├── .github └── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── buildspec.yaml ├── cloudformation ├── ds_admin_detective.yaml ├── ds_admin_principals.yaml ├── ds_administration.yaml ├── ds_env_backing_store.yaml ├── ds_env_principals.yaml ├── ds_env_sagemaker_studio.yaml ├── ds_env_studio_user_profile_v1.yaml ├── ds_env_studio_user_profile_v2.yaml ├── ds_environment.yaml ├── ds_shared_code_artifact.yaml ├── ds_shared_data_lake.yaml ├── ds_shared_ecr.yaml └── ds_shared_sagemaker_network.yaml ├── code_artifact_login.sh ├── codebuild_build.sh ├── codebuild_local_readme.md ├── create-presigned-url.sh ├── customimage ├── code-artifact-login.sh ├── jupyter-docker-stacks-tensorflow │ ├── Dockerfile │ ├── app-image-config-input.json │ ├── build-publish-sm-docker-image.sh │ ├── create-and-attach-image.sh │ └── update-domain-input.json ├── setup-ds-env.sh └── tensorflow25 │ ├── Dockerfile │ ├── app-image-config-input.json │ ├── build-publish-sm-docker-image.sh │ ├── create-and-attach-image.sh │ └── update-domain-input.json ├── docs └── images │ ├── hla.png │ └── secure-ds-personas.png ├── install_code_artifact_pip_packages.sh ├── package_cloudformation.sh ├── pre-signedurl-input.json ├── src ├── detective_control │ └── inspect_sagemaker_resource.py └── project_template │ ├── 00_SageMaker-SysOps-Workflow.ipynb │ ├── 01_SageMaker-DataScientist-Workflow.ipynb │ ├── 02_SageMaker-DevOps-Workflow.ipynb │ └── util │ └── utilsspec.py └── workshop-env.sh /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. iOS] 28 | - Browser [e.g. chrome, safari] 29 | - Version [e.g. 22] 30 | 31 | **Smartphone (please complete the following information):** 32 | - Device: [e.g. iPhone6] 33 | - OS: [e.g. iOS8.1] 34 | - Browser [e.g. stock browser, safari] 35 | - Version [e.g. 22] 36 | 37 | **Additional context** 38 | Add any other context about the problem here. 39 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # System Files 2 | **/.DS_Store 3 | 4 | # IDEs 5 | # JetBrains IDEs (Intellij, Pycharm etc.) 6 | .idea 7 | .idea/ 8 | 9 | # VSCode 10 | **/.vscode 11 | 12 | # Byte-compiled / optimized / DLL files 13 | __pycache__/ 14 | *.py[cod] 15 | *$py.class 16 | 17 | # C extensions 18 | *.so 19 | 20 | # Distribution / packaging 21 | .Python 22 | build/ 23 | develop-eggs/ 24 | dist/ 25 | downloads/ 26 | eggs/ 27 | .eggs/ 28 | lib/ 29 | lib64/ 30 | parts/ 31 | sdist/ 32 | var/ 33 | wheels/ 34 | pip-wheel-metadata/ 35 | share/python-wheels/ 36 | *.egg-info/ 37 | .installed.cfg 38 | *.egg 39 | MANIFEST 40 | 41 | # PyInstaller 42 | # Usually these files are written by a python script from a template 43 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 44 | *.manifest 45 | *.spec 46 | 47 | # Installer logs 48 | pip-log.txt 49 | pip-delete-this-directory.txt 50 | 51 | # Unit test / coverage reports 52 | htmlcov/ 53 | .tox/ 54 | .nox/ 55 | .coverage 56 | .coverage.* 57 | .cache 58 | nosetests.xml 59 | coverage.xml 60 | *.cover 61 | *.py,cover 62 | .hypothesis/ 63 | .pytest_cache/ 64 | 65 | # Translations 66 | *.mo 67 | *.pot 68 | 69 | # Django stuff: 70 | *.log 71 | local_settings.py 72 | db.sqlite3 73 | db.sqlite3-journal 74 | 75 | # Flask stuff: 76 | instance/ 77 | .webassets-cache 78 | 79 | # Scrapy stuff: 80 | .scrapy 81 | 82 | # Sphinx documentation 83 | docs/_build/ 84 | 85 | # PyBuilder 86 | target/ 87 | 88 | # Jupyter Notebook 89 | .ipynb_checkpoints 90 | 91 | # IPython 92 | profile_default/ 93 | ipython_config.py 94 | 95 | # pyenv 96 | .python-version 97 | 98 | # pipenv 99 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 100 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 101 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 102 | # install all needed dependencies. 103 | #Pipfile.lock 104 | 105 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 106 | __pypackages__/ 107 | 108 | # Celery stuff 109 | celerybeat-schedule 110 | celerybeat.pid 111 | 112 | # SageMath parsed files 113 | *.sage.py 114 | 115 | # Environments 116 | .env 117 | .venv 118 | env/ 119 | venv/ 120 | ENV/ 121 | env.bak/ 122 | venv.bak/ 123 | 124 | # Spyder project settings 125 | .spyderproject 126 | .spyproject 127 | 128 | # Rope project settings 129 | .ropeproject 130 | 131 | # mkdocs documentation 132 | /site 133 | 134 | # mypy 135 | .mypy_cache/ 136 | .dmypy.json 137 | dmypy.json 138 | 139 | # Pyre type checker 140 | .pyre/ 141 | 142 | # CDK 143 | cdk.out/ 144 | cdk.context.* 145 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 10 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 11 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 12 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 13 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 15 | 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Secure Data Science With Amazon SageMaker Studio Reference Architecture 2 | 3 | ## Overview 4 | 5 | Amazon SageMaker is a powerful enabler and a key component of a data science environment, but it’s only part of what is 6 | required to build a complete and secure data science environment. For more robust security you will need other AWS 7 | services such as AWS VPC, AWS IAM, AWS KMS, Amazon CloudWatch, Amazon S3, and AWS Service Catalog just to name a few. 8 | This project aims to be an example of how to pull together these services, to use them together to create secure, 9 | self-service, data science environments. 10 | 11 | This github repository is a companion to the [Secure Data Science with Amazon SageMaker Studio Workshop]( 12 | https://catalog.us-east-1.prod.workshops.aws/v2/workshops/c882cd42-8ec8-4112-9469-9fab33471e85/en-US/). You can find detailed 13 | instructions for using the content of this github repository in the workshop. 14 | 15 | ## Table of Contents 16 | 17 | 1. [Repository Breakdown](#repository-breakdown) 18 | 1. [Architecture and Process Overview](#architecture-and-process-overview) 19 | 1. [Getting Started](#getting-started) 20 | 1. [Features](#features) 21 | 22 | * [Private Network per Data Science Environment](#private-network-per-data-science-environment) 23 | * [Authentication and Authorization](#authentication-and-authorization) 24 | * [Data Protection](#data-protection) 25 | * [Auditability](#auditability) 26 | 27 | 1. [Further Reading](#further-reading) 28 | 1. [License](#license) 29 | 30 | ## Repository Breakdown 31 | 32 | This repository contains the following files: 33 | 34 | ```bash 35 | ├── code_artifact_login.sh # Bash shell script to obtain a new AWS CodeArtifact token to install pre-approved pip packages 36 | ├── CODE_OF_CONDUCT.md # Guidance for participating in this open source project 37 | ├── CONTRIBUTING.md # Guidelines for contributing to this project 38 | ├── create-presigned-url.sh # Bash shell script to create pre-signed url for a user 39 | ├── install_code_artifact_pip_packages.sh # Bash shell script to install pre-approved pip packages 40 | ├── LICENSE # Details for the MIT-0 license 41 | ├── publish_cloudformation.sh # Bash shell script to package and prepare CloudFormation for deployment 42 | ├── pre-signedurl-input.json # AWS cli input json for use with create-presigned-url.sh 43 | ├── README.md # This readme 44 | ├── cloudformation 45 | │ ├── ds_admin_detective.yaml # Deploys a detective control to manage SageMaker resources 46 | │ ├── ds_admin_principals.yaml # Deploys the Data Science Administrator role 47 | │ ├── ds_administration.yaml # Deploys nested stacks 48 | │ ├── ds_env_backing_store.yaml # Deploys a team's S3 buckets and CodeCommit repository 49 | │ ├── ds_env_principals.yaml # Creates a data science administrator and user 50 | │ ├── ds_env_sagemaker_studio.yaml # Onboards the SageMaker Studio domain in a VPC 51 | │ ├── ds_env_studio_user_profile_v1.yaml # Onboards a SageMaker Studio domain user profile for a team 52 | │ ├── ds_env_studio_user_profile_v2.yaml # Onboards a SageMaker Studio domain user profile for a team with preventative control 53 | │ ├── ds_environment.yaml # Manages nested stacks for a team 54 | │ ├── ds_shared_code_artifact.yaml # Creates shared CodeArtifact repository for the data science environment 55 | │ ├── ds_shared_ecr.yaml # Creates shared ECR repository for the data science environment 56 | │ └── ds_shared_sagemaker_network.yaml # Creates a shared services VPC with private subnets and no Internet access 57 | ├── customimage 58 | │ └── jupyter-docker-stacks-tensorflow # SageMaker custom image tensforflow 2.4.1 built with Jupyter docker stacks 59 | │ │ ├── app-image-config-input.json # AWS cli input json for use with create-and-attach-image.sh 60 | │ │ ├── build-publish-sm-docker-image.sh # Bash shell script to build the custom image, publish to ECR 61 | │ │ ├── create-and-attach-image.sh # Bash shell script to build the custom image, publish to ECR and attach to a SageMaker Studio domain 62 | │ │ ├── Dockerfile # Docker file for custom image 63 | │ │ └── update-domain-input.json # AWS cli input json for use with create-and-attach-image.sh 64 | │ └── tensorflow25 # SageMaker custom image tensforflow 2.5.0 built with official Tensorflow docker image 65 | │ │ ├── app-image-config-input.json # AWS cli input json for use with create-and-attach-image.sh 66 | │ │ ├── build-publish-sm-docker-image.sh # Bash shell script to build the custom image, publish to ECR 67 | │ │ ├── create-and-attach-image.sh # Bash shell script to build the custom image, publish to ECR and attach to a SageMaker Studio domain 68 | │ │ ├── Dockerfile # Docker file for custom image 69 | │ │ └── update-domain-input.json # AWS cli input json for use with create-and-attach-image.sh 70 | │ ├── code-artifact-login.sh # Bash shell script to obtain a new AWS CodeArtifact token inside SageMaker Studio docker image 71 | │ ├── setup-ds-env.sh # Bash shell script to setup data science environment inside SageMaker Studio docker image 72 | ├── docs 73 | │ └── images 74 | │ └── hla.png # High Level Architecture diagram 75 | └── src 76 | ├── detective_control 77 | │ └── inspect_sagemaker_resource.py # Lambda function to detect non-VPC-attached SageMaker resources 78 | └── project_template 79 | ├── 00_SageMaker-SysOps-Workflow.ipynb # Sample Jupyter notebbok to demonstrate security controls 80 | ├── 01_SageMaker-DataScientist-Workflow.ipynb # Sample Jupyter notebook to demonstrate secure ML lifecycle 81 | ├── 02_SageMaker-DevOps-Workflow.ipynb # Second half of a secure ML lifecycle 82 | ├── util 83 | ├── __init__.py 84 | └── utilsspec.py 85 | ``` 86 | 87 | ## Architecture and Process Overview 88 | 89 | ![High-level Architecture](docs/images/hla.png) 90 | 91 | The diagram represents high level architecture of the secure data science environment provisioned by the CloudFormation templates in 92 | this project. 93 | 94 | Once deployed, this CloudFormation stack provides you with a Data Science Product Portfolio, powered by [AWS Service Catalog](https://aws.amazon.com/servicecatalog/). 95 | This allows users who have assumed the *Data Science Administrator* role to onboard SageMaker Studio domain using the 96 | *SageMaker Studio Product*, deploy new data science environments using the *Data Science Team Environment* product and onboard 97 | new users to the SageMaker Studio domain for a data science team using *Data Science Studio User Profile* product within 98 | the catalog. Note that the SageMaker Studio domain is onboarded only once in a region in an AWS Account. 99 | 100 | Data Science Administrators can onboard SageMaker Studio only with [network access type](https://docs.aws.amazon.com/sagemaker/latest/dg/onboard-vpc.html) 101 | **VPC only** by specifying Studio domain name, and custom SageMaker image properties. 102 | AWS Service Catalog will then onboard SageMaker Studio to the shared service VPC: 103 | - A private, isolated, dedicated network environment built using an [Amazon VPC](https://aws.amazon.com/vpc/). 104 | - [Private connectivity](https://aws.amazon.com/privatelink/) to specific AWS services such as AWS CodeArtifact, AWS ECR, AWS KMS, 105 | Amazon S3, SageMaker services, AWS CodeCommit to name a few. Since SageMaker Studio is onboarded in a VPCOnly mode, it is required to 106 | use VPC Interface Endpoints to access AWS services. 107 | 108 | Data Science Administrators can specify a team name, the environment type, and a few other criteria to launch the data science environment. AWS Service Catalog 109 | will then create a data science team environment consisting of: 110 | 111 | - Private, dedicated [Amazon S3 buckets](https://aws.amazon.com/s3/) for a team's data and intellectual property 112 | - A team Git repository hosted by [AWS CodeCommit](https://aws.amazon.com/codecommit/) 113 | - Team-specific encryption keys managed by [AWS Key Management Service (KMS)](https://aws.amazon.com/kms/) 114 | - Dedicated [AWS Identity & Access Management (IAM)](https://aws.amazon.com/iam/) roles for team resources 115 | 116 | To use the data science team environment, a data science team member can assume the *Data Science Administrator* role or 117 | the *Data Scientist User* role. Once they have assumed a Data Science Administrator role users can provision AWS resources 118 | for which the role provides permissions within the data science team environment. Similarly, once a user has assumed 119 | Data Scientist user role, they can launch Amazon SageMaker Studio IDE and launch Studio notebook from the Studio IDE. 120 | 121 | When SageMaker Studio IDE starts it starts Amazon SageMaker Studio-powered apps such as a Jupyter Server. You will use 122 | a custom SageMaker image to configure SageMaker Studio KernelGateway app environment to work within the secure data 123 | science team environment: 124 | 125 | - When Studio UserProfile is created for a user, an IAM role is associated with the Studio UserProfile with permissions 126 | to access only team resources 127 | - Access to a KMS-encrypted Amazon S3 bucket 128 | - Access to a KMS encryption key for encrypting data and models stored in S3 buckets 129 | - Studio notebook apps have no access to network resources outside of the shared service VPC 130 | - The Studio notebook apps access AWS resources using the VPC Interface Endpoints configured for the shared service VPC 131 | - User access to host `root` permissions is disabled by SageMaker Studio 132 | - A convenience Python module generated with constants defined for AWS KMS key IDs, VPC Subnet IDs, and Security Group IDs is placed in a 133 | custom SageMaker docker image to setup data science for all SageMaker images (prebuilt or custom) 134 | 135 | ## Getting Started 136 | 137 | #### Prerequisites 138 | 139 | 1. An [AWS Account](https://aws.amazon.com/). 140 | 1. An AWS user with Admin role (with `AdministratorAccess` policy attached). 141 | 1. [AWS CLI](https://aws.amazon.com/cli/) is installed on your developer machine. 142 | 1. [Docker CLI](https://docs.docker.com/get-docker/) is installed on your developer machine. 143 | 1. [Git CLI]() is installed on your developer machine. 144 | 1. Access to [Docker Hub](https://hub.docker.com/). 145 | 1. Unix like environment - Linux or MacOS. 146 | 147 | We recommend that you use [AWS Cloud9](https://aws.amazon.com/cloud9/) as it will get you going fastest with all the tools 148 | preinstalled. Cloud9 is a cloud-based integrated development environment (IDE) that lets you write, run, and debug your 149 | code with just a browser. It includes a code editor, debugger, and terminal. Cloud9 comes prepackaged with essential tools 150 | for development and includes AWS CLI, docker, git and much more. Cloud9 runs on an Amazon EC2 instance with Amazon Linux 2 151 | by default. 152 | 153 | This project builds custom SageMaker Studio images and requires access to [docker hub](https://hub.docker.com/) in order 154 | to build custom docker images. 155 | 156 | #### Step 1, as yourself with AWS Administrator Role as part of Cloud Platform Engineering team 157 | 158 | Assuming you are signed into the AWS console with `Admin` role, follow instructions to [setup Cloud9](https://docs.aws.amazon.com/cloud9/latest/user-guide/setting-up.html) 159 | as an `Individual User` by accepting all defaults. Launch Cloud9 IDE by [clicking on Open IDE](https://docs.aws.amazon.com/cloud9/latest/user-guide/open-environment.html) 160 | after your Cloud9 environment is ready. If you have used any IDE before then Cloud9 should feel very familiar. You can take 161 | a [quick tour](https://docs.aws.amazon.com/cloud9/latest/user-guide/tour-ide.html) of the Cloud9 if needed to get 162 | familiar with the UI. In particular how to launch a bash shell terminal which is straight forward, use top menu 163 | `Window -> New Terminal`. Most of the tasks will be executed in a bash shell by Cloud Platform Engineering team. 164 | 165 | Following setups are needed before you can deploy the Shared Service infrastructure: 166 | 1. Increase disk space for the Cloud 9 environment to 30 GB using instructions for [resizing Cloud9 EBS volume]( 167 | https://docs.aws.amazon.com/cloud9/latest/user-guide/move-environment.html#move-environment-resize). You should restart the Cloud9 168 | environment after increasing disk size by executing `sudo reboot` on bash terminal of the IDE. 169 | 1. Install jq for JSON processing on command line and bash-completion: 170 | ```bash 171 | sudo yum -y install jq bash-completion 172 | ``` 173 | 1. Set up AWS_DEFAULT_REGION and AWS_ACCOUNT_ID environment variables for your bash shell environment: 174 | ```bash 175 | echo "export AWS_DEFAULT_REGION=`curl -s http://169.254.169.254/latest/dynamic/instance-identity/document|jq -r .region`" >> ~/.bash_profile 176 | echo "export AWS_ACCOUNT_ID=`curl -s http://169.254.169.254/latest/dynamic/instance-identity/document|jq -r .accountId`" >> ~/.bash_profile 177 | . ~/.bash_profile 178 | ``` 179 | 1. Clone this repo in Cloud9 IDE: 180 | ```bash 181 | cd ~/environment 182 | # Internal only 183 | git clone https://gitlab.aws.dev/aws-ai-machine-learning-tfc/secure-data-science-with-sagemaker-studio.git 184 | # Actual public repo 185 | git clone https://github.com/aws-samples/secure-data-science-with-sagemaker-studio.git 186 | ``` 187 | 188 | With Cloud9 environment setup complete, you can deploy the Shared Service data science infrastructure. 189 | 190 | **Upload CloudFormation templates to S3 bucket in your account:** 191 | Use Cloud9 bash shell to execute the steps below: 192 | 1. `cd ~/environment/secure-data-science-with-sagemaker-studio` 193 | 1. Check that AWS_DEFAULT_REGION, AWS_ACCOUNT_ID environment variables are set in your Cloud9 bash shell, these environment 194 | variables were configured above: 195 | 1. `echo $AWS_DEFAULT_REGION`, this should print your AWS region where Cloud9 is running. 196 | 1. `echo $AWS_ACCOUNT_ID`, this should print your AWS account number. 197 | 1. If any of the variable are not set, follow the instructions provided earlier. 198 | 1. Upload cloudformation templates to S3 bucket in the target region by running `./package_cloudformation.sh` script: 199 | 1. **Review Script:** You can review the script by double-clicking on `package_cloudformation.sh` file to open in Cloud9 IDE. 200 | 1. **Upload CloudFormation templates to S3 bucket:** run `./package_cloudformation.sh` in bash terminal tab. 201 | 1. When complete all the cloud formation templates needed to create infrastructure for the workshop is in an S3 bucket 202 | printed by the script. Make note of this S3 bucket name. You can use [Amazon S3 console](https://console.aws.amazon.com/s3) 203 | to see the contents of this bucket. 204 | 1. When the script is done, it will print a command to run create-stack using `ds_administration.yaml` template. 205 | 206 | **Deploy Shared Service infrastructure for data science teams:** 207 | As Cloud Platform Engineering team member with AWS Administration role, you will now deploy the Shared Service infrastructure. 208 | 209 | 1. Deploy Shared Service infrastructure by creating the CloudFormation stack using AWS CLI, this command will use `ds_administration.yaml` 210 | CloudFormation template. Run the command printed from previous step, it will look like this: 211 | ```bash 212 | aws cloudformation create-stack \ 213 | --template-url https://s3.${REGION}.amazonaws.com/${CFN_BUCKET_NAME}/${PROJECT_NAME}/ds_administration.yaml \ 214 | --region ${REGION} 215 | --stack-name secure-ds-shared-service \ 216 | --capabilities CAPABILITY_NAMED_IAM \ 217 | --parameters ParameterKey=QuickstartMode,ParameterValue=true 218 | ``` 219 | 220 | After approximately 5-7 minutes the stack will have deployed a Shared Services VPC with private subnets and no Internet access, 221 | a [Amazon Elastic Container Registry](https://aws.amazon.com/ecr/) repository, a [AWS CodeArtifact](https://aws.amazon.com/codeartifact/) repository 222 | to be leveraged across all data science environments. ECR repository is a shared resource to host pre-approved SageMaker container images. 223 | Similarly, CodeArtifact repository is for hosting pre-approved Python packages within your network. This step also installs 224 | [AWS Service Catalog](https://aws.amazon.com/servicecatalog/) products for Data Science administrator and Data Science users along with required IAM Roles. 225 | 226 | There are three additional steps required as AWS admin before proceeding to Step 2: 227 | Use Cloud9 bash shell to execute the steps below. 228 | 229 | 1. **Build and publish a custom SageMaker docker image to Elastic Container repository**: 230 | 1. `cd ~/environment/secure-data-science-with-sagemaker-studio`. 231 | 1. Change directory to custom images: `cd customimage/jupyter-docker-stacks-tensorflow`. 232 | 1. Run the script from bash terminal tab: `./build-publish-sm-docker-image.sh` from the `customimage/jupyter-docker-stacks-tensorflow` directory. 233 | 1. Verify that custom SageMaker docker image was uploaded to ECR: Visit [ECR console](https://console.aws.amazon.com/ecr/home) in AWS Management Console, once there: 234 | - Click on `Repositories` link in left navigation panel under `Amazon ECR` section. 235 | - Click on `ds-shared-container-images` in the list of Repositories in the **Private** tab of main window. This repository was created by Cloud Formation template earlier. 236 | This is the data science shared ECR repository for hosting custom SageMaker docker images. 237 | - Notice the custom SageMaker docker image named `ds-custom-tensorflow241` under `Images` that you built and pushed in previous step. 238 | 239 | 1. **Install PIP packages from public PyPI repository to CodeArtifact repository**: 240 | 1. `cd ~/environment/secure-data-science-with-sagemaker-studio`. 241 | 1. **Obtain CodeArtifact token:** Now run the script from bash terminal tab: `./code_artifact_login.sh`. You should see a 242 | message stating successfully configured pip to use AWS CodeArtifact repository and login expires in 12 hours. 243 | 1. **Install Python packages:** With pip configured, run the script to install approved Python packages from bash terminal tab: 244 | `./install_code_artifact_pip_packages.sh`. This script demonstrates installation of approved Python packages by data science administrators. Examine the Python packages 245 | installed by the script. 246 | 1. **Verify that Python packages were installed:** Visit Code Artifact console in AWS Management Console, once there: 247 | - Click on `Repositories` link in left navigation bar 248 | - Click on `ds-shared-repo` in the list of Repositories in the main window. 249 | - Notice the Python packages under `Packages` section. 250 | 251 | 1. **Upload Dataset for use in Python notebooks to the S3 data lake bucket**: For the notebooks in the labs, you will 252 | use the public [Credit Card default dataset](https://archive.ics.uci.edu/ml/datasets/default+of+credit+card+clients) 253 | from UCI Machine Learning repository and referenced in: 254 | > Yeh, I. C., & Lien, C. H. (2009). The comparisons of data mining techniques for the predictive accuracy of 255 | > probability of default of credit card clients. Expert Systems with Applications, 36(2), 2473-2480. 256 | 1. Download the `Credit Card default dataset`: 257 | ```bash 258 | curl -o credit_card_default_data.xls https://archive.ics.uci.edu/ml/machine-learning-databases/00350/default%20of%20credit%20card%20clients.xls 259 | ``` 260 | 1. Upload the dataset to S3 data lake bucket: 261 | 1. Obtain the S3 data lake bucket name 262 | ```bash 263 | export S3_DATA_LAKE=`aws s3 ls | grep ds-data-lake- | awk '{print $3}'` 264 | ``` 265 | 1. Upload the data using the following command: 266 | ```bash 267 | aws s3 cp credit_card_default_data.xls s3://${S3_DATA_LAKE}/secure-sagemaker-demo/credit-card-default/credit_card_default_data.xls 268 | ``` 269 | 270 | With dataset uploaded to S3 data lake bucket, you can start using the Python notebooks and follow instructions in the notebook. 271 | 272 | 273 | #### Step 2, as a Data Science Administrator 274 | 275 | To access the service catalog portfolio click the `Outputs` tab of the CloudFormation stack and use the `AssumeDataScienceAdminRole` link to become a Data Science administrator, 276 | capable of onboarding [Amazon SageMaker Studio](https://aws.amazon.com/sagemaker/studio/) domain to the AWS account configured to use a Shared Service VPC, 277 | creating data science team environments and adding new SageMaker Studio user profiles for the data science teams. 278 | Once you've assumed the role you can visit the [AWS Service Catalog console](https://console.aws.amazon.com/servicecatalog/home?isSceuc=true#/products) 279 | to onboard SageMaker Studio, deploy a data science environment and onboard users to SageMaker Studio domain. 280 | 281 | You keep parameters tab from `ds_administration` stack deployed in step 1 above in CloudFormation console open as you will need value that you provided earlier. 282 | 283 | **Onboard SageMaker Studio domain**: This is a one time activity. Click the context menu icon next to the `SageMaker Studio Product` product 284 | and click `Launch product`. After you provide a name for the product launch, provide values as below for following parameters, click `Launch`: 285 | - StudioDomainName: `ds-studio-domain` (this is default) 286 | - SharedServiceStackSetName: `DSSharedServices` (this is default) 287 | - CustomImageEcrUri: `ECR URI for the custom SageMaker docker image published in Step 1` e.g. 288 | ACCOUNT_NUMBER.dkr.ecr.REGION.amazonaws.com/sm-studio-workshop:ds-custom-tensorflow-2.4.1. 289 | - CustomImageName: `ds-custom-tensorflow241` 290 | - CustomImageDescription: `Custom TensorFlow v2.4.1 Image` 291 | - AppImageConfigName: `ds-tensorflow241-image-config` 292 | - KernelSpecsName: `python3` 293 | - KernelSpecsDisplayName: `Python 3 (TensorFlow 2.4.1)` 294 | 295 | This will launch a CloudFormation stack to provision the data science environment. 296 | This will require about 10 minutes to execute. 297 | 298 | **Create a Data Science team Environment**: Visit Service Catalog console and click the context menu icon next to the `Data Science team Environment` product 299 | and click `Launch product`. After you provide a name for the product launch, and provide a team name, click `Launch` and 300 | you will have created your first data science team environment. This will launch a CloudFormation stack to provision the 301 | data science environment. This will require about 5-7 minutes to execute. 302 | 303 | **Onboard a user profile to SageMaker Studio domain for the Data Science Team Environment**: Visit Service Catalog console and click 304 | the context menu icon next to the `Data Science User Profile` product and click `Launch product`. After you provide a name for the product launch, 305 | provide values for following parameters, click `Launch`: 306 | - StudioUserProfileName: `userprofile name`, this will be the data science user. 307 | - SharedServiceStackSetName: `DSSharedServices` (this is default). 308 | - TeamName: `team name`, from previous step, the data science environment i.e. team you want to on-board a new user to. 309 | - EnvType: `dev` (this is default), the data science environment for the team you want to on-board a new user to. 310 | 311 | This will launch a CloudFormation stack to provision a new SageMaker user profile to data science environment. 312 | This will require about 1-2 minutes to execute. 313 | 314 | #### Step 3, as a Data Scientist (a data science team member/end user) 315 | 316 | When the data science team environment has completed its deployment you will have 2 links available from the Service Catalog 317 | console to assume user roles in the data science environment. Click on the `AssumeDataScientistUserRole`. Goto SageMaker console, click on 318 | `Amazon SageMaker Studio` on the left navigation bar, this will open `SageMaker Studio Control Panel`. Click `Open Studio` next to user name 319 | created in previous step to launch the SageMaker Studio IDE. First launch will take about 10 minutes as SageMaker service prepares the environment for 320 | newly create user profile. 321 | 322 | #### Step 4, Explore 323 | 324 | From the Jupyter notebook instance, using the sample notebooks, you can develop features, train, host, and monitor a machine learning model in a secure manner. If you assume your original AWS role you can also, from the AWS console, explore the various features deployed by the CloudFormation stacks. 325 | 326 | 327 | ## Features 328 | 329 | This source code demonstrates a self-service model for enabling data science teams to create data science environments that employ a number of recommended security practices. Some of the more notable features are listed below. The controls, mechanisms, and services deployed by this source code is intended to provide operations and security teams with the assurance that their best practice is being employed while also enabling data science teams to self service, move quickly, and stay focused on the data science task at hand. 330 | 331 | ### Private Network per Amazon SageMaker Studio and Data Science Environment 332 | 333 | The SageMaker Studio domain is onboarded in [**VPC Only**](https://docs.aws.amazon.com/sagemaker/latest/dg/onboard-vpc.html) mode 334 | to use a Shared Services VPC, with **VPC Only** mode Studio sends all traffic over your specified VPC. Every data science environment is now configured to host Amazon SageMaker and other components of the data science environment in Shared Services VPC. 335 | The VPC provides a familiar set of network-level controls to allow you to govern ingress and egress of data. 336 | These templates create a VPC with no Internet Gateway (IGW), therefore all subnets are private, without Internet connectivity. 337 | Network connectivity with AWS services or your own shared services is provided using [VPC endpoints](https://docs.aws.amazon.com/vpc/latest/userguide/vpc-endpoints.html) 338 | and [PrivateLink](https://aws.amazon.com/privatelink/). Security Groups are used to control traffic between different resources, 339 | allowing you to group like resources together and manage their ingress and egress traffic. 340 | 341 | ### Authentication and Authorization 342 | 343 | AWS Identity and Access Management (IAM) is used to create least-privilege, preventive controls for many aspects of the data science enviroments. 344 | These preventive controls, in the form of IAM policies, are used to control access to a team's data in Amazon S3, control who 345 | can access SageMaker resources like SageMaker Studio Notebook apps (jupyter server, kernel gateway etc.), and are also applied as 346 | VPC endpoint policies to put explicit controls around the API endpoints created in a data science environment. 347 | 348 | There are several IAM roles deployed by this source code to manage permissions and ensure separation of concerns at scale. 349 | The diagram below shows these roles and their intent: 350 | 351 | ![Data Science Personas and Roles](docs/images/secure-ds-personas.png) 352 | 353 | - **Cloud Administrator role (AWS Administrator)** 354 | 355 | This IAM role (IAM `Admin` role with `AdministratorAccess` IAM policy) is for Cloud Platform Engineering team with 356 | full administrator access to AWS cloud. 357 | 358 | This role is responsible for creating shared sevices VPC, shared ECR repository, shared CodeArtifact repository, provisioning AWS Service Catalog portfolio and products 359 | to enable self-service administration by Data Science Administrator for onboarding SageMaker Studio domain, creating data science team 360 | environments and onboarding users to SageMaker Studio domain for a team. Cloud Administrator also creates a number IAM roles to enable 361 | self-service and delagated administration by Data Science administrators. 362 | 363 | - **Data Science Administrator role** 364 | 365 | This role is for the Data Science administrators who are delegated the data science administration responsibilities 366 | and support multiple data science teams. 367 | 368 | Responsibilities: onboarding SageMaker Studio domain, creating data science team environments and onboard SageMaker Studio User Profiles 369 | (i.e. users) to a specific data science team, all using the AWS Service Catalog. The Data Science Administer can add pre-approved pip packages to 370 | Code Artifact repository, build and publish SageMaker custom images to ECR repository, attach SageMaker images and versions to SageMaker Studio domain. 371 | Granting permissions to administer team-specific resources. 372 | 373 | Examine [Data Science Administrator Principals template](cloudformation/ds_admin_principals.yaml) to review IAM role 374 | and associated policy document. 375 | 376 | - **AWS Service Catalog Launch role** 377 | 378 | The AWS [Service Catalog Launch role](https://docs.aws.amazon.com/servicecatalog/latest/adminguide/constraints-launch.html) 379 | is the role that AWS Service Catalog assumes when an end user launches a product. This allows end users to provision 380 | products and create AWS resources without giving end users (and their role) higher level permissions, thus making it 381 | possible to implement the least privileges model of security. A good example of this is AWS IAM role creation which is 382 | a highly privileged permission. You use Service Catalog launch role associated with a product to give end users 383 | ability to create role that they need without giving them permission to create an IAM role. 384 | 385 | Examine [Data Science Administrator Principals template](cloudformation/ds_admin_principals.yaml) to review IAM role 386 | and associated policy document. 387 | 388 | - **Data Science Team Administrator role** 389 | 390 | This role is for use by the data science team, where one of the team members is given this role as delegated data science 391 | administrator for the team, allowing them to self-service and support the team's admin needs. 392 | 393 | Examine [Data Science Team Environment Principals template](cloudformation/ds_env_principals.yaml) to review IAM roles 394 | and associated policy document. 395 | 396 | - **Data Scientist User role** 397 | 398 | This role is for the end users - the data scientist, ML developers and engineers, who are members of a data science team. 399 | 400 | This role grants Console access, permissions to launch SageMaker Studio IDE and Studio Jupyter notebook, permissions to create 401 | a Jupyter notebook and share Jupyter notebooks. 402 | 403 | Note that this role is separate from SageMaker Notebook execution role attached to a SageMaker Studio UserProfile. 404 | 405 | Examine [Data Science Team Environment Principals template](cloudformation/ds_env_principals.yaml) to review IAM roles 406 | and associated policy document. 407 | 408 | - **SageMaker Studio UserProfile Execution role** 409 | 410 | This role is used by SageMaker Studio UserProfile which is created for each user. It grants the Studio Apps such as NotebookServer or 411 | KernelGateway access to SageMaker services and AWS resources. This role gives permissions to data scientist to perform 412 | data processing, training, deploy inference endpoint, use bias detection services, access S3 buckets, S3 data lakes and so on. 413 | 414 | This role can be re-used for training jobs, batch transformations, and other Amazon SageMaker resources to support auditbility. 415 | 416 | Examine [Data Science Environment User Profile v1 template](cloudformation/ds_env_studio_user_profile_v1.yaml) and 417 | [Data Science Environment User Profile v2 template](cloudformation/ds_env_studio_user_profile_v2.yaml)to review IAM roles 418 | and associated policy document. 419 | 420 | The IAM policies created by this source code use many [IAM conditions](https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_actions-resources-contextkeys.html) 421 | to grant powerful permissions but only under certain conditions. 422 | 423 | It is worth highlighting that the SageMaker service provides several [SageMaker IAM roles]( 424 | https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-roles.html) that you should learn more about as you will 425 | need them in order to manage permissions and ensure separation of concerns at scale. Some of these roles are: 426 | 427 | - **Training / Transform job execution role** 428 | 429 | Used by Training Jobs or Batch Transform jobs to access AWS resources like Amazon S3 430 | 431 | - **Endpoint creation role** 432 | 433 | Used by your CI/CD pipeline to create hosted ML model endpoints 434 | 435 | - **Endpoint hosting role** 436 | 437 | Used by a hosted ML model to access AWS resources such as Amazon S3 438 | 439 | - **Endpoint invocation role** 440 | 441 | Used by an application to call a hosted ML model endpoint 442 | 443 | ### Data Protection 444 | 445 | It is assumed that a data science environment contains highly sensitive data to train ML models, and that there is also sensitive 446 | intellectual property in the form of algorithms, libraries, and trained models. There are many ways to protect data such as 447 | the preventive controls described above, defined as IAM policies. 448 | In addition this source code encrypts data at rest using managed encryption keys. 449 | 450 | Many AWS services, including Amazon S3, and Amazon SageMaker, are integrated with AWS Key Management Service (KMS) to make 451 | it very easy to encrypt your data at rest. This source code takes advantage of these integrations to ensure that your 452 | data is encrypted in Amazon S3 and on Amazon SageMaker resources, end to end. SageMaker Studio domain is configured to use 453 | AWS Key Management Service (KMS). The Studio EFS volume is encrypted using a KMS key created specifically for use by SageMaker Studio domain. 454 | This encryption is also applied to your intellectual property as it is being developed in the many places it may be stored such as 455 | Amazon S3, EFS volumes, ECR respository or AWS CodeCommit git repository. 456 | 457 | ### Auditability 458 | 459 | Using cloud services in a safe and responsible manner is good, but being able to demonstrate to others that you are operating in a governed manner is even better. Developers and security officers alike will need to see activity logs for models being trained and persons interacting with the systems. Amazon CloudWatch Logs and CloudTrail are there to help, receiving logs from many different parts of your data science environment to include: 460 | 461 | - Amazon S3 462 | - Amazon SageMaker Studio Notebooks 463 | - Amazon SageMaker Training Jobs 464 | - Amazon SageMaker Hosted Models 465 | - VPC Flow Logs 466 | 467 | ## Further Reading 468 | 469 | There is a multitude of material and resources available to you to advise you on how to best support your business using AWS services. The following is a non-exhaustive list in no particular order: 470 | 471 | - [Amazon SageMaker documentation regarding security](https://docs.aws.amazon.com/sagemaker/latest/dg/security.html) 472 | - [AWS Well-Architected Framework: Machine Learning Lens](https://docs.aws.amazon.com/wellarchitected/latest/machine-learning-lens/welcome.html) 473 | - [Building secure machine learning environments with Amazon SageMaker](https://aws.amazon.com/blogs/machine-learning/building-secure-machine-learning-environments-with-amazon-sagemaker/) 474 | - [Setting up secure, well-governed machine learning environments on AWS](https://aws.amazon.com/blogs/mt/setting-up-machine-learning-environments-aws/) 475 | - [Configuring Amazon SageMaker Studio for teams and groups with complete resource isolation](https://aws.amazon.com/blogs/machine-learning/configuring-amazon-sagemaker-studio-for-teams-and-groups-with-complete-resource-isolation/) 476 | - [Securing Amazon SageMaker Studio connectivity using a private VPC](https://aws.amazon.com/blogs/machine-learning/securing-amazon-sagemaker-studio-connectivity-using-a-private-vpc/) 477 | - [Building secure Amazon SageMaker access URLs with AWS Service Catalog](https://aws.amazon.com/blogs/mt/building-secure-amazon-sagemaker-access-urls-with-aws-service-catalog/) 478 | - [Private package installation in Amazon SageMaker running in internet-free mode](https://aws.amazon.com/blogs/machine-learning/private-package-installation-in-amazon-sagemaker-running-in-internet-free-mode/) 479 | - [Bringing your own custom container image to Amazon SageMaker Studio notebooks](https://aws.amazon.com/blogs/machine-learning/bringing-your-own-custom-container-image-to-amazon-sagemaker-studio-notebooks/) 480 | - [Secure and Compliant Machine Learning Workflows with Amazon SageMaker (video)](https://youtu.be/HlSEUvApDZE) 481 | - [An Overview of Amazon SageMaker Security (video)](https://youtu.be/zTJTzKcNzMk) 482 | - [Building Secure Machine Learning Environments using Amazon SageMaker (video)](https://youtu.be/txr6CR87GXI) 483 | - [AWS Cloud Security](https://aws.amazon.com/security/) 484 | 485 | ## License 486 | 487 | This source code is licensed under the [MIT-0 License](https://github.com/aws/mit-0). See the [LICENSE](LICENSE) file for details. 488 | -------------------------------------------------------------------------------- /buildspec.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | version: 0.2 4 | 5 | phases: 6 | pre_build: 7 | commands: 8 | - apt-get update 9 | - apt-get install python3-pip zip -y 10 | - pip3 install -U pip 11 | - pip3 install awscli 12 | 13 | build: 14 | commands: 15 | - env 16 | - echo Packaging Cloudformation and uploading to S3... 17 | - ./package_cloudformation.sh 18 | -------------------------------------------------------------------------------- /cloudformation/ds_admin_detective.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | Description: | 4 | Deploy a Lambda function and CloudWatch trigger to inspect SageMaker resources and ensure they are attached to a VPC. 5 | 6 | Parameters: 7 | StackSetName: 8 | Type: String 9 | Description: A name to be used across nested stacks 10 | 11 | Resources: 12 | SageMakerDetectiveControlExecutionRole: 13 | Type: 'AWS::IAM::Role' 14 | Properties: 15 | AssumeRolePolicyDocument: 16 | Version: 2012-10-17 17 | Statement: 18 | - Effect: Allow 19 | Principal: 20 | Service: lambda.amazonaws.com 21 | Action: 'sts:AssumeRole' 22 | RoleName: 23 | !Join 24 | - '' 25 | - 26 | - !Sub '${StackSetName}-SageMaker-DetectiveControl-' 27 | - !Select 28 | - 4 29 | - !Split 30 | - '-' 31 | - !Select 32 | - 2 33 | - !Split 34 | - / 35 | - !Ref 'AWS::StackId' 36 | Policies: 37 | - PolicyName: LambdaInlineForSageMaker 38 | PolicyDocument: 39 | Version: 2012-10-17 40 | Statement: 41 | - Sid: VisualEditor0 42 | Effect: Allow 43 | Action: 44 | - 'sagemaker:DeleteTags' 45 | - 'sagemaker:DeleteEndpointConfig' 46 | - 'sagemaker:ListTags' 47 | - 'sagemaker:ListTransformJobs' 48 | - 'sagemaker:StopTrainingJob' 49 | - 'sagemaker:DeleteModel' 50 | - 'sagemaker:ListTrainingJobs' 51 | - 'sagemaker:ListHyperParameterTuningJobs' 52 | - 'sagemaker:DeleteEndpoint' 53 | - 'sagemaker:ListModels' 54 | - 'sagemaker:StopTransformJob' 55 | - 'sagemaker:AddTags' 56 | - 'sagemaker:ListEndpoints' 57 | Resource: '*' 58 | ManagedPolicyArns: 59 | - 'arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole' 60 | 61 | SageMakerVPCDetectiveControl: 62 | Type: 'AWS::Lambda::Function' 63 | Properties: 64 | FunctionName: SageMakerVPCEnforcer 65 | Description: Detective control to enforce VPC attachment of SageMaker resources 66 | Runtime: python3.7 67 | Code: vpc_detective_control.zip 68 | Handler: inspect_sagemaker_resource.lambda_handler 69 | MemorySize: 320 70 | Timeout: 180 71 | Role: !GetAtt SageMakerDetectiveControlExecutionRole.Arn 72 | 73 | SageMakerVPCEnforcementRule: 74 | Type: 'AWS::Events::Rule' 75 | DependsOn: SageMakerVPCDetectiveControl 76 | Properties: 77 | Description: The Cloudwatch Rule checking VPC enablement of SageMaker resources 78 | EventPattern: 79 | source: 80 | - aws.sagemaker 81 | detail-type: 82 | - AWS API Call via CloudTrail 83 | detail: 84 | eventSource: 85 | - sagemaker.amazonaws.com 86 | eventName: 87 | - CreateTrainingJob 88 | - CreateModel 89 | Name: SageMakerVPCEnforcementRule 90 | State: ENABLED 91 | Targets: 92 | - Arn: !GetAtt SageMakerVPCDetectiveControl.Arn 93 | Id: SagemakerVPCEnforcementLambda 94 | 95 | InvokeLambdaPermission: 96 | Type: 'AWS::Lambda::Permission' 97 | DependsOn: SageMakerVPCEnforcementRule 98 | Properties: 99 | FunctionName: !GetAtt SageMakerVPCDetectiveControl.Arn 100 | Action: 'lambda:InvokeFunction' 101 | Principal: events.amazonaws.com 102 | SourceArn: !GetAtt SageMakerVPCEnforcementRule.Arn 103 | -------------------------------------------------------------------------------- /cloudformation/ds_admin_principals.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | Description: | 4 | Creates data science administrator role and service catalog launch role 5 | 6 | Outputs: 7 | DSAdministratorName: 8 | Description: The name of the DataScienceAdministrator role 9 | Value: !Ref DataScienceAdministrator 10 | 11 | DSAdministratorArn: 12 | Description: The ARN of the DataScienceAdministrator role 13 | Value: !GetAtt DataScienceAdministrator.Arn 14 | Export: 15 | Name: !Sub 'ds-administrator-role-${StackSetName}-arn' 16 | 17 | SCLaunchRoleArn: 18 | Description: The ARN of the Service Catalog launch role 19 | Value: !GetAtt SCLaunchRole.Arn 20 | 21 | Parameters: 22 | StackSetName: 23 | Type: String 24 | Description: A name to be used across nested stacks 25 | 26 | Resources: 27 | DataScienceAdministrator: 28 | Type: 'AWS::IAM::Role' 29 | Properties: 30 | AssumeRolePolicyDocument: 31 | Version: 2012-10-17 32 | Statement: 33 | - Effect: Allow 34 | Principal: 35 | AWS: !Sub 'arn:aws:iam::${AWS::AccountId}:root' 36 | Action: 37 | - 'sts:AssumeRole' 38 | - Sid: SageMakerTrustRelantionship 39 | Effect: Allow 40 | Principal: 41 | Service: 42 | - 'sagemaker.amazonaws.com' 43 | Action: 44 | - 'sts:AssumeRole' 45 | RoleName: 46 | !Join 47 | - '' 48 | - 49 | - !Sub '${StackSetName}-DataScienceAdministrator-' 50 | - !Select 51 | - 4 52 | - !Split 53 | - '-' 54 | - !Select 55 | - 2 56 | - !Split 57 | - / 58 | - !Ref 'AWS::StackId' 59 | Policies: 60 | - PolicyName: SageMakerAccessInlinePolicy 61 | PolicyDocument: 62 | Version: 2012-10-17 63 | Statement: 64 | - Sid: SageMakerDSAdminAccess 65 | Effect: Allow 66 | Action: 67 | - 'sagemaker:*' 68 | - 'iam:TagRole' 69 | - 'kms:CreateGrant' 70 | - 'kms:DescribeKey' 71 | Resource: '*' 72 | - Sid: SageMakerIamPassRole 73 | Effect: Allow 74 | Action: 75 | - 'iam:PassRole' 76 | Resource: '*' 77 | Condition: 78 | StringEquals: 79 | 'iam:PassedToService': 80 | - sagemaker.amazonaws.com 81 | ManagedPolicyArns: 82 | - 'arn:aws:iam::aws:policy/AWSServiceCatalogAdminFullAccess' 83 | - 'arn:aws:iam::aws:policy/AWSKeyManagementServicePowerUser' 84 | - 'arn:aws:iam::aws:policy/AmazonS3FullAccess' 85 | - 'arn:aws:iam::aws:policy/AWSCloudFormationFullAccess' 86 | - 'arn:aws:iam::aws:policy/ReadOnlyAccess' 87 | - 'arn:aws:iam::aws:policy/AmazonSSMFullAccess' 88 | - 'arn:aws:iam::aws:policy/AWSCodeCommitFullAccess' 89 | - 'arn:aws:iam::aws:policy/AWSCodeArtifactAdminAccess' 90 | - 'arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryFullAccess' 91 | 92 | SCLaunchRole: 93 | Type: 'AWS::IAM::Role' 94 | Properties: 95 | AssumeRolePolicyDocument: 96 | Version: 2012-10-17 97 | Statement: 98 | - Effect: Allow 99 | Principal: 100 | Service: servicecatalog.amazonaws.com 101 | Action: 'sts:AssumeRole' 102 | RoleName: 103 | !Join 104 | - '' 105 | - 106 | - !Sub '${StackSetName}-ServiceCatalogLaunchRole-' 107 | - !Select 108 | - 4 109 | - !Split 110 | - '-' 111 | - !Select 112 | - 2 113 | - !Split 114 | - / 115 | - !Ref 'AWS::StackId' 116 | Policies: 117 | - PolicyName: SCInlinePolicy 118 | PolicyDocument: 119 | Version: 2012-10-17 120 | Statement: 121 | - Sid: Policy1 122 | Effect: Allow 123 | Action: 124 | - 'cloudformation:CreateStack' 125 | - 'cloudformation:UpdateStack' 126 | - 'cloudformation:DeleteStack' 127 | - 'cloudformation:DescribeStacks' 128 | - 'cloudformation:DescribeStackEvents' 129 | - 'cloudformation:GetTemplateSummary' 130 | - 'cloudformation:ValidateTemplate' 131 | - 'codecommit:CreateCommit' 132 | - 'codecommit:CreateRepository' 133 | - 'codecommit:DeleteRepository' 134 | - 'codecommit:GetRepository' 135 | - 'codecommit:ListRepositories' 136 | - 'codecommit:TagResource' 137 | - 'config:DescribeConfigurationRecorderStatus' 138 | - 'config:DescribeConfigurationRecorders' 139 | - 'ec2:AssociateRouteTable' 140 | - 'ec2:AuthorizeSecurityGroupIngress' 141 | - 'ec2:CreateRouteTable' 142 | - 'ec2:CreateSecurityGroup' 143 | - 'ec2:CreateSubnet' 144 | - 'ec2:CreateTags' 145 | - 'ec2:CreateVpc' 146 | - 'ec2:CreateVpcEndpoint' 147 | - 'ec2:DeleteRouteTable' 148 | - 'ec2:DeleteSecurityGroup' 149 | - 'ec2:DeleteSubnet' 150 | - 'ec2:DeleteTags' 151 | - 'ec2:DeleteVpc' 152 | - 'ec2:DeleteVpcEndpoints' 153 | - 'ec2:DescribeRouteTables' 154 | - 'ec2:DescribeSecurityGroups' 155 | - 'ec2:DescribeSubnets' 156 | - 'ec2:DescribeTags' 157 | - 'ec2:DescribeVpcEndpoints' 158 | - 'ec2:DescribeVpcs' 159 | - 'ec2:DisassociateRouteTable' 160 | - 'ec2:ModifyVpcAttribute' 161 | - 'ec2:RevokeSecurityGroupIngress' 162 | - 'ecr:GetAuthorizationToken' 163 | - 'ecr:BatchCheckLayerAvailability' 164 | - 'ecr:GetDownloadUrlForLayer' 165 | - 'ecr:GetRepositoryPolicy' 166 | - 'ecr:SetRepositoryPolicy' 167 | - 'ecr:DescribeRepositories' 168 | - 'ecr:CreateRepository' 169 | - 'ecr:DeleteRepository' 170 | - 'ecr:DescribeRepositories' 171 | - 'ecr:ListImages' 172 | - 'ecr:PutImage' 173 | - 'ecr:DescribeImages' 174 | - 'ecr:BatchGetImage' 175 | - 'ecr:GetLifecyclePolicy' 176 | - 'ecr:GetLifecyclePolicyPreview' 177 | - 'ecr:ListTagsForResource' 178 | - 'ecr:TagResource' 179 | - 'ecr:UntagResource' 180 | - 'ecr:DescribeImageScanFindings' 181 | - 'ecr:InitiateLayerUpload' 182 | - 'ecr:UploadLayerPart' 183 | - 'ecr:CompleteLayerUpload' 184 | - 'iam:AttachRolePolicy' 185 | - 'iam:GetPolicy' 186 | - 'iam:CreatePolicy' 187 | - 'iam:CreatePolicyVersion' 188 | - 'iam:DeletePolicy' 189 | - 'iam:DeletePolicyVersion' 190 | - 'iam:CreateRole' 191 | - 'iam:DeleteRole' 192 | - 'iam:DeleteRolePolicy' 193 | - 'iam:DetachRolePolicy' 194 | - 'iam:GetRole' 195 | - 'iam:GetRolePolicy' 196 | - 'iam:ListPolicyVersions' 197 | - 'iam:PassRole' 198 | - 'iam:PutRolePolicy' 199 | - 'iam:CreateServiceLinkedRole' 200 | - 'iam:TagRole' 201 | - 'kms:CreateAlias' 202 | - 'kms:CreateGrant' 203 | - 'kms:CreateKey' 204 | - 'kms:Decrypt' 205 | - 'kms:DeleteAlias' 206 | - 'kms:DeleteCustomKeyStore' 207 | - 'kms:DeleteImportedKeyMaterial' 208 | - 'kms:DescribeKey' 209 | - 'kms:EnableKey' 210 | - 'kms:EnableKeyRotation' 211 | - 'kms:GenerateDataKey' 212 | - 'kms:ListAliases' 213 | - 'kms:PutKeyPolicy' 214 | - 'kms:ScheduleKeyDeletion' 215 | - 'kms:TagResource' 216 | - 'kms:UpdateAlias' 217 | - 'kms:UpdateCustomKeyStore' 218 | - 'kms:UpdateKeyDescription' 219 | - 'kms:GenerateDataKeyWithoutPlainText' 220 | - 'resource-groups:CreateGroup' 221 | - 'resource-groups:DeleteGroup' 222 | - 'resource-groups:Tag' 223 | - 'resource-groups:Untag' 224 | - 's3:CreateBucket' 225 | - 's3:DeleteBucket' 226 | - 's3:DeleteBucketPolicy' 227 | - 's3:GetBucketPolicy' 228 | - 's3:GetEncryptionConfiguration' 229 | - 's3:GetObject' 230 | - 's3:ListBucket' 231 | - 's3:PutBucketPolicy' 232 | - 's3:PutBucketTagging' 233 | - 's3:PutEncryptionConfiguration' 234 | - 's3:PutBucketPublicAccessBlock' 235 | - 'servicecatalog:AssociatePrincipalWithPortfolio' 236 | - 'servicecatalog:AssociateProductWithPortfolio' 237 | - 'servicecatalog:CreateConstraint' 238 | - 'servicecatalog:CreatePortfolio' 239 | - 'servicecatalog:CreateProduct' 240 | - 'servicecatalog:DeleteConstraint' 241 | - 'servicecatalog:DeletePortfolio' 242 | - 'servicecatalog:DeleteProduct' 243 | - 'servicecatalog:DescribeConstraint' 244 | - 'servicecatalog:DescribeProductAsAdmin' 245 | - 'servicecatalog:DescribeProvisioningArtifact' 246 | - 'servicecatalog:DisassociatePrincipalFromPortfolio' 247 | - 'servicecatalog:DisassociateProductFromPortfolio' 248 | - 'ssm:AddTagsToResource' 249 | - 'ssm:DeleteParameter' 250 | - 'ssm:DeleteParameters' 251 | - 'ssm:GetParameter' 252 | - 'ssm:GetParameters' 253 | - 'ssm:PutParameter' 254 | - 'ssm:RemoveTagsFromResource' 255 | - 'elasticfilesystem:CreateFileSystem' 256 | - 'elasticfilesystem:TagResource' 257 | - 'elasticfilesystem:UntagResource' 258 | - 'sagemaker:AddTags' 259 | - 'sagemaker:CreateDomain' 260 | - 'sagemaker:CreateUserProfile' 261 | - 'sagemaker:CreateApp' 262 | - 'sagemaker:CreateAppImageConfig' 263 | - 'sagemaker:CreateImage' 264 | - 'sagemaker:CreateImageVersion' 265 | - 'sagemaker:DescribeDomain' 266 | - 'sagemaker:DescribeImage' 267 | - 'sagemaker:DescribeImageVersion' 268 | - 'sagemaker:DescribeUserProfile' 269 | - 'sagemaker:DeleteDomain' 270 | - 'sagemaker:DeleteImage' 271 | - 'sagemaker:DeleteImageVersion' 272 | - 'sagemaker:DeleteUserProfile' 273 | - 'sagemaker:DeleteApp' 274 | - 'sagemaker:DeleteNotebookInstance' 275 | - 'sagemaker:DeleteAppImageConfig' 276 | - 'sagemaker:DeleteTags' 277 | - 'sagemaker:ListTags' 278 | - 'sagemaker:UpdateAppImageConfig' 279 | - 'sagemaker:UpdateNotebookInstanceLifecycleConfig' 280 | - 'sagemaker:UpdateDomain' 281 | - 'sagemaker:UpdateUserProfile' 282 | - 'sagemaker:UpdateImage' 283 | Resource: '*' 284 | - Sid: SageMakerIamPassRole 285 | Effect: Allow 286 | Action: 287 | - 'iam:PassRole' 288 | Resource: '*' 289 | Condition: 290 | StringEquals: 291 | 'iam:PassedToService': 292 | - sagemaker.amazonaws.com 293 | -------------------------------------------------------------------------------- /cloudformation/ds_administration.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | 4 | # The following template is designed to provision and configure a shared service secure environment for data science teams. 5 | # This template creates shared service infrastructure: a VPC with private subnets, data science administrator & service 6 | # catalog launch roles, shared ECR repository, shared CodeArtifact repository, shared data lake S3 bucket, detective control 7 | # lambda function and service catalog portfolio and products. 8 | # Lastly the template stores outputs into Parameter Store so they can be referenced later by SC products. 9 | Description: Creates Data Science Shared Service Infrastructure 10 | 11 | Outputs: 12 | AssumeDataScienceAdminRole: 13 | Description: URL for assuming the role of a data science admininstrator 14 | Value: !Sub 'https://signin.aws.amazon.com/switchrole?account=${AWS::AccountId}&roleName=${DataSciencePrincipals.Outputs.DSAdministratorName}&displayName=${DataSciencePrincipals.Outputs.DSAdministratorName}' 15 | 16 | QuickstartMode: 17 | Description: Whether this stack set was deployed as a quickstart 18 | Value: !Ref QuickstartMode 19 | Export: 20 | Name: !Sub '${SharedServiceStackSetName}-QuickstartMode' 21 | 22 | Parameters: 23 | SharedServiceStackSetName: 24 | Type: String 25 | Default: DSSharedServices 26 | Description: Name to be used as a common root across all shared service resources for reference from other Cloudformation stacks 27 | 28 | SageMakerStudioVpcCIDR: 29 | Type: String 30 | Default: 10.2.0.0/16 31 | Description: CIDR range for SageMaker Studio VPC 32 | AllowedPattern: '^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$' 33 | 34 | SageMakerStudioSubnet1CIDR: 35 | Type: String 36 | Default: 10.2.1.0/24 37 | Description: CIDR range for SageMaker Studio Subnet A 38 | AllowedPattern: '^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$' 39 | 40 | SageMakerStudioSubnet2CIDR: 41 | Type: String 42 | Default: 10.2.2.0/24 43 | Description: CIDR range for SageMaker Studio Subnet B 44 | AllowedPattern: '^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$' 45 | 46 | SageMakerStudioSubnet3CIDR: 47 | Type: String 48 | Default: 10.2.3.0/24 49 | Description: CIDR range for SageMaker Studio Subnet C 50 | AllowedPattern: '^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(\/([0-9]|[1-2][0-9]|3[0-2]))$' 51 | 52 | SharedECRRepositoryName: 53 | Type: String 54 | Default: ds-shared-container-images 55 | Description: Shared ECR Repository name to be used across all data science projects supported by this stack 56 | 57 | QuickstartMode: 58 | Type: String 59 | Default: true 60 | AllowedValues: 61 | - true 62 | - false 63 | Description: true indicates a quickstart-style deployment, false indicates a workshop deployment 64 | 65 | Conditions: 66 | IsAQuickstart: !Equals [ !Ref QuickstartMode, true ] 67 | 68 | Resources: 69 | DataScienceAdministratorPortfolio: 70 | Type: 'AWS::ServiceCatalog::Portfolio' 71 | Condition: IsAQuickstart 72 | Properties: 73 | ProviderName: 'Cloud Operations Team' 74 | Description: 'This portfolio is a collection of products designed to support managing data science teams' 75 | DisplayName: 'Data Science Administrator Portfolio' 76 | 77 | DSSageMakerStudioProduct: 78 | Type: 'AWS::ServiceCatalog::CloudFormationProduct' 79 | Condition: IsAQuickstart 80 | Properties: 81 | Name: 'SageMaker Studio Product' 82 | Description: 'Onboards SageMaker Studio Domain to the AWS Account.' 83 | Owner: 'Data Science Administration Office' 84 | ProvisioningArtifactParameters: 85 | - Name: 'DS SageMaker Studio v1' 86 | Info: 87 | LoadTemplateFromURL: 'https://s3.amazonaws.com/< S3_CFN_STAGING_BUCKET_PATH >/ds_env_sagemaker_studio.yaml' 88 | 89 | DSEnvironmentProduct: 90 | Type: 'AWS::ServiceCatalog::CloudFormationProduct' 91 | Condition: IsAQuickstart 92 | Properties: 93 | Name: 'Data Science Team Environment' 94 | Description: 'S3 buckets for hosting data and model, KMS Key and AWS Code Commit git repository to support data science teams.' 95 | Owner: 'Data Science Administration Office' 96 | ProvisioningArtifactParameters: 97 | - Name: 'DS Environment v1' 98 | Info: 99 | LoadTemplateFromURL: 'https://s3.amazonaws.com/< S3_CFN_STAGING_BUCKET_PATH >/ds_environment.yaml' 100 | 101 | DSUserProfileProduct: 102 | Type: 'AWS::ServiceCatalog::CloudFormationProduct' 103 | Condition: IsAQuickstart 104 | Properties: 105 | Name: 'Data Science Studio User Profile' 106 | Description: 'Creates a SageMaker Studio User Profile.' 107 | Owner: 'Data Science Administration Office' 108 | ProvisioningArtifactParameters: 109 | - Name: 'DS SageMaker Studio User Profile v1' 110 | Info: 111 | LoadTemplateFromURL: 'https://s3.amazonaws.com/< S3_CFN_STAGING_BUCKET_PATH >/ds_env_studio_user_profile_v1.yaml' 112 | 113 | SCPortfolioSageMakerStudioProductAssociation: 114 | Type: 'AWS::ServiceCatalog::PortfolioProductAssociation' 115 | Condition: IsAQuickstart 116 | Properties: 117 | PortfolioId: !Ref DataScienceAdministratorPortfolio 118 | ProductId: !Ref DSSageMakerStudioProduct 119 | 120 | SCPortfolioDSEnvironmentProductAssociation: 121 | Type: 'AWS::ServiceCatalog::PortfolioProductAssociation' 122 | Condition: IsAQuickstart 123 | Properties: 124 | PortfolioId: !Ref DataScienceAdministratorPortfolio 125 | ProductId: !Ref DSEnvironmentProduct 126 | 127 | SCPortfolioDSUserProfileProductAssociation : 128 | Type: 'AWS::ServiceCatalog::PortfolioProductAssociation' 129 | Condition: IsAQuickstart 130 | Properties: 131 | PortfolioId: !Ref DataScienceAdministratorPortfolio 132 | ProductId: !Ref DSUserProfileProduct 133 | 134 | DSAdminPortfolioPrincipleAssociation: 135 | Type: 'AWS::ServiceCatalog::PortfolioPrincipalAssociation' 136 | Condition: IsAQuickstart 137 | Properties: 138 | PortfolioId: !Ref DataScienceAdministratorPortfolio 139 | PrincipalARN: !GetAtt DataSciencePrincipals.Outputs.DSAdministratorArn 140 | PrincipalType: IAM 141 | 142 | DSSageMakerStudioProductLaunchRoleConstraint: 143 | Type: 'AWS::ServiceCatalog::LaunchRoleConstraint' 144 | Condition: IsAQuickstart 145 | DependsOn: SCPortfolioSageMakerStudioProductAssociation 146 | Properties: 147 | Description: The Launch Role SC uses to launch product 148 | PortfolioId: !Ref DataScienceAdministratorPortfolio 149 | ProductId: !Ref DSSageMakerStudioProduct 150 | RoleArn: !GetAtt DataSciencePrincipals.Outputs.SCLaunchRoleArn 151 | 152 | DSEnvironmentProductLaunchRoleConstraint: 153 | Type: 'AWS::ServiceCatalog::LaunchRoleConstraint' 154 | Condition: IsAQuickstart 155 | DependsOn: SCPortfolioDSEnvironmentProductAssociation 156 | Properties: 157 | Description: The Launch Role SC uses to launch product 158 | PortfolioId: !Ref DataScienceAdministratorPortfolio 159 | ProductId: !Ref DSEnvironmentProduct 160 | RoleArn: !GetAtt DataSciencePrincipals.Outputs.SCLaunchRoleArn 161 | 162 | DSUserProfileProductLaunchRoleConstraint: 163 | Type: 'AWS::ServiceCatalog::LaunchRoleConstraint' 164 | Condition: IsAQuickstart 165 | DependsOn: SCPortfolioDSUserProfileProductAssociation 166 | Properties: 167 | Description: The Launch Role SC uses to launch product 168 | PortfolioId: !Ref DataScienceAdministratorPortfolio 169 | ProductId: !Ref DSUserProfileProduct 170 | RoleArn: !GetAtt DataSciencePrincipals.Outputs.SCLaunchRoleArn 171 | 172 | DataSciencePrincipals: 173 | Type: AWS::CloudFormation::Stack 174 | Properties: 175 | Parameters: 176 | StackSetName: !Ref SharedServiceStackSetName 177 | TemplateURL: ds_admin_principals.yaml 178 | 179 | SharedCloudArtifactRepository: 180 | Type: AWS::CloudFormation::Stack 181 | Properties: 182 | Parameters: 183 | StackSetName: !Ref SharedServiceStackSetName 184 | TemplateURL: ds_shared_code_artifact.yaml 185 | 186 | SharedDataScienceECR: 187 | Type: AWS::CloudFormation::Stack 188 | Properties: 189 | Parameters: 190 | SharedServiceStackSetName: !Ref SharedServiceStackSetName 191 | ECRRepositoryName: !Ref SharedECRRepositoryName 192 | TemplateURL: ds_shared_ecr.yaml 193 | 194 | SharedSageMakerNetwork: 195 | Type: AWS::CloudFormation::Stack 196 | Properties: 197 | Parameters: 198 | SharedServiceStackSetName: !Ref SharedServiceStackSetName 199 | SageMakerStudioVpcCIDR: !Ref SageMakerStudioVpcCIDR 200 | SageMakerStudioSubnet1CIDR: !Ref SageMakerStudioSubnet1CIDR 201 | SageMakerStudioSubnet2CIDR: !Ref SageMakerStudioSubnet2CIDR 202 | SageMakerStudioSubnet3CIDR: !Ref SageMakerStudioSubnet3CIDR 203 | TemplateURL: ds_shared_sagemaker_network.yaml 204 | Tags: 205 | - Key: Name 206 | Value: !Sub "ds-vpc-${SharedServiceStackSetName}" 207 | 208 | SharedDataLake: 209 | Type: AWS::CloudFormation::Stack 210 | DependsOn: SharedSageMakerNetwork 211 | Properties: 212 | Parameters: 213 | SharedServiceStackSetName: !Ref SharedServiceStackSetName 214 | TemplateURL: ds_shared_data_lake.yaml 215 | 216 | SageMakerDetectiveControl: 217 | Type: AWS::CloudFormation::Stack 218 | Properties: 219 | Parameters: 220 | StackSetName: !Ref SharedServiceStackSetName 221 | TemplateURL: ds_admin_detective.yaml 222 | -------------------------------------------------------------------------------- /cloudformation/ds_env_backing_store.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | 4 | # The following template is designed to provision and configure a secure environment for data science. 5 | # This template creates an AWS VPC, a KMS CMK, an administrator and data scientist role, and an S3 bucket. 6 | # The template also provisions a Service Catalog portfolio and product to create notebooks into the VPC. 7 | # Lastly the template stores outputs into Parameter Store so they can be referenced later by SC products. 8 | Description: Data Science Environment S3 data storage 9 | 10 | Parameters: 11 | SharedServiceStackSetName: 12 | Type: String 13 | Description: Common root name used across shared service cloudformation resources 14 | 15 | TeamName: 16 | Type: String 17 | AllowedPattern: '[A-Za-z0-9\-]*' 18 | Description: Please specify your team name. Used as a suffix for team's resource names. 19 | 20 | EnvType: 21 | Description: System Environment 22 | Type: String 23 | Default: dev 24 | 25 | Outputs: 26 | KMSCMK: 27 | Description: KMS Key ARN for the data and model buckets 28 | Value: !GetAtt KMSCMK.Arn 29 | Export: 30 | Name: !Sub 'ds-kms-cmk-${TeamName}-${EnvType}-arn' 31 | 32 | DataBucket: 33 | Description: Data bucket name 34 | Value: !Ref DataBucket 35 | Export: 36 | Name: !Sub 'ds-s3-data-${TeamName}-${EnvType}' 37 | 38 | ModelArtifactsBucket: 39 | Description: Model artifacts bucket 40 | Value: !Ref ModelArtifactsBucket 41 | Export: 42 | Name: !Sub 'ds-s3-models-${TeamName}-${EnvType}' 43 | 44 | CodeCommitUrl: 45 | Description: Code Commit Repository 46 | Value: !GetAtt CodeCommitRepo.CloneUrlHttp 47 | Export: 48 | Name: !Sub 'ds-source-${TeamName}-${EnvType}-url' 49 | 50 | Resources: 51 | KMSCMK: 52 | Type: 'AWS::KMS::Key' 53 | Properties: 54 | Description: KMS key for S3 buckets 55 | EnableKeyRotation: true 56 | KeyPolicy: 57 | Id: key-policy-1 58 | Version: 2012-10-17 59 | Statement: 60 | - Sid: Enable IAM User Permissions 61 | Effect: Allow 62 | Principal: 63 | AWS: !Sub "arn:aws:iam::${AWS::AccountId}:root" 64 | Action: 'kms:*' 65 | Resource: '*' 66 | - Sid: Allow access for Key Administrators 67 | Effect: Allow 68 | Principal: 69 | AWS: 70 | Fn::ImportValue: 71 | !Sub "ds-admin-role-${TeamName}-${EnvType}-arn" 72 | Action: 73 | - 'kms:Create*' 74 | - 'kms:Describe*' 75 | - 'kms:Enable*' 76 | - 'kms:List*' 77 | - 'kms:Put*' 78 | - 'kms:Update*' 79 | - 'kms:Revoke*' 80 | - 'kms:Disable*' 81 | - 'kms:Get*' 82 | - 'kms:Delete*' 83 | - 'kms:TagResource' 84 | - 'kms:UntagResource' 85 | - 'kms:ScheduleKeyDeletion' 86 | - 'kms:CancelKeyDeletion' 87 | Resource: '*' 88 | - Sid: Allow access for Key Users 89 | Effect: Allow 90 | Principal: 91 | AWS: !Sub "arn:aws:iam::${AWS::AccountId}:root" 92 | # TODO: reduce to team specific role 93 | # AWS: !Sub "arn:aws:iam::${AWS::AccountId}:role/service-role/ds-notebook-role-${TeamName}-${EnvType}" 94 | # AWS: !Sub "arn:aws:iam::${AWS::AccountId}:role/ds-user-role-${TeamName}-${EnvType}" 95 | Action: 96 | - 'kms:Encrypt' 97 | - 'kms:Decrypt' 98 | - 'kms:CreateGrant' 99 | - 'kms:ReEncrypt' 100 | - 'kms:GenerateDataKey' 101 | - 'kms:DescribeKey' 102 | Resource: '*' 103 | Condition: 104 | StringNotEquals: 105 | 'aws:SourceVpce': 106 | Fn::ImportValue: 107 | !Sub "ds-s3-endpoint-${SharedServiceStackSetName}-id" 108 | Tags: 109 | - Key: TeamName 110 | Value: !Ref TeamName 111 | - Key: EnvironmentType 112 | Value: !Ref EnvType 113 | 114 | KMSCMKAlias: 115 | Type: 'AWS::KMS::Alias' 116 | Properties: 117 | AliasName: !Sub "alias/ds-s3-kms-cmk-${TeamName}-${EnvType}" 118 | TargetKeyId: !Ref KMSCMK 119 | 120 | KMSCMKArn: 121 | Type: 'AWS::SSM::Parameter' 122 | Properties: 123 | Name: !Sub "ds-kms-cmk-${TeamName}-${EnvType}-arn" 124 | Type: String 125 | Value: !GetAtt 126 | - KMSCMK 127 | - Arn 128 | Description: SageMakerExecRole ARN 129 | 130 | DataBucket: 131 | Type: 'AWS::S3::Bucket' 132 | Properties: 133 | BucketName: 134 | !Join 135 | - '' 136 | - 137 | - 'ds-data-bucket-' 138 | - !Ref TeamName 139 | - '-' 140 | - !Ref EnvType 141 | - '-' 142 | - !Select 143 | - 4 144 | - !Split 145 | - '-' 146 | - !Select 147 | - 2 148 | - !Split 149 | - / 150 | - !Ref 'AWS::StackId' 151 | PublicAccessBlockConfiguration: 152 | BlockPublicAcls: TRUE 153 | BlockPublicPolicy: TRUE 154 | IgnorePublicAcls: TRUE 155 | RestrictPublicBuckets: TRUE 156 | BucketEncryption: 157 | ServerSideEncryptionConfiguration: 158 | - ServerSideEncryptionByDefault: 159 | SSEAlgorithm: 'aws:kms' 160 | KMSMasterKeyID: !Ref KMSCMK 161 | Tags: 162 | - Key: TeamName 163 | Value: !Ref TeamName 164 | - Key: EnvironmentType 165 | Value: !Ref EnvType 166 | 167 | DataBucketPolicy: 168 | Type: 'AWS::S3::BucketPolicy' 169 | Properties: 170 | Bucket: !Ref DataBucket 171 | PolicyDocument: 172 | Statement: 173 | - Action: 174 | - 's3:GetObject' 175 | - 's3:PutObject' 176 | - 's3:ListBucket' 177 | Effect: Deny 178 | Resource: 179 | - !Sub "arn:aws:s3:::${DataBucket}/*" 180 | - !Sub "arn:aws:s3:::${DataBucket}" 181 | Principal: '*' 182 | Condition: 183 | StringNotEquals: 184 | 'aws:SourceVpce': 185 | Fn::ImportValue: 186 | !Sub "ds-s3-endpoint-${SharedServiceStackSetName}-id" 187 | 188 | ModelArtifactsBucket: 189 | Type: 'AWS::S3::Bucket' 190 | Properties: 191 | BucketName: 192 | !Join 193 | - "" 194 | - 195 | - "ds-model-bucket-" 196 | - !Ref TeamName 197 | - "-" 198 | - !Ref EnvType 199 | - "-" 200 | - !Select 201 | - 4 202 | - !Split 203 | - '-' 204 | - !Select 205 | - 2 206 | - !Split 207 | - / 208 | - !Ref 'AWS::StackId' 209 | PublicAccessBlockConfiguration: 210 | BlockPublicAcls: TRUE 211 | BlockPublicPolicy: TRUE 212 | IgnorePublicAcls: TRUE 213 | RestrictPublicBuckets: TRUE 214 | BucketEncryption: 215 | ServerSideEncryptionConfiguration: 216 | - ServerSideEncryptionByDefault: 217 | SSEAlgorithm: 'aws:kms' 218 | KMSMasterKeyID: !Ref KMSCMK 219 | Tags: 220 | - Key: TeamName 221 | Value: !Ref TeamName 222 | - Key: EnvironmentType 223 | Value: !Ref EnvType 224 | 225 | ModelArtifactsBucketPolicy: 226 | Type: 'AWS::S3::BucketPolicy' 227 | Properties: 228 | Bucket: !Ref ModelArtifactsBucket 229 | PolicyDocument: 230 | Statement: 231 | - Action: 232 | - 's3:GetObject' 233 | - 's3:PutObject' 234 | - 's3:ListBucket' 235 | Effect: Deny 236 | Resource: 237 | - !Sub "arn:aws:s3:::${ModelArtifactsBucket}/*" 238 | - !Sub "arn:aws:s3:::${ModelArtifactsBucket}" 239 | Principal: '*' 240 | Condition: 241 | StringNotEquals: 242 | 'aws:SourceVpce': 243 | Fn::ImportValue: 244 | !Sub "ds-s3-endpoint-${SharedServiceStackSetName}-id" 245 | 246 | CodeCommitRepo: 247 | Type: 'AWS::CodeCommit::Repository' 248 | Properties: 249 | RepositoryName: !Sub 'ds-source-${TeamName}-${EnvType}' 250 | RepositoryDescription: Data science project code repository 251 | Code: 252 | S3: 253 | Bucket: '< S3_CFN_STAGING_BUCKET >' 254 | Key: '< S3_CFN_STAGING_PATH >/project_template.zip' 255 | Tags: 256 | - Key: TeamName 257 | Value: !Ref TeamName 258 | - Key: EnvironmentType 259 | Value: !Ref EnvType 260 | 261 | S3DataBucketNameSSMParameter: 262 | Type: 'AWS::SSM::Parameter' 263 | Properties: 264 | Name: !Sub "ds-s3-data-bucket-${TeamName}-${EnvType}" 265 | Type: String 266 | Value: !Ref DataBucket 267 | Description: Data Science S3 data bucket name for the project 268 | 269 | S3ModelBucketNameSSMParameter: 270 | Type: 'AWS::SSM::Parameter' 271 | Properties: 272 | Name: !Sub "ds-s3-model-artifact-bucket-${TeamName}-${EnvType}" 273 | Type: String 274 | Value: !Ref ModelArtifactsBucket 275 | Description: Data Science S3 data bucket name for the project -------------------------------------------------------------------------------- /cloudformation/ds_env_principals.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | Description: | 4 | Template to create IAM principals for operation within the data science environment. 5 | 6 | Parameters: 7 | TeamName: 8 | Type: String 9 | AllowedPattern: '[A-Za-z0-9\-]*' 10 | Description: Please specify your team name. Used as a suffix for team's resource names. 11 | 12 | EnvType: 13 | Description: System Environment 14 | Type: String 15 | Default: dev 16 | 17 | Outputs: 18 | DataScientistAdminRoleArn: 19 | Description: ARN of the data science administration role for this project 20 | Value: !GetAtt DataScientistAdminRole.Arn 21 | Export: 22 | Name: !Sub "ds-admin-role-${TeamName}-${EnvType}-arn" 23 | 24 | DataScientistUserRoleArn: 25 | Description: ARN of the data science user role for this project 26 | Value: !GetAtt DataScientistRole.Arn 27 | Export: 28 | Name: !Sub "ds-user-role-${TeamName}-${EnvType}-arn" 29 | 30 | AssumeDataScienceAdminRole: 31 | Description: URL for assuming the role of a data science admin 32 | Value: !Sub 'https://signin.aws.amazon.com/switchrole?account=${AWS::AccountId}&roleName=${DataScientistAdminRole}&displayName=${DataScientistAdminRole}' 33 | 34 | AssumeDataScientistUserRole: 35 | Description: URL for assuming the role of a data science user 36 | Value: !Sub 'https://signin.aws.amazon.com/switchrole?account=${AWS::AccountId}&roleName=${DataScientistRole}&displayName=${DataScientistRole}' 37 | 38 | 39 | Resources: 40 | DataScientistAdminRole: 41 | Type: 'AWS::IAM::Role' 42 | Properties: 43 | AssumeRolePolicyDocument: 44 | Version: 2012-10-17 45 | Statement: 46 | - Effect: Allow 47 | Principal: 48 | AWS: !Sub "arn:aws:iam::${AWS::AccountId}:root" 49 | Action: 50 | - 'sts:AssumeRole' 51 | - Sid: SageMakerTrustRelantionship 52 | Effect: Allow 53 | Principal: 54 | Service: 55 | - 'sagemaker.amazonaws.com' 56 | Action: 57 | - 'sts:AssumeRole' 58 | RoleName: !Sub "ds-admin-role-${TeamName}-${EnvType}" 59 | Policies: 60 | - PolicyName: SageMakerAccessInlinePolicy 61 | PolicyDocument: 62 | Version: 2012-10-17 63 | Statement: 64 | - Sid: DSAdminAdditionalPolicies 65 | Effect: Allow 66 | Action: 67 | - 'sagemaker:AddTags' 68 | - 'sagemaker:CreateUserProfile' 69 | - 'sagemaker:CreateNotebookInstance' 70 | - 'sagemaker:CreateNotebookInstanceLifecycleConfig' 71 | - 'sagemaker:CreateApp' 72 | - 'sagemaker:CreateAppImageConfig' 73 | - 'sagemaker:CreateImage' 74 | - 'sagemaker:CreateImageVersion' 75 | - 'sagemaker:DeleteImage' 76 | - 'sagemaker:DeleteImageVersion' 77 | - 'sagemaker:DeleteUserProfile' 78 | - 'sagemaker:DeleteApp' 79 | - 'sagemaker:DeleteNotebookInstance' 80 | - 'sagemaker:DeleteAppImageConfig' 81 | - 'sagemaker:DeleteNotebookInstanceLifecycleConfig' 82 | - 'sagemaker:DeleteTags' 83 | - 'sagemaker:UpdateNotebookInstance' 84 | - 'sagemaker:UpdateAppImageConfig' 85 | - 'sagemaker:UpdateNotebookInstanceLifecycleConfig' 86 | - 'sagemaker:UpdateDomain' 87 | - 'sagemaker:UpdateUserProfile' 88 | - 'iam:TagRole' 89 | - 'kms:CreateGrant' 90 | - 'kms:DescribeKey' 91 | Resource: '*' 92 | - Sid: SageMakerIamPassRole 93 | Effect: Allow 94 | Action: 95 | - 'iam:PassRole' 96 | Resource: '*' 97 | Condition: 98 | StringEquals: 99 | 'iam:PassedToService': 100 | - sagemaker.amazonaws.com 101 | ManagedPolicyArns: 102 | - 'arn:aws:iam::aws:policy/AWSServiceCatalogAdminFullAccess' 103 | - 'arn:aws:iam::aws:policy/AWSKeyManagementServicePowerUser' 104 | - 'arn:aws:iam::aws:policy/AmazonS3FullAccess' 105 | - 'arn:aws:iam::aws:policy/AWSCloudFormationFullAccess' 106 | - 'arn:aws:iam::aws:policy/ReadOnlyAccess' 107 | - 'arn:aws:iam::aws:policy/AmazonSSMFullAccess' 108 | - 'arn:aws:iam::aws:policy/AWSCodeCommitFullAccess' 109 | - 'arn:aws:iam::aws:policy/AWSCodeArtifactAdminAccess' 110 | - 'arn:aws:iam::aws:policy/AmazonSageMakerFullAccess' 111 | - 'arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryFullAccess' 112 | Tags: 113 | - Key: TeamName 114 | Value: !Ref TeamName 115 | - Key: EnvironmentType 116 | Value: !Ref EnvType 117 | 118 | DataScientistRole: 119 | Type: 'AWS::IAM::Role' 120 | Properties: 121 | AssumeRolePolicyDocument: 122 | Version: 2012-10-17 123 | Statement: 124 | - Effect: Allow 125 | Principal: 126 | AWS: !Sub "arn:aws:iam::${AWS::AccountId}:root" 127 | Action: 128 | - 'sts:AssumeRole' 129 | - Sid: SageMakerTrustRelantionship 130 | Effect: Allow 131 | Principal: 132 | Service: 133 | - 'sagemaker.amazonaws.com' 134 | Action: 135 | - 'sts:AssumeRole' 136 | RoleName: !Sub "ds-user-role-${TeamName}-${EnvType}" 137 | Policies: 138 | - PolicyName: SageMakerAccessInlinePolicy 139 | PolicyDocument: 140 | Version: 2012-10-17 141 | Statement: 142 | - Sid: DataScientistAdditionalPolicies 143 | Effect: Allow 144 | Action: 145 | - 'sagemaker:UpdateCodeRepository' 146 | - 'sagemaker:DeleteCodeRepository' 147 | - 'sagemaker:CreateCodeRepository' 148 | - 'sagemaker:StartNotebookInstance' 149 | - 'sagemaker:StopNotebookInstance' 150 | - 'sagemaker:CreatePresignedDomainUrl' 151 | - 'sagemaker:CreatePresignedNotebookInstanceUrl' 152 | - 'sagemaker:CreateApp' 153 | - 'sagemaker:AddTags' 154 | - 'sagemaker:DeleteApp' 155 | - 'sagemaker:GetSagemakerServicecatalogPortfolioStatus' 156 | - 'codecommit:BatchGetRepositories' 157 | - 'codecommit:ListRepositories' 158 | - 'iam:TagRole' 159 | - 'kms:CreateGrant' 160 | - 'kms:Decrypt' 161 | - 'kms:DescribeKey' 162 | - 'kms:Encrypt' 163 | - 'kms:ReEncrypt' 164 | - 'kms:GenerateDataKey' 165 | - 'kms:ListAliases' 166 | - 'servicecatalog:ListAcceptedPortfolioShares' 167 | - 'servicecatalog:ListPrincipalsForPortfolio' 168 | Resource: '*' 169 | - Sid: CodeCommitAccess 170 | Effect: Allow 171 | Action: 172 | - 'codecommit:GitPull' 173 | - 'codecommit:GitPush' 174 | - 'codecommit:CreateBranch' 175 | - 'codecommit:DeleteBranch' 176 | - 'codecommit:GetBranch' 177 | - 'codecommit:ListBranches' 178 | - 'codecommit:UpdateDefaultBranch' 179 | - 'codecommit:CreatePullRequest' 180 | - 'codecommit:CreatePullRequestApproval' 181 | - 'codecommit:GetPullRequest*' 182 | - 'codecommit:ListPullRequests' 183 | - 'codecommit:UpdatePullRequest*' 184 | - 'codecommit:DescribePullRequestEvents' 185 | - 'codecommit:CreateCommit' 186 | - 'codecommit:GetCommit' 187 | - 'codecommit:BatchGetCommits' 188 | - 'codecommit:GetCommitHistory' 189 | - 'codecommit:GetDifferences' 190 | - 'codecommit:GetReferences' 191 | - 'codecommit:GetRepository' 192 | - 'codecommit:GetMerge*' 193 | - 'codecommit:Merge*' 194 | - 'codecommit:DescribeMergeConflicts' 195 | - 'codecommit:GetComment*' 196 | - 'codecommit:PostComment*' 197 | - 'codecommit:PutCommentReaction' 198 | - 'codecommit:UpdateComment*' 199 | - 'codecommit:PutFile' 200 | - 'codecommit:GetFile' 201 | - 'codecommit:DeleteFile' 202 | - 'codecommit:GetFolder' 203 | - 'codecommit:GetBlob' 204 | Resource: 205 | - !Sub 'arn:aws:codecommit:${AWS::Region}:${AWS::AccountId}:ds-source-${TeamName}-${EnvType}' 206 | - Sid: SageMakerIamPassRole 207 | Effect: Allow 208 | Action: 209 | - 'iam:PassRole' 210 | Resource: '*' 211 | Condition: 212 | StringEquals: 213 | 'iam:PassedToService': 214 | - sagemaker.amazonaws.com 215 | ManagedPolicyArns: 216 | - 'arn:aws:iam::aws:policy/AWSServiceCatalogEndUserFullAccess' 217 | - 'arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess' 218 | - 'arn:aws:iam::aws:policy/AWSCloudFormationReadOnlyAccess' 219 | - 'arn:aws:iam::aws:policy/AmazonSSMReadOnlyAccess' 220 | - 'arn:aws:iam::aws:policy/AWSLambda_ReadOnlyAccess' 221 | - 'arn:aws:iam::aws:policy/AWSCodeCommitReadOnly' 222 | - 'arn:aws:iam::aws:policy/AWSCodeArtifactReadOnlyAccess' 223 | - 'arn:aws:iam::aws:policy/AmazonSageMakerReadOnly' 224 | - 'arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly' 225 | Tags: 226 | - Key: TeamName 227 | Value: !Ref TeamName 228 | - Key: EnvironmentType 229 | Value: !Ref EnvType 230 | 231 | DataScientistRoleArn: 232 | Type: 'AWS::SSM::Parameter' 233 | Properties: 234 | Name: !Sub "ds-user-role-${TeamName}-${EnvType}-arn" 235 | Type: String 236 | Value: !GetAtt DataScientistRole.Arn 237 | Description: SSM-Parameter - DataScientist Role Arn -------------------------------------------------------------------------------- /cloudformation/ds_env_sagemaker_studio.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | 4 | # The following template is designed to onboard SageMaker Studio Domain in VPCOnly secure environment for data science. 5 | # The template depends on shared SageMaker Studio VPC and DataScience Administration role created by ds_administration.yaml. 6 | # This template creates a KMS CMK, resources required for custom sagemaker image for tensorflow, a default data scientist role, 7 | # and omboards SageMaker Studio Domain with default user settings. 8 | Description: | 9 | CloudFormation template for creating and onboarding SageMaker Studio 10 | 11 | Parameters: 12 | SharedServiceStackSetName: 13 | Type: String 14 | Description: Shared Service stack set name, common across data science stacks 15 | 16 | StudioDomainName: 17 | Description: Studio domain name 18 | AllowedPattern: '^[a-zA-Z0-9](-*[a-zA-Z0-9]){0,62}' 19 | Type: String 20 | Default: ds-studio-domain 21 | 22 | KMSAlias: 23 | Type: String 24 | Default: ds-sagemaker-studio-kms 25 | 26 | CustomImageEcrUri: 27 | Type: String 28 | Description: Custom Images URI in ECR of format ACCOUNT_ID.dkr.ecr.REGION.amazonaws.com/ECR_REPOSITORY_NAME:IMAGE_NAME 29 | 30 | CustomImageName: 31 | Type: String 32 | Description: Custom Image name 33 | 34 | CustomImageDescription: 35 | Type: String 36 | Description: Custom Image description 37 | 38 | AppImageConfigName: 39 | Type: String 40 | Description: App image config name for SageMaker custom image 41 | 42 | KernelSpecsName: 43 | Type: String 44 | Description: Jupyter kernel name for SageMaker custom image, kernel name must match the name in custom name per to Jupyter kernel specs https://jupyter-client.readthedocs.io/en/stable/kernels.html 45 | Default: python3 46 | 47 | KernelSpecsDisplayName: 48 | Type: String 49 | Description: Jupyter kernel display name, this name is displayed in custom images in Studio 50 | Default: Python 3 51 | 52 | Outputs: 53 | StudioDomainId: 54 | Description: SageMaker Studio Domain ID 55 | Value: !Ref SageMakerStudioDomain 56 | Export: 57 | Name: !Sub 'ds-sagemaker-studio-${SharedServiceStackSetName}-domain-id' 58 | 59 | StudioUrl: 60 | Description: Link to open SageMaker Studio 61 | Value: !GetAtt SageMakerStudioDomain.Url 62 | 63 | EFSKmsKeyId: 64 | Description: KMS Key Id for the Studio EFS encryption 65 | Value: !Ref SagemakerStudioKMS 66 | 67 | EFSKmsKeyArn: 68 | Description: KMS Key ARN for the Studio EFS encryption 69 | Value: !GetAtt SagemakerStudioKMS.Arn 70 | Export: 71 | Name: !Sub 'ds-sagemaker-studio-kms-cmk-${SharedServiceStackSetName}-arn' 72 | 73 | Resources: 74 | SagemakerStudioKMS: 75 | Type: 'AWS::KMS::Key' 76 | Properties: 77 | Description: Generated KMS Key for Sagemaker Studio's EFS encryption 78 | EnableKeyRotation: true 79 | Enabled: true 80 | KeyPolicy: 81 | Version: 2012-10-17 82 | Id: KmsKey-EfsSagemakerStudioKey 83 | Statement: 84 | - Sid: Enable IAM User Permissions 85 | Effect: Allow 86 | Principal: 87 | AWS: !Sub "arn:aws:iam::${AWS::AccountId}:root" 88 | Action: 'kms:*' 89 | Resource: '*' 90 | - Sid: Allow access for Key Administrators 91 | Action: 92 | - 'kms:Create*' 93 | - 'kms:Describe*' 94 | - 'kms:Enable*' 95 | - 'kms:List*' 96 | - 'kms:Put*' 97 | - 'kms:Update*' 98 | - 'kms:Revoke*' 99 | - 'kms:Disable*' 100 | - 'kms:Get*' 101 | - 'kms:Delete*' 102 | - 'kms:TagResource' 103 | - 'kms:UntagResource' 104 | - 'kms:ScheduleKeyDeletion' 105 | - 'kms:CancelKeyDeletion' 106 | Effect: Allow 107 | Principal: 108 | AWS: 109 | Fn::ImportValue: 110 | !Sub "ds-administrator-role-${SharedServiceStackSetName}-arn" 111 | Resource: '*' 112 | - Sid: Allow access for Key Users 113 | Effect: Allow 114 | Principal: 115 | AWS: !Sub "arn:aws:iam::${AWS::AccountId}:root" 116 | Action: 117 | - 'kms:Encrypt' 118 | - 'kms:Decrypt' 119 | - 'kms:CreateGrant' 120 | - 'kms:ReEncrypt' 121 | - 'kms:GenerateDataKey' 122 | - 'kms:DescribeKey' 123 | Resource: '*' 124 | Condition: 125 | StringNotEquals: 126 | 'aws:SourceVpce': 127 | Fn::ImportValue: 128 | !Sub "ds-s3-endpoint-${SharedServiceStackSetName}-id" 129 | Tags: 130 | - Key: StudioDomainName 131 | Value: !Ref StudioDomainName 132 | - Key: StackSetName 133 | Value: !Ref SharedServiceStackSetName 134 | 135 | KeyAlias: 136 | Type: AWS::KMS::Alias 137 | Properties: 138 | AliasName: !Sub alias/${KMSAlias} 139 | TargetKeyId: !Ref SagemakerStudioKMS 140 | 141 | SageMakerCustomImage: 142 | Type: AWS::SageMaker::Image 143 | Properties: 144 | ImageDescription: !Ref CustomImageDescription 145 | ImageDisplayName: !Ref CustomImageDescription 146 | ImageName: !Ref CustomImageName 147 | # Requires SageMakerFullAccess 148 | ImageRoleArn: 149 | Fn::ImportValue: 150 | !Sub 'ds-administrator-role-${SharedServiceStackSetName}-arn' 151 | Tags: 152 | - Key: StudioDomainName 153 | Value: !Ref StudioDomainName 154 | - Key: StackSetName 155 | Value: !Ref SharedServiceStackSetName 156 | 157 | SageMakerCustomImageVersion: 158 | Type: AWS::SageMaker::ImageVersion 159 | DependsOn: 160 | - SageMakerCustomImage 161 | Properties: 162 | BaseImage: !Ref CustomImageEcrUri 163 | ImageName: !Ref CustomImageName 164 | 165 | SageMakerCustomImageAppConfig: 166 | Type: AWS::SageMaker::AppImageConfig 167 | DependsOn: 168 | - SageMakerCustomImageVersion 169 | Properties: 170 | AppImageConfigName: !Ref AppImageConfigName 171 | KernelGatewayImageConfig: 172 | FileSystemConfig: 173 | DefaultGid: 100 174 | DefaultUid: 1000 175 | MountPath: '/home/jovyan/work' 176 | KernelSpecs: 177 | - DisplayName: !Ref KernelSpecsDisplayName 178 | Name: !Ref KernelSpecsName 179 | Tags: 180 | - Key: StudioDomainName 181 | Value: !Ref StudioDomainName 182 | - Key: StackSetName 183 | Value: !Ref SharedServiceStackSetName 184 | 185 | DataScientistDefaultRole: 186 | Type: 'AWS::IAM::Role' 187 | Properties: 188 | AssumeRolePolicyDocument: 189 | Version: 2012-10-17 190 | Statement: 191 | - Effect: Allow 192 | Principal: 193 | AWS: !Sub "arn:aws:iam::${AWS::AccountId}:root" 194 | Action: 195 | - 'sts:AssumeRole' 196 | - Sid: SageMakerTrustRelationship 197 | Effect: Allow 198 | Principal: 199 | Service: 200 | - 'sagemaker.amazonaws.com' 201 | Action: 202 | - 'sts:AssumeRole' 203 | RoleName: !Sub "ds-default-user-role-${SharedServiceStackSetName}" 204 | Policies: 205 | - PolicyName: SageMakerAccessInlinePolicy 206 | PolicyDocument: 207 | Version: 2012-10-17 208 | Statement: 209 | - Sid: DataScientistAdditionalPolicies 210 | Effect: Allow 211 | Action: 212 | - 'sagemaker:UpdateCodeRepository' 213 | - 'sagemaker:DeleteCodeRepository' 214 | - 'sagemaker:CreateCodeRepository' 215 | - 'sagemaker:StartNotebookInstance' 216 | - 'sagemaker:StopNotebookInstance' 217 | - 'sagemaker:CreateApp' 218 | - 'sagemaker:AddTags' 219 | - 'sagemaker:DeleteApp' 220 | - 'sagemaker:GetSagemakerServicecatalogPortfolioStatus' 221 | - 'codecommit:BatchGetRepositories' 222 | - 'codecommit:GitPull' 223 | - 'codecommit:GitPush' 224 | - 'codecommit:CreateBranch' 225 | - 'codecommit:DeleteBranch' 226 | - 'codecommit:GetBranch' 227 | - 'codecommit:ListBranches' 228 | - 'codecommit:CreatePullRequest' 229 | - 'codecommit:CreatePullRequestApproval' 230 | - 'codecommit:GetPullRequest' 231 | - 'codecommit:CreateCommit' 232 | - 'codecommit:GetCommit' 233 | - 'codecommit:GetCommitHistory' 234 | - 'codecommit:GetDifferences' 235 | - 'codecommit:GetReferences' 236 | - 'codecommit:CreateRepository' 237 | - 'codecommit:GetRepository' 238 | - 'codecommit:ListRepositories' 239 | - 'iam:TagRole' 240 | - 'kms:CreateGrant' 241 | - 'kms:DescribeKey' 242 | - 'servicecatalog:ListAcceptedPortfolioShares' 243 | Resource: '*' 244 | - Sid: SageMakerIamPassRole 245 | Effect: Allow 246 | Action: 247 | - 'iam:PassRole' 248 | Resource: '*' 249 | Condition: 250 | StringEquals: 251 | 'iam:PassedToService': 252 | - sagemaker.amazonaws.com 253 | ManagedPolicyArns: 254 | - 'arn:aws:iam::aws:policy/AWSServiceCatalogEndUserFullAccess' 255 | - 'arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess' 256 | - 'arn:aws:iam::aws:policy/AWSCloudFormationReadOnlyAccess' 257 | - 'arn:aws:iam::aws:policy/AmazonSSMReadOnlyAccess' 258 | - 'arn:aws:iam::aws:policy/AWSLambda_ReadOnlyAccess' 259 | - 'arn:aws:iam::aws:policy/AWSCodeCommitReadOnly' 260 | - 'arn:aws:iam::aws:policy/AWSCodeArtifactReadOnlyAccess' 261 | - 'arn:aws:iam::aws:policy/AmazonSageMakerFullAccess' 262 | - 'arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly' 263 | Tags: 264 | - Key: StudioDomainName 265 | Value: !Ref StudioDomainName 266 | - Key: StackSetName 267 | Value: !Ref SharedServiceStackSetName 268 | 269 | DataScientistDefaultRoleArn: 270 | Type: 'AWS::SSM::Parameter' 271 | Properties: 272 | Name: !Sub "ds-default-user-role-${SharedServiceStackSetName}-arn" 273 | Type: String 274 | Value: !GetAtt DataScientistDefaultRole.Arn 275 | Description: SSM-Parameter - Default DataScientist Role Arn 276 | 277 | SageMakerStudioDomain: 278 | Type: AWS::SageMaker::Domain 279 | DependsOn: 280 | - SageMakerCustomImageAppConfig 281 | Properties: 282 | AppNetworkAccessType: VpcOnly 283 | AuthMode: IAM 284 | DefaultUserSettings: 285 | ExecutionRole: !GetAtt DataScientistDefaultRole.Arn 286 | SecurityGroups: 287 | - Fn::ImportValue: 288 | !Sub 'ds-sagemaker-vpc-sg-${SharedServiceStackSetName}' 289 | SharingSettings: 290 | NotebookOutputOption: Disabled 291 | KernelGatewayAppSettings: 292 | CustomImages: 293 | - AppImageConfigName: !Ref AppImageConfigName 294 | ImageName: !Ref CustomImageName 295 | DefaultResourceSpec: 296 | InstanceType: 'ml.t3.medium' 297 | SageMakerImageArn: !Ref SageMakerCustomImage 298 | SageMakerImageVersionArn: !GetAtt SageMakerCustomImageVersion.ImageVersionArn 299 | DomainName: !Ref StudioDomainName 300 | KmsKeyId: !Ref SagemakerStudioKMS 301 | SubnetIds: 302 | - Fn::ImportValue: 303 | !Sub 'ds-subnet1-${SharedServiceStackSetName}' 304 | - Fn::ImportValue: 305 | !Sub 'ds-subnet2-${SharedServiceStackSetName}' 306 | - Fn::ImportValue: 307 | !Sub 'ds-subnet3-${SharedServiceStackSetName}' 308 | VpcId: 309 | Fn::ImportValue: !Sub 'ds-vpc-${SharedServiceStackSetName}' 310 | Tags: 311 | - Key: StackSetName 312 | Value: !Ref SharedServiceStackSetName 313 | -------------------------------------------------------------------------------- /cloudformation/ds_env_studio_user_profile_v1.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | Description: | 4 | CloudFormation template for creating SageMaker Studio UserProfiles for data science project and environment combination. 5 | 6 | Parameters: 7 | TeamName: 8 | Type: String 9 | AllowedPattern: '[A-Za-z0-9\-]*' 10 | Description: Please specify your team name. Used as a suffix for team's resource names. 11 | 12 | EnvType: 13 | Description: System Environment. Used as a suffix for team and environment resource names. 14 | Type: String 15 | Default: dev 16 | 17 | SharedServiceStackSetName: 18 | Type: String 19 | Default: DSSharedServices 20 | Description: Common root name used across shared service cloudformation resources 21 | 22 | StudioUserProfileName: 23 | Description: Studio user profile name 24 | Type: String 25 | 26 | InstanceType: 27 | Description: The instance type for the SageMaker notebook. If creating Jupyter server, you may not modify the default. 28 | Type: String 29 | Default: ml.t3.medium 30 | AllowedValues: 31 | - ml.m5.2xlarge 32 | - ml.m5.4xlarge 33 | - ml.m5.8xlarge 34 | - ml.m5.large 35 | - ml.m5.xlarge 36 | - ml.p3.16xlarge 37 | - ml.p3.2xlarge 38 | - ml.p3.8xlarge 39 | - ml.t3.2xlarge 40 | - ml.t3.large 41 | - ml.t3.medium 42 | - ml.t3.micro 43 | - ml.t3.small 44 | - ml.t3.xlarge 45 | - system 46 | 47 | DeploymentInstanceType: 48 | Description: Allowed Instance Type to Deploy Models 49 | Type: String 50 | Default: ml.c5.large 51 | AllowedValues: 52 | - ml.m5.2xlarge 53 | - ml.m5.4xlarge 54 | - ml.m5.8xlarge 55 | - ml.c5.large 56 | - ml.t3.medium 57 | 58 | Outputs: 59 | StudioUserProfileName: 60 | Description: name of created user profile 61 | Value: !Ref SageMakerStudioUserProfile 62 | 63 | # NotebookUrl: 64 | # Description: Link to open Notebook 65 | # Value: !Sub 'https://${AWS::Region}.console.aws.amazon.com/sagemaker/home?region=${AWS::Region}#/notebook-instances/openNotebook/${NotebookInstance.NotebookInstanceName}?view=lab' 66 | 67 | Resources: 68 | SageMakerExecRole: 69 | Type: 'AWS::IAM::Role' 70 | Properties: 71 | AssumeRolePolicyDocument: 72 | Version: 2012-10-17 73 | Statement: 74 | - Effect: Allow 75 | Principal: 76 | Service: 77 | - sagemaker.amazonaws.com 78 | Action: 79 | - 'sts:AssumeRole' 80 | Path: /service-role/ 81 | RoleName: !Sub 'ds-notebook-role-${TeamName}-${EnvType}-${StudioUserProfileName}' 82 | ManagedPolicyArns: 83 | - !Ref SageMakerExecPolicy1 84 | - !Ref SageMakerExecPolicy2 85 | - 'arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess' 86 | - 'arn:aws:iam::aws:policy/AWSCodeArtifactReadOnlyAccess' 87 | Tags: 88 | - Key: TeamName 89 | Value: !Ref TeamName 90 | - Key: EnvironmentType 91 | Value: !Ref EnvType 92 | 93 | SageMakerExecPolicy1: 94 | Type: 'AWS::IAM::ManagedPolicy' 95 | Properties: 96 | ManagedPolicyName: !Sub 'ds-notebook-policy-${TeamName}-${EnvType}-${StudioUserProfileName}-1' 97 | PolicyDocument: 98 | Version: 2012-10-17 99 | Statement: 100 | - Effect: Allow 101 | Action: 102 | - 'ssm:GetParameters' 103 | - 'ssm:GetParameter' 104 | Resource: !Sub 'arn:aws:ssm:${AWS::Region}:${AWS::AccountId}:parameter/ds-*' 105 | - Effect: Allow 106 | Action: 107 | - 'sagemaker:CreateHyperParameterTuningJob' 108 | - 'sagemaker:CreateProcessingJob' 109 | - 'sagemaker:CreateTrainingJob' 110 | - 'sagemaker:CreateAutoMLJob' 111 | - 'sagemaker:CreateTransformJob' 112 | Resource: '*' 113 | Condition: 114 | 'BoolIfExists': 115 | 'sagemaker:InterContainerTrafficEncryption': 'true' 116 | 'Null': 117 | 'sagemaker:VolumeKmsKey': 'false' 118 | 'sagemaker:OutputKmsKey': 'false' 119 | - Effect: Allow 120 | Action: 121 | - 'sagemaker:CreateEndpointConfig' 122 | Resource: '*' 123 | Condition: 124 | 'Null': 125 | 'sagemaker:VolumeKmsKey': 'false' 126 | 'ForAllValues:StringLike': 127 | 'sagemaker:InstanceTypes': !Ref DeploymentInstanceType 128 | - Effect: Allow 129 | Action: 130 | - 'sagemaker:AssociateTrialComponent' 131 | - 'sagemaker:CreateHyperParameterTuningJob' 132 | - 'sagemaker:CreateProcessingJob' 133 | - 'sagemaker:CreateTrainingJob' 134 | - 'sagemaker:CreateAutoMLJob' 135 | - 'sagemaker:CreateModel' 136 | - 'sagemaker:CreateExperiment' 137 | - 'sagemaker:CreateModelPackage' 138 | - 'sagemaker:CreateModelPackageGroup' 139 | - 'sagemaker:CreateTrial' 140 | - 'sagemaker:CreateTrialComponent' 141 | - 'sagemaker:CreateApp' 142 | - 'sagemaker:DeleteApp' 143 | - 'sagemaker:DescribeApp' 144 | - 'sagemaker:DeleteExperiment' 145 | - 'sagemaker:DeleteEndpointConfig' 146 | - 'sagemaker:DeleteEndpoint' 147 | - 'sagemaker:DeleteModel' 148 | - 'sagemaker:DeleteModelPackage' 149 | - 'sagemaker:DeleteModelPackageGroup' 150 | - 'sagemaker:DeleteTrial' 151 | - 'sagemaker:DeleteTrialComponent' 152 | - 'sagemaker:StopAutoMLJob' 153 | - 'sagemaker:StopHyperParameterTuningJob' 154 | - 'sagemaker:StopTransformJob' 155 | - 'sagemaker:UpdateEndpoint' 156 | - 'sagemaker:UpdateEndpointWeightsAndCapacities' 157 | - 'sagemaker:UpdateExperiment' 158 | - 'sagemaker:UpdateTrial' 159 | - 'sagemaker:UpdateTrialComponent' 160 | Resource: '*' 161 | Condition: 162 | 'ForAllValues:StringEqualsIfExists': 163 | 'sagemaker:VpcSubnets': 164 | - Fn::ImportValue: 165 | !Sub 'ds-subnet1-${SharedServiceStackSetName}' 166 | - Fn::ImportValue: 167 | !Sub 'ds-subnet2-${SharedServiceStackSetName}' 168 | - Fn::ImportValue: 169 | !Sub 'ds-subnet3-${SharedServiceStackSetName}' 170 | 'sagemaker:VpcSecurityGroupIds': 171 | - Fn::ImportValue: 172 | !Sub 'ds-sagemaker-vpc-sg-${SharedServiceStackSetName}' 173 | - Effect: Allow 174 | Action: 175 | - 'sagemaker:List*' 176 | - 'sagemaker:Describe*' 177 | - 'sagemaker:Search' 178 | - 'application-autoscaling:DeleteScalingPolicy' 179 | - 'application-autoscaling:DeleteScheduledAction' 180 | - 'application-autoscaling:DeregisterScalableTarget' 181 | - 'application-autoscaling:DescribeScalableTargets' 182 | - 'application-autoscaling:DescribeScalingActivities' 183 | - 'application-autoscaling:DescribeScalingPolicies' 184 | - 'application-autoscaling:DescribeScheduledActions' 185 | - 'application-autoscaling:PutScalingPolicy' 186 | - 'application-autoscaling:PutScheduledAction' 187 | - 'application-autoscaling:RegisterScalableTarget' 188 | - 'cloudwatch:DeleteAlarms' 189 | - 'cloudwatch:DescribeAlarms' 190 | - 'cloudwatch:GetMetricData' 191 | - 'cloudwatch:GetMetricStatistics' 192 | - 'cloudwatch:ListMetrics' 193 | - 'cloudwatch:PutMetricAlarm' 194 | - 'cloudwatch:PutMetricData' 195 | - 'ec2:CreateNetworkInterface' 196 | - 'ec2:CreateNetworkInterfacePermission' 197 | - 'ec2:DeleteNetworkInterface' 198 | - 'ec2:DeleteNetworkInterfacePermission' 199 | - 'ec2:DescribeDhcpOptions' 200 | - 'ec2:DescribeNetworkInterfaces' 201 | - 'ec2:DescribeRouteTables' 202 | - 'ec2:DescribeSecurityGroups' 203 | - 'ec2:DescribeSubnets' 204 | - 'ec2:DescribeVpcEndpoints' 205 | - 'ec2:DescribeVpcs' 206 | - 'elastic-inference:Connect' 207 | - 'iam:ListRoles' 208 | - 'lambda:ListFunctions' 209 | - 'logs:CreateLogGroup' 210 | - 'logs:CreateLogStream' 211 | - 'logs:DescribeLogStreams' 212 | - 'logs:GetLogEvents' 213 | - 'logs:PutLogEvents' 214 | - 'sns:ListTopics' 215 | - 'codecommit:BatchGetRepositories' 216 | - 'codecommit:ListRepositories' 217 | Resource: '*' 218 | 219 | SageMakerExecPolicy2: 220 | Type: 'AWS::IAM::ManagedPolicy' 221 | Properties: 222 | ManagedPolicyName: !Sub 'ds-notebook-policy-${TeamName}-${EnvType}-${StudioUserProfileName}-2' 223 | PolicyDocument: 224 | Version: 2012-10-17 225 | Statement: 226 | - Sid: KMSKeyAccess 227 | Effect: Allow 228 | Action: 229 | - 'kms:CreateGrant' 230 | - 'kms:Decrypt' 231 | - 'kms:DescribeKey' 232 | - 'kms:Encrypt' 233 | - 'kms:ReEncrypt' 234 | - 'kms:GenerateDataKey' 235 | - 'kms:ListAliases' 236 | Resource: 237 | - Fn::ImportValue: 238 | !Sub 'ds-kms-cmk-${TeamName}-${EnvType}-arn' 239 | - Fn::ImportValue: 240 | !Sub 'ds-sagemaker-studio-kms-cmk-${SharedServiceStackSetName}-arn' 241 | - Fn::ImportValue: 242 | !Sub 'ds-data-lake-kms-cmk-${SharedServiceStackSetName}-arn' 243 | - Sid: CodeCommitAccess 244 | Effect: Allow 245 | Action: 246 | - 'codecommit:GitPull' 247 | - 'codecommit:GitPush' 248 | - 'codecommit:*Branch*' 249 | - 'codecommit:*PullRequest*' 250 | - 'codecommit:*Commit*' 251 | - 'codecommit:GetDifferences' 252 | - 'codecommit:GetReferences' 253 | - 'codecommit:GetRepository' 254 | - 'codecommit:GetMerge*' 255 | - 'codecommit:Merge*' 256 | - 'codecommit:DescribeMergeConflicts' 257 | - 'codecommit:*Comment*' 258 | - 'codecommit:*File' 259 | - 'codecommit:GetFolder' 260 | - 'codecommit:GetBlob' 261 | Resource: 262 | - !Sub 'arn:aws:codecommit:${AWS::Region}:${AWS::AccountId}:ds-source-${TeamName}-${EnvType}' 263 | - Sid: ECRAccess 264 | Effect: Allow 265 | Action: 266 | - 'ecr:BatchCheckLayerAvailability' 267 | - 'ecr:GetDownloadUrlForLayer' 268 | - 'ecr:GetRepositoryPolicy' 269 | - 'ecr:DescribeRepositories' 270 | - 'ecr:DescribeImages' 271 | - 'ecr:ListImages' 272 | - 'ecr:BatchGetImage' 273 | - 'ecr:GetLifecyclePolicy' 274 | - 'ecr:GetLifecyclePolicyPreview' 275 | - 'ecr:ListTagsForResource' 276 | - 'ecr:DescribeImageScanFindings' 277 | Resource: 278 | - 'arn:aws:ecr:*:*:repository/*sagemaker*' 279 | - Fn::Join: 280 | - '' 281 | - - 'arn:aws:ecr:*:*:repository/' 282 | - Fn::ImportValue: 283 | !Sub 'ds-shared-ecr-repository-${SharedServiceStackSetName}' 284 | - Sid: ECRAuthTokenAccess 285 | Effect: Allow 286 | Action: 287 | - 'ecr:GetAuthorizationToken' 288 | Resource: "*" 289 | - Effect: Allow 290 | Action: 291 | - 's3:GetObject' 292 | Resource: 293 | - "arn:aws:s3:::sagemaker-*/*" 294 | - Effect: Allow 295 | Action: 296 | - 's3:GetObject' 297 | - 's3:PutObject' 298 | - 's3:DeleteObject' 299 | - 's3:ListBucket' 300 | Resource: 301 | - !Sub 'arn:aws:s3:::ds-data-bucket-${TeamName}-${EnvType}-*' 302 | - !Sub 'arn:aws:s3:::ds-data-bucket-${TeamName}-${EnvType}-*/*' 303 | - !Sub 'arn:aws:s3:::ds-model-bucket-${TeamName}-${EnvType}-*' 304 | - !Sub 'arn:aws:s3:::ds-model-bucket-${TeamName}-${EnvType}-*/*' 305 | - 'arn:aws:s3:::ds-data-lake*' 306 | - 'arn:aws:s3:::ds-data-lake*/*' 307 | - Effect: Allow 308 | Action: 309 | - 's3:GetBucketLocation' 310 | - 's3:ListBucket*' 311 | - 's3:ListAllMyBuckets' 312 | Resource: '*' 313 | - Effect: Allow 314 | Action: 315 | - 'lambda:InvokeFunction' 316 | Resource: 317 | - 'arn:aws:lambda:*:*:function:*SageMaker*' 318 | - 'arn:aws:lambda:*:*:function:*sagemaker*' 319 | - 'arn:aws:lambda:*:*:function:*Sagemaker*' 320 | - 'arn:aws:lambda:*:*:function:*LabelingFunction*' 321 | - Action: 'iam:CreateServiceLinkedRole' 322 | Effect: Allow 323 | Resource: 'arn:aws:iam::*:role/aws-service-role/sagemaker.application-autoscaling.amazonaws.com/AWSServiceRoleForApplicationAutoScaling_SageMakerEndpoint' 324 | Condition: 325 | StringLike: 326 | 'iam:AWSServiceName': sagemaker.application-autoscaling.amazonaws.com 327 | - Effect: Allow 328 | Action: 329 | - 'sns:Subscribe' 330 | - 'sns:CreateTopic' 331 | Resource: 332 | - 'arn:aws:sns:*:*:*SageMaker*' 333 | - 'arn:aws:sns:*:*:*Sagemaker*' 334 | - 'arn:aws:sns:*:*:*sagemaker*' 335 | - Effect: Allow 336 | Action: 337 | - 'iam:PassRole' 338 | Resource: '*' 339 | Condition: 340 | StringEquals: 341 | 'iam:PassedToService': 342 | - sagemaker.amazonaws.com 343 | 344 | SageMakerStudioUserProfile: 345 | Type: AWS::SageMaker::UserProfile 346 | Properties: 347 | DomainId: 348 | Fn::ImportValue: 349 | !Sub 'ds-sagemaker-studio-${SharedServiceStackSetName}-domain-id' 350 | UserProfileName: !Ref StudioUserProfileName 351 | UserSettings: 352 | ExecutionRole: !GetAtt SageMakerExecRole.Arn 353 | SharingSettings: 354 | NotebookOutputOption: Disabled 355 | SecurityGroups: 356 | - Fn::ImportValue: 357 | !Sub "ds-sagemaker-vpc-sg-${SharedServiceStackSetName}" 358 | - Fn::ImportValue: 359 | !Sub "ds-userprofile-sg-${SharedServiceStackSetName}" 360 | Tags: 361 | - Key: StackSetName 362 | Value: !Ref SharedServiceStackSetName 363 | - Key: TeamName 364 | Value: !Ref TeamName 365 | - Key: EnvironmentType 366 | Value: !Ref EnvType 367 | -------------------------------------------------------------------------------- /cloudformation/ds_env_studio_user_profile_v2.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | Description: | 4 | CloudFormation template for creating SageMaker Studio UserProfiles for data science project and environment combination. 5 | 6 | Parameters: 7 | TeamName: 8 | Type: String 9 | AllowedPattern: '[A-Za-z0-9\-]*' 10 | Description: Please specify your tean name. Used as a suffix for team's resource names. 11 | 12 | EnvType: 13 | Description: System Environment. Used as a suffix for team and environment resource names. 14 | Type: String 15 | Default: dev 16 | 17 | SharedServiceStackSetName: 18 | Type: String 19 | Default: DSSharedServices 20 | Description: Common root name used across shared service cloudformation resources 21 | 22 | StudioUserProfileName: 23 | Description: Studio user profile name 24 | Type: String 25 | 26 | InstanceType: 27 | Description: The instance type for the SageMaker notebook. If creating Jupyter server, you may not modify the default. 28 | Type: String 29 | Default: ml.t3.medium 30 | AllowedValues: 31 | - ml.m5.2xlarge 32 | - ml.m5.4xlarge 33 | - ml.m5.8xlarge 34 | - ml.m5.large 35 | - ml.m5.xlarge 36 | - ml.p3.16xlarge 37 | - ml.p3.2xlarge 38 | - ml.p3.8xlarge 39 | - ml.t3.2xlarge 40 | - ml.t3.large 41 | - ml.t3.medium 42 | - ml.t3.micro 43 | - ml.t3.small 44 | - ml.t3.xlarge 45 | - system 46 | 47 | DeploymentInstanceType: 48 | Description: Allowed Instance Type to Deploy Models 49 | Type: String 50 | Default: ml.c5.large 51 | AllowedValues: 52 | - ml.m5.2xlarge 53 | - ml.m5.4xlarge 54 | - ml.m5.8xlarge 55 | - ml.c5.large 56 | - ml.t3.medium 57 | 58 | Outputs: 59 | StudioUserProfileName: 60 | Description: name of created user profile 61 | Value: !Ref SageMakerStudioUserProfile 62 | 63 | # NotebookUrl: 64 | # Description: Link to open Notebook 65 | # Value: !Sub 'https://${AWS::Region}.console.aws.amazon.com/sagemaker/home?region=${AWS::Region}#/notebook-instances/openNotebook/${NotebookInstance.NotebookInstanceName}?view=lab' 66 | 67 | Resources: 68 | SageMakerExecRole: 69 | Type: 'AWS::IAM::Role' 70 | Properties: 71 | AssumeRolePolicyDocument: 72 | Version: 2012-10-17 73 | Statement: 74 | - Effect: Allow 75 | Principal: 76 | Service: 77 | - sagemaker.amazonaws.com 78 | Action: 79 | - 'sts:AssumeRole' 80 | Path: /service-role/ 81 | RoleName: !Sub 'ds-notebook-role-${TeamName}-${EnvType}-${StudioUserProfileName}' 82 | ManagedPolicyArns: 83 | - !Ref SageMakerExecPolicy1 84 | - !Ref SageMakerExecPolicy2 85 | - 'arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess' 86 | - 'arn:aws:iam::aws:policy/AWSCodeArtifactReadOnlyAccess' 87 | Tags: 88 | - Key: TeamName 89 | Value: !Ref TeamName 90 | - Key: EnvironmentType 91 | Value: !Ref EnvType 92 | 93 | SageMakerExecPolicy1: 94 | Type: 'AWS::IAM::ManagedPolicy' 95 | Properties: 96 | ManagedPolicyName: !Sub 'ds-notebook-policy-${TeamName}-${EnvType}-${StudioUserProfileName}-1' 97 | PolicyDocument: 98 | Version: 2012-10-17 99 | Statement: 100 | - Effect: Allow 101 | Action: 102 | - 'ssm:GetParameters' 103 | - 'ssm:GetParameter' 104 | Resource: !Sub 'arn:aws:ssm:${AWS::Region}:${AWS::AccountId}:parameter/ds-*' 105 | - Effect: Allow 106 | Action: 107 | - 'sagemaker:CreateHyperParameterTuningJob' 108 | - 'sagemaker:CreateProcessingJob' 109 | - 'sagemaker:CreateTrainingJob' 110 | - 'sagemaker:CreateAutoMLJob' 111 | - 'sagemaker:CreateTransformJob' 112 | Resource: '*' 113 | Condition: 114 | 'BoolIfExists': 115 | 'sagemaker:InterContainerTrafficEncryption': 'true' 116 | 'Null': 117 | 'sagemaker:VolumeKmsKey': 'false' 118 | 'sagemaker:OutputKmsKey': 'false' 119 | - Effect: Deny 120 | Action: 121 | - 'sagemaker:CreateHyperParameterTuningJob' 122 | - 'sagemaker:CreateProcessingJob' 123 | - 'sagemaker:CreateTrainingJob' 124 | - 'sagemaker:CreateAutoMLJob' 125 | - 'sagemaker:CreateTransformJob' 126 | Resource: '*' 127 | Condition: 128 | 'Null': 129 | 'sagemaker:VpcSubnets': 'true' 130 | - Effect: Allow 131 | Action: 132 | - 'sagemaker:CreateEndpointConfig' 133 | Resource: '*' 134 | Condition: 135 | 'Null': 136 | 'sagemaker:VolumeKmsKey': 'false' 137 | 'ForAllValues:StringLike': 138 | 'sagemaker:InstanceTypes': !Ref DeploymentInstanceType 139 | - Effect: Allow 140 | Action: 141 | - 'sagemaker:*' 142 | Resource: '*' 143 | Condition: 144 | 'ForAllValues:StringEqualsIfExists': 145 | 'sagemaker:VpcSubnets': 146 | - Fn::ImportValue: 147 | !Sub 'ds-subnet1-${SharedServiceStackSetName}' 148 | - Fn::ImportValue: 149 | !Sub 'ds-subnet2-${SharedServiceStackSetName}' 150 | - Fn::ImportValue: 151 | !Sub 'ds-subnet3-${SharedServiceStackSetName}' 152 | 'sagemaker:VpcSecurityGroupIds': 153 | - Fn::ImportValue: 154 | !Sub 'ds-sagemaker-vpc-sg-${SharedServiceStackSetName}' 155 | - Effect: Allow 156 | Action: 157 | - 'application-autoscaling:DeleteScalingPolicy' 158 | - 'application-autoscaling:DeleteScheduledAction' 159 | - 'application-autoscaling:DeregisterScalableTarget' 160 | - 'application-autoscaling:DescribeScalableTargets' 161 | - 'application-autoscaling:DescribeScalingActivities' 162 | - 'application-autoscaling:DescribeScalingPolicies' 163 | - 'application-autoscaling:DescribeScheduledActions' 164 | - 'application-autoscaling:PutScalingPolicy' 165 | - 'application-autoscaling:PutScheduledAction' 166 | - 'application-autoscaling:RegisterScalableTarget' 167 | - 'cloudwatch:DeleteAlarms' 168 | - 'cloudwatch:DescribeAlarms' 169 | - 'cloudwatch:GetMetricData' 170 | - 'cloudwatch:GetMetricStatistics' 171 | - 'cloudwatch:ListMetrics' 172 | - 'cloudwatch:PutMetricAlarm' 173 | - 'cloudwatch:PutMetricData' 174 | - 'ec2:CreateNetworkInterface' 175 | - 'ec2:CreateNetworkInterfacePermission' 176 | - 'ec2:DeleteNetworkInterface' 177 | - 'ec2:DeleteNetworkInterfacePermission' 178 | - 'ec2:DescribeDhcpOptions' 179 | - 'ec2:DescribeNetworkInterfaces' 180 | - 'ec2:DescribeRouteTables' 181 | - 'ec2:DescribeSecurityGroups' 182 | - 'ec2:DescribeSubnets' 183 | - 'ec2:DescribeVpcEndpoints' 184 | - 'ec2:DescribeVpcs' 185 | - 'elastic-inference:Connect' 186 | - 'iam:ListRoles' 187 | - 'lambda:ListFunctions' 188 | - 'logs:CreateLogGroup' 189 | - 'logs:CreateLogStream' 190 | - 'logs:DescribeLogStreams' 191 | - 'logs:GetLogEvents' 192 | - 'logs:PutLogEvents' 193 | - 'sns:ListTopics' 194 | - 'codecommit:BatchGetRepositories' 195 | - 'codecommit:ListRepositories' 196 | Resource: '*' 197 | 198 | SageMakerExecPolicy2: 199 | Type: 'AWS::IAM::ManagedPolicy' 200 | Properties: 201 | ManagedPolicyName: !Sub 'ds-notebook-policy-${TeamName}-${EnvType}-${StudioUserProfileName}-2' 202 | PolicyDocument: 203 | Version: 2012-10-17 204 | Statement: 205 | - Sid: KMSKeyAccess 206 | Effect: Allow 207 | Action: 208 | - 'kms:CreateGrant' 209 | - 'kms:Decrypt' 210 | - 'kms:DescribeKey' 211 | - 'kms:Encrypt' 212 | - 'kms:ReEncrypt' 213 | - 'kms:GenerateDataKey' 214 | - 'kms:ListAliases' 215 | Resource: 216 | - Fn::ImportValue: 217 | !Sub 'ds-kms-cmk-${TeamName}-${EnvType}-arn' 218 | - Fn::ImportValue: 219 | !Sub 'ds-sagemaker-studio-kms-cmk-${SharedServiceStackSetName}-arn' 220 | - Fn::ImportValue: 221 | !Sub 'ds-data-lake-kms-cmk-${SharedServiceStackSetName}-arn' 222 | - Sid: CodeCommitAccess 223 | Effect: Allow 224 | Action: 225 | - 'codecommit:GitPull' 226 | - 'codecommit:GitPush' 227 | - 'codecommit:*Branch*' 228 | - 'codecommit:*PullRequest*' 229 | - 'codecommit:*Commit*' 230 | - 'codecommit:GetDifferences' 231 | - 'codecommit:GetReferences' 232 | - 'codecommit:GetRepository' 233 | - 'codecommit:GetMerge*' 234 | - 'codecommit:Merge*' 235 | - 'codecommit:DescribeMergeConflicts' 236 | - 'codecommit:*Comment*' 237 | - 'codecommit:*File' 238 | - 'codecommit:GetFolder' 239 | - 'codecommit:GetBlob' 240 | Resource: 241 | - !Sub 'arn:aws:codecommit:${AWS::Region}:${AWS::AccountId}:ds-source-${TeamName}-${EnvType}' 242 | - Sid: ECRAccess 243 | Effect: Allow 244 | Action: 245 | - 'ecr:BatchCheckLayerAvailability' 246 | - 'ecr:GetDownloadUrlForLayer' 247 | - 'ecr:GetRepositoryPolicy' 248 | - 'ecr:DescribeRepositories' 249 | - 'ecr:DescribeImages' 250 | - 'ecr:ListImages' 251 | - 'ecr:BatchGetImage' 252 | - 'ecr:GetLifecyclePolicy' 253 | - 'ecr:GetLifecyclePolicyPreview' 254 | - 'ecr:ListTagsForResource' 255 | - 'ecr:DescribeImageScanFindings' 256 | Resource: 257 | - 'arn:aws:ecr:*:*:repository/*sagemaker*' 258 | - Fn::Join: 259 | - '' 260 | - - 'arn:aws:ecr:*:*:repository/' 261 | - Fn::ImportValue: 262 | !Sub 'ds-shared-ecr-repository-${SharedServiceStackSetName}' 263 | - Sid: ECRAuthTokenAccess 264 | Effect: Allow 265 | Action: 266 | - 'ecr:GetAuthorizationToken' 267 | Resource: "*" 268 | - Effect: Allow 269 | Action: 270 | - 's3:GetObject' 271 | Resource: 272 | - "arn:aws:s3:::sagemaker-*/*" 273 | - Effect: Allow 274 | Action: 275 | - 's3:GetObject' 276 | - 's3:PutObject' 277 | - 's3:DeleteObject' 278 | - 's3:ListBucket' 279 | Resource: 280 | - !Sub 'arn:aws:s3:::ds-data-bucket-${TeamName}-${EnvType}-*' 281 | - !Sub 'arn:aws:s3:::ds-data-bucket-${TeamName}-${EnvType}-*/*' 282 | - !Sub 'arn:aws:s3:::ds-model-bucket-${TeamName}-${EnvType}-*' 283 | - !Sub 'arn:aws:s3:::ds-model-bucket-${TeamName}-${EnvType}-*/*' 284 | - 'arn:aws:s3:::ds-data-lake*' 285 | - 'arn:aws:s3:::ds-data-lake*/*' 286 | - Effect: Allow 287 | Action: 288 | - 's3:GetBucketLocation' 289 | - 's3:ListBucket*' 290 | - 's3:ListAllMyBuckets' 291 | Resource: '*' 292 | - Effect: Allow 293 | Action: 294 | - 'lambda:InvokeFunction' 295 | Resource: 296 | - 'arn:aws:lambda:*:*:function:*SageMaker*' 297 | - 'arn:aws:lambda:*:*:function:*sagemaker*' 298 | - 'arn:aws:lambda:*:*:function:*Sagemaker*' 299 | - 'arn:aws:lambda:*:*:function:*LabelingFunction*' 300 | - Action: 'iam:CreateServiceLinkedRole' 301 | Effect: Allow 302 | Resource: 'arn:aws:iam::*:role/aws-service-role/sagemaker.application-autoscaling.amazonaws.com/AWSServiceRoleForApplicationAutoScaling_SageMakerEndpoint' 303 | Condition: 304 | StringLike: 305 | 'iam:AWSServiceName': sagemaker.application-autoscaling.amazonaws.com 306 | - Effect: Allow 307 | Action: 308 | - 'sns:Subscribe' 309 | - 'sns:CreateTopic' 310 | Resource: 311 | - 'arn:aws:sns:*:*:*SageMaker*' 312 | - 'arn:aws:sns:*:*:*Sagemaker*' 313 | - 'arn:aws:sns:*:*:*sagemaker*' 314 | - Effect: Allow 315 | Action: 316 | - 'iam:PassRole' 317 | Resource: '*' 318 | Condition: 319 | StringEquals: 320 | 'iam:PassedToService': 321 | - sagemaker.amazonaws.com 322 | 323 | SageMakerStudioUserProfile: 324 | Type: AWS::SageMaker::UserProfile 325 | Properties: 326 | DomainId: 327 | Fn::ImportValue: 328 | !Sub 'ds-sagemaker-studio-${SharedServiceStackSetName}-domain-id' 329 | UserProfileName: !Ref StudioUserProfileName 330 | UserSettings: 331 | ExecutionRole: !GetAtt SageMakerExecRole.Arn 332 | SharingSettings: 333 | NotebookOutputOption: Disabled 334 | SecurityGroups: 335 | - Fn::ImportValue: 336 | !Sub "ds-sagemaker-vpc-sg-${SharedServiceStackSetName}" 337 | - Fn::ImportValue: 338 | !Sub "ds-userprofile-sg-${SharedServiceStackSetName}" 339 | Tags: 340 | - Key: StackSetName 341 | Value: !Ref SharedServiceStackSetName 342 | - Key: TeamName 343 | Value: !Ref TeamName 344 | - Key: EnvironmentType 345 | Value: !Ref EnvType 346 | -------------------------------------------------------------------------------- /cloudformation/ds_environment.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | 4 | # The following template is designed to provision and configure a secure environment for data science teams. 5 | # This template creates team and environment specific resources: a KMS CMK, an data science administrator and 6 | # data scientist role, and an S3 bucket. 7 | # Lastly the template stores outputs into Parameter Store so they can be referenced later by SC products. 8 | Description: Data Science Environment 9 | 10 | Parameters: 11 | TeamName: 12 | Type: String 13 | AllowedPattern: '[a-z0-9\-]*' 14 | Description: Please specify your Team Name. Used as a suffix for team resource names. Mandatory LOWER CASE. 15 | 16 | EnvType: 17 | Description: >- 18 | Please specify the target Environment. Used for tagging and resource 19 | names. Mandatory LOWER CASE. 20 | Type: String 21 | AllowedPattern: '[a-z0-9\-]*' 22 | Default: dev 23 | 24 | SharedServiceStackSetName: 25 | Type: String 26 | Default: DSSharedServices 27 | Description: Common root name used across shared service cloudformation resources 28 | 29 | Outputs: 30 | AssumeDataScienceAdminRole: 31 | Description: URL for assuming the role of a data science admininstrator 32 | Value: !GetAtt DSEnvironmentPrincipals.Outputs.AssumeDataScienceAdminRole 33 | 34 | AssumeDataScientistUserRole: 35 | Description: URL for assuming the role of a data science user 36 | Value: !GetAtt DSEnvironmentPrincipals.Outputs.AssumeDataScientistUserRole 37 | 38 | Resources: 39 | DSEnvironmentPrincipals: 40 | Type: AWS::CloudFormation::Stack 41 | Properties: 42 | Parameters: 43 | TeamName: !Ref TeamName 44 | EnvType: !Ref EnvType 45 | TemplateURL: ds_env_principals.yaml 46 | Tags: 47 | - Key: TeamName 48 | Value: !Ref TeamName 49 | - Key: EnvironmentType 50 | Value: !Ref EnvType 51 | 52 | DSEnvironmentBackingStore: 53 | Type: AWS::CloudFormation::Stack 54 | DependsOn: 55 | - DSEnvironmentPrincipals 56 | Properties: 57 | Parameters: 58 | SharedServiceStackSetName: !Ref SharedServiceStackSetName 59 | TeamName: !Ref TeamName 60 | EnvType: !Ref EnvType 61 | TemplateURL: ds_env_backing_store.yaml 62 | Tags: 63 | - Key: TeamName 64 | Value: !Ref TeamName 65 | - Key: EnvironmentType 66 | Value: !Ref EnvType 67 | 68 | DSEnvironmentResourceGroup: 69 | Type: "AWS::ResourceGroups::Group" 70 | Properties: 71 | Name: !Sub 'ds-${TeamName}-${EnvType}-resource-group' 72 | Description: !Sub 'AWS Resources belonging to ${TeamName} in its ${EnvType} environment.' 73 | ResourceQuery: 74 | Type: "TAG_FILTERS_1_0" 75 | Query: 76 | ResourceTypeFilters: 77 | - "AWS::AllSupported" 78 | TagFilters: 79 | - Key: "TeamName" 80 | Values: 81 | - !Sub '${TeamName}' 82 | - Key: "EnvironmentType" 83 | Values: 84 | - !Sub '${EnvType}' 85 | Tags: 86 | - Key: TeamName 87 | Value: !Ref TeamName 88 | - Key: EnvironmentType 89 | Value: !Ref EnvType -------------------------------------------------------------------------------- /cloudformation/ds_shared_code_artifact.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | Description: | 4 | Create a shared CodeArtifact repository to host python modules from public PyPI repository (https://pypi.org/) for 5 | a data science project team. 6 | 7 | Parameters: 8 | StackSetName: 9 | Type: String 10 | Description: A name to be used across nested stacks 11 | 12 | CodeArtifactDomainName: 13 | Type: String 14 | Default: ds-domain 15 | Description: Code Artifact shared data science domain name 16 | 17 | CodeArtifactRepositoryName: 18 | Type: String 19 | Default: ds-shared-repo 20 | Description: Code Artifact shared data science repository name 21 | 22 | CodeArtifactPublicPyPIRepositoryName: 23 | Type: String 24 | Default: ds-public-upstream-repo 25 | Description: Code Artifact upstream repository name with external connection to public PyPI 26 | 27 | Outputs: 28 | SharedCodeArtifactDomain: 29 | Description: The domain of the Shared CodeArtifact 30 | Value: !Ref CodeArtifactDomainName 31 | Export: 32 | Name: !Sub 'ds-shared-code-artifact-domain-${StackSetName}' 33 | 34 | SharedCodeArtifactRepository: 35 | Description: The shared CodeArtifact repository 36 | Value: !Ref CodeArtifactRepositoryName 37 | Export: 38 | Name: !Sub 'ds-shared-code-artifact-repository-${StackSetName}' 39 | 40 | Resources: 41 | SharedDSCodeArtifactDomain: 42 | Type: 'AWS::CodeArtifact::Domain' 43 | Properties: 44 | DomainName: !Ref CodeArtifactDomainName 45 | 46 | DSCodeArtifactPublicRepository: 47 | Type: 'AWS::CodeArtifact::Repository' 48 | Properties: 49 | Description: Upstream repository for shared data science repository with external connection to public PyPi repository 50 | RepositoryName: !Ref CodeArtifactPublicPyPIRepositoryName 51 | DomainName: !GetAtt SharedDSCodeArtifactDomain.Name 52 | ExternalConnections: 53 | - public:pypi 54 | 55 | SharedDSCodeArtifactRepository: 56 | Type: 'AWS::CodeArtifact::Repository' 57 | Properties: 58 | Description: Shared asset repository to host python package and modules for data science projects 59 | RepositoryName: !Ref CodeArtifactRepositoryName 60 | DomainName: !GetAtt SharedDSCodeArtifactDomain.Name 61 | Upstreams: 62 | - !GetAtt DSCodeArtifactPublicRepository.Name 63 | 64 | SSMCodeArtifactDomain: 65 | Type: 'AWS::SSM::Parameter' 66 | Properties: 67 | Name: "ds-codeartifact-domain-name" 68 | Type: String 69 | Value: !GetAtt SharedDSCodeArtifactDomain.Name 70 | Description: Data Science CodeArtifact Domain Name 71 | 72 | SSMCodeArtifactDomainOwner: 73 | Type: 'AWS::SSM::Parameter' 74 | Properties: 75 | Name: "ds-codeartifact-domain-owner" 76 | Type: String 77 | Value: !Ref "AWS::AccountId" 78 | Description: Data Science CodeArtifact Domain Owner i.e. the AWS Account 79 | 80 | SSMCodeArtifactRepository: 81 | Type: 'AWS::SSM::Parameter' 82 | Properties: 83 | Name: "ds-codeartifact-repository" 84 | Type: String 85 | Value: !GetAtt SharedDSCodeArtifactRepository.Name 86 | Description: Data Science CodeArtifact Repository Name -------------------------------------------------------------------------------- /cloudformation/ds_shared_data_lake.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | 4 | # This template creates an a shared S3 bucket to server data lake for data science teams. 5 | Description: Shared S3 data storage to server as data lake for data science teams. 6 | 7 | Parameters: 8 | SharedServiceStackSetName: 9 | Type: String 10 | Description: Common root name used across shared service cloudformation resources 11 | 12 | Outputs: 13 | DataLakeKMSCMK: 14 | Description: KMS Key ARN for the shared data lake S3 bucket 15 | Value: !GetAtt KMSCMK.Arn 16 | Export: 17 | Name: !Sub 'ds-data-lake-kms-cmk-${SharedServiceStackSetName}-arn' 18 | 19 | DataLakeBucket: 20 | Description: Shared service data lake S3 bucket name 21 | Value: !Ref DataLakeBucket 22 | Export: 23 | Name: !Sub 'ds-s3-data-lake-${SharedServiceStackSetName}' 24 | 25 | Resources: 26 | KMSCMK: 27 | Type: 'AWS::KMS::Key' 28 | Properties: 29 | Description: KMS key for S3 data lake bucket 30 | EnableKeyRotation: true 31 | KeyPolicy: 32 | Id: key-policy-1 33 | Version: 2012-10-17 34 | Statement: 35 | - Sid: Enable IAM User Permissions 36 | Effect: Allow 37 | Principal: 38 | AWS: !Sub "arn:aws:iam::${AWS::AccountId}:root" 39 | Action: 'kms:*' 40 | Resource: '*' 41 | - Sid: Allow access for Key Administrators 42 | Effect: Allow 43 | Principal: 44 | AWS: 45 | Fn::ImportValue: 46 | !Sub "ds-administrator-role-${SharedServiceStackSetName}-arn" 47 | Action: 48 | - 'kms:Create*' 49 | - 'kms:Describe*' 50 | - 'kms:Enable*' 51 | - 'kms:List*' 52 | - 'kms:Put*' 53 | - 'kms:Update*' 54 | - 'kms:Revoke*' 55 | - 'kms:Disable*' 56 | - 'kms:Get*' 57 | - 'kms:Delete*' 58 | - 'kms:TagResource' 59 | - 'kms:UntagResource' 60 | - 'kms:ScheduleKeyDeletion' 61 | - 'kms:CancelKeyDeletion' 62 | Resource: '*' 63 | - Sid: Allow access for Key Users 64 | Effect: Allow 65 | Principal: 66 | AWS: !Sub "arn:aws:iam::${AWS::AccountId}:root" 67 | Action: 68 | - 'kms:Encrypt' 69 | - 'kms:Decrypt' 70 | - 'kms:CreateGrant' 71 | - 'kms:ReEncrypt' 72 | - 'kms:GenerateDataKey' 73 | - 'kms:DescribeKey' 74 | Resource: '*' 75 | Condition: 76 | StringNotEquals: 77 | 'aws:SourceVpce': 78 | Fn::ImportValue: 79 | !Sub "ds-s3-endpoint-${SharedServiceStackSetName}-id" 80 | Tags: 81 | - Key: SharedServiceStackSetName 82 | Value: !Ref SharedServiceStackSetName 83 | 84 | KMSCMKAlias: 85 | Type: 'AWS::KMS::Alias' 86 | Properties: 87 | AliasName: !Sub "alias/ds-s3-data-lake-kms-cmk-${SharedServiceStackSetName}" 88 | TargetKeyId: !Ref KMSCMK 89 | 90 | KMSCMKArn: 91 | Type: 'AWS::SSM::Parameter' 92 | Properties: 93 | Name: !Sub "ds-s3-data-lake-kms-cmk-${SharedServiceStackSetName}-arn" 94 | Type: String 95 | Value: !GetAtt 96 | - KMSCMK 97 | - Arn 98 | Description: SageMakerExecRole ARN 99 | 100 | DataLakeBucket: 101 | Type: 'AWS::S3::Bucket' 102 | Properties: 103 | BucketName: 104 | !Join 105 | - '' 106 | - 107 | - 'ds-data-lake-' 108 | - !Select 109 | - 4 110 | - !Split 111 | - '-' 112 | - !Select 113 | - 2 114 | - !Split 115 | - / 116 | - !Ref 'AWS::StackId' 117 | PublicAccessBlockConfiguration: 118 | BlockPublicAcls: TRUE 119 | BlockPublicPolicy: TRUE 120 | IgnorePublicAcls: TRUE 121 | RestrictPublicBuckets: TRUE 122 | BucketEncryption: 123 | ServerSideEncryptionConfiguration: 124 | - ServerSideEncryptionByDefault: 125 | SSEAlgorithm: 'aws:kms' 126 | KMSMasterKeyID: !Ref KMSCMK 127 | Tags: 128 | - Key: SharedServiceStackSetName 129 | Value: !Ref SharedServiceStackSetName 130 | 131 | S3DataLakeBucketNameSSMParameter: 132 | Type: 'AWS::SSM::Parameter' 133 | Properties: 134 | Name: !Sub "ds-s3-data-lake-bucket-${SharedServiceStackSetName}" 135 | Type: String 136 | Value: !Ref DataLakeBucket 137 | Description: Shared S3 data lake bucket name for data science teams. 138 | -------------------------------------------------------------------------------- /cloudformation/ds_shared_ecr.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | Description: | 4 | Create a shared service ECR repository. 5 | 6 | Parameters: 7 | SharedServiceStackSetName: 8 | Type: String 9 | Description: Common root name used across shared service cloudformation resources 10 | 11 | ECRRepositoryName: 12 | Type: String 13 | AllowedPattern: '(?:[a-z0-9]+(?:[._-][a-z0-9]+)*/)*[a-z0-9]+(?:[._-][a-z0-9]+)*' 14 | Description: Shared ECR Repository name for data science projects, see Amazon ECR documentation for details on allow patterns for name 15 | 16 | Outputs: 17 | DSECRRepositoryName: 18 | Description: The name of the Data Science Shared ECR Repository 19 | Value: !Ref ECRRepositoryName 20 | Export: 21 | Name: !Sub 'ds-shared-ecr-repository-${SharedServiceStackSetName}' 22 | 23 | Resources: 24 | DSECRRepository: 25 | Type: 'AWS::ECR::Repository' 26 | Properties: 27 | EncryptionConfiguration: 28 | EncryptionType: 'KMS' 29 | ImageScanningConfiguration: 30 | ScanOnPush: "true" 31 | ImageTagMutability: 'MUTABLE' 32 | RepositoryName: !Ref ECRRepositoryName 33 | # RepositoryPolicyText: | 34 | # { 35 | # "Version":"2012-10-17", 36 | # "Statement": 37 | # [ 38 | # { 39 | # "Sid": "FullECRAdminToDSAdmin", 40 | # "Effect": "Allow", 41 | # "Principal": { 42 | # "AWS": [ 43 | # { "Fn::Sub": "arn:aws:iam::${AWS::AccountId}:role/ds-admin-role*" } 44 | # ] 45 | # }, 46 | # "Action": [ 47 | # "ecr:*" 48 | # ] 49 | # }, 50 | # { 51 | # "Sid": "AllowPullToDataScientist", 52 | # "Effect": "Allow", 53 | # "Principal": { 54 | # "AWS": [ 55 | # { "Fn::Sub": "arn:aws:iam::${AWS::AccountId}:role/service-role/ds-notebook-role*" }, 56 | # { "Fn::Sub": "arn:aws:iam::${AWS::AccountId}:role/ds-user-role*" } 57 | # ] 58 | # }, 59 | # "Action": [ 60 | # "ecr:BatchCheckLayerAvailability", 61 | # "ecr:GetDownloadUrlForLayer", 62 | # "ecr:GetRepositoryPolicy", 63 | # "ecr:DescribeRepositories", 64 | # "ecr:DescribeImages", 65 | # "ecr:ListImages", 66 | # "ecr:BatchGetImage", 67 | # "ecr:GetLifecyclePolicy", 68 | # "ecr:GetLifecyclePolicyPreview", 69 | # "ecr:ListTagsForResource", 70 | # "ecr:DescribeImageScanFindings" 71 | # ] 72 | # } 73 | # ] 74 | # } 75 | Tags: 76 | - Key: SharedServiceStackSetName 77 | Value: !Ref SharedServiceStackSetName 78 | -------------------------------------------------------------------------------- /cloudformation/ds_shared_sagemaker_network.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | 4 | Description: | 5 | Create a secure VPC designed to host SageMaker Studio and data science project teams. 6 | 7 | Parameters: 8 | SharedServiceStackSetName: 9 | Type: String 10 | Description: The data science shared services stack set name 11 | 12 | SageMakerStudioVpcCIDR: 13 | Type: String 14 | Default: 10.2.0.0/16 15 | Description: CIDR range for SageMaker Studio VPC 16 | 17 | SageMakerStudioSubnet1CIDR: 18 | Type: String 19 | Default: 10.2.1.0/24 20 | Description: CIDR range for SageMaker Studio Subnet A 21 | 22 | SageMakerStudioSubnet2CIDR: 23 | Type: String 24 | Default: 10.2.2.0/24 25 | Description: CIDR range for SageMaker Studio Subnet B 26 | 27 | SageMakerStudioSubnet3CIDR: 28 | Type: String 29 | Default: 10.2.3.0/24 30 | Description: CIDR range for SageMaker Studio Subnet C 31 | 32 | Outputs: 33 | S3VPCEndpointId: 34 | Description: The ID of the S3 VPC Endpoint 35 | Value: !Ref VPCEndpointS3 36 | Export: 37 | Name: !Sub "ds-s3-endpoint-${SharedServiceStackSetName}-id" 38 | 39 | CodeArtifactAPIEndpointDNS: 40 | Description: DNS entries for the CodeArtifact API VPC endpoint 41 | Value: 42 | Fn::Select: 43 | - 0 44 | - !GetAtt VPCEndpointCodeArtifactApi.DnsEntries 45 | Export: 46 | Name: !Sub 'ds-codeartifact-api-endpoint-${SharedServiceStackSetName}-dns' 47 | 48 | CodeArtifactRepositoryEndpointDNS: 49 | Description: DNS entries for the CodeArtifact Repository VPC endpoint 50 | Value: 51 | Fn::Select: 52 | - 0 53 | - !GetAtt VPCEndpointCodeArtifactRepository.DnsEntries 54 | Export: 55 | Name: !Sub 'ds-codeartifact-repository-endpoint-${SharedServiceStackSetName}-dns' 56 | 57 | SageMakerVPC: 58 | Description: The SageMaker VPC ID 59 | Value: !Ref SageMakerVPC 60 | Export: 61 | Name: !Sub 'ds-vpc-${SharedServiceStackSetName}' 62 | 63 | Subnet1Id: 64 | Description: The ID of the first Subnet 65 | Value: !Ref PrivateSubnetA 66 | Export: 67 | Name: !Sub 'ds-subnet1-${SharedServiceStackSetName}' 68 | 69 | Subnet2Id: 70 | Description: The ID of the second Subnet 71 | Value: !Ref PrivateSubnetB 72 | Export: 73 | Name: !Sub 'ds-subnet2-${SharedServiceStackSetName}' 74 | 75 | Subnet3Id: 76 | Description: The ID of the third Subnet 77 | Value: !Ref PrivateSubnetC 78 | Export: 79 | Name: !Sub 'ds-subnet3-${SharedServiceStackSetName}' 80 | 81 | SageMakerSecurityGroup: 82 | Description: Security Group ID for the SageMaker Resources 83 | Value: !GetAtt SageMakerSecurityGroup.GroupId 84 | Export: 85 | Name: !Sub "ds-sagemaker-vpc-sg-${SharedServiceStackSetName}" 86 | 87 | UserProfileSecurityGroup: 88 | Description: Security Group ID for the User Profile 89 | Value: !GetAtt UserProfileSecurityGroup.GroupId 90 | Export: 91 | Name: !Sub "ds-userprofile-sg-${SharedServiceStackSetName}" 92 | 93 | Mappings: 94 | RegionMap: 95 | us-east-1: 96 | S3BUCKETARN: "arn:aws:s3:::assets-193858265520-us-east-1" 97 | us-east-2: 98 | S3BUCKETARN: "arn:aws:s3:::assets-250872398865-us-east-2" 99 | us-west-2: 100 | S3BUCKETARN: "arn:aws:s3:::assets-787052242323-us-west-2" 101 | eu-west-1: 102 | S3BUCKETARN: "arn:aws:s3:::assets-438097961670-eu-west-1" 103 | eu-west-2: 104 | S3BUCKETARN: "arn:aws:s3:::assets-247805302724-eu-west-2" 105 | eu-west-3: 106 | S3BUCKETARN: "arn:aws:s3:::assets-762466490029-eu-west-3" 107 | eu-north-1: 108 | S3BUCKETARN: "arn:aws:s3:::assets-611884512288-eu-north-1" 109 | eu-south-1: 110 | S3BUCKETARN: "arn:aws:s3:::assets-484130244270-eu-south-1" 111 | eu-central-1: 112 | S3BUCKETARN: "arn:aws:s3:::assets-769407342218-eu-central-1" 113 | ap-northeast-1: 114 | S3BUCKETARN: "arn:aws:s3:::assets-660291247815-ap-northeast-1" 115 | ap-southeast-1: 116 | S3BUCKETARN: "arn:aws:s3:::assets-421485864821-ap-southeast-1" 117 | ap-southeast-2: 118 | S3BUCKETARN: "arn:aws:s3:::assets-860415559748-ap-southeast-2" 119 | ap-south-1: 120 | S3BUCKETARN: "arn:aws:s3:::assets-681137435769-ap-south-1" 121 | 122 | Resources: 123 | ######################### 124 | # 125 | # VPC AND SUBNETS 126 | # 127 | ######################### 128 | SageMakerVPC: 129 | Type: 'AWS::EC2::VPC' 130 | Properties: 131 | CidrBlock: !Ref SageMakerStudioVpcCIDR 132 | InstanceTenancy: default 133 | EnableDnsSupport: true 134 | EnableDnsHostnames: true 135 | Tags: 136 | - Key: Name 137 | Value: !Sub "ds-vpc-${SharedServiceStackSetName}" 138 | 139 | SageMakerVPCId: 140 | Type: 'AWS::SSM::Parameter' 141 | Properties: 142 | Name: !Sub "ds-vpc-${SharedServiceStackSetName}-id" 143 | Type: String 144 | Value: !Ref SageMakerVPC 145 | Description: SageMaker VPC ID 146 | 147 | PrivateSubnetA: 148 | Type: 'AWS::EC2::Subnet' 149 | Properties: 150 | VpcId: !Ref SageMakerVPC 151 | CidrBlock: !Ref SageMakerStudioSubnet1CIDR 152 | AvailabilityZone: !Sub "${AWS::Region}a" 153 | Tags: 154 | - Key: Name 155 | Value: !Sub "ds-subnet-a-${SharedServiceStackSetName}" 156 | 157 | PrivateSubnetAId: 158 | Type: 'AWS::SSM::Parameter' 159 | Properties: 160 | Name: !Sub "ds-subnet-a-${SharedServiceStackSetName}-id" 161 | Type: String 162 | Value: !Ref PrivateSubnetA 163 | Description: Private Subnet-A ID 164 | 165 | PrivateSubnetB: 166 | Type: 'AWS::EC2::Subnet' 167 | Properties: 168 | VpcId: !Ref SageMakerVPC 169 | CidrBlock: !Ref SageMakerStudioSubnet2CIDR 170 | AvailabilityZone: !Sub "${AWS::Region}b" 171 | Tags: 172 | - Key: Name 173 | Value: !Sub "ds-subnet-b-${SharedServiceStackSetName}" 174 | 175 | PrivateSubnetBId: 176 | Type: 'AWS::SSM::Parameter' 177 | Properties: 178 | Name: !Sub "ds-subnet-b-${SharedServiceStackSetName}-id" 179 | Type: String 180 | Value: !Ref PrivateSubnetB 181 | Description: Private Subnet-B ID 182 | 183 | PrivateSubnetC: 184 | Type: 'AWS::EC2::Subnet' 185 | Properties: 186 | VpcId: !Ref SageMakerVPC 187 | CidrBlock: !Ref SageMakerStudioSubnet3CIDR 188 | AvailabilityZone: !Sub "${AWS::Region}c" 189 | Tags: 190 | - Key: Name 191 | Value: !Sub "ds-subnet-c-${SharedServiceStackSetName}" 192 | 193 | PrivateSubnetCId: 194 | Type: 'AWS::SSM::Parameter' 195 | Properties: 196 | Name: !Sub "ds-subnet-c-${SharedServiceStackSetName}-id" 197 | Type: String 198 | Value: !Ref PrivateSubnetC 199 | Description: Private Subnet-B ID 200 | 201 | ######################### 202 | # 203 | # ROUTE TABLES 204 | # 205 | ######################### 206 | 207 | PrivateRouteTable: 208 | Type: 'AWS::EC2::RouteTable' 209 | Properties: 210 | VpcId: !Ref SageMakerVPC 211 | Tags: 212 | - Key: Name 213 | Value: !Sub "ds-vpc-rt-${SharedServiceStackSetName}" 214 | 215 | PrivateSubnetRouteTableAssociation1: 216 | Type: 'AWS::EC2::SubnetRouteTableAssociation' 217 | Properties: 218 | RouteTableId: !Ref PrivateRouteTable 219 | SubnetId: !Ref PrivateSubnetA 220 | 221 | PrivateSubnetRouteTableAssociation2: 222 | Type: 'AWS::EC2::SubnetRouteTableAssociation' 223 | Properties: 224 | RouteTableId: !Ref PrivateRouteTable 225 | SubnetId: !Ref PrivateSubnetB 226 | 227 | PrivateSubnetRouteTableAssociation3: 228 | Type: 'AWS::EC2::SubnetRouteTableAssociation' 229 | Properties: 230 | RouteTableId: !Ref PrivateRouteTable 231 | SubnetId: !Ref PrivateSubnetC 232 | 233 | ######################### 234 | # 235 | # SECURITY GROUPS 236 | # 237 | ######################### 238 | 239 | VPCEndpointSecurityGroup: 240 | Type: 'AWS::EC2::SecurityGroup' 241 | Properties: 242 | GroupDescription: Allow TLS for VPC Endpoint 243 | VpcId: !Ref SageMakerVPC 244 | SecurityGroupIngress: 245 | - IpProtocol: tcp 246 | FromPort: 443 247 | ToPort: 443 248 | SourceSecurityGroupId: !GetAtt SageMakerSecurityGroup.GroupId 249 | Tags: 250 | - Key: Name 251 | Value: !Sub "ds-vpc-https-ingress-sg-${SharedServiceStackSetName}" 252 | 253 | SageMakerSecurityGroup: 254 | Type: 'AWS::EC2::SecurityGroup' 255 | Properties: 256 | GroupDescription: 'Security Group for SageMaker Studio Notebook, Training Job and Hosting Endpoint' 257 | VpcId: !Ref SageMakerVPC 258 | Tags: 259 | - Key: Name 260 | Value: !Sub "ds-sagemaker-vpc-sg-${SharedServiceStackSetName}" 261 | 262 | SageMakerSecurityGroupId: 263 | Type: 'AWS::SSM::Parameter' 264 | Properties: 265 | Name: !Sub "ds-sagemaker-vpc-sg-${SharedServiceStackSetName}-id" 266 | Type: String 267 | Value: !GetAtt SageMakerSecurityGroup.GroupId 268 | Description: SageMaker Security Group ID 269 | 270 | SharedServicesVPCEndpointSecurityGroup: 271 | Type: 'AWS::EC2::SecurityGroup' 272 | Properties: 273 | GroupDescription: Allow HTTP for VPC Endpoint 274 | VpcId: !Ref SageMakerVPC 275 | SecurityGroupIngress: 276 | - IpProtocol: tcp 277 | FromPort: 80 278 | ToPort: 80 279 | SourceSecurityGroupId: !GetAtt SageMakerSecurityGroup.GroupId 280 | Tags: 281 | - Key: Name 282 | Value: !Sub "ds-vpc-http-ingress-sg-${SharedServiceStackSetName}" 283 | 284 | UserProfileSecurityGroup: 285 | Type: 'AWS::EC2::SecurityGroup' 286 | Properties: 287 | GroupDescription: 'Security Group for SageMaker Studio User Profile.' 288 | VpcId: !Ref SageMakerVPC 289 | Tags: 290 | - Key: Name 291 | Value: !Sub "ds-userprofile-sg-${SharedServiceStackSetName}" 292 | 293 | UserProfileSecurityGroupIngress: 294 | Type: AWS::EC2::SecurityGroupIngress 295 | Properties: 296 | IpProtocol: '-1' 297 | GroupId: !Ref UserProfileSecurityGroup 298 | SourceSecurityGroupId: !Ref UserProfileSecurityGroup 299 | 300 | UserProfileSecurityGroupId: 301 | Type: 'AWS::SSM::Parameter' 302 | Properties: 303 | Name: !Sub "ds-userprofile-sg-${SharedServiceStackSetName}-id" 304 | Type: String 305 | Value: !GetAtt UserProfileSecurityGroup.GroupId 306 | Description: SageMaker User Profile Security Group ID per user 307 | 308 | ######################### 309 | # 310 | # VPC ENDPOINTS 311 | # 312 | ######################### 313 | VPCEndpointS3: 314 | Type: 'AWS::EC2::VPCEndpoint' 315 | Properties: 316 | ServiceName: !Sub 'com.amazonaws.${AWS::Region}.s3' 317 | VpcEndpointType: Gateway 318 | VpcId: !Ref SageMakerVPC 319 | PolicyDocument: 320 | !Sub 321 | - | 322 | { 323 | "Version":"2012-10-17", 324 | "Statement":[{ 325 | "Effect":"Allow", 326 | "Principal": "*", 327 | "Action":[ 328 | "s3:GetObject", 329 | "s3:PutObject", 330 | "s3:ListBucket" 331 | ], 332 | "Resource":[ 333 | "arn:aws:s3:::ds-model-bucket-*", 334 | "arn:aws:s3:::ds-data-bucket-*", 335 | "arn:aws:s3:::ds-model-bucket-*/*", 336 | "arn:aws:s3:::ds-data-bucket-*/*", 337 | "arn:aws:s3:::*ds-data-lake*", 338 | "arn:aws:s3:::*ds-data-lake*/*" 339 | ] 340 | }, 341 | { 342 | "Sid": "S3PolicyForCodeArtifact", 343 | "Principal": "*", 344 | "Action": [ 345 | "s3:GetObject" 346 | ], 347 | "Effect": "Allow", 348 | "Resource": ["${CODE_ARTIFACT_ASSET_S3_BUCKET}/*"] 349 | }, 350 | { 351 | "Sid": "S3AccessToEcrSpecificBucket", 352 | "Principal": "*", 353 | "Action": [ 354 | "s3:GetObject" 355 | ], 356 | "Effect": "Allow", 357 | "Resource": ["arn:aws:s3:::prod-${AWS::Region}-starport-layer-bucket/*"] 358 | } 359 | ] 360 | } 361 | - CODE_ARTIFACT_ASSET_S3_BUCKET: 362 | Fn::FindInMap: 363 | - RegionMap 364 | - !Ref 'AWS::Region' 365 | - S3BUCKETARN 366 | RouteTableIds: 367 | - !Ref PrivateRouteTable 368 | 369 | VPCEndpointS3Id: 370 | Type: 'AWS::SSM::Parameter' 371 | Properties: 372 | Name: !Sub "ds-s3-endpoint-${SharedServiceStackSetName}-id" 373 | Type: String 374 | Value: !Ref VPCEndpointS3 375 | Description: S3 VPC Endpoint ID 376 | 377 | VPCEndpointSSM: 378 | Type: 'AWS::EC2::VPCEndpoint' 379 | Properties: 380 | VpcEndpointType: Interface 381 | PrivateDnsEnabled: true 382 | SubnetIds: 383 | - !Ref PrivateSubnetA 384 | - !Ref PrivateSubnetB 385 | - !Ref PrivateSubnetC 386 | SecurityGroupIds: 387 | - !Ref VPCEndpointSecurityGroup 388 | ServiceName: !Sub 'com.amazonaws.${AWS::Region}.ssm' 389 | VpcId: !Ref SageMakerVPC 390 | 391 | VPCEndpointCW: 392 | Type: 'AWS::EC2::VPCEndpoint' 393 | Properties: 394 | PolicyDocument: 395 | Version: 2012-10-17 396 | Statement: 397 | - Effect: Allow 398 | Principal: '*' 399 | Action: '*' 400 | Resource: '*' 401 | VpcEndpointType: Interface 402 | PrivateDnsEnabled: true 403 | SubnetIds: 404 | - !Ref PrivateSubnetA 405 | - !Ref PrivateSubnetB 406 | - !Ref PrivateSubnetC 407 | SecurityGroupIds: 408 | - !Ref VPCEndpointSecurityGroup 409 | ServiceName: !Sub 'com.amazonaws.${AWS::Region}.monitoring' 410 | VpcId: !Ref SageMakerVPC 411 | 412 | VPCEndpointCWL: 413 | Type: 'AWS::EC2::VPCEndpoint' 414 | Properties: 415 | PolicyDocument: 416 | Version: 2012-10-17 417 | Statement: 418 | - Effect: Allow 419 | Principal: '*' 420 | Action: '*' 421 | Resource: '*' 422 | VpcEndpointType: Interface 423 | PrivateDnsEnabled: true 424 | SubnetIds: 425 | - !Ref PrivateSubnetA 426 | - !Ref PrivateSubnetB 427 | - !Ref PrivateSubnetC 428 | SecurityGroupIds: 429 | - !Ref VPCEndpointSecurityGroup 430 | ServiceName: !Sub 'com.amazonaws.${AWS::Region}.logs' 431 | VpcId: !Ref SageMakerVPC 432 | 433 | VPCEndpointSagemakerAPI: 434 | Type: 'AWS::EC2::VPCEndpoint' 435 | Properties: 436 | PolicyDocument: 437 | Version: 2012-10-17 438 | Statement: 439 | - Effect: Allow 440 | Principal: '*' 441 | Action: '*' 442 | Resource: '*' 443 | VpcEndpointType: Interface 444 | PrivateDnsEnabled: true 445 | SubnetIds: 446 | - !Ref PrivateSubnetA 447 | - !Ref PrivateSubnetB 448 | - !Ref PrivateSubnetC 449 | SecurityGroupIds: 450 | - !Ref VPCEndpointSecurityGroup 451 | ServiceName: !Sub 'com.amazonaws.${AWS::Region}.sagemaker.api' 452 | VpcId: !Ref SageMakerVPC 453 | 454 | VPCEndpointSageMakerRuntime: 455 | Type: 'AWS::EC2::VPCEndpoint' 456 | Properties: 457 | PolicyDocument: 458 | Version: 2012-10-17 459 | Statement: 460 | - Effect: Allow 461 | Principal: '*' 462 | Action: '*' 463 | Resource: '*' 464 | VpcEndpointType: Interface 465 | PrivateDnsEnabled: true 466 | SubnetIds: 467 | - !Ref PrivateSubnetA 468 | - !Ref PrivateSubnetB 469 | - !Ref PrivateSubnetC 470 | SecurityGroupIds: 471 | - !Ref VPCEndpointSecurityGroup 472 | ServiceName: !Sub 'com.amazonaws.${AWS::Region}.sagemaker.runtime' 473 | VpcId: !Ref SageMakerVPC 474 | 475 | VPCEndpointSageMakerNotebook: 476 | Type: 'AWS::EC2::VPCEndpoint' 477 | Properties: 478 | PolicyDocument: 479 | Version: 2012-10-17 480 | Statement: 481 | - Effect: Allow 482 | Principal: '*' 483 | Action: '*' 484 | Resource: '*' 485 | VpcEndpointType: Interface 486 | PrivateDnsEnabled: true 487 | SubnetIds: 488 | - !Ref PrivateSubnetA 489 | - !Ref PrivateSubnetB 490 | - !Ref PrivateSubnetC 491 | SecurityGroupIds: 492 | - !Ref VPCEndpointSecurityGroup 493 | ServiceName: !Sub 'aws.sagemaker.${AWS::Region}.notebook' 494 | VpcId: !Ref SageMakerVPC 495 | 496 | VPCEndpointSTS: 497 | Type: 'AWS::EC2::VPCEndpoint' 498 | Properties: 499 | PolicyDocument: 500 | Version: 2012-10-17 501 | Statement: 502 | - Effect: Allow 503 | Principal: '*' 504 | Action: '*' 505 | Resource: '*' 506 | VpcEndpointType: Interface 507 | PrivateDnsEnabled: true 508 | SubnetIds: 509 | - !Ref PrivateSubnetA 510 | - !Ref PrivateSubnetB 511 | - !Ref PrivateSubnetC 512 | SecurityGroupIds: 513 | - !Ref VPCEndpointSecurityGroup 514 | ServiceName: !Sub 'com.amazonaws.${AWS::Region}.sts' 515 | VpcId: !Ref SageMakerVPC 516 | 517 | VPCEndpointCodeCommit: 518 | Type: 'AWS::EC2::VPCEndpoint' 519 | Properties: 520 | PolicyDocument: 521 | Version: 2012-10-17 522 | Statement: 523 | - Effect: Allow 524 | Principal: '*' 525 | Action: '*' 526 | Resource: '*' 527 | VpcEndpointType: Interface 528 | PrivateDnsEnabled: true 529 | SubnetIds: 530 | - !Ref PrivateSubnetA 531 | - !Ref PrivateSubnetB 532 | - !Ref PrivateSubnetC 533 | SecurityGroupIds: 534 | - !Ref VPCEndpointSecurityGroup 535 | ServiceName: !Sub 'com.amazonaws.${AWS::Region}.git-codecommit' 536 | VpcId: !Ref SageMakerVPC 537 | 538 | VPCEndpointCodeCommitAPI: 539 | Type: 'AWS::EC2::VPCEndpoint' 540 | Properties: 541 | PolicyDocument: 542 | Version: 2012-10-17 543 | Statement: 544 | - Effect: Allow 545 | Principal: '*' 546 | Action: '*' 547 | Resource: '*' 548 | VpcEndpointType: Interface 549 | PrivateDnsEnabled: true 550 | SubnetIds: 551 | - !Ref PrivateSubnetA 552 | - !Ref PrivateSubnetB 553 | - !Ref PrivateSubnetC 554 | SecurityGroupIds: 555 | - !Ref VPCEndpointSecurityGroup 556 | ServiceName: !Sub 'com.amazonaws.${AWS::Region}.codecommit' 557 | VpcId: !Ref SageMakerVPC 558 | 559 | VPCEndpointCodeArtifactApi: 560 | Type: 'AWS::EC2::VPCEndpoint' 561 | Properties: 562 | PolicyDocument: 563 | Version: 2012-10-17 564 | Statement: 565 | - Effect: Allow 566 | Principal: '*' 567 | Action: 568 | - 'codeartifact:Describe*' 569 | - 'codeartifact:Get*' 570 | - 'codeartifact:List*' 571 | - 'codeartifact:ReadFromRepository' 572 | Resource: '*' 573 | # - !Sub 574 | # - 'arn:aws:codeartifact:*:*:domain/${CODE_ARTIFACT_DOMAIN_NAME}' 575 | # - CODE_ARTIFACT_DOMAIN_NAME: 576 | # Fn::ImportValue: !Sub 'ds-shared-code-artifact-domain-${SharedServiceStackSetName}' 577 | - Effect: Allow 578 | Principal: '*' 579 | Action: 'sts:GetServiceBearerToken' 580 | Resource: '*' 581 | Condition: 582 | StringEquals: 583 | 'sts:AWSServiceName': 'codeartifact.amazonaws.com' 584 | VpcEndpointType: Interface 585 | PrivateDnsEnabled: false 586 | SubnetIds: 587 | - !Ref PrivateSubnetA 588 | - !Ref PrivateSubnetB 589 | - !Ref PrivateSubnetC 590 | SecurityGroupIds: 591 | - !Ref VPCEndpointSecurityGroup 592 | ServiceName: !Sub 'com.amazonaws.${AWS::Region}.codeartifact.api' 593 | VpcId: !Ref SageMakerVPC 594 | 595 | SSMCodeArtifactApiVPCEndpointDNS: 596 | Type: 'AWS::SSM::Parameter' 597 | Properties: 598 | Name: "ds-codeartifact-api-dns" 599 | Type: String 600 | Value: 601 | Fn::Select: 602 | - 1 603 | - Fn::Split: 604 | - ":" 605 | - Fn::Select: 606 | - 0 607 | - !GetAtt VPCEndpointCodeArtifactApi.DnsEntries 608 | Description: Data Science CodeArtifact Service API VPC Endpoint DNS name 609 | 610 | VPCEndpointCodeArtifactRepository: 611 | Type: 'AWS::EC2::VPCEndpoint' 612 | Properties: 613 | PolicyDocument: 614 | Version: 2012-10-17 615 | Statement: 616 | - Effect: Allow 617 | Principal: '*' 618 | Action: 619 | - 'codeartifact:Describe*' 620 | - 'codeartifact:Get*' 621 | - 'codeartifact:List*' 622 | - 'codeartifact:ReadFromRepository' 623 | Resource: '*' 624 | # - !Sub 625 | # - 'arn:aws:codeartifact:*:*:repository/${CODE_ARTIFACT_DOMAIN_NAME}/*' 626 | # - CODE_ARTIFACT_DOMAIN_NAME: 627 | # Fn::ImportValue: !Sub 'ds-shared-code-artifact-domain-${SharedServiceStackSetName}' 628 | VpcEndpointType: Interface 629 | PrivateDnsEnabled: true 630 | SubnetIds: 631 | - !Ref PrivateSubnetA 632 | - !Ref PrivateSubnetB 633 | - !Ref PrivateSubnetC 634 | SecurityGroupIds: 635 | - !Ref VPCEndpointSecurityGroup 636 | ServiceName: !Sub 'com.amazonaws.${AWS::Region}.codeartifact.repositories' 637 | VpcId: !Ref SageMakerVPC 638 | 639 | VPCEndpointECR: 640 | Type: 'AWS::EC2::VPCEndpoint' 641 | Properties: 642 | PolicyDocument: 643 | Version: 2012-10-17 644 | Statement: 645 | - Effect: Allow 646 | Principal: '*' 647 | Action: '*' 648 | Resource: '*' 649 | VpcEndpointType: Interface 650 | PrivateDnsEnabled: true 651 | SubnetIds: 652 | - !Ref PrivateSubnetA 653 | - !Ref PrivateSubnetB 654 | - !Ref PrivateSubnetC 655 | SecurityGroupIds: 656 | - !Ref VPCEndpointSecurityGroup 657 | ServiceName: !Sub 'com.amazonaws.${AWS::Region}.ecr.dkr' 658 | VpcId: !Ref SageMakerVPC 659 | 660 | VPCEndpointECRAPI: 661 | Type: 'AWS::EC2::VPCEndpoint' 662 | Properties: 663 | PolicyDocument: 664 | Version: 2012-10-17 665 | Statement: 666 | - Effect: Allow 667 | Principal: '*' 668 | Action: '*' 669 | Resource: '*' 670 | VpcEndpointType: Interface 671 | PrivateDnsEnabled: true 672 | SubnetIds: 673 | - !Ref PrivateSubnetA 674 | - !Ref PrivateSubnetB 675 | - !Ref PrivateSubnetC 676 | SecurityGroupIds: 677 | - !Ref VPCEndpointSecurityGroup 678 | ServiceName: !Sub 'com.amazonaws.${AWS::Region}.ecr.api' 679 | VpcId: !Ref SageMakerVPC 680 | 681 | VPCEndpointKMS: 682 | Type: 'AWS::EC2::VPCEndpoint' 683 | Properties: 684 | PolicyDocument: 685 | Version: 2012-10-17 686 | Statement: 687 | - Effect: Allow 688 | Principal: '*' 689 | Action: '*' 690 | Resource: '*' 691 | VpcEndpointType: Interface 692 | PrivateDnsEnabled: true 693 | SubnetIds: 694 | - !Ref PrivateSubnetA 695 | - !Ref PrivateSubnetB 696 | - !Ref PrivateSubnetC 697 | SecurityGroupIds: 698 | - !Ref VPCEndpointSecurityGroup 699 | ServiceName: !Sub 'com.amazonaws.${AWS::Region}.kms' 700 | VpcId: !Ref SageMakerVPC -------------------------------------------------------------------------------- /code_artifact_login.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: MIT-0 4 | 5 | set -e 6 | 7 | # This script configures pip to use Code Artifact as PYPI repository. 8 | # 9 | # This script should be run after code artifact has been created in the target AWS account. 10 | # 11 | # The script requires CodeArtifact Admin permissions, add following Managed Policy to Role used to run the script: 12 | # arn:aws:iam::aws:policy/AWSCodeArtifactAdminAccess 13 | # 14 | # This script assumes a Linux or MacOSX environment and relies on the following software packages being installed: 15 | # . - AWS Command Line Interface (CLI) v2 latest version that supports codeartifact commands 16 | # . - Python 3 / pip3 17 | 18 | # Modify the variables for your environment 19 | # DOMAIN_OWNER is AWS Account Number 20 | CODE_ARTIFACT_DOMAIN_OWNER=${AWS_ACCOUNT_ID} 21 | DS_SHARED_CODE_ARTIFACT_DOMAIN=ds-domain 22 | DS_SHARED_CODE_ARTIFACT_REPO=ds-shared-repo 23 | # Set AWS_PROFILE variables in environment if needed 24 | 25 | # Login to CodeArtifact which will setup pip.conf in $HOME/.config/pip/pip.conf 26 | aws codeartifact login --tool pip \ 27 | --domain ${DS_SHARED_CODE_ARTIFACT_DOMAIN} \ 28 | --domain-owner ${CODE_ARTIFACT_DOMAIN_OWNER} \ 29 | --repository ${DS_SHARED_CODE_ARTIFACT_REPO} 30 | 31 | -------------------------------------------------------------------------------- /codebuild_build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: MIT-0 5 | # 6 | function allOSRealPath() { 7 | if isOSWindows 8 | then 9 | path="" 10 | case $1 in 11 | .* ) path="$PWD/${1#./}" ;; 12 | /* ) path="$1" ;; 13 | * ) path="/$1" ;; 14 | esac 15 | 16 | echo "/$path" | sed -e 's/\\/\//g' -e 's/://' -e 's/./\U&/3' 17 | else 18 | case $1 in 19 | /* ) echo "$1"; exit;; 20 | * ) echo "$PWD/${1#./}"; exit;; 21 | esac 22 | fi 23 | } 24 | 25 | function isOSWindows() { 26 | if [ $OSTYPE == "msys" ] 27 | then 28 | return 0 29 | else 30 | return 1 31 | fi 32 | } 33 | 34 | function usage { 35 | echo "usage: codebuild_build.sh [-i image_name] [-a artifact_output_directory] [options]" 36 | echo "Required:" 37 | echo " -i Used to specify the customer build container image." 38 | echo " -a Used to specify an artifact output directory." 39 | echo "Options:" 40 | echo " -l IMAGE Used to override the default local agent image." 41 | echo " -s Used to specify source information. Defaults to the current working directory for primary source." 42 | echo " * First (-s) is for primary source" 43 | echo " * Use additional (-s) in : format for secondary source" 44 | echo " * For sourceIdentifier, use a value that is fewer than 128 characters and contains only alphanumeric characters and underscores" 45 | echo " -c Use the AWS configuration and credentials from your local host. This includes ~/.aws and any AWS_* environment variables." 46 | echo " -p Used to specify the AWS CLI Profile." 47 | echo " -b FILE Used to specify a buildspec override file. Defaults to buildspec.yml in the source directory." 48 | echo " -m Used to mount the source directory to the customer build container directly." 49 | echo " -d Used to run the build container in docker privileged mode." 50 | echo " -e FILE Used to specify a file containing environment variables." 51 | echo " (-e) File format expectations:" 52 | echo " * Each line is in VAR=VAL format" 53 | echo " * Lines beginning with # are processed as comments and ignored" 54 | echo " * Blank lines are ignored" 55 | echo " * File can be of type .env or .txt" 56 | echo " * There is no special handling of quotation marks, meaning they will be part of the VAL" 57 | exit 1 58 | } 59 | 60 | image_flag=false 61 | artifact_flag=false 62 | awsconfig_flag=false 63 | mount_src_dir_flag=false 64 | docker_privileged_mode_flag=false 65 | 66 | while getopts "cmdi:a:s:b:e:l:p:h" opt; do 67 | case $opt in 68 | i ) image_flag=true; image_name=$OPTARG;; 69 | a ) artifact_flag=true; artifact_dir=$OPTARG;; 70 | b ) buildspec=$OPTARG;; 71 | c ) awsconfig_flag=true;; 72 | m ) mount_src_dir_flag=true;; 73 | d ) docker_privileged_mode_flag=true;; 74 | s ) source_dirs+=("$OPTARG");; 75 | e ) environment_variable_file=$OPTARG;; 76 | l ) local_agent_image=$OPTARG;; 77 | p ) aws_profile=$OPTARG;; 78 | h ) usage; exit;; 79 | \? ) echo "Unknown option: -$OPTARG" >&2; exit 1;; 80 | : ) echo "Missing option argument for -$OPTARG" >&2; exit 1;; 81 | * ) echo "Invalid option: -$OPTARG" >&2; exit 1;; 82 | esac 83 | done 84 | 85 | if ! $image_flag 86 | then 87 | echo "The image name flag (-i) must be included for a build to run" >&2 88 | fi 89 | 90 | if ! $artifact_flag 91 | then 92 | echo "The artifact directory (-a) must be included for a build to run" >&2 93 | fi 94 | 95 | if ! $image_flag || ! $artifact_flag 96 | then 97 | exit 1 98 | fi 99 | 100 | docker_command="docker run -it " 101 | if isOSWindows 102 | then 103 | docker_command+="-v //var/run/docker.sock:/var/run/docker.sock -e " 104 | else 105 | docker_command+="-v /var/run/docker.sock:/var/run/docker.sock -e " 106 | fi 107 | 108 | docker_command+="\"IMAGE_NAME=$image_name\" -e \ 109 | \"ARTIFACTS=$(allOSRealPath "$artifact_dir")\"" 110 | 111 | if [ -z "$source_dirs" ] 112 | then 113 | docker_command+=" -e \"SOURCE=$(allOSRealPath "$PWD")\"" 114 | else 115 | for index in "${!source_dirs[@]}"; do 116 | if [ $index -eq 0 ] 117 | then 118 | docker_command+=" -e \"SOURCE=$(allOSRealPath "${source_dirs[$index]}")\"" 119 | else 120 | identifier=${source_dirs[$index]%%:*} 121 | src_dir=$(allOSRealPath "${source_dirs[$index]#*:}") 122 | 123 | docker_command+=" -e \"SECONDARY_SOURCE_$index=$identifier:$src_dir\"" 124 | fi 125 | done 126 | fi 127 | 128 | if [ -n "$buildspec" ] 129 | then 130 | docker_command+=" -e \"BUILDSPEC=$(allOSRealPath "$buildspec")\"" 131 | fi 132 | 133 | if [ -n "$environment_variable_file" ] 134 | then 135 | environment_variable_file_path=$(allOSRealPath "$environment_variable_file") 136 | environment_variable_file_dir=$(dirname "$environment_variable_file_path") 137 | environment_variable_file_basename=$(basename "$environment_variable_file") 138 | docker_command+=" -v \"$environment_variable_file_dir:/LocalBuild/envFile/\" -e \"ENV_VAR_FILE=$environment_variable_file_basename\"" 139 | fi 140 | 141 | if [ -n "$local_agent_image" ] 142 | then 143 | docker_command+=" -e \"LOCAL_AGENT_IMAGE_NAME=$local_agent_image\"" 144 | fi 145 | 146 | if $awsconfig_flag 147 | then 148 | if [ -d "$HOME/.aws" ] 149 | then 150 | configuration_file_path=$(allOSRealPath "$HOME/.aws") 151 | docker_command+=" -e \"AWS_CONFIGURATION=$configuration_file_path\"" 152 | else 153 | docker_command+=" -e \"AWS_CONFIGURATION=NONE\"" 154 | fi 155 | 156 | if [ -n "$aws_profile" ] 157 | then 158 | docker_command+=" -e \"AWS_PROFILE=$aws_profile\"" 159 | fi 160 | 161 | docker_command+="$(env | grep ^AWS_ | while read -r line; do echo " -e \"$line\""; done )" 162 | fi 163 | 164 | if $mount_src_dir_flag 165 | then 166 | docker_command+=" -e \"MOUNT_SOURCE_DIRECTORY=TRUE\"" 167 | fi 168 | 169 | if $docker_privileged_mode_flag 170 | then 171 | docker_command+=" -e \"DOCKER_PRIVILEGED_MODE=TRUE\"" 172 | fi 173 | 174 | if isOSWindows 175 | then 176 | docker_command+=" -e \"INITIATOR=$USERNAME\"" 177 | else 178 | docker_command+=" -e \"INITIATOR=$USER\"" 179 | fi 180 | 181 | docker_command+=" amazon/aws-codebuild-local:latest" 182 | 183 | # Note we do not expose the AWS_SECRET_ACCESS_KEY or the AWS_SESSION_TOKEN 184 | exposed_command=$docker_command 185 | secure_variables=( "AWS_SECRET_ACCESS_KEY=" "AWS_SESSION_TOKEN=") 186 | for variable in "${secure_variables[@]}" 187 | do 188 | exposed_command="$(echo $exposed_command | sed "s/\($variable\)[^ ]*/\1********\"/")" 189 | done 190 | 191 | echo "Build Command:" 192 | echo "" 193 | echo $exposed_command 194 | echo "" 195 | 196 | eval $docker_command 197 | -------------------------------------------------------------------------------- /codebuild_local_readme.md: -------------------------------------------------------------------------------- 1 | # AWS CodeBuild Local Mode 2 | 3 | ## Overview 4 | 5 | AWS CodeBuild can be used locally through the use of Docker containers. This gives developers the ability to develop and test 6 | their buildspec.yaml files locally before commiting them to the Git repository. Equally it could be used to locally deploy and 7 | test code before publishing it to the repository. The rest of this readme details how to setup and use CodeBuild Local with 8 | this repository in case you're unfamiliar. 9 | 10 | ## Prerequisites 11 | 12 | AWS CodeBuild Local relies on 2 Docker Containers. The first, (aws-codebuild-local)[https://hub.docker.com/r/amazon/aws-codebuild-local/] 13 | acts as the service daemon, monitoring the build. The second container is the hosting contianer used to execute the project 14 | buildspec.yaml. For the purpose of this project the latest Ubuntu image should suffice. 15 | 16 | To get these two containers the following commands should be executed: 17 | 18 | ```bash 19 | docker pull amazon/aws-codebuild-local 20 | docker pull ubuntu 21 | ``` 22 | 23 | ## Execute the build 24 | 25 | To run CodeBuild locally use the shell script `codebuild_local.sh` to kick things off: 26 | 27 | ```bash 28 | ./codebuild_local.sh -i 'ubuntu:latest' -c -a /tmp -s . 29 | ``` 30 | 31 | This will install any dependencies such as zip, Python pip, and the AWS CLI. It will then execute the `package_cloudformation.sh` 32 | shell script to package this repository's cloudformation and publish it to Amazon S3 for your use. 33 | -------------------------------------------------------------------------------- /create-presigned-url.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: MIT-0 4 | 5 | # Set AWS_PROFILE in environment if needed 6 | aws sagemaker create-presigned-domain-url --cli-input-json file://pre-signedurl-input.json -------------------------------------------------------------------------------- /customimage/code-artifact-login.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: MIT-0 4 | set -e 5 | 6 | # Configure PIP 7 | ## get values from SSM Parameter Store 8 | CODE_ARTIFACT_DOMAIN=`aws ssm get-parameter --name ds-codeartifact-domain-name --query 'Parameter.Value' --output text` 9 | DOMAIN_OWNER=`aws ssm get-parameter --name ds-codeartifact-domain-owner --query 'Parameter.Value' --output text` 10 | CODE_ARTIFACT_REPO=`aws ssm get-parameter --name ds-codeartifact-repository --query 'Parameter.Value' --output text` 11 | CODE_ARTIFACT_API_DNS=`aws ssm get-parameter --name ds-codeartifact-api-dns --query 'Parameter.Value' --output text` 12 | 13 | ## Configure PIP by calling CodeArtifact login, it automatically updates ${HOME}/.config/pip/pip.conf 14 | echo "Configuring pip to use CodeArtifact" 15 | echo "aws codeartifact login --tool pip --domain ${CODE_ARTIFACT_DOMAIN} --domain-owner ${DOMAIN_OWNER} --repository ${CODE_ARTIFACT_REPO} --endpoint-url https://${CODE_ARTIFACT_API_DNS}" 16 | aws codeartifact login --tool pip --domain ${CODE_ARTIFACT_DOMAIN} --domain-owner ${DOMAIN_OWNER} --repository ${CODE_ARTIFACT_REPO} --endpoint-url https://${CODE_ARTIFACT_API_DNS} 17 | -------------------------------------------------------------------------------- /customimage/jupyter-docker-stacks-tensorflow/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | 4 | # TensorFlow image published by jupyter-docker-stacks 5 | # https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html#jupyter-tensorflow-notebook 6 | # latest is tensorflow 2.4.1 at the time of writing 7 | FROM jupyter/tensorflow-notebook:latest 8 | 9 | # Install the AWS CLI: 10 | # AWS CLI dependencies, these are not in docker image 11 | # shap package needs gcc which is install using build-essential 12 | USER root 13 | RUN apt-get update && \ 14 | apt-get install -y curl && \ 15 | apt-get install -y groff groff-base less && \ 16 | apt-get install -y build-essential && \ 17 | apt-get clean 18 | 19 | RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \ 20 | unzip awscliv2.zip 21 | RUN ./aws/install 22 | 23 | # install jq to parse json within bash scripts 24 | RUN curl -o /usr/local/bin/jq http://stedolan.github.io/jq/download/linux64/jq && \ 25 | chmod +x /usr/local/bin/jq 26 | 27 | COPY --chown=${NB_UID}:${NB_GID} setup-ds-env.sh "/home/${NB_USER}/bin/setup-ds-env.sh" 28 | COPY --chown=${NB_UID}:${NB_GID} code-artifact-login.sh "/home/${NB_USER}/bin/code-artifact-login.sh" 29 | COPY --chown=${NB_UID}:${NB_GID} notebooks/ "/home/${NB_USER}/notebooks/" 30 | 31 | USER ${NB_UID} 32 | # Install various Python utilities for SageMaker 33 | # (Pinned to last tested major version for repeatability) 34 | RUN pip install ipykernel && \ 35 | python -m ipykernel install --sys-prefix && \ 36 | pip install \ 37 | 'boto3>=1,<2' \ 38 | 'sagemaker>=2,<3' \ 39 | 'sagemaker-experiments>=0.1,<0.2' \ 40 | 'sagemaker-studio-image-build>=0.4,<0.5' \ 41 | 'smdebug>=0.9,<0.10' 42 | 43 | # Use pip install for the lab, shap requires gcc, it is installed above 44 | # RUN conda install -c conda-forge shap 45 | -------------------------------------------------------------------------------- /customimage/jupyter-docker-stacks-tensorflow/app-image-config-input.json: -------------------------------------------------------------------------------- 1 | { 2 | "AppImageConfigName": "ds-tensorflow241-image-config", 3 | "KernelGatewayImageConfig": { 4 | "KernelSpecs": [ 5 | { 6 | "Name": "python3", 7 | "DisplayName": "Python 3 (TensorFlow 2.4.1)" 8 | } 9 | ], 10 | "FileSystemConfig": { 11 | "MountPath": "/home/jovyan/work", 12 | "DefaultUid": 1000, 13 | "DefaultGid": 100 14 | } 15 | } 16 | } -------------------------------------------------------------------------------- /customimage/jupyter-docker-stacks-tensorflow/build-publish-sm-docker-image.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | # Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: MIT-0 4 | set -e 5 | 6 | # Set AWS_ACCOUNT_ID, AWS_DEFAULT_REGION, AWS_PROFILE variables in env if needed before running this script 7 | if [[ -z "${AWS_ACCOUNT_ID}" ]]; then 8 | echo "AWS_ACCOUNT_ID environment variable undefined, please define this environment variable and try again" 9 | exit 1 10 | fi 11 | 12 | if [[ -z "${AWS_DEFAULT_REGION}" ]]; then 13 | echo "AWS_DEFAULT_REGION environment variable undefined, please define this environment variable and try again" 14 | exit 1 15 | fi 16 | 17 | REPOSITORY_NAME=ds-shared-container-images 18 | IMAGE_NAME=ds-custom-tensorflow241 19 | DISPLAY_NAME="Custom TensorFlow v2.4.1 Image" 20 | 21 | # create a repository in ECR, and then login to ECR repository 22 | #aws ecr create-repository --repository-name ${REPOSITORY_NAME} 23 | aws ecr get-login-password | docker login --username AWS \ 24 | --password-stdin ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_DEFAULT_REGION}.amazonaws.com/${REPOSITORY_NAME} 25 | 26 | # cp data science setup script 27 | cp ../setup-ds-env.sh . 28 | cp ../code-artifact-login.sh . 29 | 30 | # copy example notebooks 31 | cp -r ../../src/project_template notebooks 32 | 33 | # Build the docker image and push to Amazon ECR (modify image tags and name as required) 34 | docker build . -t ${IMAGE_NAME} -t ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_DEFAULT_REGION}.amazonaws.com/${REPOSITORY_NAME}:${IMAGE_NAME} 35 | docker push ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_DEFAULT_REGION}.amazonaws.com/${REPOSITORY_NAME}:${IMAGE_NAME} 36 | 37 | # remove copied files 38 | rm -rf setup-ds-env.sh 39 | rm -rf code-artifact-login.sh 40 | rm -rf notebooks -------------------------------------------------------------------------------- /customimage/jupyter-docker-stacks-tensorflow/create-and-attach-image.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | # Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: MIT-0 4 | set -e 5 | 6 | # Set AWS_ACCOUNT_ID, AWS_DEFAULT_REGION, AWS_PROFILE variables in env if needed before running this script 7 | if [[ -z "${AWS_ACCOUNT_ID}" ]]; then 8 | echo "AWS_ACCOUNT_ID environment variable undefined, please define this environment variable and try again" 9 | exit 1 10 | fi 11 | 12 | if [[ -z "${AWS_DEFAULT_REGION}" ]]; then 13 | echo "AWS_DEFAULT_REGION environment variable undefined, please define this environment variable and try again" 14 | exit 1 15 | fi 16 | 17 | STACK_SET_NAME=DSSharedServices # 18 | REPOSITORY_NAME=ds-shared-container-images 19 | IMAGE_NAME=ds-custom-tensorflow241 20 | DISPLAY_NAME="Custom TensorFlow v2.4.1 Image" 21 | 22 | # create a repository in ECR, and then login to ECR repository 23 | #aws ecr create-repository --repository-name ${REPOSITORY_NAME} 24 | aws ecr get-login-password | docker login --username AWS \ 25 | --password-stdin ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_DEFAULT_REGION}.amazonaws.com/${REPOSITORY_NAME} 26 | 27 | # cp data science setup script 28 | cp ../setup-ds-env.sh . 29 | cp ../code-artifact-login.sh . 30 | 31 | # copy example notebooks 32 | cp -r ../../src/project_template notebooks 33 | 34 | # Build the docker image and push to Amazon ECR (modify image tags and name as required) 35 | docker build . -t ${IMAGE_NAME} -t ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_DEFAULT_REGION}.amazonaws.com/${REPOSITORY_NAME}:${IMAGE_NAME} 36 | docker push ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_DEFAULT_REGION}.amazonaws.com/${REPOSITORY_NAME}:${IMAGE_NAME} 37 | 38 | # remove copied files 39 | rm -rf setup-ds-env.sh 40 | rm -rf code-artifact-login.sh 41 | rm -rf notebooks 42 | 43 | # Using with SageMaker Studio 44 | ## Create SageMaker Image with the image in ECR (modify image name as required) 45 | ROLE_ARN="arn:aws:iam::${AWS_ACCOUNT_ID}:role/ds-administrator-role-${STACK_SET_NAME}" 46 | 47 | aws sagemaker create-image \ 48 | --image-name ${IMAGE_NAME} \ 49 | --display-name "${DISPLAY_NAME}" \ 50 | --role-arn ${ROLE_ARN} 51 | 52 | aws sagemaker create-image-version \ 53 | --image-name ${IMAGE_NAME} \ 54 | --base-image "${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_DEFAULT_REGION}.amazonaws.com/${REPOSITORY_NAME}:${IMAGE_NAME}" 55 | 56 | ## Verify the image-version is created successfully. Do NOT proceed if image-version is in CREATE_FAILED state or in any other state apart from CREATED. 57 | aws sagemaker describe-image-version --image-name ${IMAGE_NAME} 58 | # 59 | ## Create AppImageConfig for this image 60 | #aws sagemaker create-app-image-config --cli-input-json file://app-image-config-input.json 61 | 62 | # Not ready to create or update a domain 63 | # Create domain 64 | #aws sagemaker create-domain --cli-input-json file://create-domain-input.json 65 | # OR 66 | # Update domain 67 | #aws sagemaker update-domain --cli-input-json file://update-domain-input.json -------------------------------------------------------------------------------- /customimage/jupyter-docker-stacks-tensorflow/update-domain-input.json: -------------------------------------------------------------------------------- 1 | { 2 | "DomainId": "d-f4begctixzdw", 3 | "DefaultUserSettings": { 4 | "KernelGatewayAppSettings": { 5 | "CustomImages": [ 6 | { 7 | "ImageName": "ds-custom-tensorflow241", 8 | "AppImageConfigName": "ds-tensorflow241-image-config" 9 | } 10 | ] 11 | } 12 | } 13 | } -------------------------------------------------------------------------------- /customimage/setup-ds-env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: MIT-0 4 | set -e 5 | 6 | # NOTE: Update these variables before executing this script 7 | # AWS_DEFAULT_REGION is already defined by SageMaker as environment variable in the container 8 | DS_SHARED_SERVICE_STACK_SET_NAME=DSSharedServices # default from workshop, update if different 9 | ENV_TYPE=dev # default from workshop, update if different 10 | TEAM_NAME=fsi-smteam # default from workshop, update if different 11 | USER_NAME=UPDATE_ME # user name for git 12 | USER_EMAIL=UPDATE_ME # user email for git> 13 | 14 | # Detect directory of this script 15 | SCRIPT_PATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" 16 | 17 | ############################################# 18 | ## Create a simple Python library to make building VPC-based SageMaker resources easier 19 | ############################################# 20 | echo Building SageMaker convenience module: 21 | echo Sensing network settings... 22 | ## Configure the approriate VPC, subnet, security group values based on values in Parameter Store 23 | 24 | VPC_ID=`aws ssm get-parameter --name ds-vpc-${DS_SHARED_SERVICE_STACK_SET_NAME}-id --region ${AWS_DEFAULT_REGION} --query 'Parameter.Value' --output text` 25 | SUBNET_1=`aws ssm get-parameter --name ds-subnet-a-${DS_SHARED_SERVICE_STACK_SET_NAME}-id --region ${AWS_DEFAULT_REGION} --query 'Parameter.Value' --output text` 26 | SUBNET_2=`aws ssm get-parameter --name ds-subnet-b-${DS_SHARED_SERVICE_STACK_SET_NAME}-id --region ${AWS_DEFAULT_REGION} --query 'Parameter.Value' --output text` 27 | SUBNET_3=`aws ssm get-parameter --name ds-subnet-c-${DS_SHARED_SERVICE_STACK_SET_NAME}-id --region ${AWS_DEFAULT_REGION} --query 'Parameter.Value' --output text` 28 | SG_ID=`aws ssm get-parameter --name ds-sagemaker-vpc-sg-${DS_SHARED_SERVICE_STACK_SET_NAME}-id --region ${AWS_DEFAULT_REGION} --query 'Parameter.Value' --output text` 29 | KMS_ARN=`aws ssm get-parameter --name ds-kms-cmk-${TEAM_NAME}-${ENV_TYPE}-arn --region ${AWS_DEFAULT_REGION} --query 'Parameter.Value' --output text` 30 | S3_DATA_BUCKET=`aws ssm get-parameter --name ds-s3-data-bucket-${TEAM_NAME}-${ENV_TYPE} --region ${AWS_DEFAULT_REGION} --query 'Parameter.Value' --output text` 31 | S3_MODEL_BUCKET=`aws ssm get-parameter --name ds-s3-model-artifact-bucket-${TEAM_NAME}-${ENV_TYPE} --region ${AWS_DEFAULT_REGION} --query 'Parameter.Value' --output text` 32 | SHARED_DATA_LAKE_BUCKET=`aws ssm get-parameter --name ds-s3-data-lake-bucket-${DS_SHARED_SERVICE_STACK_SET_NAME} --region ${AWS_DEFAULT_REGION} --query 'Parameter.Value' --output text` 33 | 34 | echo Writing convenience module... 35 | ## Create the python module in the iPython directory which should be in every Python kernel path 36 | mkdir -p ${HOME}/.ipython 37 | cat < ${HOME}/.ipython/sagemaker_environment.py 38 | SAGEMAKER_VPC="$VPC_ID" 39 | SAGEMAKER_SUBNETS = ["$SUBNET_1", "$SUBNET_2", "$SUBNET_3"] 40 | SAGEMAKER_SECURITY_GROUPS = ["$SG_ID"] 41 | SAGEMAKER_KMS_KEY_ID = "$KMS_ARN" 42 | SAGEMAKER_DATA_BUCKET = "${S3_DATA_BUCKET}" 43 | SAGEMAKER_MODEL_BUCKET = "${S3_MODEL_BUCKET}" 44 | SAGEMAKER_DATA_LAKE_BUCKET = "${SHARED_DATA_LAKE_BUCKET}" 45 | EOF 46 | 47 | # Configure git 48 | echo Configuring Git tooling... 49 | git config --global user.name $USER_NAME 50 | git config --global user.email $USER_EMAIL 51 | 52 | # Clone workshop repo 53 | #echo Cloning data science team CodeCommit repository... 54 | #git clone https://git-codecommit.${AWS_DEFAULT_REGION}.amazonaws.com/v1/repos/ds-source-${TEAM_NAME}-${ENV_TYPE} $HOME/work/ds-source-${TEAM_NAME}-${ENV_TYPE} 55 | 56 | ## Configure PIP to use AWS CodeArtifact 57 | $SCRIPT_PATH/code-artifact-login.sh 58 | -------------------------------------------------------------------------------- /customimage/tensorflow25/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | 4 | # Official TensorFlow image published by TensorFlow 5 | # https://www.tensorflow.org/install/docker 6 | # latest 2.5.0 at the time of writing 7 | FROM tensorflow/tensorflow:2.5.0 8 | 9 | # Install the AWS CLI: 10 | # AWS CLI dependencies, these are not in docker image 11 | # shap package needs gcc which is install using build-essential 12 | USER root 13 | RUN apt-get update && \ 14 | apt-get install -y curl wget unzip vim && \ 15 | apt-get install -y groff groff-base less && \ 16 | apt-get install -y build-essential && \ 17 | apt-get clean 18 | 19 | RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \ 20 | unzip awscliv2.zip 21 | RUN ./aws/install 22 | 23 | # install jq to parse json within bash scripts 24 | RUN curl -o /usr/local/bin/jq http://stedolan.github.io/jq/download/linux64/jq && \ 25 | chmod +x /usr/local/bin/jq 26 | 27 | USER $NB_UID 28 | # Install various Python utilities for SageMaker 29 | # (Pinned to last tested major version for repeatability) 30 | RUN pip install ipykernel && \ 31 | python -m ipykernel install --sys-prefix && \ 32 | pip install \ 33 | 'boto3>=1,<2' \ 34 | 'sagemaker>=2,<3' \ 35 | 'sagemaker-experiments>=0.1,<0.2' \ 36 | 'sagemaker-studio-image-build>=0.4,<0.5' \ 37 | 'smdebug>=0.9,<0.10' 38 | 39 | COPY setup-ds-env.sh /root/setup-ds-env.sh 40 | COPY code-artifact-login.sh /root/code-artifact-login.sh 41 | COPY notebooks/ /root/notebooks/ 42 | -------------------------------------------------------------------------------- /customimage/tensorflow25/app-image-config-input.json: -------------------------------------------------------------------------------- 1 | { 2 | "AppImageConfigName": "ds-tensorflow250-image-config", 3 | "KernelGatewayImageConfig": { 4 | "KernelSpecs": [ 5 | { 6 | "Name": "python3", 7 | "DisplayName": "Python 3 (TensorFlow 2.5.0)" 8 | } 9 | ], 10 | "FileSystemConfig": { 11 | "MountPath": "/root/data", 12 | "DefaultUid": 0, 13 | "DefaultGid": 0 14 | } 15 | } 16 | } -------------------------------------------------------------------------------- /customimage/tensorflow25/build-publish-sm-docker-image.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | # Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: MIT-0 4 | set -e 5 | set -x 6 | 7 | # Set AWS_ACCOUNT_ID, AWS_DEFAULT_REGION, AWS_PROFILE variables in env if needed before running this script 8 | 9 | if [[ -z "${AWS_ACCOUNT_ID}" ]]; then 10 | echo "AWS_ACCOUNT_ID environment variable undefined, please define this environment variable and try again" 11 | exit 1 12 | fi 13 | 14 | if [[ -z "${AWS_DEFAULT_REGION}" ]]; then 15 | echo "AWS_DEFAULT_REGION environment variable undefined, please define this environment variable and try again" 16 | exit 1 17 | fi 18 | 19 | REPOSITORY_NAME=ds-shared-container-images 20 | IMAGE_NAME=ds-custom-tensorflow250 21 | DISPLAY_NAME="Custom TensorFlow v2.5.0 Image" 22 | 23 | # create a repository in ECR, and then login to ECR repository 24 | #aws ecr create-repository --repository-name ${REPOSITORY_NAME} 25 | aws ecr get-login-password | docker login --username AWS \ 26 | --password-stdin ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_DEFAULT_REGION}.amazonaws.com/${REPOSITORY_NAME} 27 | 28 | # cp data science setup script 29 | cp ../setup-ds-env.sh . 30 | cp ../code-artifact-login.sh . 31 | 32 | # copy example notebooks 33 | cp -r ../../src/project_template notebooks 34 | 35 | # Build the docker image and push to Amazon ECR (modify image tags and name as required) 36 | docker build . -t ${IMAGE_NAME} -t ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_DEFAULT_REGION}.amazonaws.com/${REPOSITORY_NAME}:${IMAGE_NAME} 37 | docker push ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_DEFAULT_REGION}.amazonaws.com/${REPOSITORY_NAME}:${IMAGE_NAME} 38 | 39 | # remove copied files 40 | rm -rf setup-ds-env.sh 41 | rm -rf code-artifact-login.sh 42 | rm -rf notebooks -------------------------------------------------------------------------------- /customimage/tensorflow25/create-and-attach-image.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | # Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: MIT-0 4 | set -e 5 | 6 | # Set AWS_ACCOUNT_ID, AWS_DEFAULT_REGION, AWS_PROFILE variables in env if needed before running this script 7 | 8 | if [[ -z "${AWS_ACCOUNT_ID}" ]]; then 9 | echo "AWS_ACCOUNT_ID environment variable undefined, please define this environment variable and try again" 10 | exit 1 11 | fi 12 | 13 | if [[ -z "${AWS_DEFAULT_REGION}" ]]; then 14 | echo "AWS_DEFAULT_REGION environment variable undefined, please define this environment variable and try again" 15 | exit 1 16 | fi 17 | 18 | STACK_SET_NAME=DSSharedServices # 19 | REPOSITORY_NAME=ds-shared-container-images 20 | IMAGE_NAME=ds-custom-tensorflow250 21 | DISPLAY_NAME="Custom TensorFlow v2.5.0 Image" 22 | 23 | # create a repository in ECR, and then login to ECR repository 24 | #aws ecr create-repository --repository-name ${REPOSITORY_NAME} 25 | aws ecr get-login-password | docker login --username AWS \ 26 | --password-stdin ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_DEFAULT_REGION}.amazonaws.com/${REPOSITORY_NAME} 27 | 28 | # cp data science setup script 29 | cp ../setup-ds-env.sh . 30 | cp ../code-artifact-login.sh . 31 | # copy example notebooks 32 | cp -r ../../src/project_template notebooks 33 | 34 | # Build the docker image and push to Amazon ECR (modify image tags and name as required) 35 | docker build . -t ${IMAGE_NAME} -t ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_DEFAULT_REGION}.amazonaws.com/${REPOSITORY_NAME}:${IMAGE_NAME} 36 | docker push ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_DEFAULT_REGION}.amazonaws.com/${REPOSITORY_NAME}:${IMAGE_NAME} 37 | 38 | # remove copied files 39 | rm -rf setup-ds-env.sh 40 | rm -rf code-artifact-login.sh 41 | rm -rf notebooks 42 | 43 | # Using with SageMaker Studio 44 | ## Create SageMaker Image with the image in ECR (modify image name as required) 45 | ROLE_ARN="arn:aws:iam::${AWS_ACCOUNT_ID}:role/ds-administrator-role-${STACK_SET_NAME}" 46 | # 47 | aws sagemaker create-image \ 48 | --image-name ${IMAGE_NAME} \ 49 | --display-name "${DISPLAY_NAME}" \ 50 | --role-arn ${ROLE_ARN} 51 | 52 | aws sagemaker create-image-version \ 53 | --image-name ${IMAGE_NAME} \ 54 | --base-image "${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_DEFAULT_REGION}.amazonaws.com/${REPOSITORY_NAME}:${IMAGE_NAME}" 55 | 56 | ## Verify the image-version is created successfully. Do NOT proceed if image-version is in CREATE_FAILED state or in any other state apart from CREATED. 57 | aws sagemaker describe-image-version --image-name ${IMAGE_NAME} 58 | # 59 | ## Create AppImageConfig for this image 60 | aws sagemaker create-app-image-config --cli-input-json file://app-image-config-input.json 61 | 62 | # Not ready to create or update a domain 63 | # Create domain 64 | #aws sagemaker create-domain --cli-input-json file://create-domain-input.json 65 | # OR 66 | # Update domain 67 | aws sagemaker update-domain --cli-input-json file://update-domain-input.json -------------------------------------------------------------------------------- /customimage/tensorflow25/update-domain-input.json: -------------------------------------------------------------------------------- 1 | { 2 | "DomainId": "d-f4begctixzdw", 3 | "DefaultUserSettings": { 4 | "KernelGatewayAppSettings": { 5 | "CustomImages": [ 6 | { 7 | "ImageName": "ds-custom-tensorflow250", 8 | "AppImageConfigName": "ds-tensorflow250-image-config" 9 | } 10 | ] 11 | } 12 | } 13 | } -------------------------------------------------------------------------------- /docs/images/hla.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-studio-secure-data-science-workshop/0958ad674e71700452eb55a764c8c6f690fd066d/docs/images/hla.png -------------------------------------------------------------------------------- /docs/images/secure-ds-personas.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-studio-secure-data-science-workshop/0958ad674e71700452eb55a764c8c6f690fd066d/docs/images/secure-ds-personas.png -------------------------------------------------------------------------------- /install_code_artifact_pip_packages.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: MIT-0 4 | 5 | set -e #-x 6 | 7 | # This script installs pip packages in shared data science code artifact repository and disassociates external 8 | # connection to public PyPI repository. 9 | # 10 | # This script should be run after code artifact has been created in the target AWS account. 11 | # 12 | # The script requires CodeArtifact Admin permissions, add following Managed Policy to Role used to run the script: 13 | # arn:aws:iam::aws:policy/AWSCodeArtifactAdminAccess 14 | # 15 | # This script assumes a Linux or MacOSX environment and relies on the following software packages being installed: 16 | # . - AWS Command Line Interface (CLI) v2 latest version that supports codeartifact commands 17 | # . - Python 3 / pip3 18 | # 19 | # This script assumes you have configured pip to use AWS CodeArtifact, if not use the provided code_artifact_login.sh 20 | # script to configure pip. Note that CodeArtifact token is by default valid for 12 hours, so you may have to run it again if 21 | # token as expired. 22 | 23 | # Modify the variables for your environment 24 | # DOMAIN_OWNER is AWS Account Number 25 | CODE_ARTIFACT_DOMAIN_OWNER=${AWS_ACCOUNT_ID} 26 | DS_SHARED_CODE_ARTIFACT_DOMAIN=ds-domain 27 | DS_SHARED_CODE_ARTIFACT_PUBLIC_UPSTREAM_REPO=ds-public-upstream-repo 28 | EXTERNAL_CONNECTION_NAME=public:pypi # this doesn't change 29 | # Set AWS_PROFILE variable in environment if needed 30 | 31 | # Install pip packages, this will download and cache pip packages in CodeArtifact shared repo 32 | pip3 install --no-cache-dir --user awswrangler==2.9.0 stepfunctions==2.2.0 smdebug==1.0.10 shap==0.39.0 sagemaker-experiments==0.1.33 33 | pip3 install --no-cache-dir --user \ 34 | --only-binary=:all: \ 35 | numpy==1.20.3 pandas==1.2.5 protobuf==3.17.3 pyarrow==4.0.1 scikit-learn==0.24.2 scipy==1.7.0 psycopg2-binary xgboost==1.4.2 36 | 37 | # Disassociate external public PyPI connection to restrict package download from public PYPI repo. 38 | # AWS CodeArtifact service as of this writing by default will download packages from public PYPI repo 39 | # if an external connection exists. 40 | # The connection can be associated again to install new packages in the shared repo by using: 41 | # aws codeartifact associate-external-connection 42 | aws codeartifact disassociate-external-connection \ 43 | --domain ${DS_SHARED_CODE_ARTIFACT_DOMAIN} \ 44 | --domain-owner ${CODE_ARTIFACT_DOMAIN_OWNER} \ 45 | --repository ${DS_SHARED_CODE_ARTIFACT_PUBLIC_UPSTREAM_REPO} \ 46 | --external-connection ${EXTERNAL_CONNECTION_NAME} 47 | -------------------------------------------------------------------------------- /package_cloudformation.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: MIT-0 5 | 6 | set -e 7 | 8 | # This script will package the CloudFormation in this directory, and the source code in this repository, and upload it 9 | # to Amazon S3 in preparation for deployment using the AWS CloudFormation service. 10 | # 11 | # This script exists because Service Catalog products, when using relative references to cloudformation templates are 12 | # not properly packaged by the AWS cli. Also the full stack, due to 2 levels of Service Catalog deployment will not 13 | # always package properly using the AWS cli. 14 | 15 | # This script treats the templates as source code and packages them, putting the results into a 'build' subdirectory. 16 | 17 | # This script assumes a Linux or MacOSX environment and relies on the following software packages being installed: 18 | # . - AWS Command Line Interface (CLI) 19 | # . - sed 20 | # . - Python 3 / pip3 21 | # . - zip 22 | 23 | # PLEASE NOTE this script will store all resources to an Amazon S3 bucket s3://${CFN_BUCKET_NAME}/${PROJECT_NAME} 24 | # Set AWS_DEFAULT_REGION and AWS_PROFILE variables in environment if needed 25 | QUICKSTART_MODE=true 26 | CFN_BUCKET_NAME=${CFN_BUCKET_NAME:="secure-data-science-cloudformation-$RANDOM-$AWS_DEFAULT_REGION"} 27 | PROJECT_NAME="quickstart" 28 | # files that won't be uploaded by `aws cloudformation package` 29 | UPLOAD_LIST="ds_environment.yaml project_template.zip ds_administration.yaml ds_env_studio_user_profile_v1.yaml ds_env_studio_user_profile_v2.yaml ds_env_sagemaker_studio.yaml" 30 | # files that need to be scrubbed with sed to replace < S3_CFN_STAGING_BUCKET > with an actual S3 bucket name 31 | SELF_PACKAGE_LIST="ds_administration.yaml ds_env_backing_store.yaml" 32 | # files to be packaged using `aws cloudformation package` 33 | AWS_PACKAGE_LIST="ds_environment.yaml ds_administration.yaml" 34 | TMP_OUTPUT_DIR="/tmp/build/${AWS_DEFAULT_REGION}" 35 | PUBLISH_PYPI=${PUBLISH_PYPI:True} 36 | 37 | if aws s3 ls s3://${CFN_BUCKET_NAME} 2>&1 | grep NoSuchBucket 38 | then 39 | echo Creating Amazon S3 bucket ${CFN_BUCKET_NAME} 40 | aws s3 mb s3://${CFN_BUCKET_NAME} 41 | aws s3api put-public-access-block --bucket ${CFN_BUCKET_NAME} --public-access-block-configuration "BlockPublicAcls=true,IgnorePublicAcls=true,BlockPublicPolicy=true,RestrictPublicBuckets=true" 42 | fi 43 | echo Preparing content for publication to Amazon S3 s3://${CFN_BUCKET_NAME} 44 | 45 | ## clean away any previous builds of the CFN 46 | rm -fr ${TMP_OUTPUT_DIR} 47 | mkdir -p ${TMP_OUTPUT_DIR} 48 | cp cloudformation/*.yaml ${TMP_OUTPUT_DIR} 49 | 50 | echo "Zipping code sample..." 51 | pushd src/project_template 52 | zip -r ${TMP_OUTPUT_DIR}/project_template.zip ./* 53 | popd 54 | 55 | echo "Zipping detective control..." 56 | pushd src/detective_control 57 | zip -r ${TMP_OUTPUT_DIR}/vpc_detective_control.zip ./* 58 | popd 59 | 60 | ## publish materials to target AWS regions 61 | REGION=${AWS_DEFAULT_REGION:="us-west-2"} 62 | echo Publishing CloudFormation to ${REGION} 63 | 64 | echo "Clearing ${CFN_BUCKET_NAME}..." 65 | 66 | echo "Self-packaging some Cloudformation templates..." 67 | for fname in ${SELF_PACKAGE_LIST}; 68 | do 69 | sed -ie "s/< S3_CFN_STAGING_PATH >/${PROJECT_NAME}/" ${TMP_OUTPUT_DIR}/${fname} 70 | sed -ie "s/< S3_CFN_STAGING_BUCKET >/${CFN_BUCKET_NAME}/" ${TMP_OUTPUT_DIR}/${fname} 71 | sed -ie "s/< S3_CFN_STAGING_BUCKET_PATH >/${CFN_BUCKET_NAME}\/${PROJECT_NAME}/" ${TMP_OUTPUT_DIR}/${fname} 72 | done 73 | 74 | echo "Packaging Cloudformation templates..." 75 | for fname in ${AWS_PACKAGE_LIST}; 76 | do 77 | pushd ${TMP_OUTPUT_DIR} 78 | aws cloudformation package \ 79 | --template-file ${fname} \ 80 | --s3-bucket ${CFN_BUCKET_NAME} \ 81 | --s3-prefix ${PROJECT_NAME} \ 82 | --output-template-file ${TMP_OUTPUT_DIR}/${fname}-${REGION} \ 83 | --region ${REGION} 84 | popd 85 | done 86 | 87 | # push files to S3, note this does not 'package' the templates 88 | echo "Copying cloudformation templates and files to S3..." 89 | for fname in ${UPLOAD_LIST}; 90 | do 91 | if [ -f ${TMP_OUTPUT_DIR}/${fname}-${REGION} ]; then 92 | aws s3 cp ${TMP_OUTPUT_DIR}/${fname}-${REGION} s3://${CFN_BUCKET_NAME}/${PROJECT_NAME}/${fname} 93 | else 94 | aws s3 cp ${TMP_OUTPUT_DIR}/${fname} s3://${CFN_BUCKET_NAME}/${PROJECT_NAME}/${fname} 95 | fi 96 | done 97 | 98 | echo ================================================== 99 | echo "Publication complete" 100 | echo "To deploy execute:" 101 | echo " aws cloudformation create-stack --template-url https://s3.${REGION}.amazonaws.com/${CFN_BUCKET_NAME}/${PROJECT_NAME}/ds_administration.yaml --region ${REGION} --stack-name secure-ds-shared-service --capabilities CAPABILITY_NAMED_IAM --parameters ParameterKey=QuickstartMode,ParameterValue=${QUICKSTART_MODE} " 102 | -------------------------------------------------------------------------------- /pre-signedurl-input.json: -------------------------------------------------------------------------------- 1 | { 2 | "DomainId": "", 3 | "UserProfileName": "", 4 | "SessionExpirationDurationInSeconds": 43200, 5 | "ExpiresInSeconds": 300 6 | } -------------------------------------------------------------------------------- /src/detective_control/inspect_sagemaker_resource.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | 4 | import boto3 5 | import logging 6 | import traceback 7 | 8 | logger = logging.getLogger() 9 | logger.setLevel(logging.INFO) 10 | 11 | 12 | client = boto3.client('sagemaker') 13 | 14 | def stop_training_job(trainingName): 15 | try: 16 | response = client.stop_training_job ( 17 | TrainingJobName=trainingName 18 | ) 19 | logger.info("Stopping training job: " + str(trainingName)) 20 | except Exception: 21 | traceback.print_exc() 22 | 23 | def delete_model(modelName): 24 | try: 25 | response = client.delete_model( 26 | ModelName=modelName 27 | ) 28 | logger.info("Deleting Model: " + str(modelName)) 29 | except Exception: 30 | traceback.print_exc() 31 | 32 | 33 | def lambda_handler(event, context): 34 | logger.info("Event: " + str(event)) 35 | eventName = event['detail']['eventName'] 36 | requestParameters = event["detail"]["requestParameters"] 37 | 38 | if eventName == "CreateTrainingJob": 39 | if "vpcConfig" not in requestParameters: 40 | stop_training_job(requestParameters["trainingJobName"]) 41 | elif eventName == "CreateModel": 42 | if "vpcConfig" not in requestParameters: 43 | delete_model(requestParameters["modelName"]) -------------------------------------------------------------------------------- /src/project_template/util/utilsspec.py: -------------------------------------------------------------------------------- 1 | # Let's write some utility functions to do extract model artifacts and generate feature importances for XGBoost Model. 2 | import boto3 3 | import re 4 | import tarfile 5 | import matplotlib.pyplot as plt 6 | import xgboost as xgb 7 | # from sklearn.externals import joblib 8 | import joblib 9 | 10 | def download_artifacts(job_name, local_fname): 11 | ''' Given a trial name in a SageMaker Experiment, extract the model file and download it locally''' 12 | sm_client = boto3.Session().client('sagemaker') 13 | response = sm_client.describe_trial_component(TrialComponentName=job_name) 14 | model_artifacts_full_path = response['OutputArtifacts']['SageMaker.ModelArtifact']['Value'] 15 | 16 | p = re.compile('(?<=s3://).*?/') 17 | s = p.search(model_artifacts_full_path) 18 | object_name_start = s.span()[1] 19 | object_name = model_artifacts_full_path[object_name_start:] 20 | bucket_name = s.group()[:-1] 21 | s3 = boto3.client('s3') 22 | s3.download_file(bucket_name, object_name, local_fname) 23 | 24 | def unpack_model_file(fn): 25 | # Unpack model file 26 | _tar = tarfile.open(fn, 'r:gz') 27 | _tar.extractall() 28 | _fil = open('xgboost-model', 'rb') 29 | _model = joblib.load(_fil) 30 | print(_model) 31 | 32 | return _model 33 | 34 | def plot_features(model, columns): 35 | num_features = len(columns) 36 | fig, ax = plt.subplots(figsize=(6,6)) 37 | xgb.plot_importance(model, max_num_features=num_features, 38 | height=0.8, ax=ax, show_values = False) 39 | plt.title('Top Model Feature Importance') 40 | plt.show() 41 | -------------------------------------------------------------------------------- /workshop-env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: MIT-0 5 | 6 | # Environment variables used in various scripts used in the workshop 7 | # Update them for your environment 8 | # In a bash shell terminal execute `. workshop-env.sh or source workshop-env.sh` to setup the environment 9 | if [[ -z "${C9_PROJECT}" ]]; then 10 | echo "Not in AWS Cloud9 environment" 11 | export AWS_ACCOUNT_ID=# 12 | export AWS_DEFAULT_REGION=# 13 | else 14 | # In Cloud9 15 | echo "In AWS Cloud9 environment" 16 | export AWS_ACCOUNT_ID=`curl -s http://169.254.169.254/latest/dynamic/instance-identity/document|jq -r .accountId` 17 | export AWS_DEFAULT_REGION=`curl -s http://169.254.169.254/latest/dynamic/instance-identity/document|jq -r .region` 18 | fi 19 | 20 | echo "AWS_ACCOUNT_ID set to $AWS_ACCOUNT_ID" 21 | echo "AWS_DEFAULT_REGION set to $AWS_DEFAULT_REGION" 22 | 23 | # set if other than "default" 24 | #export AWS_PROFILE=# 25 | 26 | --------------------------------------------------------------------------------