├── .infrastructure ├── .nvmrc ├── .python-version ├── cdk_src │ ├── __init__.py │ ├── smstudio │ │ ├── user │ │ │ ├── fn_user │ │ │ │ ├── requirements.txt │ │ │ │ └── main.py │ │ │ └── __init__.py │ │ ├── domain │ │ │ ├── fn_domain │ │ │ │ ├── requirements.txt │ │ │ │ └── vpctools.py │ │ │ └── __init__.py │ │ ├── lcc │ │ │ ├── fn_studio_lcconfig │ │ │ │ ├── requirements.txt │ │ │ │ └── main.py │ │ │ ├── nbi-onstart.sh │ │ │ ├── studio-jupyterlab-onstart.sh │ │ │ ├── studio-classic-onstart.sh │ │ │ └── __init__.py │ │ ├── cr_lambda_common │ │ │ ├── requirements.txt │ │ │ ├── sagemaker_util.py │ │ │ └── cfn.py │ │ ├── user_setup │ │ │ └── fn_user_setup │ │ │ │ ├── requirements.txt │ │ │ │ ├── main.py │ │ │ │ ├── smprojects.py │ │ │ │ ├── base.py │ │ │ │ └── content.py │ │ ├── cr_lambda_common.py │ │ ├── iam.py │ │ └── region_config.py │ ├── cdk_stack.py │ └── config_utils.py ├── requirements-dev.txt ├── pyproject.toml ├── source.bat ├── requirements.txt ├── package.json ├── package-lock.json ├── cdk_app.py ├── cdk.json └── README.md ├── custom_script_demos ├── keras_nlp │ ├── util │ │ ├── __init__.py │ │ ├── lab-widgets.sh │ │ └── preprocessing.py │ └── src │ │ └── main.py ├── pytorch_nlp │ ├── util │ │ ├── __init__.py │ │ ├── lab-widgets.sh │ │ └── preprocessing.py │ └── src │ │ └── main.py ├── sklearn_reg │ └── .gitignore └── huggingface_nlp │ └── scripts │ └── train.py ├── autopilot └── .gitignore ├── builtin_algorithm_hpo_tabular ├── .gitignore ├── util │ ├── __init__.py │ └── data.py └── img │ ├── canvas-01-launch.png │ ├── canvas-02-datasets-list.png │ ├── canvas-05-config-model.png │ ├── feature-store-features.png │ ├── model-registry-compare.png │ ├── canvas-03-data-selection.png │ └── canvas-04-select-dataset.png ├── migration_challenge ├── keras_mnist │ ├── util │ │ ├── __init__.py │ │ └── draw.py │ ├── src │ │ └── main.py │ └── README.md ├── pytorch_mnist │ ├── util │ │ ├── __init__.py │ │ └── draw.py │ ├── src │ │ └── main.py │ └── README.md └── sklearn_cls │ └── src │ └── main.py ├── .gitignore ├── CODE_OF_CONDUCT.md ├── LICENSE ├── .simple.cf.yaml ├── CONTRIBUTING.md └── README.md /.infrastructure/.nvmrc: -------------------------------------------------------------------------------- 1 | 22.15 2 | -------------------------------------------------------------------------------- /.infrastructure/.python-version: -------------------------------------------------------------------------------- 1 | 3.12 2 | -------------------------------------------------------------------------------- /custom_script_demos/keras_nlp/util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /custom_script_demos/pytorch_nlp/util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /autopilot/.gitignore: -------------------------------------------------------------------------------- 1 | autopilot_output/ 2 | data/ 3 | -------------------------------------------------------------------------------- /builtin_algorithm_hpo_tabular/.gitignore: -------------------------------------------------------------------------------- 1 | data/ 2 | -------------------------------------------------------------------------------- /custom_script_demos/sklearn_reg/.gitignore: -------------------------------------------------------------------------------- 1 | model/ 2 | src/ 3 | -------------------------------------------------------------------------------- /migration_challenge/keras_mnist/util/__init__.py: -------------------------------------------------------------------------------- 1 | from . import draw 2 | -------------------------------------------------------------------------------- /migration_challenge/pytorch_mnist/util/__init__.py: -------------------------------------------------------------------------------- 1 | from . import draw 2 | -------------------------------------------------------------------------------- /.infrastructure/cdk_src/__init__.py: -------------------------------------------------------------------------------- 1 | """CDK source code for workshop stack""" 2 | -------------------------------------------------------------------------------- /.infrastructure/requirements-dev.txt: -------------------------------------------------------------------------------- 1 | black==24.3.0 2 | cfn-lint==0.87 3 | pytest==6.2.5 4 | -------------------------------------------------------------------------------- /builtin_algorithm_hpo_tabular/util/__init__.py: -------------------------------------------------------------------------------- 1 | from . import reporting 2 | from . import data 3 | -------------------------------------------------------------------------------- /.infrastructure/cdk_src/smstudio/user/fn_user/requirements.txt: -------------------------------------------------------------------------------- 1 | # Nothing extra required beyond helper layer 2 | -------------------------------------------------------------------------------- /.infrastructure/cdk_src/smstudio/domain/fn_domain/requirements.txt: -------------------------------------------------------------------------------- 1 | # Nothing extra required beyond helper layer 2 | -------------------------------------------------------------------------------- /.infrastructure/cdk_src/smstudio/lcc/fn_studio_lcconfig/requirements.txt: -------------------------------------------------------------------------------- 1 | # Nothing extra required beyond helper layer 2 | -------------------------------------------------------------------------------- /.infrastructure/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | requires-python = ">= 3.9" 3 | 4 | [tool.black] 5 | extend-exclude = "^/(cdk\\.out|setup\\.py)" 6 | line-length = 100 7 | -------------------------------------------------------------------------------- /builtin_algorithm_hpo_tabular/img/canvas-01-launch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-101-workshop/HEAD/builtin_algorithm_hpo_tabular/img/canvas-01-launch.png -------------------------------------------------------------------------------- /builtin_algorithm_hpo_tabular/img/canvas-02-datasets-list.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-101-workshop/HEAD/builtin_algorithm_hpo_tabular/img/canvas-02-datasets-list.png -------------------------------------------------------------------------------- /builtin_algorithm_hpo_tabular/img/canvas-05-config-model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-101-workshop/HEAD/builtin_algorithm_hpo_tabular/img/canvas-05-config-model.png -------------------------------------------------------------------------------- /builtin_algorithm_hpo_tabular/img/feature-store-features.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-101-workshop/HEAD/builtin_algorithm_hpo_tabular/img/feature-store-features.png -------------------------------------------------------------------------------- /builtin_algorithm_hpo_tabular/img/model-registry-compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-101-workshop/HEAD/builtin_algorithm_hpo_tabular/img/model-registry-compare.png -------------------------------------------------------------------------------- /builtin_algorithm_hpo_tabular/img/canvas-03-data-selection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-101-workshop/HEAD/builtin_algorithm_hpo_tabular/img/canvas-03-data-selection.png -------------------------------------------------------------------------------- /builtin_algorithm_hpo_tabular/img/canvas-04-select-dataset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sagemaker-101-workshop/HEAD/builtin_algorithm_hpo_tabular/img/canvas-04-select-dataset.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Data files (same convention across exercises) 2 | data/ 3 | **.tmp.* 4 | 5 | # Operating systems 6 | .DS_Store 7 | 8 | # JavaScript 9 | node_modules/ 10 | 11 | # Python 12 | .ipynb_checkpoints 13 | __pycache__ 14 | **.pyc 15 | .venv/ 16 | 17 | # CDK & SAM 18 | .aws-sam 19 | cdk.out/ 20 | -------------------------------------------------------------------------------- /.infrastructure/cdk_src/smstudio/cr_lambda_common/requirements.txt: -------------------------------------------------------------------------------- 1 | # Studio user settings incl `StudioWebPortal` (for domain `force_studio_classic`) and 2 | # `JupyterLabAppSettings` (for user) require an upgrade to the Lambda default versions of boto3 / 3 | # botocore: 4 | boto3>=1.34.33 5 | botocore>=1.34.33 6 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /.infrastructure/cdk_src/smstudio/user_setup/fn_user_setup/requirements.txt: -------------------------------------------------------------------------------- 1 | # GitPython provides Python bindings for git *assuming you already have the git binaries installed* 2 | # - We've handled this via a 3rd party Lambda Layer, but you could instead consider instead using a 3 | # PyPI package like 'lambda-git' which bundles binaries. 4 | gitpython>=3.1,<4 5 | -------------------------------------------------------------------------------- /.infrastructure/source.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | rem The sole purpose of this script is to make the command 4 | rem 5 | rem source .venv/bin/activate 6 | rem 7 | rem (which activates a Python virtualenv on Linux or Mac OS X) work on Windows. 8 | rem On Windows, this command just runs this batch file (the argument is ignored). 9 | rem 10 | rem Now we don't need to document a Windows command for activating a virtualenv. 11 | 12 | echo Executing .venv\Scripts\activate.bat for you 13 | .venv\Scripts\activate.bat 14 | -------------------------------------------------------------------------------- /custom_script_demos/keras_nlp/util/lab-widgets.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Example for installing IPyWidgets extension from a SageMaker Lifecycle Configuration script 3 | sudo -u ec2-user -i <=2.109.0 for Python 3.12 Lambda runtime, smstudio.domain to be able to set the 2 | # "StudioWebPortal" user setting to force classic Studio experience 3 | # >=2.140 to try and avoid 'Package @aws-sdk/client-cognito-identity-provider does not exist.' on 4 | # AwsCustomResource (this version includes fix for related failure to upgrade AWS SDK) 5 | # See: https://github.com/aws/aws-cdk/issues/30067 6 | aws-cdk-lib==2.158.0 7 | aws-cdk.aws-lambda-python-alpha==2.158.0-alpha.0 8 | cdk-nag==2.28 9 | constructs>=10.0.0,<11.0.0 10 | upsert-slr>=1.0.2,<2 11 | -------------------------------------------------------------------------------- /.infrastructure/cdk_src/smstudio/lcc/nbi-onstart.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | # Install extension for interactive canvas drawing: 5 | # ipywidgets is already present on al2-v2 NBIs. Pin versions to avoid reinstallations 6 | sudo -u ec2-user -i <<'EOF' 7 | source /home/ec2-user/anaconda3/bin/activate JupyterSystemEnv 8 | JUPYTERSERVER_VER=`pip show jupyter-server | grep 'Version:' | sed 's/Version: //'` 9 | IPYWIDGETS_VER=`pip show ipywidgets | grep 'Version:' | sed 's/Version: //'` 10 | pip install \ 11 | jupyter-server==$JUPYTERSERVER_VER \ 12 | ipywidgets==$IPYWIDGETS_VER \ 13 | 'ipycanvas<0.13' 14 | source /home/ec2-user/anaconda3/bin/deactivate 15 | EOF 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 10 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 11 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 12 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 13 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 15 | -------------------------------------------------------------------------------- /migration_challenge/keras_mnist/src/main.py: -------------------------------------------------------------------------------- 1 | """CNN-based image classification on SageMaker with TensorFlow and Keras 2 | 3 | (Complete me with help from Local Notebook.ipynb, and the NLP example's src/main.py!) 4 | """ 5 | 6 | # Dependencies: 7 | import argparse 8 | # TODO: Others? 9 | 10 | def parse_args(): 11 | # TODO: Standard pattern for loading parameters in from SageMaker 12 | 13 | # TODO: Other function definitions, if you'd like to break up your code into functions? 14 | 15 | # Training script: 16 | if __name__ == "__main__": 17 | # Load arguments from CLI / environment variables: 18 | args, unknown = parse_args() 19 | 20 | # TODO: Load images from container filesystem into training / test data sets? 21 | 22 | # TODO: Create the Keras model? 23 | 24 | # Fit the Keras model: 25 | model.fit( 26 | ? 27 | ) 28 | 29 | # TODO: Evaluate model quality and log metrics? 30 | 31 | # TODO: Save outputs (trained model) to specified folder? 32 | model.save( 33 | ? 34 | ) 35 | -------------------------------------------------------------------------------- /.infrastructure/cdk_src/smstudio/cr_lambda_common/sagemaker_util.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | """Shared utilities for CloudFormation Custom Resources working with SageMaker""" 4 | # Python Built-Ins: 5 | import logging 6 | import time 7 | from typing import Callable, TypeVar 8 | 9 | # External Dependencies: 10 | from botocore.exceptions import ClientError 11 | 12 | 13 | logger = logging.getLogger("sagemaker_util") 14 | TResponse = TypeVar("TResponse") 15 | 16 | 17 | def retry_if_already_updating(fn: Callable[[], TResponse], delay_secs: float = 10) -> TResponse: 18 | """Retry `fn` every `delay_secs` if it fails because a SageMaker Domain is already updating""" 19 | while True: 20 | try: 21 | return fn() 22 | except ClientError as err: 23 | if "is already being updated" in err.response["Error"]["Message"]: 24 | logger.info("Domain already updating - waiting to retry...") 25 | time.sleep(delay_secs) 26 | continue 27 | else: 28 | raise err 29 | -------------------------------------------------------------------------------- /migration_challenge/pytorch_mnist/src/main.py: -------------------------------------------------------------------------------- 1 | """CNN-based image classification on SageMaker with PyTorch 2 | 3 | (Complete me with help from Local Notebook.ipynb, and the NLP example's src/main.py!) 4 | """ 5 | 6 | # Dependencies: 7 | import argparse 8 | # TODO: Others? 9 | 10 | def parse_args(): 11 | # TODO: Standard pattern for loading parameters in from SageMaker 12 | 13 | def model_fn(model_dir): 14 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 15 | model = torch.jit.load(os.path.join(model_dir, 'model.pth')) 16 | return model 17 | 18 | # TODO: Other function definitions, if you'd like to break up your code into functions? 19 | 20 | # Training script: 21 | if __name__ == "__main__": 22 | # TODO: Load arguments from CLI / environment variables? 23 | args, _ = parse_args() 24 | 25 | # TODO: Load images from container filesystem into training / test data sets? 26 | 27 | # TODO: Load dataset into a PyTorch Data Loader with correct batch size 28 | 29 | # TODO: Fit the PyTorch model? 30 | model = ? 31 | 32 | # TODO: Save outputs (trained model) to specified folder? -------------------------------------------------------------------------------- /.infrastructure/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "sagemaker-101-workshop", 3 | "version": "0.1.0", 4 | "description": "CDK infrastructure for Amazon SageMaker 101 workshop", 5 | "main": "index.js", 6 | "directories": { 7 | "test": "tests" 8 | }, 9 | "scripts": { 10 | "cdk:bootstrap": "cdk bootstrap", 11 | "deploy": "npm run login:ecrpublic && cdk deploy --all", 12 | "destroy": "cdk destroy --all", 13 | "lint:cfn": "cfn-lint cfn_bootstrap.yaml", 14 | "lint:python": "black ./cdk_src", 15 | "lint": "npm run lint:cfn && npm run lint:python", 16 | "login:ecrpublic": "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws", 17 | "scan:cfn": "cfn_nag_scan --input-path cfn_bootstrap.yaml", 18 | "test": "echo \"Error: no test specified\" && exit 1" 19 | }, 20 | "keywords": [ 21 | "Workshop", 22 | "SageMaker", 23 | "AWS" 24 | ], 25 | "author": "Amazon Web Services", 26 | "license": "MIT-0", 27 | "private": true, 28 | "dependencies": { 29 | "aws-cdk": "2.158.0" 30 | }, 31 | "engines": { 32 | "node": ">=20" 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /migration_challenge/keras_mnist/README.md: -------------------------------------------------------------------------------- 1 | # SageMaker Migration Exercise (TensorFlow) 2 | 3 | In this exercise, you'll migrate an [example notebook](Local%20Notebook.ipynb) (fitting a Keras CNN model on the MNIST Digits sample dataset) into the SageMaker data science workflow. 4 | 5 | **To get started, clone this repository into a SageMaker Notebook instance (any instance type will do) and fire up the [Instructions.ipynb](Instructions.ipynb) notebook!** 6 | 7 | 8 | ## Prerequisites 9 | 10 | This practice exercise is intended to be delivered with in-person support, and assumes you: 11 | 12 | - Have had a high-level introduction to the SageMaker workflow, and: 13 | - Are familiar with using the AWS Console to access Amazon SageMaker and Amazon S3 14 | - Are familiar with configuring SageMaker Notebook Instance Execution Roles with appropriate Amazon S3 access 15 | 16 | If that doesn't sound like you, you might prefer to check out: 17 | 18 | - The official [Introductory Amazon SageMaker Tutorial](https://aws.amazon.com/getting-started/tutorials/build-train-deploy-machine-learning-model-sagemaker/) 19 | - The ["Get Started with the Amazon SageMaker Console"](https://docs.aws.amazon.com/sagemaker/latest/dg/gs-console.html) page in the [Amazon SageMaker Developer Guide](https://docs.aws.amazon.com/sagemaker/latest/dg/whatis.html) 20 | -------------------------------------------------------------------------------- /migration_challenge/pytorch_mnist/README.md: -------------------------------------------------------------------------------- 1 | # SageMaker Migration Exercise (PyTorch) 2 | 3 | In this exercise, you'll migrate an [example notebook](Local%20Notebook.ipynb) (fitting a PyTorch CNN model on the MNIST Digits sample dataset) into the SageMaker data science workflow. 4 | 5 | **To get started, clone this repository into a SageMaker Notebook instance (any instance type will do) and fire up the [Instructions.ipynb](Instructions.ipynb) notebook!** 6 | 7 | 8 | ## Prerequisites 9 | 10 | This practice exercise is intended to be delivered with in-person support, and assumes you: 11 | 12 | - Have had a high-level introduction to the SageMaker workflow, and: 13 | - Are familiar with using the AWS Console to access Amazon SageMaker and Amazon S3 14 | - Are familiar with configuring SageMaker Notebook Instance Execution Roles with appropriate Amazon S3 access 15 | 16 | If that doesn't sound like you, you might prefer to check out: 17 | 18 | - The official [Introductory Amazon SageMaker Tutorial](https://aws.amazon.com/getting-started/tutorials/build-train-deploy-machine-learning-model-sagemaker/) 19 | - The ["Get Started with the Amazon SageMaker Console"](https://docs.aws.amazon.com/sagemaker/latest/dg/gs-console.html) page in the [Amazon SageMaker Developer Guide](https://docs.aws.amazon.com/sagemaker/latest/dg/whatis.html) 20 | -------------------------------------------------------------------------------- /migration_challenge/sklearn_cls/src/main.py: -------------------------------------------------------------------------------- 1 | """SageMaker combined training/inference script for Scikit Learn random forest classifier""" 2 | # TODO: Add any other libraries you need below 3 | # Python Built-Ins: 4 | import argparse 5 | import os 6 | 7 | # External Dependencies: 8 | import joblib # Utilities for saving and re-loading models 9 | 10 | 11 | # Helper Functions 12 | 13 | 14 | # Main training script block: 15 | if __name__ == "__main__": 16 | # Parse input parameters from command line and environment variables: 17 | print("Parsing training arguments") 18 | parser = argparse.ArgumentParser() 19 | 20 | # TODO: Load RandomForest hyperparameters 21 | # TODO: Find data, model, and output directories from CLI/env vars 22 | 23 | args, _ = parser.parse_known_args() 24 | 25 | # TODO: Parse class names to Id mappings: 26 | 27 | # TODO: Load your data (both training and test) from container filesystem 28 | # (split into training and test datasets and identify correct features/labels) 29 | 30 | # TODO: Fit the random forest model 31 | 32 | # TODO: Save the model to the location specified by args.model_dir, using the joblib 33 | 34 | 35 | # TODO: Function to load the trained model at inference time 36 | 37 | 38 | # TODO: (Bonus!) Custom inference output_fn to return string labels instead of numeric class IDs 39 | -------------------------------------------------------------------------------- /.infrastructure/package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "sagemaker-101-workshop", 3 | "version": "0.1.0", 4 | "lockfileVersion": 3, 5 | "requires": true, 6 | "packages": { 7 | "": { 8 | "name": "sagemaker-101-workshop", 9 | "version": "0.1.0", 10 | "license": "MIT-0", 11 | "dependencies": { 12 | "aws-cdk": "2.158.0" 13 | }, 14 | "engines": { 15 | "node": ">=20" 16 | } 17 | }, 18 | "node_modules/aws-cdk": { 19 | "version": "2.158.0", 20 | "resolved": "https://registry.npmjs.org/aws-cdk/-/aws-cdk-2.158.0.tgz", 21 | "integrity": "sha512-UcrxBG02RACrnTvfuyZiTuOz8gqOpnqjCMTdVmdpExv5qk9hddhtRAubNaC4xleHuNJnvskYqqVW+Y3Abh6zGQ==", 22 | "bin": { 23 | "cdk": "bin/cdk" 24 | }, 25 | "engines": { 26 | "node": ">= 14.15.0" 27 | }, 28 | "optionalDependencies": { 29 | "fsevents": "2.3.2" 30 | } 31 | }, 32 | "node_modules/fsevents": { 33 | "version": "2.3.2", 34 | "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", 35 | "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", 36 | "hasInstallScript": true, 37 | "optional": true, 38 | "os": [ 39 | "darwin" 40 | ], 41 | "engines": { 42 | "node": "^8.16.0 || ^10.6.0 || >=11.0.0" 43 | } 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /.infrastructure/cdk_src/cdk_stack.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | """CDK stack for AWS workshop with Amazon SageMaker""" 4 | # Python Built-Ins: 5 | from typing import Optional 6 | 7 | # External Dependencies: 8 | from aws_cdk import Stack 9 | from constructs import Construct 10 | from aws_cdk import aws_ec2 11 | 12 | # Local Dependencies: 13 | from .smstudio import WorkshopSageMakerEnvironment 14 | 15 | 16 | class WorkshopStack(Stack): 17 | def __init__( 18 | self, 19 | scope: Construct, 20 | construct_id: str, 21 | sagemaker_code_checkout: Optional[str] = None, 22 | sagemaker_code_repo: Optional[str] = None, 23 | ) -> None: 24 | super().__init__(scope, construct_id) 25 | 26 | # Shared VPC: 27 | vpc = aws_ec2.Vpc(self, "Vpc") 28 | 29 | # Deploy SageMaker Studio environment: 30 | sagemaker_env = WorkshopSageMakerEnvironment( 31 | self, 32 | "SageMakerEnvironment", 33 | vpc=vpc, 34 | code_checkout=sagemaker_code_checkout, 35 | code_repo=sagemaker_code_repo, 36 | create_nbi=False, # Don't create a 'Notebook Instance' (save costs, use Studio) 37 | domain_name="WorkshopDomain", 38 | instance_type="ml.t3.large", 39 | studio_classic=False, # Keep SMStudio classic disabled (save costs) 40 | ) 41 | -------------------------------------------------------------------------------- /.infrastructure/cdk_src/smstudio/lcc/studio-jupyterlab-onstart.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #### Clone sample code for labs 4 | # For new-style SMStudio we can't use EFS mounts to initialize user content, so have to use 5 | # this LCC. Repo name (and possibly branch config) below is populated by CDK. 6 | # `|| true` to swallow any errors (e.g. if folder already exists) - `set +e` doesn't work 7 | git clone {{CODE_REPO}} || true 8 | 9 | #### Docker installation (for SageMaker Local Mode) 10 | # As per: https://docs.docker.com/engine/install/ubuntu/#install-using-the-repository 11 | # Add Docker's official GPG key: 12 | sudo apt-get update 13 | sudo apt-get -y install ca-certificates curl 14 | sudo install -m 0755 -d /etc/apt/keyrings 15 | sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc 16 | sudo chmod a+r /etc/apt/keyrings/docker.asc 17 | # Add the repository to Apt sources: 18 | echo \ 19 | "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \ 20 | $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ 21 | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null 22 | sudo apt-get update 23 | 24 | sudo apt-get -y install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin 25 | 26 | #### JupyterLab extensions / etc 27 | # MNIST exercises require ipycanvas 28 | pip install "ipycanvas>=0.12,<0.14" 29 | restart-jupyter-server 30 | -------------------------------------------------------------------------------- /.infrastructure/cdk_src/config_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: MIT-0 4 | """Utilities for configuring the stack (e.g. environment variable parsing) 5 | """ 6 | # Python Built-Ins: 7 | import os 8 | from typing import Optional 9 | 10 | 11 | def bool_env_var(env_var_name: str, default: Optional[bool] = None) -> bool: 12 | """Parse a boolean environment variable 13 | 14 | Raises 15 | ------ 16 | ValueError : 17 | If environment variable `env_var_name` is not found and no `default` is specified, or if the 18 | raw value string could not be interpreted as a boolean. 19 | 20 | Returns 21 | ------- 22 | parsed : 23 | True if the env var has values such as `1`, `true`, `y`, `yes` (case-insensitive). False if 24 | opposite values `0`, `false`, `n`, `no` or empty string. 25 | """ 26 | raw = os.environ.get(env_var_name) 27 | if raw is None: 28 | if default is None: 29 | raise ValueError(f"Mandatory boolean env var '{env_var_name}' not found") 30 | return default 31 | raw = raw.lower() 32 | if raw in ("1", "true", "y", "yes"): 33 | return True 34 | elif raw in ("", "0", "false", "n", "no"): 35 | return False 36 | else: 37 | raise ValueError( 38 | "Couldn't interpret env var '%s' as boolean. Got: '%s'" % (env_var_name, raw) 39 | ) 40 | -------------------------------------------------------------------------------- /.infrastructure/cdk_app.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # SPDX-License-Identifier: MIT-0 4 | """Main AWS CDK entry point for the workshop infrastructure 5 | """ 6 | # Python Built-Ins: 7 | import json 8 | import os 9 | 10 | # External Dependencies: 11 | import aws_cdk as cdk 12 | from cdk_nag import AwsSolutionsChecks # (Optional stack security checks) 13 | 14 | # Local Dependencies: 15 | from cdk_src.cdk_stack import WorkshopStack 16 | from cdk_src.config_utils import bool_env_var 17 | 18 | # Top-level configurations are loaded from environment variables at the point `cdk synth` or 19 | # `cdk deploy` is run (or you can override here): 20 | config = { 21 | # cdk_nag is a useful tool for auditing configuration security, but can sometimes be noisy: 22 | "cdk_nag": bool_env_var("CDK_NAG", default=False), 23 | "sagemaker_code_checkout": os.environ.get("SAGEMAKER_CODE_CHECKOUT"), 24 | "sagemaker_code_repo": os.environ.get( 25 | "SAGEMAKER_CODE_REPO", 26 | "https://github.com/aws-samples/sagemaker-101-workshop", 27 | ), 28 | } 29 | 30 | app = cdk.App() 31 | print(f"Preparing stack with configuration:\n{json.dumps(config, indent=2)}") 32 | llm_eval_wkshp_stack = WorkshopStack( 33 | app, 34 | "WorkshopStack", 35 | **{k: v for k, v in config.items() if k != "cdk_nag"}, 36 | ) 37 | 38 | if config["cdk_nag"]: 39 | print("Adding cdk_nag checks") 40 | cdk.Aspects.of(app).add(AwsSolutionsChecks()) 41 | else: 42 | print("Skipping cdk_nag checks") 43 | 44 | app.synth() 45 | -------------------------------------------------------------------------------- /.infrastructure/cdk_src/smstudio/user_setup/fn_user_setup/main.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | """Custom CloudFormation Resource for loading content to a SageMaker Studio user 4 | 5 | Updating or deleting this resource does not currently do anything. Errors in the setup process are 6 | also ignored (typically don't want to roll back the whole stack just because we couldn't clone a 7 | repo - as users can always do it manually!) 8 | 9 | For input CloudFormation resource properties, see `StudioUserSetupResourceProperties` in base.py. 10 | 11 | CloudFormation Return Values 12 | ---------------------------- 13 | Direct .Ref : string 14 | SageMaker user profile name 15 | """ 16 | # Python Built-Ins: 17 | import logging 18 | 19 | logging.getLogger().setLevel(logging.INFO) # Set log level for AWS Lambda *BEFORE* other imports 20 | 21 | # Local Dependencies: 22 | from base import StudioUserSetupResourceProperties 23 | from cfn import CustomResourceEvent, CustomResourceRequestType 24 | import content 25 | import smprojects 26 | 27 | logger = logging.getLogger("main") 28 | 29 | 30 | def lambda_handler(event_raw: dict, context: dict): 31 | logger.info(event_raw) 32 | event = CustomResourceEvent(event_raw, StudioUserSetupResourceProperties) 33 | if event.request_type == CustomResourceRequestType.create: 34 | try: 35 | smprojects.on_create_update(event) 36 | except: 37 | logging.exception("Failed to set up user for SageMaker Projects") 38 | return content.handle_create(event, context) 39 | elif event.request_type == CustomResourceRequestType.update: 40 | try: 41 | smprojects.on_create_update(event) 42 | except: 43 | logging.exception("Failed to set up user for SageMaker Projects") 44 | return content.handle_update(event, context) 45 | elif event.request_type == CustomResourceRequestType.delete: 46 | return content.handle_delete(event, context) 47 | else: 48 | raise ValueError(f"Unsupported CFn RequestType '{event_raw['RequestType']}'") 49 | -------------------------------------------------------------------------------- /.infrastructure/cdk_src/smstudio/cr_lambda_common.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | """Shared Lambda constructs to help with SageMaker Studio CDK""" 4 | # Python Built-Ins: 5 | import os 6 | from typing import Any, Dict, Sequence 7 | 8 | # External Dependencies: 9 | from aws_cdk import RemovalPolicy 10 | from aws_cdk.aws_lambda import Architecture, Runtime 11 | from aws_cdk.aws_lambda_python_alpha import BundlingOptions, PythonLayerVersion 12 | from constructs import Construct 13 | 14 | LAYER_CODE_PATH = os.path.join(os.path.dirname(__file__), "cr_lambda_common") 15 | 16 | 17 | class SMCustomResourceHelperLayer(PythonLayerVersion): 18 | """Lambda layer with helper functions/classes for SageMaker CloudFormation Custom Resources 19 | 20 | It works like a regular aws_cdk.aws_lambda_python_alpha.PythonLayerVersion, but the code 21 | location is already specified for you. You probably don't need to specify 22 | """ 23 | 24 | def __init__( 25 | self, 26 | scope: Construct, 27 | id: str, 28 | *, 29 | bundling: BundlingOptions | Dict[str, Any] | None = None, 30 | compatible_architectures: Sequence[Architecture] | None = None, 31 | compatible_runtimes: Sequence[Runtime] | None = None, 32 | description: str | None = ( 33 | "Helper functions & classes for SageMaker CloudFormation custom resources" 34 | ), 35 | layer_version_name: str | None = None, 36 | license: str | None = None, 37 | removal_policy: RemovalPolicy | None = None, 38 | ) -> None: 39 | super().__init__( 40 | scope, 41 | id, 42 | entry=LAYER_CODE_PATH, 43 | bundling=bundling, 44 | compatible_architectures=compatible_architectures, 45 | compatible_runtimes=[ 46 | Runtime.PYTHON_3_8, 47 | Runtime.PYTHON_3_9, 48 | Runtime.PYTHON_3_10, 49 | Runtime.PYTHON_3_11, 50 | Runtime.PYTHON_3_12, 51 | ], 52 | description=description, 53 | layer_version_name=layer_version_name, 54 | license=license, 55 | removal_policy=removal_policy, 56 | ) 57 | -------------------------------------------------------------------------------- /.infrastructure/cdk.json: -------------------------------------------------------------------------------- 1 | { 2 | "app": "python3 cdk_app.py", 3 | "watch": { 4 | "include": [ 5 | "**" 6 | ], 7 | "exclude": [ 8 | "README.md", 9 | "cdk*.json", 10 | "requirements*.txt", 11 | "source.bat", 12 | "**/__init__.py", 13 | "python/__pycache__", 14 | "tests" 15 | ] 16 | }, 17 | "context": { 18 | "@aws-cdk/aws-lambda:recognizeLayerVersion": true, 19 | "@aws-cdk/core:checkSecretUsage": true, 20 | "@aws-cdk/core:target-partitions": [ 21 | "aws", 22 | "aws-cn" 23 | ], 24 | "@aws-cdk-containers/ecs-service-extensions:enableDefaultLogDriver": true, 25 | "@aws-cdk/aws-ec2:uniqueImdsv2TemplateName": true, 26 | "@aws-cdk/aws-ecs:arnFormatIncludesClusterName": true, 27 | "@aws-cdk/aws-iam:minimizePolicies": true, 28 | "@aws-cdk/core:validateSnapshotRemovalPolicy": true, 29 | "@aws-cdk/aws-codepipeline:crossAccountKeyAliasStackSafeResourceName": true, 30 | "@aws-cdk/aws-s3:createDefaultLoggingPolicy": true, 31 | "@aws-cdk/aws-sns-subscriptions:restrictSqsDescryption": true, 32 | "@aws-cdk/aws-apigateway:disableCloudWatchRole": true, 33 | "@aws-cdk/core:enablePartitionLiterals": true, 34 | "@aws-cdk/aws-events:eventsTargetQueueSameAccount": true, 35 | "@aws-cdk/aws-iam:standardizedServicePrincipals": true, 36 | "@aws-cdk/aws-ecs:disableExplicitDeploymentControllerForCircuitBreaker": true, 37 | "@aws-cdk/aws-iam:importedRoleStackSafeDefaultPolicyName": true, 38 | "@aws-cdk/aws-s3:serverAccessLogsUseBucketPolicy": true, 39 | "@aws-cdk/aws-route53-patters:useCertificate": true, 40 | "@aws-cdk/customresources:installLatestAwsSdkDefault": false, 41 | "@aws-cdk/aws-rds:databaseProxyUniqueResourceName": true, 42 | "@aws-cdk/aws-codedeploy:removeAlarmsFromDeploymentGroup": true, 43 | "@aws-cdk/aws-apigateway:authorizerChangeDeploymentLogicalId": true, 44 | "@aws-cdk/aws-ec2:launchTemplateDefaultUserData": true, 45 | "@aws-cdk/aws-secretsmanager:useAttachedSecretResourcePolicyForSecretTargetAttachments": true, 46 | "@aws-cdk/aws-redshift:columnId": true, 47 | "@aws-cdk/aws-stepfunctions-tasks:enableEmrServicePolicyV2": true, 48 | "@aws-cdk/aws-ec2:restrictDefaultSecurityGroup": true, 49 | "@aws-cdk/aws-apigateway:requestValidatorUniqueId": true, 50 | "@aws-cdk/aws-kms:aliasNameRef": true, 51 | "@aws-cdk/core:includePrefixInUniqueNameGeneration": true 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /.simple.cf.yaml: -------------------------------------------------------------------------------- 1 | # This CloudFormation template provides a basic SageMaker Notebook Instance setup for you to try out 2 | # the workshop. The permissions are probably more generous than you'd want to grant in a production 3 | # account! 4 | AWSTemplateFormatVersion: '2010-09-09' 5 | Resources: 6 | SageMakerIamRole: 7 | Type: 'AWS::IAM::Role' 8 | Properties: 9 | AssumeRolePolicyDocument: 10 | Version: '2012-10-17' 11 | Statement: 12 | - 13 | Effect: Allow 14 | Principal: 15 | Service: sagemaker.amazonaws.com 16 | Action: sts:AssumeRole 17 | Path: / 18 | ManagedPolicyArns: 19 | - 'arn:aws:iam::aws:policy/AmazonSageMakerFullAccess' 20 | - 'arn:aws:iam::aws:policy/AmazonS3FullAccess' 21 | 22 | # SageMaker notebook 23 | NotebookConfig: 24 | Type: 'AWS::SageMaker::NotebookInstanceLifecycleConfig' 25 | Properties: 26 | NotebookInstanceLifecycleConfigName: !Sub '${AWS::StackName}-LifecycleConfig' 27 | OnStart: 28 | - Content: 29 | Fn::Base64: !Sub | 30 | #!/bin/bash 31 | set -e 32 | 33 | # Install extension for interactive canvas drawing: 34 | # ipywidgets is already present on al2-v2 NBIs. Pin versions to avoid reinstallations 35 | sudo -u ec2-user -i <<'EOF' 36 | source /home/ec2-user/anaconda3/bin/activate JupyterSystemEnv 37 | JUPYTERSERVER_VER=`pip show jupyter-server | grep 'Version:' | sed 's/Version: //'` 38 | IPYWIDGETS_VER=`pip show ipywidgets | grep 'Version:' | sed 's/Version: //'` 39 | pip install \ 40 | jupyter-server==$JUPYTERSERVER_VER \ 41 | ipywidgets==$IPYWIDGETS_VER \ 42 | 'ipycanvas<0.13' 43 | source /home/ec2-user/anaconda3/bin/deactivate 44 | EOF 45 | 46 | NotebookInstance: 47 | Type: 'AWS::SageMaker::NotebookInstance' 48 | Properties: 49 | InstanceType: ml.t3.medium 50 | LifecycleConfigName: !GetAtt NotebookConfig.NotebookInstanceLifecycleConfigName 51 | # Otherwise it gets some garbage name by default: 52 | NotebookInstanceName: !Sub '${AWS::StackName}-Notebook' 53 | RoleArn: !GetAtt SageMakerIamRole.Arn 54 | VolumeSizeInGB: 20 55 | PlatformIdentifier: notebook-al2-v2 56 | DefaultCodeRepository: https://github.com/aws-samples/sagemaker-101-workshop 57 | -------------------------------------------------------------------------------- /.infrastructure/cdk_src/smstudio/lcc/studio-classic-onstart.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu 3 | 4 | echo "Checking conda environments" 5 | if conda info --envs | grep ^studio; then 6 | # Standard on JLv3 image at time of writing 7 | CONDA_ENV=studio 8 | else 9 | # Standard on JLv1 image at time of writing 10 | exit 0 11 | fi 12 | echo "Activating conda env $CONDA_ENV" 13 | source activate $CONDA_ENV 14 | 15 | BOTO3_VER=`pip show boto3 | grep 'Version:' | sed 's/Version: //'` 16 | BOTOCORE_VER=`pip show botocore | grep 'Version:' | sed 's/Version: //'` 17 | JUPYTERSERVER_VER=`pip show jupyter-server | grep 'Version:' | sed 's/Version: //'` 18 | 19 | echo "Installing CodeWhisperer, jupyterlab-lsp, language tools, canvas widget" 20 | pip install amazon-codewhisperer-jupyterlab-ext \ 21 | jupyterlab-lsp \ 22 | 'python-lsp-server[flake8,mccabe,pycodestyle,pydocstyle,pyflakes,pylint,rope]' \ 23 | jupyterlab-spellchecker \ 24 | jupyterlab-code-formatter black isort \ 25 | jupyterlab-s3-browser \ 26 | boto3==$BOTO3_VER \ 27 | botocore==$BOTOCORE_VER \ 28 | jupyter-server==$JUPYTERSERVER_VER \ 29 | 'ipycanvas<0.13' 30 | # bash-language-server v5+ requires Node v16+ (not yet available): 31 | jlpm add --dev bash-language-server@"<5.0.0" dockerfile-language-server-nodejs 32 | 33 | # CodeWhisperer should be specifically enabled: 34 | jupyter server extension enable amazon_codewhisperer_jupyterlab_ext 35 | 36 | CMP_CONFIG_DIR=.jupyter/lab/user-settings/@krassowski/jupyterlab-lsp/ 37 | CMP_CONFIG_FILE=completion.jupyterlab-settings 38 | CMP_CONFIG_PATH="$CMP_CONFIG_DIR/$CMP_CONFIG_FILE" 39 | if test -f $CMP_CONFIG_PATH; then 40 | echo "jupyterlab-lsp config file already exists: Skipping default config setup" 41 | else 42 | echo "Setting continuous hinting to enabled by default" 43 | mkdir -p $CMP_CONFIG_DIR 44 | echo '{ "continuousHinting": true }' > $CMP_CONFIG_PATH 45 | fi 46 | 47 | FMT_CONFIG_DIR=~/.jupyter/lab/user-settings/@ryantam626/jupyterlab_code_formatter 48 | FMT_CONFIG_FILE=settings.jupyterlab-settings 49 | FMT_CONFIG_PATH="$FMT_CONFIG_DIR/$FMT_CONFIG_FILE" 50 | if test -f $FMT_CONFIG_PATH; then 51 | echo "jupyterlab-code-formatter config file already exists: Skipping default config setup" 52 | else 53 | echo "Configuring jupyterlab-code-formatter format on save and line width" 54 | mkdir -p $FMT_CONFIG_DIR 55 | # Could turn on "formatOnSave": true here, but would raise error messages for partial nbks 56 | cat > $FMT_CONFIG_PATH < ~/.config/pycodestyle < /dev/null 2>&1 -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /.infrastructure/cdk_src/smstudio/user_setup/fn_user_setup/smprojects.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | """Custom CloudFormation Resource for SageMaker Projects setup 4 | 5 | See `.base.StudioUserSetupResourceProperties` for CloudFormation input Properties, and main.py 6 | docstring for CloudFormation return values. 7 | 8 | This sub-resource handles granting (existing) SMStudio user profiles permission to view and launch 9 | SageMaker Project Templates, from CloudFormation. 10 | """ 11 | # Python Built-Ins: 12 | from logging import getLogger 13 | 14 | # External Dependencies: 15 | import boto3 # AWS SDK for Python 16 | 17 | # Local Dependencies: 18 | from base import StudioUserSetupResourceProperties 19 | from cfn import CustomResourceEvent 20 | 21 | 22 | scclient = boto3.client("servicecatalog") 23 | smclient = boto3.client("sagemaker") 24 | logger = getLogger("smprojects") 25 | 26 | 27 | def enable_sm_projects_for_role(studio_role_arn: str) -> None: 28 | """Enable SageMaker Projects for a SageMaker Execution Role 29 | This function assumes you've already run Boto SageMaker 30 | enable_sagemaker_servicecatalog_portfolio() for the account as a whole 31 | """ 32 | portfolios_resp = scclient.list_accepted_portfolio_shares() 33 | 34 | portfolio_ids = set() 35 | for portfolio in portfolios_resp["PortfolioDetails"]: 36 | if portfolio["ProviderName"] == "Amazon SageMaker": 37 | portfolio_ids.add(portfolio["Id"]) 38 | 39 | logger.info(f"Adding {len(portfolio_ids)} SageMaker SC portfolios to role {studio_role_arn}") 40 | for portfolio_id in portfolio_ids: 41 | scclient.associate_principal_with_portfolio( 42 | PortfolioId=portfolio_id, PrincipalARN=studio_role_arn, PrincipalType="IAM" 43 | ) 44 | 45 | 46 | def disable_sm_projects_for_role(studio_role_arn: str) -> None: 47 | """Enable SageMaker Projects for a SageMaker Execution Role 48 | This function assumes you've already run Boto SageMaker 49 | enable_sagemaker_servicecatalog_portfolio() for the account as a whole 50 | """ 51 | portfolios_resp = scclient.list_accepted_portfolio_shares() 52 | 53 | portfolio_ids = set() 54 | for portfolio in portfolios_resp["PortfolioDetails"]: 55 | if portfolio["ProviderName"] == "Amazon SageMaker": 56 | portfolio_ids.add(portfolio["Id"]) 57 | 58 | logger.info( 59 | f"Removing {len(portfolio_ids)} SageMaker SC portfolios from role {studio_role_arn}" 60 | ) 61 | for portfolio_id in portfolio_ids: 62 | scclient.disassociate_principal_from_portfolio( 63 | PortfolioId=portfolio_id, 64 | PrincipalARN=studio_role_arn, 65 | ) 66 | 67 | 68 | def get_user_profile_role_arn(domain_id: str, user_profile_name: str) -> str: 69 | user_desc = smclient.describe_user_profile( 70 | DomainId=domain_id, UserProfileName=user_profile_name 71 | ) 72 | return user_desc["UserSettings"]["ExecutionRole"] 73 | 74 | 75 | def on_create_update(event: CustomResourceEvent[StudioUserSetupResourceProperties]) -> bool: 76 | logger.info("**Received create/update request") 77 | if event.props.enable_projects: 78 | logger.info("**Setting up SageMaker projects for user") 79 | role_arn = get_user_profile_role_arn(event.props.domain_id, event.props.user_profile_name) 80 | enable_sm_projects_for_role(role_arn) 81 | return True 82 | else: 83 | logger.info("**Skipping removing SM Projects from user") 84 | return False 85 | -------------------------------------------------------------------------------- /custom_script_demos/keras_nlp/src/main.py: -------------------------------------------------------------------------------- 1 | """CNN-based text classification on SageMaker with TensorFlow and Keras""" 2 | 3 | # Python Built-Ins: 4 | import argparse 5 | import os 6 | 7 | # External Dependencies: 8 | import numpy as np 9 | import tensorflow as tf 10 | from tensorflow.keras.layers import Conv1D, Dense, Dropout, Embedding, Flatten, MaxPooling1D 11 | from tensorflow.keras.models import Sequential 12 | 13 | ###### Helper functions ############ 14 | def load_training_data(base_dir): 15 | X_train = np.load(os.path.join(base_dir, "train_X.npy")) 16 | y_train = np.load(os.path.join(base_dir, "train_Y.npy")) 17 | return X_train, y_train 18 | 19 | def load_testing_data(base_dir): 20 | X_test = np.load(os.path.join(base_dir, "test_X.npy")) 21 | y_test = np.load(os.path.join(base_dir, "test_Y.npy")) 22 | return X_test, y_test 23 | 24 | def load_embeddings(base_dir): 25 | embedding_matrix = np.load(os.path.join(base_dir, "docs-embedding-matrix.npy")) 26 | return embedding_matrix 27 | 28 | def parse_args(): 29 | """Acquire hyperparameters and directory locations passed by SageMaker""" 30 | parser = argparse.ArgumentParser() 31 | 32 | # Hyperparameters sent by the client are passed as command-line arguments to the script. 33 | parser.add_argument("--epochs", type=int, default=1) 34 | parser.add_argument("--learning_rate", type=float, default=0.001) 35 | parser.add_argument("--num_classes", type=int, default=4) 36 | parser.add_argument("--max_seq_len", type=int, default=40) 37 | 38 | # Data, model, and output directories 39 | parser.add_argument("--output-data-dir", type=str, default=os.environ.get("SM_OUTPUT_DATA_DIR")) 40 | parser.add_argument("--model-dir", type=str, default=os.environ.get("SM_MODEL_DIR")) 41 | parser.add_argument("--train", type=str, default=os.environ.get("SM_CHANNEL_TRAIN")) 42 | parser.add_argument("--test", type=str, default=os.environ.get("SM_CHANNEL_TEST")) 43 | parser.add_argument("--embeddings", type=str, default=os.environ.get("SM_CHANNEL_EMBEDDINGS")) 44 | 45 | return parser.parse_known_args() 46 | 47 | ###### Main application ############ 48 | if __name__ == "__main__": 49 | 50 | ###### Parse input arguments ############ 51 | args, unknown = parse_args() 52 | print(args) 53 | 54 | ###### Load data from input channels ############ 55 | X_train, y_train = load_training_data(args.train) 56 | X_test, y_test = load_testing_data(args.test) 57 | embedding_matrix = load_embeddings(args.embeddings) 58 | 59 | 60 | ###### Setup model architecture ############ 61 | model = Sequential() 62 | model.add(Embedding( 63 | embedding_matrix.shape[0], # Final vocabulary size 64 | embedding_matrix.shape[1], # Word vector dimensions 65 | weights=[embedding_matrix], 66 | input_length=args.max_seq_len, 67 | trainable=False, 68 | name="embed", 69 | )) 70 | model.add(Conv1D(filters=128, kernel_size=3, activation="relu", name="conv_1")) 71 | model.add(MaxPooling1D(pool_size=5, name="maxpool_1")) 72 | model.add(Flatten(name="flat_1")) 73 | model.add(Dropout(0.3, name="dropout_1")) 74 | model.add(Dense(128, activation="relu", name="dense_1")) 75 | model.add(Dense(args.num_classes, activation="softmax", name="out_1")) 76 | 77 | ###### Compile the model ############ 78 | optimizer = tf.keras.optimizers.RMSprop(learning_rate=args.learning_rate) 79 | model.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["acc"]) 80 | 81 | model.summary() 82 | 83 | print("Training model") 84 | model.fit(X_train, y_train, batch_size=16, epochs=args.epochs, verbose=2) 85 | print("Evaluating model") 86 | # TODO: Better differentiate train vs val loss in logs 87 | scores = model.evaluate(X_test, y_test, verbose=2) 88 | print( 89 | "Validation results: " 90 | + "; ".join(map( 91 | lambda i: f"{model.metrics_names[i]}={scores[i]:.5f}", range(len(model.metrics_names)) 92 | )) 93 | ) 94 | 95 | 96 | ###### Save Keras model for TensorFlow Serving ############ 97 | print(f"------ save model to {os.path.join(args.model_dir, 'model/1/')}") 98 | model.save(os.path.join(args.model_dir, "model/1")) 99 | -------------------------------------------------------------------------------- /.infrastructure/cdk_src/smstudio/cr_lambda_common/cfn.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | """Types/classes for working with CloudFormation Custom Resource events in Python Lambda functions 4 | 5 | TODO: Assess `aws-lambda-powertools` and/or `crhelper` instead 6 | 7 | https://docs.powertools.aws.dev/lambda/python/ 8 | https://github.com/aws-cloudformation/custom-resource-helper 9 | """ 10 | # Python Built-Ins: 11 | from enum import Enum 12 | from logging import getLogger 13 | from typing import Generic, Optional, Type, TypeVar, Union 14 | 15 | logger = getLogger("cfn") 16 | 17 | 18 | class CustomResourceRequestType(str, Enum): 19 | "Enumeration of CloudFormation event 'RequestType's received by a custom resource" 20 | create = "Create" 21 | update = "Update" 22 | delete = "Delete" 23 | 24 | 25 | def parse_cfn_boolean(raw: Union[bool, str], var_name: Optional[str] = None) -> bool: 26 | """Parse a boolean value from (potentially stringified/text) CloudFormation event properties 27 | 28 | Common text values like 'true', 'yes', etc are supported. Raises a ValueError if the raw 29 | value is `None` or cannot be interpreted as boolean. 30 | 31 | Parameters 32 | ---------- 33 | raw : 34 | The raw value from CloudFormation, which might be a string 35 | var_name : 36 | Optional name of the variable to be parsed (only used for error messages) 37 | """ 38 | if isinstance(raw, bool): 39 | return raw 40 | if isinstance(raw, str): 41 | if raw in ("1", "t", "true", "y", "yes"): 42 | return True 43 | elif raw in ("0", "f", "false", "n", "no"): 44 | return False 45 | else: 46 | raise ValueError( 47 | f"Invalid {(var_name + ' ') if var_name else ''}string value '{raw}' (expected boolean)" 48 | ) 49 | else: 50 | raise ValueError( 51 | f"Invalid {(var_name + ' ') if var_name else ''}value type '{type(raw)}' (expected boolean)" 52 | ) 53 | 54 | 55 | TResourceProps = TypeVar("TResourceProps") 56 | 57 | 58 | class CustomResourceEvent(Generic[TResourceProps]): 59 | """Class to parse a CFn Custom Resource event 60 | 61 | This is a generic class: TResourceProps should be a class that can be initialized with the 62 | dict of CloudFormation resource properties for your specific custom resource - and raises an 63 | exception if the properties are invalid. 64 | """ 65 | 66 | physical_id: Optional[str] 67 | props: Optional[TResourceProps] 68 | old_props: Optional[TResourceProps] 69 | request_type: CustomResourceRequestType 70 | resource_type: str 71 | 72 | def __init__(self, event: dict, PropertiesClass: Type[TResourceProps]): 73 | """Create a CustomResourceEvent 74 | 75 | Parameters 76 | ---------- 77 | event : 78 | Raw event dict from AWS Lambda 79 | PropertiesClass : 80 | Python class that should be created for the resource properties. Your class will be 81 | instantiated with one constructor argument - the raw properties dictionary. If this 82 | is an 'Update' event, another instance will be created from the OldResourceProperties. 83 | If the OldResourceProperties cannot be parsed, an exception will be logged but not 84 | raised. 85 | """ 86 | self.physical_id = event.get("PhysicalResourceId") 87 | self.request_type = CustomResourceRequestType(event["RequestType"]) 88 | self.resource_type = event["ResourceType"] 89 | resource_properties = event.get("ResourceProperties") 90 | if resource_properties: 91 | self.props = PropertiesClass(resource_properties) 92 | else: 93 | self.props = None 94 | # Only present for 'Update' requests: 95 | old_resource_properties = event.get("OldResourceProperties") 96 | if old_resource_properties: 97 | try: 98 | self.old_props = PropertiesClass(old_resource_properties) 99 | except Exception: 100 | logger.exception("Failed to parse OldResourceProperties of Update event") 101 | self.old_props = None 102 | else: 103 | self.old_props = None 104 | -------------------------------------------------------------------------------- /custom_script_demos/pytorch_nlp/util/preprocessing.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | # Python Built-Ins: 4 | import gzip 5 | import os 6 | import shutil 7 | import subprocess 8 | import tarfile 9 | import time 10 | from typing import Optional 11 | 12 | # External Dependencies: 13 | import numpy as np 14 | from sklearn import preprocessing 15 | import torchtext 16 | 17 | 18 | def wait_for_file_stable(path: str, stable_secs: int=60, poll_secs: Optional[int]=None) -> bool: 19 | """Wait for a file to become stable (not recently modified) & return existence 20 | 21 | Returns False if file does not exist. Raises FileNotFoundError if file deleted during polling. 22 | 23 | When running through the two notebooks at the same time in parallel, this helps to minimize any 24 | errors caused by initiating multiple downloads/extractions/etc on the same file in parallel. 25 | """ 26 | if not poll_secs: 27 | poll_secs = stable_secs / 4 28 | try: 29 | init_stat = os.stat(path) 30 | except FileNotFoundError: 31 | return False 32 | 33 | if (time.time() - init_stat.st_mtime) < stable_secs: 34 | print(f"Waiting for file to stabilize... {path}") 35 | while (time.time() - os.stat(path).st_mtime) < stable_secs: 36 | time.sleep(poll_secs) 37 | print("File ready") 38 | 39 | return True 40 | 41 | def dummy_encode_labels(df,label): 42 | encoder = preprocessing.LabelEncoder() 43 | encoded_y = encoder.fit_transform(df[label].values) 44 | num_classes = len(encoder.classes_) 45 | # convert integers to dummy variables (i.e. one hot encoded) 46 | dummy_y = np.eye(num_classes, dtype="float32")[encoded_y] 47 | return dummy_y, encoder.classes_ 48 | 49 | 50 | def tokenize_and_pad_docs(df, columns, max_length=40): 51 | docs = df[columns].values 52 | 53 | t = torchtext.data.Field( 54 | lower = True, 55 | tokenize = "basic_english", 56 | fix_length = max_length 57 | ) 58 | docs = list(map(t.preprocess, docs)) 59 | padded_docs = t.pad(docs) 60 | t.build_vocab(padded_docs) 61 | print(f"Vocabulary size: {len(t.vocab)}") 62 | numericalized_docs = [] 63 | for d in padded_docs: 64 | temp = [] 65 | for c in d: 66 | temp.append(t.vocab.stoi[c]) 67 | numericalized_docs.append(temp) 68 | print(f"Number of headlines: {len(numericalized_docs)}") 69 | return np.array(numericalized_docs), t 70 | 71 | 72 | def get_word_embeddings(t, folder, lang="en"): 73 | """Download pre-trained word vectors and construct an embedding matrix for tokenizer `t` 74 | 75 | Any tokens in `t` not found in the embedding vectors are mapped to all-zeros. 76 | """ 77 | vecs_url = f"https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.{lang}.300.vec.gz" 78 | vecs_gz_filename = vecs_url.rpartition("/")[2] 79 | os.makedirs(folder, exist_ok=True) 80 | vecs_gz_filepath = os.path.join(folder, vecs_gz_filename) 81 | 82 | tokenizer_vocab_size = len(t.vocab) 83 | 84 | if wait_for_file_stable(vecs_gz_filepath): 85 | print("Using existing embeddings file") 86 | else: 87 | print("Downloading word vectors...") 88 | subprocess.run( 89 | [" ".join(["curl", vecs_url, "-o", vecs_gz_filepath])], check=True, shell=True 90 | ) 91 | 92 | print("Loading into memory...") 93 | embeddings_index = dict() 94 | with gzip.open(vecs_gz_filepath, "rt") as zipf: 95 | firstline = zipf.readline() 96 | emb_vocab_size, emb_d = firstline.split(" ") 97 | emb_vocab_size = int(emb_vocab_size) 98 | emb_d = int(emb_d) 99 | for line in zipf: 100 | values = line.split() 101 | word = values[0] 102 | # Only load subset of the embeddings recognised by the tokenizer: 103 | if word in t.vocab.stoi: 104 | coefs = np.asarray(values[1:], dtype="float32") 105 | embeddings_index[word] = coefs 106 | print("Loaded {} of {} word vectors for tokenizer vocabulary length {}".format( 107 | len(embeddings_index), 108 | emb_vocab_size, 109 | tokenizer_vocab_size, 110 | )) 111 | 112 | # create a weight matrix for words in training docs 113 | embedding_matrix = np.zeros((tokenizer_vocab_size, emb_d)) 114 | for word, i in t.vocab.stoi.items(): 115 | embedding_vector = embeddings_index.get(word) 116 | if embedding_vector is not None: 117 | embedding_matrix[i] = embedding_vector 118 | 119 | return embedding_matrix 120 | -------------------------------------------------------------------------------- /custom_script_demos/keras_nlp/util/preprocessing.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | # Python Built-Ins: 4 | import gzip 5 | import os 6 | import shutil 7 | import subprocess 8 | import tarfile 9 | import time 10 | from typing import Optional 11 | 12 | # External Dependencies: 13 | import numpy as np 14 | from sklearn import preprocessing 15 | import tensorflow as tf 16 | from tensorflow.keras.preprocessing.text import Tokenizer 17 | from tensorflow.keras.preprocessing.sequence import pad_sequences 18 | 19 | 20 | def wait_for_file_stable(path: str, stable_secs: int=60, poll_secs: Optional[int]=None) -> bool: 21 | """Wait for a file to become stable (not recently modified) & return existence 22 | 23 | Returns False if file does not exist. Raises FileNotFoundError if file deleted during polling. 24 | 25 | When running through the two notebooks at the same time in parallel, this helps to minimize any 26 | errors caused by initiating multiple downloads/extractions/etc on the same file in parallel. 27 | """ 28 | if not poll_secs: 29 | poll_secs = stable_secs / 4 30 | try: 31 | init_stat = os.stat(path) 32 | except FileNotFoundError: 33 | return False 34 | 35 | if (time.time() - init_stat.st_mtime) < stable_secs: 36 | print(f"Waiting for file to stabilize... {path}") 37 | while (time.time() - os.stat(path).st_mtime) < stable_secs: 38 | time.sleep(poll_secs) 39 | print("File ready") 40 | 41 | return True 42 | 43 | 44 | def dummy_encode_labels(df,label): 45 | encoder = preprocessing.LabelEncoder() 46 | encoded_y = encoder.fit_transform(df[label].values) 47 | num_classes = len(encoder.classes_) 48 | # convert integers to dummy variables (i.e. one hot encoded) 49 | dummy_y = np.eye(num_classes, dtype="float32")[encoded_y] 50 | return dummy_y, encoder.classes_ 51 | 52 | 53 | def tokenize_and_pad_docs(df, columns, max_length=40): 54 | docs = df[columns].values 55 | # prepare tokenizer 56 | t = Tokenizer() 57 | t.fit_on_texts(docs) 58 | vocab_size = len(t.word_index) + 1 59 | # integer encode the documents 60 | encoded_docs = t.texts_to_sequences(docs) 61 | print(f"Vocabulary size: {vocab_size}") 62 | print("Padding docs to max_length={} (truncating {} docs)".format( 63 | max_length, 64 | sum(1 for doc in encoded_docs if len(doc) > max_length), 65 | )) 66 | padded_docs = pad_sequences(encoded_docs, maxlen=max_length, padding="post") 67 | print(f"Number of headlines: {len(padded_docs)}") 68 | return padded_docs, t 69 | 70 | 71 | def get_word_embeddings(t, folder, lang="en"): 72 | """Download pre-trained word vectors and construct an embedding matrix for tokenizer `t` 73 | 74 | Any tokens in `t` not found in the embedding vectors are mapped to all-zeros. 75 | """ 76 | vecs_url = f"https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.{lang}.300.vec.gz" 77 | vecs_gz_filename = vecs_url.rpartition("/")[2] 78 | os.makedirs(folder, exist_ok=True) 79 | vecs_gz_filepath = os.path.join(folder, vecs_gz_filename) 80 | 81 | # Tokenizer.num_words is nullable, and there's an OOV token, so: 82 | tokenizer_vocab_size = len(t.word_index) + 1 83 | 84 | if wait_for_file_stable(vecs_gz_filepath): 85 | print("Using existing embeddings file") 86 | else: 87 | print("Downloading word vectors...") 88 | subprocess.run( 89 | [" ".join(["curl", vecs_url, "-o", vecs_gz_filepath])], check=True, shell=True 90 | ) 91 | 92 | print("Loading into memory...") 93 | embeddings_index = dict() 94 | with gzip.open(vecs_gz_filepath, "rt") as zipf: 95 | firstline = zipf.readline() 96 | emb_vocab_size, emb_d = firstline.split(" ") 97 | emb_vocab_size = int(emb_vocab_size) 98 | emb_d = int(emb_d) 99 | for line in zipf: 100 | values = line.split() 101 | word = values[0] 102 | # Only load subset of the embeddings recognised by the tokenizer: 103 | if word in t.word_index: 104 | coefs = np.asarray(values[1:], dtype="float32") 105 | embeddings_index[word] = coefs 106 | print("Loaded {} of {} word vectors for tokenizer vocabulary length {}".format( 107 | len(embeddings_index), 108 | emb_vocab_size, 109 | tokenizer_vocab_size, 110 | )) 111 | 112 | # create a weight matrix for words in training docs 113 | embedding_matrix = np.zeros((tokenizer_vocab_size, emb_d)) 114 | for word, i in t.word_index.items(): 115 | embedding_vector = embeddings_index.get(word) 116 | if embedding_vector is not None: 117 | embedding_matrix[i] = embedding_vector 118 | 119 | return embedding_matrix 120 | -------------------------------------------------------------------------------- /.infrastructure/cdk_src/smstudio/user_setup/fn_user_setup/base.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | """Shared (CloudFormation resource property) definitions""" 4 | # Python Built-Ins: 5 | from __future__ import annotations 6 | import json 7 | from typing import Optional, Union 8 | 9 | 10 | class StudioUserSetupResourceProperties: 11 | """Parser for CloudFormation resource properties for this Custom Resource 12 | 13 | Resource Properties 14 | ------------------- 15 | DomainId: str 16 | ID of the (already existing) target SageMaker Studio domain. 17 | HomeEfsFileSystemUid : Union[str, int] 18 | EFS user ID (numeric) of the target SageMaker Studio user. You can get this from the 19 | SageMaker DescribeUserProfile API. 20 | UserProfileName : str 21 | Name of the target SageMaker Studio user profile. 22 | TargetPath : Optional[str] 23 | Path (relative to Studio home folder) where the content should be loaded. If not set, this 24 | will default to the repository name or source file name. Trying to escape the Studio home 25 | folder with '../' is not supported and may have unintended consequences (including possibly 26 | writing to other users' folders). 27 | GitRepository : Optional[str] 28 | (Required if using git) A `git clone`able URL. 29 | GitCheckout : Optional[str] 30 | (Only used if `GitRepository` is set) A `git checkout`able name (e.g. branch name) in your 31 | target repository. If not provided, the cloned repository will remain on the default 32 | branch. 33 | ContentS3Uri : Optional[str] 34 | s3://doc-example-bucket/path URI for fetching the content. Currently only an individual 35 | object is supported (not folder prefix). 36 | AuthenticateS3 : Optional[bool] 37 | (Only if using `ContentS3Uri`) Set true to authenticate S3 requests with this Lambda's IAM 38 | identity. By default (false), requests will be anonymous/unsigned - which is appropriate 39 | for public buckets such as sample data and the AWS Open Data Registry. 40 | ExtractContent (bool, optional): 41 | (Only if using `ContentS3Uri`) Set true to unzip the content after download. By default 42 | (false), the object will simply be downloaded as-is. Tarballs and other archive formats 43 | apart from zip files are not currently supported. 44 | """ 45 | 46 | # Common parameters: 47 | domain_id: str 48 | home_efs_file_system_uid: Union[str, int] 49 | user_profile_name: str 50 | target_path: Optional[str] 51 | # Parameters for Git content: 52 | git_repository: Optional[str] 53 | git_checkout: Optional[str] 54 | # Parameters for S3 content: 55 | content_s3_uri: Optional[str] 56 | authenticate_s3: bool 57 | extract_content: bool 58 | # Parameters for SageMaker projects: 59 | enable_projects: bool 60 | 61 | def __init__(self, resource_properties: dict): 62 | self.domain_id = resource_properties["DomainId"] 63 | self.home_efs_file_system_uid = resource_properties["HomeEfsFileSystemUid"] 64 | self.user_profile_name = resource_properties["UserProfileName"] 65 | self.target_path = resource_properties.get("TargetPath") 66 | 67 | # Git content: 68 | self.git_checkout = resource_properties.get("GitCheckout") 69 | self.git_repository = resource_properties.get("GitRepository") 70 | 71 | # S3 content: 72 | self.authenticate_s3 = resource_properties.get("AuthenticateS3", False) 73 | self.content_s3_uri = resource_properties.get("ContentS3Uri") 74 | self.extract_content = resource_properties.get("ExtractContent", False) 75 | 76 | # SageMaker projects: 77 | self.enable_projects = resource_properties.get("EnableProjects", False) 78 | 79 | # Validations: 80 | if self.git_repository and self.content_s3_uri: 81 | raise ValueError( 82 | "Cannot set both GitRepository and ContentS3Uri: Create a separate custom " 83 | "resource instance for your git and S3 content items" 84 | ) 85 | if not (self.git_repository or self.content_s3_uri): 86 | raise ValueError( 87 | "Must set either GitRepository (git content) or ContentS3Uri (S3 content)" 88 | ) 89 | 90 | def __str__(self): 91 | dict_val = { 92 | "DomainId": self.domain_id, 93 | "HomeEfsFileSystemUid": self.home_efs_file_system_uid, 94 | "UserProfileName": self.user_profile_name, 95 | } 96 | if self.target_path: 97 | dict_val["TargetPath"] = self.target_path 98 | if self.git_checkout: 99 | dict_val["GitCheckout"] = self.git_checkout 100 | if self.git_repository: 101 | dict_val["GitRepository"] = self.git_repository 102 | if self.content_s3_uri: 103 | dict_val["ContentS3Uri"] = self.content_s3_uri 104 | if self.authenticate_s3: 105 | dict_val["AuthenticateS3"] = self.authenticate_s3 106 | if self.extract_content: 107 | dict_val["ExtractContent"] = self.extract_content 108 | return json.dumps(dict_val) 109 | 110 | @classmethod 111 | def from_str(cls, str_val) -> StudioUserSetupResourceProperties: 112 | return cls(json.loads(str_val)) 113 | -------------------------------------------------------------------------------- /.infrastructure/cdk_src/smstudio/domain/fn_domain/vpctools.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | """Utilities for analyzing VPCs for use with SageMaker Studio""" 4 | # Python Built-Ins: 5 | import ipaddress 6 | import logging 7 | from typing import Tuple, Union 8 | 9 | # External Dependencies: 10 | import boto3 11 | 12 | logger = logging.getLogger("vpctools") 13 | ec2 = boto3.client("ec2") 14 | 15 | 16 | def get_studio_efs_security_group_ids( 17 | studio_domain_id: str, vpc_id: str 18 | ) -> Tuple[Union[str, None], Union[str, None]]: 19 | """Retrieve the security groups you need for [inbound, outbound] comms with SMStudio EFS filesystem 20 | 21 | Returns 22 | ------- 23 | inbound : Union[str, None] 24 | Security Group ID for inbound connection from SMStudio filesystem, or None if could not be found 25 | outbound : str 26 | Secrity Group ID for outbound connection to SMStudio filesystem, or None if could nom be found 27 | 28 | Raises 29 | ------ 30 | ValueError : 31 | If multiple potential SGs are found for either inbound or outbound connection (suggests duplication 32 | or otherwise erroneous SMStudio/VPC setup). 33 | Other : 34 | As per boto3 EC2 describe_security_groups() 35 | """ 36 | inbound_sg_name = f"security-group-for-inbound-nfs-{studio_domain_id}" 37 | outbound_sg_name = f"security-group-for-outbound-nfs-{studio_domain_id}" 38 | nfs_sgs = ec2.describe_security_groups( 39 | Filters=[ 40 | {"Name": "vpc-id", "Values": [vpc_id]}, 41 | {"Name": "group-name", "Values": [inbound_sg_name, outbound_sg_name]}, 42 | ], 43 | )["SecurityGroups"] 44 | inbound_sgs = list( 45 | filter( 46 | lambda sg: sg["GroupName"] == inbound_sg_name, 47 | nfs_sgs, 48 | ) 49 | ) 50 | n_inbound_sgs = len(inbound_sgs) 51 | outbound_sgs = list( 52 | filter( 53 | lambda sg: sg["GroupName"] == outbound_sg_name, 54 | nfs_sgs, 55 | ) 56 | ) 57 | n_outbound_sgs = len(outbound_sgs) 58 | if n_inbound_sgs > 1 or n_outbound_sgs > 1: 59 | raise ValueError( 60 | "Found duplicate EFS security groups for SMStudio {}: Got {} inbound, {} outbound".format( 61 | studio_domain_id, 62 | n_inbound_sgs, 63 | n_outbound_sgs, 64 | ) 65 | ) 66 | return ( 67 | inbound_sgs[0]["GroupId"] if n_inbound_sgs else None, 68 | outbound_sgs[0]["GroupId"] if n_outbound_sgs else None, 69 | ) 70 | 71 | 72 | def propose_subnet(vpc_id, new_subnet_prefixlen=26): 73 | """Propose a valid configuration for a new (IPv4) subnet to add to the VPC for CF stack purposes. 74 | 75 | Parameters 76 | ---------- 77 | vpc_id : str 78 | ID of the VPC to propose a subnet for 79 | new_subnet_prefixlen : int (optional) 80 | CIDR mask length in bits for requested *new* subnet to propose. Defaults to 26 bits (64 IPs) 81 | """ 82 | logger.info(f"Proposing admin subnet for VPC {vpc_id}...") 83 | # Get VPC info: 84 | vpc_list = ec2.describe_vpcs( 85 | Filters=[{"Name": "vpc-id", "Values": [vpc_id]}], 86 | )["Vpcs"] 87 | if not len(vpc_list): 88 | raise ValueError(f"VPC ID {vpc_id} not found") 89 | vpc_description = vpc_list[0] 90 | existing_subnets = ec2.describe_subnets( 91 | Filters=[{"Name": "vpc-id", "Values": [vpc_id]}], 92 | )["Subnets"] 93 | 94 | # Load CIDRs of provided VPC and existing subnets with Python ipaddress library: 95 | logger.info(f"Parsing existing CIDRs...") 96 | vpc_net = ipaddress.ip_network(vpc_description["CidrBlock"]) 97 | existing_nets = list( 98 | map( 99 | lambda subnet: ipaddress.ip_network(subnet["CidrBlock"]), 100 | existing_subnets, 101 | ) 102 | ) 103 | 104 | # Validate existing configuration: 105 | # (Could probably skip this since we just retrieved fresh data, but might help to prevent any weird 106 | # errors manifesting as harder-to-interpret issues further down) 107 | for subnet in existing_nets: 108 | if not subnet.subnet_of(vpc_net): 109 | raise ValueError(f"Listed 'subnet' {subnet} is not inside VPC {vpc_net}") 110 | for checknet in existing_nets: 111 | if checknet != subnet and subnet.overlaps(checknet): 112 | raise ValueError(f"Listed subnets {subnet} and {checknet} overlap") 113 | 114 | # Calculate remaining vacant ranges: 115 | logger.info(f"Calculating remaining vacant ranges...") 116 | available_nets = [vpc_net] 117 | for subnet in existing_nets: 118 | next_available = [] 119 | for vacancy in available_nets: 120 | if vacancy.subnet_of(subnet): 121 | # This gap is fully contained by `subnet` 122 | continue 123 | try: 124 | # Preserve the list of subranges in `vacancy` after excluding `subnet`: 125 | next_available += list(vacancy.address_exclude(subnet)) 126 | except ValueError: 127 | # This `vacancy` does not contain `subnet`: 128 | next_available.append(vacancy) 129 | available_nets = next_available 130 | available_nets.sort() 131 | 132 | # Select the first available subnet of requested size: 133 | try: 134 | parent = next( 135 | filter( 136 | lambda n: n.prefixlen <= new_subnet_prefixlen, 137 | available_nets, 138 | ) 139 | ) 140 | except StopIteration: 141 | raise ValueError(f"No vacant subnets of requested size /{new_subnet_prefixlen} left in VPC") 142 | 143 | if parent.prefixlen == new_subnet_prefixlen: 144 | proposed_net = parent 145 | else: 146 | diff = new_subnet_prefixlen - parent.prefixlen 147 | proposed_net = next(parent.subnets(diff)) 148 | 149 | return {"CidrBlock": str(proposed_net)} 150 | -------------------------------------------------------------------------------- /custom_script_demos/pytorch_nlp/src/main.py: -------------------------------------------------------------------------------- 1 | """CNN-based text classification on SageMaker with PyTorch""" 2 | 3 | # Python Built-Ins: 4 | import argparse 5 | import os 6 | import io 7 | import logging 8 | import sys 9 | 10 | # External Dependencies: 11 | import numpy as np 12 | import torch 13 | import torch.nn as nn 14 | import torch.nn.functional as F 15 | import torch.optim as optim 16 | from torch.utils.data import DataLoader 17 | 18 | # Configure log level & destination for running nicely in SageMaker: 19 | logger = logging.getLogger(__name__) 20 | logger.setLevel(logging.DEBUG) 21 | logger.addHandler(logging.StreamHandler(sys.stdout)) 22 | 23 | 24 | class Net(nn.Module): 25 | """Custom PyTorch model definition: A basic 1D CNN for text""" 26 | 27 | def __init__(self, vocab_size=400000, emb_dim=300, num_classes=4): 28 | super(Net, self).__init__() 29 | self.embedding = nn.Embedding(vocab_size, emb_dim) 30 | self.conv1 = nn.Conv1d(emb_dim, 128, kernel_size=3) 31 | self.max_pool1d = nn.MaxPool1d(5) 32 | self.flatten1 = nn.Flatten() 33 | self.dropout1 = nn.Dropout(p=0.3) 34 | self.fc1 = nn.Linear(896, 128) 35 | self.fc2 = nn.Linear(128, num_classes) 36 | 37 | def forward(self, x): 38 | x = self.embedding(x) 39 | x = torch.transpose(x,1,2) 40 | x = self.flatten1(self.max_pool1d(self.conv1(x))) 41 | x = self.dropout1(x) 42 | x = F.relu(self.fc1(x)) 43 | x = self.fc2(x) 44 | return F.softmax(x, dim=-1) 45 | 46 | 47 | class Dataset(torch.utils.data.Dataset): 48 | """Custom PyTorch dataset for text classification""" 49 | 50 | def __init__(self, data: np.array, labels: np.array): 51 | "Initialization" 52 | self.labels = labels 53 | self.data = data 54 | 55 | def __len__(self): 56 | "Denotes the total number of samples" 57 | return len(self.data) 58 | 59 | def __getitem__(self, index): 60 | # Load data and get label 61 | X = torch.as_tensor(self.data[index]).long() 62 | y = torch.as_tensor(self.labels[index]) 63 | return X, y 64 | 65 | 66 | def load_training_data(base_dir): 67 | X_train = np.load(os.path.join(base_dir, "train_X.npy")) 68 | y_train = np.load(os.path.join(base_dir, "train_Y.npy")) 69 | return DataLoader(Dataset(X_train, y_train), batch_size=16) 70 | 71 | 72 | def load_testing_data(base_dir): 73 | X_test = np.load(os.path.join(base_dir, "test_X.npy")) 74 | y_test = np.load(os.path.join(base_dir, "test_Y.npy")) 75 | return DataLoader(Dataset(X_test, y_test), batch_size=1) 76 | 77 | 78 | def load_embeddings(base_dir): 79 | embedding_matrix = np.load(os.path.join(base_dir, "docs-embedding-matrix.npy")) 80 | return embedding_matrix 81 | 82 | 83 | def parse_args(): 84 | """Acquire hyperparameters and directory locations passed by SageMaker""" 85 | parser = argparse.ArgumentParser() 86 | 87 | # Hyperparameters sent by the client are passed as command-line arguments to the script. 88 | parser.add_argument("--epochs", type=int, default=1) 89 | parser.add_argument("--learning_rate", type=float, default=0.001) 90 | parser.add_argument("--num_classes", type=int, default=4) 91 | parser.add_argument("--max_seq_len", type=int, default=40) 92 | 93 | # Data, model, and output directories 94 | parser.add_argument("--output-data-dir", type=str, default=os.environ.get("SM_OUTPUT_DATA_DIR")) 95 | parser.add_argument("--model-dir", type=str, default=os.environ.get("SM_MODEL_DIR")) 96 | parser.add_argument("--train", type=str, default=os.environ.get("SM_CHANNEL_TRAIN")) 97 | parser.add_argument("--test", type=str, default=os.environ.get("SM_CHANNEL_TEST")) 98 | parser.add_argument("--embeddings", type=str, default=os.environ.get("SM_CHANNEL_EMBEDDINGS")) 99 | 100 | return parser.parse_known_args() 101 | 102 | 103 | def test(model, test_loader, device): 104 | model.eval() 105 | test_loss = 0.0 106 | correct = 0 107 | with torch.no_grad(): 108 | for data, target in test_loader: 109 | data, target = data.to(device), target.to(device) 110 | output = model(data) 111 | test_loss += F.binary_cross_entropy(output, target, reduction="sum").item() 112 | pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability 113 | target_index = target.max(1, keepdim=True)[1] 114 | correct += pred.eq(target_index).sum().item() 115 | 116 | test_loss /= len(test_loader.dataset) # Average loss over dataset samples 117 | print(f"val_loss: {test_loss:.4f}, val_acc: {correct/len(test_loader.dataset):.4f}") 118 | 119 | 120 | def train(args): 121 | ###### Load data from input channels ############ 122 | train_loader = load_training_data(args.train) 123 | test_loader = load_testing_data(args.test) 124 | embedding_matrix = load_embeddings(args.embeddings) 125 | 126 | ###### Setup model architecture ############ 127 | model = Net( 128 | vocab_size=embedding_matrix.shape[0], 129 | emb_dim=embedding_matrix.shape[1], 130 | num_classes=args.num_classes, 131 | ) 132 | model.embedding.weight = torch.nn.parameter.Parameter(torch.FloatTensor(embedding_matrix), False) 133 | device = torch.device("cpu") 134 | if torch.cuda.is_available(): 135 | device = torch.device("cuda") 136 | model.to(device) 137 | optimizer = optim.RMSprop(model.parameters(), lr=args.learning_rate) 138 | 139 | for epoch in range(1, args.epochs + 1): 140 | model.train() 141 | running_loss = 0.0 142 | n_batches = 0 143 | for batch_idx, (X_train, y_train) in enumerate(train_loader, 1): 144 | data, target = X_train.to(device), y_train.to(device) 145 | optimizer.zero_grad() 146 | output = model(data) 147 | loss = F.binary_cross_entropy(output, target) 148 | loss.backward() 149 | optimizer.step() 150 | running_loss += loss.item() 151 | n_batches += 1 152 | print(f"epoch: {epoch}, train_loss: {running_loss / n_batches:.6f}") # (Avg over batches) 153 | print("Evaluating model") 154 | test(model, test_loader, device) 155 | save_model(model, args.model_dir, args.max_seq_len) 156 | 157 | 158 | def save_model(model, model_dir, max_seq_len): 159 | path = os.path.join(model_dir, "model.pth") 160 | x = torch.randint(0, 10, (1, max_seq_len)) 161 | model = model.cpu() 162 | model.eval() 163 | m = torch.jit.trace(model, x) 164 | torch.jit.save(m, path) 165 | 166 | 167 | def model_fn(model_dir): 168 | """Customized model loading function for inference 169 | 170 | https://sagemaker.readthedocs.io/en/stable/frameworks/pytorch/using_pytorch.html#load-a-model 171 | """ 172 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 173 | model = torch.jit.load(os.path.join(model_dir, "model.pth")).to(device) 174 | return model 175 | 176 | ###### Main application ############ 177 | if __name__ == "__main__": 178 | 179 | ###### Parse input arguments ############ 180 | args, unknown = parse_args() 181 | 182 | train(args) 183 | -------------------------------------------------------------------------------- /.infrastructure/README.md: -------------------------------------------------------------------------------- 1 | # Workshop infrastructure 2 | 3 | This project provides infrastructure-as-code to deploy an [Amazon SageMaker Studio Domain](https://docs.aws.amazon.com/sagemaker/latest/dg/sm-domain.html) pre-configured ready to use in a guided workshop setting (in case you don't have one already). 4 | 5 | 6 | ## Architecture overview 7 | 8 | The above infrastructure, and the optional SageMaker Studio Domain deployment, is implemented in and deployed through [AWS CDK for Python](https://aws.amazon.com/cdk/). Since deploying CDK code requires setting up a development environment (as detailed below), we also provide a directly-deployable ["bootstrap" CloudFormation template](cfn_bootstrap.yaml) which fetches this repository and runs the CDK deploment via [AWS CodeBuild](https://aws.amazon.com/codebuild/). 9 | 10 | > ⚠️ **Note:** The above CloudFormation template creates an AWS CodeBuild Project with broad IAM permissions to deploy the solution on your behalf. It's not recommended for use in production-environments where [least-privilege principles](https://aws.amazon.com/blogs/security/techniques-for-writing-least-privilege-iam-policies/) should be followed. 11 | 12 | For a detailed list of other security configurations you might want to optimize before using the stack in prodution, you can enable [cdk-nag](https://github.com/cdklabs/cdk-nag) by running the build with the `CDK_NAG=true` environment variable or editing the defaults in [cdk_app.py](cdk_app.py). You don't need to request stack deployment to complete this analysis: running `npx cdk synth` would show the same error list. 13 | 14 | 15 | ## Development environment pre-requisites 16 | 17 | To customize and deploy from source code, you'll need: 18 | 19 | - [NodeJS](https://nodejs.org/en) installed 20 | - The minimum required version is specified in [package.json](package.json) `engines` field and the canonical development version is specified in [.nvmrc](.nvmrc) 21 | - If you work across multiple projects and need to manage multiple parallel versions of NodeJS on your system, you may want to install it via [NVM](https://github.com/nvm-sh/nvm) or [NVM-Windows](https://github.com/coreybutler/nvm-windows) 22 | - [Python](https://www.python.org/) 23 | - The minimum required version is specified in [pyproject.toml](pyproject.toml) and the canonical development version is specified in [.python-version](.python-version) 24 | - If you work across multiple projects and need to manage multiple parallel versions of Python on your system, you may want to install it via [pyenv](https://github.com/pyenv/pyenv) or [pyenv for Windows](https://github.com/pyenv-win/pyenv-win) 25 | - The [AWS CLI](https://aws.amazon.com/cli/) installed and [configured / logged in](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html) with access to your AWS Account 26 | - [Docker Desktop](https://www.docker.com/products/docker-desktop/) (or a suitable alternative) installed for building (and maybe locally testing?) container images. 27 | 28 | 29 | ## Getting started 30 | 31 | The following commands assume you're working in a terminal from the same directory as this README. 32 | 33 | These examples use `$` to indicate the prompt of a Bash/POSIX-like shell (e.g. on macOS or Linux), and `%` to indicate a Windows-like shell. You only need to type the commands **after** the prompt! 34 | 35 | ### Install & activate 36 | 37 | If you haven't already, consider setting your `AWS_REGION` and `AWS_DEFAULT_REGION` environment variables to your target AWS Region for deployment: 38 | 39 | ```sh 40 | $ export AWS_REGION="us-west-2" 41 | $ export AWS_DEFAULT_REGION="us-west-2" 42 | ``` 43 | 44 | **IF** you're using NVM and/or pyenv, first activate the target versions of NodeJS and/or Python: 45 | 46 | ```sh 47 | $ nvm use # Should discover version from .nvmrc file 48 | $ pyenv local # Should discover version from .python-version file 49 | ``` 50 | 51 | Install the [CDK Toolkit CLI](https://docs.aws.amazon.com/cdk/v2/guide/cli.html) and any other NodeJS dependencies from [package.json](package.json) **locally** in the project: 52 | 53 | ```sh 54 | $ npm install # Will enable locally-versioned `npx cdk deploy` rather than global `cdk` CLI 55 | ``` 56 | 57 | The initialization process [should *automatically* create](https://docs.aws.amazon.com/cdk/v2/guide/work-with-cdk-python.html) a Python virtualenv when you first run e.g. `npx cdk synth`, if you have the `virtualenv` package installed - but if you prefer to create one manually you can run: 58 | 59 | ```sh 60 | $ python3 -m venv .venv 61 | ``` 62 | 63 | After the init process completes and the virtualenv is created, you can use the following 64 | step to activate your virtualenv (from Bash/POSIX-like shells): 65 | 66 | ```sh 67 | $ source .venv/bin/activate 68 | ``` 69 | 70 | If you are a Windows platform, you would activate the virtualenv like this: 71 | 72 | ``` 73 | % .venv\Scripts\activate.bat 74 | ``` 75 | 76 | Once the virtualenv is activated, you can install the required dependencies. 77 | 78 | ``` 79 | (.venv) $ pip install -r requirements.txt 80 | ``` 81 | 82 | (If you need to add any dependencies, simply add them to your requirements.txt and re-run this installation in your Python virtual environment) 83 | 84 | 85 | ### Synthesizing and deploying with CDK 86 | 87 | Once your AWS CLI is configured, virtual environment activated and dependencies installed, you should be able to use the CDK application. If you haven't already deployed CDK-based infrastructure in your AWS Account & Region, first [bootstrap](https://docs.aws.amazon.com/cdk/v2/guide/cli.html#cli-bootstrap) your environment by running: 88 | 89 | ```sh 90 | $ npm run cdk:bootstrap 91 | ``` 92 | 93 | Then, you should be able to directly synthesize and deploy this project by running: 94 | 95 | ```sh 96 | $ npm run deploy 97 | 98 | # Or optionally to suppress approval prompts: 99 | $ npm run deploy -- --require-approval never 100 | ``` 101 | 102 | To delete your deployed stacks, you can run: 103 | 104 | ```sh 105 | $ npm run destroy 106 | 107 | # Or optionally to suppress approval prompts: 108 | $ npm run destroy -- --force 109 | ``` 110 | 111 | The NPM `deploy` script (and others) are defined in the `scripts` field of [package.json](package.json) and run inside NPM context so have access to the locally-installed version of the `cdk` CLI. The `--` separates arguments for NPM from those that should be passed through to the underlying script. The `app` field of [cdk.json](cdk.json) defines the entry-point command for `cdk` commands. 112 | 113 | You can also run CDK commands directly via [npx](https://docs.npmjs.com/cli/v7/commands/npx) if you prefer - for example to **just synthesize** the CloudFormation template(s) instead of also deploying them: 114 | 115 | ```sh 116 | $ npx cdk synth --all # Note no extra '--' required here 117 | ``` 118 | 119 | See the [CDK Toolkit CLI docs](https://docs.aws.amazon.com/cdk/v2/guide/cli.html) for other useful commands you can run (but add the `npx` prefix!). 120 | 121 | 122 | ## Re-configuring the stack 123 | 124 | [cdk_app.py](cdk_app.py) accepts some configuration parameters as environment variables. [cfn_bootstrap.yaml](cfn_bootstrap.yaml) uses these same environment variables to pass CloudFormation stack parameters through to the CDK build & deployment process. 125 | -------------------------------------------------------------------------------- /.infrastructure/cdk_src/smstudio/user/fn_user/main.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | """Custom CloudFormation Resource for a SageMaker Studio User Profile 4 | 5 | See `StudioUserResourceProperties` for expected CloudFormation resource properties. 6 | 7 | CloudFormation Return Values 8 | ---------------------------- 9 | Direct .Ref : 10 | Name of the created SageMaker Studio user profile 11 | UserProfileName : 12 | Name of the created SageMaker Studio user profile 13 | HomeEfsFileSystemUid : 14 | Home EFS File System POSIX user ID allocated for the created SageMaker Studio user (the UID 15 | they'll appear as when mounting the Studio Domain EFS). 16 | """ 17 | # Python Built-Ins: 18 | from __future__ import annotations 19 | import json 20 | import logging 21 | import time 22 | 23 | logging.getLogger().setLevel(logging.INFO) # Set log level for AWS Lambda *BEFORE* other imports 24 | 25 | # External Dependencies: 26 | import boto3 27 | 28 | # Local Dependencies: 29 | from cfn import CustomResourceEvent, CustomResourceRequestType 30 | 31 | logger = logging.getLogger("main") 32 | smclient = boto3.client("sagemaker") 33 | 34 | 35 | class StudioUserResourceProperties: 36 | """Parser for CloudFormation resource properties for this Custom Resource 37 | 38 | Resource Properties 39 | ------------------- 40 | 41 | DomainId : str 42 | (Required) SageMaker Studio Domain ID to create the profile on. 43 | UserProfileName : str 44 | (Required) Domain-unique name to give the user profile (update requires replacement). 45 | UserSettings : dict 46 | Optional user settings object to apply to the user profile. Default `{}`. 47 | """ 48 | 49 | domain_id: str 50 | user_profile_name: str 51 | user_settings: dict 52 | 53 | def __init__(self, resource_properties: dict): 54 | self.domain_id = resource_properties["DomainId"] 55 | self.user_profile_name = resource_properties["UserProfileName"] 56 | self.user_settings = resource_properties.get("UserSettings", {}) 57 | 58 | def __str__(self): 59 | dict_val = { 60 | "DomainId": self.domain_id, 61 | "UserProfileName": self.user_profile_name, 62 | "UserSettings": self.user_settings, 63 | } 64 | return json.dumps(dict_val) 65 | 66 | @classmethod 67 | def from_str(cls, str_val) -> StudioUserResourceProperties: 68 | return cls(json.loads(str_val)) 69 | 70 | 71 | def lambda_handler(event_raw: dict, context: dict): 72 | """Main entry point for (CDK) Custom Resource Lambda""" 73 | logger.info(event_raw) 74 | event = CustomResourceEvent(event_raw, StudioUserResourceProperties) 75 | if event.request_type == CustomResourceRequestType.create: 76 | return handle_create(event, context) 77 | elif event.request_type == CustomResourceRequestType.update: 78 | return handle_update(event, context) 79 | elif event.request_type == CustomResourceRequestType.delete: 80 | return handle_delete(event, context) 81 | else: 82 | raise ValueError(f"Unsupported CFn RequestType '{event_raw['RequestType']}'") 83 | 84 | 85 | def handle_create(event: CustomResourceEvent[StudioUserResourceProperties], context): 86 | logging.info("**Received create request") 87 | 88 | logging.info("**Creating user profile") 89 | result = create_user_profile(event.props) 90 | # TODO: Do we need to wait for completion? 91 | response = { 92 | "UserProfileName": result["UserProfileName"], 93 | "HomeEfsFileSystemUid": result["HomeEfsFileSystemUid"], 94 | } 95 | print(response) 96 | return { 97 | "PhysicalResourceId": result["UserProfileName"], 98 | "Data": response, 99 | } 100 | 101 | 102 | def handle_delete(event: CustomResourceEvent[StudioUserResourceProperties], context): 103 | logging.info("**Received delete event") 104 | domain_id = event.props.domain_id 105 | try: 106 | smclient.describe_user_profile(DomainId=domain_id, UserProfileName=event.physical_id) 107 | except smclient.exceptions.ResourceNotFound: 108 | # Not found -> Treat as deletion successful 109 | return {"PhysicalResourceId": event.physical_id, "Data": {}} 110 | delete_user_profile(domain_id, event.physical_id) 111 | return {"PhysicalResourceId": event.physical_id, "Data": {}} 112 | 113 | 114 | def handle_update(event: CustomResourceEvent[StudioUserResourceProperties], context): 115 | logging.info("**Received update event") 116 | update_user_profile( 117 | domain_id=event.props.domain_id, 118 | user_profile_name=event.physical_id, 119 | user_settings=event.props.user_settings, 120 | ) 121 | return {"PhysicalResourceId": event.physical_id, "Data": {}} 122 | 123 | 124 | def create_user_profile(config: StudioUserResourceProperties): 125 | domain_id = config.domain_id 126 | user_profile_name = config.user_profile_name 127 | 128 | response = smclient.create_user_profile( 129 | DomainId=domain_id, 130 | UserProfileName=user_profile_name, 131 | UserSettings=config.user_settings, 132 | ) 133 | created = False 134 | time.sleep(0.2) 135 | while not created: 136 | response = smclient.describe_user_profile( 137 | DomainId=domain_id, UserProfileName=user_profile_name 138 | ) 139 | status_lower = response["Status"].lower() 140 | if status_lower == "inservice": 141 | created = True 142 | break 143 | elif "failed" in status_lower: 144 | raise ValueError( 145 | f"User '{user_profile_name}' entered Failed state during creation (domain {domain_id})", 146 | ) 147 | time.sleep(5) 148 | 149 | logging.info("**SageMaker domain created successfully: %s", domain_id) 150 | return response 151 | 152 | 153 | def delete_user_profile(domain_id: str, user_profile_name: str): 154 | response = smclient.delete_user_profile( 155 | DomainId=domain_id, 156 | UserProfileName=user_profile_name, 157 | ) 158 | deleted = False 159 | time.sleep(0.2) 160 | while not deleted: 161 | try: 162 | response = smclient.describe_user_profile( 163 | DomainId=domain_id, UserProfileName=user_profile_name 164 | ) 165 | status_lower = response["Status"].lower() 166 | if "failed" in status_lower: 167 | raise ValueError( 168 | f"User '{user_profile_name}' entered Failed state during deletion (domain {domain_id})", 169 | ) 170 | elif "deleting" not in status_lower: 171 | raise ValueError( 172 | f"User '{user_profile_name}' no longer 'Deleting' but not deleted (domain {domain_id})", 173 | ) 174 | except smclient.exceptions.ResourceNotFound: 175 | logging.info("Deleted user %s from domain %s", user_profile_name, domain_id) 176 | deleted = True 177 | break 178 | time.sleep(5) 179 | return response 180 | 181 | 182 | def update_user_profile(domain_id: str, user_profile_name: str, user_settings: dict): 183 | response = smclient.update_user_profile( 184 | DomainId=domain_id, 185 | UserProfileName=user_profile_name, 186 | UserSettings=user_settings, 187 | ) 188 | updated = False 189 | time.sleep(0.2) 190 | while not updated: 191 | response = smclient.describe_user_profile( 192 | DomainId=domain_id, UserProfileName=user_profile_name 193 | ) 194 | status_lower = response["Status"].lower() 195 | if status_lower == "inservice": 196 | updated = True 197 | break 198 | elif "failed" in status_lower: 199 | raise ValueError( 200 | f"User '{user_profile_name}' entered Failed state during deletion (domain {domain_id})", 201 | ) 202 | time.sleep(5) 203 | return response 204 | -------------------------------------------------------------------------------- /custom_script_demos/huggingface_nlp/scripts/train.py: -------------------------------------------------------------------------------- 1 | """Transformer-based text classification on SageMaker with Hugging Face""" 2 | 3 | # Python Built-Ins: 4 | import argparse 5 | import logging 6 | import os 7 | import sys 8 | from typing import List, Optional 9 | 10 | # External Dependencies: 11 | import datasets 12 | #from datasets import disable_progress_bar as disable_datasets_progress_bar 13 | from transformers import ( 14 | AutoModelForSequenceClassification, 15 | Trainer, 16 | TrainingArguments, 17 | AutoTokenizer, 18 | DataCollatorWithPadding, 19 | ) 20 | from sklearn.metrics import accuracy_score, precision_recall_fscore_support 21 | 22 | # Set up logging: 23 | logging.basicConfig( 24 | level=logging.getLevelName("INFO"), 25 | handlers=[logging.StreamHandler(sys.stdout)], 26 | format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", 27 | ) 28 | logger = logging.getLogger(__name__) 29 | datasets.disable_progress_bar() # Too noisy on conventional log streams 30 | 31 | # Factoring your code out into smaller helper functions can help with debugging: 32 | 33 | 34 | def parse_args(): 35 | """Parse hyperparameters and data args from CLI arguments and environment variables""" 36 | parser = argparse.ArgumentParser() 37 | 38 | # hyperparameters sent by the client are passed as command-line arguments to the script. 39 | parser.add_argument("--model_id", type=str, required=True) 40 | parser.add_argument("--class_names", type=lambda s: s.split(","), required=True) 41 | parser.add_argument("--learning_rate", type=float, default=5e-5) 42 | parser.add_argument("--warmup_steps", type=int, default=500) 43 | parser.add_argument("--epochs", type=int, default=3) 44 | parser.add_argument("--train_max_steps", type=int, default=-1) 45 | parser.add_argument("--train_batch_size", type=int, default=32) 46 | parser.add_argument("--eval_batch_size", type=int, default=64) 47 | parser.add_argument("--fp16", type=int, default=1) 48 | 49 | # Data, model, and output folders are set by combination of CLI args and env vars: 50 | parser.add_argument("--train", type=str, default=os.environ.get("SM_CHANNEL_TRAIN")) 51 | parser.add_argument("--test", type=str, default=os.environ.get("SM_CHANNEL_TEST")) 52 | parser.add_argument("--model_dir", type=str, default=os.environ.get("SM_MODEL_DIR")) 53 | parser.add_argument("--output_data_dir", type=str, default=os.environ.get("SM_OUTPUT_DATA_DIR")) 54 | # parser.add_argument("--n_gpus", type=int, default=os.environ.get("SM_NUM_GPUS")) 55 | 56 | args, _ = parser.parse_known_args() 57 | return args 58 | 59 | 60 | def compute_metrics(pred): 61 | labels = pred.label_ids 62 | preds = pred.predictions.argmax(-1) 63 | precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average="micro") 64 | acc = accuracy_score(labels, preds) 65 | return {"accuracy": acc, "f1": f1, "precision": precision, "recall": recall} 66 | 67 | 68 | def get_model(model_id: str, class_names: List[str]) -> ( 69 | AutoTokenizer, AutoModelForSequenceClassification, DataCollatorWithPadding 70 | ): 71 | """Set up tokenizer, model, data_collator from job parameters""" 72 | tokenizer = AutoTokenizer.from_pretrained(model_id) 73 | 74 | model = AutoModelForSequenceClassification.from_pretrained( 75 | model_id, num_labels=len(class_names) 76 | ) 77 | model.config.label2id = {name: ix for ix, name in enumerate(class_names)} 78 | model.config.id2label = {ix: name for ix, name in enumerate(class_names)} 79 | 80 | data_collator = DataCollatorWithPadding(tokenizer=tokenizer) 81 | 82 | return tokenizer, model, data_collator 83 | 84 | 85 | def load_datasets(tokenizer: AutoTokenizer, train_dir: str, test_dir: Optional[str] = None) -> ( 86 | datasets.Dataset, Optional[datasets.Dataset] 87 | ): 88 | """Load and pre-process training (+ validation?) dataset(s)""" 89 | 90 | def preprocess(batch): 91 | """Tokenize and pre-process raw examples for training/validation""" 92 | result = tokenizer(batch["title"], truncation=True) 93 | result["label"] = batch["category"] 94 | return result 95 | 96 | 97 | raw_train_dataset = datasets.load_dataset( 98 | "csv", 99 | data_files=[os.path.join(train_dir, f) for f in os.listdir(train_dir)], 100 | column_names=["category", "title", "content"], 101 | split=datasets.Split.ALL, 102 | ) 103 | train_dataset = raw_train_dataset.map( 104 | preprocess, batched=True, batch_size=1000, remove_columns=raw_train_dataset.column_names 105 | ) 106 | logger.info(f"Loaded train_dataset length is: {len(train_dataset)}") 107 | if test_dir: 108 | # test channel is optional: 109 | raw_test_dataset = datasets.load_dataset( 110 | "csv", 111 | data_files=[os.path.join(test_dir, f) for f in os.listdir(test_dir)], 112 | column_names=["category", "title", "content"], 113 | split=datasets.Split.ALL, 114 | ) 115 | test_dataset = raw_test_dataset.map( 116 | preprocess, batched=True, batch_size=1000, remove_columns=raw_test_dataset.column_names 117 | ) 118 | logger.info(f"Loaded test_dataset length is: {len(test_dataset)}") 119 | else: 120 | test_dataset = None 121 | logger.info("No test_dataset provided") 122 | return train_dataset, test_dataset 123 | 124 | 125 | # Only run this main block if running as a script (e.g. in training), not when imported as a module 126 | # (which would be the case if used at inference): 127 | if __name__ == "__main__": 128 | # Load job parameters: 129 | args = parse_args() 130 | training_args = TrainingArguments( 131 | max_steps=args.train_max_steps, 132 | num_train_epochs=args.epochs, 133 | per_device_train_batch_size=args.train_batch_size, 134 | per_device_eval_batch_size=args.eval_batch_size, 135 | fp16=bool(args.fp16), 136 | evaluation_strategy="epoch", 137 | save_strategy="epoch", 138 | load_best_model_at_end=True, 139 | metric_for_best_model="f1", 140 | learning_rate=args.learning_rate, 141 | warmup_steps=args.warmup_steps, 142 | disable_tqdm=True, # Interactive progress bars too noisy on conventional log streams 143 | # You could save checkpoints & logs under args.output_data_dir to upload them, but it 144 | # increases job run time by a few minutes: 145 | output_dir="/tmp/transformers/checkpoints", 146 | logging_dir="/tmp/transformers/logs", 147 | ) 148 | 149 | # Load tokenizer/model/collator: 150 | tokenizer, model, collator = get_model(model_id=args.model_id, class_names=args.class_names) 151 | 152 | # Load and pre-process the dataset: 153 | train_dataset, test_dataset = load_datasets( 154 | tokenizer=tokenizer, 155 | train_dir=args.train, 156 | test_dir=args.test, 157 | ) 158 | 159 | # Create Trainer instance 160 | trainer = Trainer( 161 | model=model, 162 | args=training_args, 163 | compute_metrics=compute_metrics, 164 | train_dataset=train_dataset, 165 | eval_dataset=test_dataset, 166 | tokenizer=tokenizer, 167 | data_collator=collator, 168 | ) 169 | 170 | # Train the model 171 | trainer.train() 172 | 173 | # Save the model output 174 | trainer.save_model(args.model_dir) 175 | 176 | # Evaluate the final model and save a report, if test dataset provided: 177 | if test_dataset: 178 | eval_result = trainer.evaluate(eval_dataset=test_dataset) 179 | # The 'output' folder will also (separately from model) get uploaded to S3 by SageMaker: 180 | if args.output_data_dir: 181 | os.makedirs(args.output_data_dir, exist_ok=True) 182 | with open(os.path.join(args.output_data_dir, "eval_results.txt"), "w") as writer: 183 | print("***** Eval results *****") 184 | for key, value in sorted(eval_result.items()): 185 | writer.write(f"{key} = {value}\n") 186 | -------------------------------------------------------------------------------- /.infrastructure/cdk_src/smstudio/iam.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | """AWS CDK constructs for IAM roles in Amazon SageMaker workshops 4 | """ 5 | # Python Built-Ins: 6 | from typing import Mapping, Optional, Sequence 7 | 8 | # External Dependencies: 9 | from aws_cdk import Duration 10 | import aws_cdk.aws_iam as iam 11 | from aws_cdk.aws_iam import IManagedPolicy, IPrincipal, PolicyDocument 12 | from constructs import Construct 13 | 14 | 15 | class WorkshopSageMakerExecutionRole(iam.Role): 16 | """An IAM role set up for Amazon SageMaker execution in workshops 17 | 18 | This construct sets permissive permissions by default and is not recommended for production use 19 | """ 20 | 21 | def __init__( 22 | self, 23 | scope: Construct, 24 | id: str, 25 | *, 26 | assumed_by_extra: Optional[IPrincipal] = None, 27 | description: Optional[str] = None, 28 | enable_bedrock: bool = True, 29 | enable_codewhisperer: bool = True, 30 | enable_glueis: bool = True, 31 | enable_iamfullaccess: bool = False, 32 | enable_s3fullaccess: bool = True, 33 | enable_sagemakerfullaccess: bool = True, 34 | external_ids: Optional[Sequence[str]] = None, 35 | extras_inline_policy_name: str = "WorkshopExtras", 36 | inline_policies: Optional[Mapping[str, PolicyDocument]] = None, 37 | managed_policies: Optional[Sequence[IManagedPolicy]] = None, 38 | max_session_duration: Optional[Duration] = None, 39 | path: Optional[str] = None, 40 | permissions_boundary: Optional[IManagedPolicy] = None, 41 | role_name: Optional[str] = None, 42 | ) -> None: 43 | """Create a WorkshopSageMakerExecutionRole 44 | 45 | Parameters are generally as per CDK iam.Role, but with customized default values. 46 | 47 | Parameters 48 | ---------- 49 | scope : 50 | CDK construct scope 51 | id : 52 | CDK construct ID 53 | assumed_by_extra : 54 | Optionally provide an extra Principal this role should trust. SageMaker and (if 55 | `enable_glueis` is set) AWS Glue principals will already be trusted: You only need to 56 | set this parameter if needing to add an additional principal. 57 | description : 58 | A description of the role 59 | enable_bedrock : 60 | This construct will grant bedrock:* permissions in an inline policy by default. Set 61 | False to prevent this. 62 | enable_codewhisperer : 63 | This construct will grant the codewhisperer:GenerateRecommendations permission in an 64 | inline policy by default. Set False to prevent this. 65 | enable_glueis : 66 | This construct will trust the AWS Glue service and apply the AWS Managed 67 | AwsGlueSessionUserRestrictedServiceRole by default, for using Glue Interactive Sessions 68 | within SageMaker Studio notebooks. Set False to prevent this. 69 | enable_iamfullaccess : 70 | You can attach the AWS Managed IAMFullAccess policy to your role by setting this to 71 | `True`... But since this is a very broad permission, it's `False` by default. 72 | enable_s3fullaccess : 73 | By default, this construct will append the AmazonS3FullAccess AWS Managed Policy to 74 | your `managed_policies`. Set False to prevent this. 75 | enable_sagemakerfullaccess : 76 | By default, this construct will append the AmazonSageMakerFullAccess AWS Managed Policy 77 | to your `managed_policies`. Set False to prevent this. 78 | external_ids : 79 | A list of external IDs that are allowed to assume the role 80 | extras_inline_policy_name : 81 | The name to use for the auto-generated Inline Policy of extra permissions for 82 | SageMaker workshops. 83 | inline_policies : 84 | Inline policies to attach to the role 85 | managed_policies : 86 | By default, we'll apply AWS policies AmazonSageMakerFullAccess, AmazonS3FullAccess, 87 | AwsGlueSessionUserRestrictedServiceRole, and IAMFullAccess. You only need to set this 88 | parameter if you want to override this. 89 | max_session_duration : 90 | The maximum session duration for the role 91 | path : 92 | The path for the role 93 | permissions_boundary : 94 | The permissions boundary for the role 95 | role_name : 96 | The name of the role 97 | """ 98 | principals = [iam.ServicePrincipal("sagemaker.amazonaws.com")] 99 | extra_managed_policies = [] 100 | inline_policy_statements = [] 101 | 102 | # Parse required extra principals/policies/statements from the config options: 103 | if enable_bedrock: 104 | inline_policy_statements.append( 105 | iam.PolicyStatement(actions=["bedrock:*"], resources=["*"], sid="BedrockAccess") 106 | ) 107 | if enable_codewhisperer: 108 | inline_policy_statements.append( 109 | iam.PolicyStatement( 110 | actions=["codewhisperer:GenerateRecommendations"], 111 | resources=["*"], 112 | sid="CodeWhispererPermissions", 113 | ) 114 | ) 115 | if enable_glueis: 116 | principals.append(iam.ServicePrincipal("glue.amazonaws.com")) 117 | extra_managed_policies.append( 118 | iam.ManagedPolicy.from_aws_managed_policy_name( 119 | "service-role/AwsGlueSessionUserRestrictedServiceRole" 120 | ) 121 | ) 122 | inline_policy_statements.append( 123 | # TODO: Scope this down better 124 | iam.PolicyStatement( 125 | actions=["iam:GetRole", "iam:PassRole", "sts:GetCallerIdentity"], 126 | resources=["*"], 127 | sid="GlueSessionsIAMPerms", 128 | ) 129 | ) 130 | if enable_iamfullaccess: 131 | extra_managed_policies.append( 132 | iam.ManagedPolicy.from_aws_managed_policy_name("IAMFullAccess") 133 | ) 134 | if enable_s3fullaccess: 135 | extra_managed_policies.append( 136 | iam.ManagedPolicy.from_aws_managed_policy_name("AmazonS3FullAccess") 137 | ) 138 | if enable_sagemakerfullaccess: 139 | extra_managed_policies.append( 140 | iam.ManagedPolicy.from_aws_managed_policy_name("AmazonSageMakerFullAccess") 141 | ) 142 | 143 | # Apply the extras to the core iam.Role arguments: 144 | if assumed_by_extra: 145 | principals.append(assumed_by_extra) 146 | assumed_by = iam.CompositePrincipal(*principals) 147 | if len(extra_managed_policies): 148 | if not managed_policies: 149 | managed_policies = [] 150 | managed_policies = [*managed_policies, *extra_managed_policies] 151 | if len(inline_policy_statements): 152 | if not inline_policies: 153 | inline_policies = {} 154 | if extras_inline_policy_name in inline_policies: 155 | inline_policies[extras_inline_policy_name].add_statements(inline_policy_statements) 156 | else: 157 | inline_policies[extras_inline_policy_name] = iam.PolicyDocument( 158 | statements=inline_policy_statements, 159 | ) 160 | 161 | # Call iam.Role with the updated args: 162 | super().__init__( 163 | scope, 164 | id, 165 | assumed_by=assumed_by, 166 | description=description, 167 | external_ids=external_ids, 168 | inline_policies=inline_policies, 169 | managed_policies=managed_policies, 170 | max_session_duration=max_session_duration, 171 | path=path, 172 | permissions_boundary=permissions_boundary, 173 | role_name=role_name, 174 | ) 175 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Getting Started with "Amazon SageMaker 101" 2 | 3 | This repository accompanies a hands-on training event to introduce data scientists (and ML-ready developers / technical leaders) to core model training and deployment workflows with [Amazon SageMaker](https://aws.amazon.com/sagemaker/). 4 | 5 | Like a "101" course in [the academic sense](https://en.wikipedia.org/wiki/101_(topic)), this will likely **not** be the simplest introduction to SageMaker you can find; nor the fastest way to get started with advanced features like [optimized SageMaker Distributed training](https://docs.aws.amazon.com/sagemaker/latest/dg/distributed-training.html) or [SageMaker Clarify for bias and explainability analyses](https://aws.amazon.com/sagemaker/clarify/). 6 | 7 | Instead, these exercises are chosen to demonstrate some core build/train/deploy patterns that we've found help new users to first get productive with SageMaker - and to later understand how the more advanced features fit in. 8 | 9 | ## Agenda 10 | 11 | An interactive walkthrough of the content with screenshots is available at: 12 | 13 | > **[https://sagemaker-101-workshop.workshop.aws/](https://sagemaker-101-workshop.workshop.aws/)** 14 | 15 | Sessions in suggested order: 16 | 17 | 1. [builtin_algorithm_hpo_tabular](builtin_algorithm_hpo_tabular): Explore some **pre-built algorithms** and tools for tabular data, including [SageMaker Canvas](https://aws.amazon.com/sagemaker/canvas/), [SageMaker AutoML APIs](https://docs.aws.amazon.com/sagemaker/latest/dg/use-auto-ml.html), the [XGBoost built-in algorithm](https://docs.aws.amazon.com/sagemaker/latest/dg/xgboost.html), and [automatic hyperparameter tuning](https://docs.aws.amazon.com/sagemaker/latest/dg/automatic-model-tuning.html) 18 | - This module also includes a quick initial look at [SageMaker Feature Store](https://docs.aws.amazon.com/sagemaker/latest/dg/feature-store.html), [SageMaker Model Registry](https://docs.aws.amazon.com/sagemaker/latest/dg/model-registry.html), and the [AutoGluon built-in algorithm](https://docs.aws.amazon.com/sagemaker/latest/dg/autogluon-tabular.html) - but you don't need to dive deep on these topics. 19 | 1. [custom_script_demos](custom_script_demos): See how you can train and deploy your own models on SageMaker with **custom Python scripts** and the pre-built framework containers 20 | - (Optional) Start with [sklearn_reg](custom_script_demos/sklearn_reg) for an introduction if you're new to deep learning but familiar with Scikit-Learn 21 | - See [huggingface_nlp](custom_script_demos/sklearn_reg) (preferred) for a side-by-side comparison of in-notebook versus on-SageMaker model training and inference for text classification - or alternatively the custom CNN-based [keras_nlp](custom_script_demos/keras_nlp) or [pytorch_nlp](custom_script_demos/pytorch_nlp) examples. 22 | 1. [migration_challenge](migration_challenge): **Apply** what you learned to port an in-notebook workflow to a SageMaker training job + endpoint deployment on your own 23 | - Choose the [sklearn_cls](migration_challenge/sklearn_cls), [keras_mnist](migration_challenge/keras_mnist) or [pytorch_mnist](migration_challenge/pytorch_mnist) challenge, depending which ML framework you're most comfortable with. 24 | 25 | 26 | ## Deploying in Your Own Account 27 | 28 | The recommended way to explore these exercises is through Amazon SageMaker AI Studio - and you deploy use the [**template in .infrastructure/cfn_bootstrap.yaml**](.infrastructure/cfn_bootstrap.yaml) from the [AWS CloudFormation Console](https://console.aws.amazon.com/cloudformation/home), to get started with the same environment configuration we use for AWS-guided deliveries of this workshop. 29 | 30 | > ⚠️ Our `.infrastructure` is optimized for getting started easily with SageMaker Studio, but is not recommended for use in production environments! 31 | 32 | You can also [read more about how to onboard to SageMaker Studio](https://docs.aws.amazon.com/sagemaker/latest/dg/gs-studio-onboard.html) in the SageMaker AI Developer Guide, and learn [how SageMaker Studio Notebooks are different from Notebook Instances?"*](https://docs.aws.amazon.com/sagemaker/latest/dg/notebooks-comparison.html). A more basic Notebook Instance-based CloudFormation stack is also available in [.simple.cf.yaml](.simple.cf.yaml), but some features of the labs will not be available. 33 | 34 | Depending on your setup, you may be asked to **choose a kernel** when opening some notebooks. There should be guidance at the top of each notebook on suggested kernel types, but if you can't find any, `Data Science 3.0 (Python 3)` (on Studio) or `conda_python3` (on Notebook Instances) are likely good options. 35 | 36 | ### Setting up widgets and code completion (JupyterLab extensions) 37 | 38 | Some of the examples depend on [ipywidgets](@jupyter-widgets/jupyterlab-manager) and [ipycanvas](https://ipycanvas.readthedocs.io/en/latest/) for interactive inference demo widgets (but do provide code-only alternatives). 39 | 40 | We also usually enable some additional JupyterLab extensions powered by [jupyterlab-lsp](https://github.com/jupyter-lsp/jupyterlab-lsp#readme) and [jupyterlab-s3-browser](https://github.com/IBM/jupyterlab-s3-browser#readme) to improve user experience. You can find more information about these extensions in [this AWS ML blog post](https://aws.amazon.com/blogs/machine-learning/amazon-sagemaker-studio-and-sagemaker-notebook-instance-now-come-with-jupyterlab-3-notebooks-to-boost-developer-productivity/) 41 | 42 | `ipywidgets` should be available by default on SageMaker Studio, but not on Notebook Instances when we last tested. The other extensions require installation. 43 | 44 | To see how we automate these extra setup steps for AWS-run events, you can refer to the **lifecycle configuration scripts** in our CloudFormation templates. For a [Notebook Instance LCC](https://docs.amazonaws.cn/en_us/sagemaker/latest/dg/notebook-lifecycle-config.html), see the `AWS::SageMaker::NotebookInstanceLifecycleConfig` in [.simple.cf.yaml](.simple.cf.yaml). For a [SageMaker Studio LCC](https://docs.amazonaws.cn/en_us/sagemaker/latest/dg/studio-lcc-create.html), see the `Custom::StudioLifecycleConfig` in [.infrastructure/template.sam.yaml](.infrastructure/template.sam.yaml). 45 | 46 | 47 | ## Security 48 | 49 | See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information. 50 | 51 | 52 | ## License 53 | 54 | This library is licensed under the MIT-0 License. See the LICENSE file. 55 | 56 | 57 | ## Further Reading 58 | 59 | One major focus of this workshop is how SageMaker helps us right-size and segregate compute resources for different ML tasks, without sacrificing (and ideally accelerating!) data scientist productivity. For more information on this topic, see this post on the AWS Machine Learning Blog: [Right-sizing resources and avoiding unnecessary costs in Amazon SageMaker](https://aws.amazon.com/blogs/machine-learning/right-sizing-resources-and-avoiding-unnecessary-costs-in-amazon-sagemaker/) 60 | 61 | For a workshop that starts with a similar migration-based approach, but dives further into automated pipelines and CI/CD, check out [aws-samples/amazon-sagemaker-from-idea-to-production](https://github.com/aws-samples/amazon-sagemaker-from-idea-to-production). 62 | 63 | As you continue to explore Amazon SageMaker, you'll also find many more useful resources in: 64 | 65 | - The official **[Amazon SageMaker Examples repository](https://github.com/aws/amazon-sagemaker-examples)**: with a broad range of code samples covering SageMaker use cases from beginner to expert. 66 | - The **[documentation](https://sagemaker.readthedocs.io/en/stable/)** (and maybe even the [source code](https://github.com/aws/sagemaker-python-sdk)) for the **SageMaker Python SDK**: The high-level, open-source [PyPI library](https://pypi.org/project/sagemaker/) we use when we `import sagemaker`. 67 | - The **[Amazon SageMaker Developer Guide](https://docs.aws.amazon.com/sagemaker/latest/dg/whatis.html)**: documenting the SageMaker service itself. 68 | 69 | More advanced users may also find it helpful to refer to: 70 | 71 | - The **[boto3 reference for SageMaker](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html)** and the **[SageMaker API reference](https://docs.aws.amazon.com/sagemaker/latest/APIReference/Welcome.html)**: in case you have use cases for SageMaker where you want (or need) to use low-level APIs directly, instead of through the `sagemaker` library. 72 | - The **[AWS Deep Learning Containers](https://github.com/aws/deep-learning-containers)** and **[SageMaker Scikit-Learn Containers](https://github.com/aws/sagemaker-scikit-learn-container)** **source code**: For a deeper understanding of the framework container environments. 73 | -------------------------------------------------------------------------------- /.infrastructure/cdk_src/smstudio/user_setup/fn_user_setup/content.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | """Custom CloudFormation Resource for loading content to a SageMaker Studio user 4 | 5 | See `.base.StudioUserSetupResourceProperties` for CloudFormation input Properties, and main.py 6 | docstring for CloudFormation return values. 7 | 8 | This sub-resource either clones a (public) git repository or downloads content from Amazon S3, into 9 | a SageMaker Studio user's home folder on create. Updating and Deleting the resource currently do 10 | nothing as it's designed for one-off account setup. 11 | """ 12 | # Python Built-Ins: 13 | import logging 14 | import os 15 | import traceback 16 | from typing import Optional, Union 17 | import zipfile 18 | 19 | # External Dependencies: 20 | import boto3 21 | from botocore import UNSIGNED 22 | from botocore.config import Config 23 | from git import Repo 24 | 25 | # Local Dependencies: 26 | from base import StudioUserSetupResourceProperties 27 | from cfn import CustomResourceEvent 28 | 29 | anons3config = Config(signature_version=UNSIGNED) 30 | smclient = boto3.client("sagemaker") 31 | 32 | 33 | def handle_create(event: CustomResourceEvent[StudioUserSetupResourceProperties], context): 34 | """Handle a resource creation Lambda event from CloudFormation""" 35 | logging.info("**Received create request") 36 | logging.info("**Setting up user content") 37 | try: 38 | # Check home folder exists and is assigned to correct EFS owner: 39 | home_folder = ensure_home_dir(event.props.home_efs_file_system_uid) 40 | 41 | # Now ready to clone in Git content (or whatever else...) 42 | if event.props.git_repository: 43 | output_content_path = clone_git_repository( 44 | home_folder, 45 | event.props.git_repository, 46 | event.props.target_path, 47 | event.props.git_checkout, 48 | ) 49 | elif event.props.content_s3_uri: 50 | output_content_path = copy_s3_content( 51 | home_folder, 52 | event.props.content_s3_uri, 53 | event.props.target_path, 54 | event.props.extract_content, 55 | event.props.authenticate_s3, 56 | ) 57 | else: 58 | logging.warning("Neither GitRepository nor ContentS3Uri set - nothing to create") 59 | 60 | # Remember to set ownership/permissions for all the stuff we just created, to give the 61 | # user write access: 62 | chown_recursive(output_content_path, uid=event.props.home_efs_file_system_uid) 63 | print("All done") 64 | except Exception as e: 65 | # Don't bring the entire CF stack down just because we couldn't copy a repo: 66 | print("IGNORING CONTENT SETUP ERROR") 67 | traceback.print_exc() 68 | 69 | logging.info("**SageMaker Studio user '%s' set up successfully", event.props.user_profile_name) 70 | return { 71 | "PhysicalResourceId": event.props.user_profile_name, 72 | "Data": {"UserProfileName": event.props.user_profile_name}, 73 | } 74 | 75 | 76 | def handle_delete(event: CustomResourceEvent[StudioUserSetupResourceProperties], context): 77 | """Handle a resource deletion Lambda event from CloudFormation (a no-op for this resource)""" 78 | logging.info("**Received delete event") 79 | # Since this is a no-op, there's no point strictly parsing the props (risking failures): 80 | logging.info( 81 | "**Deleting user setup is a no-op: user '%s' on domain '%s", 82 | event.physical_id, 83 | event.props.domain_id, 84 | ) 85 | return {"PhysicalResourceId": event.physical_id, "Data": {}} 86 | 87 | 88 | def handle_update(event: CustomResourceEvent[StudioUserSetupResourceProperties], context): 89 | """Handle a resource update Lambda event from CloudFormation (a no-op for this resource)""" 90 | logging.info("**Received update event") 91 | # Since this is a no-op, there's no point strictly parsing the props (risking failures): 92 | logging.info( 93 | "**Updating user setup is a no-op: user '%s' on domain '%s", 94 | event.physical_id, 95 | event.props.domain_id, 96 | ) 97 | return {"PhysicalResourceId": event.physical_id, "Data": {}} 98 | 99 | 100 | def ensure_home_dir(efs_uid: Union[int, str]) -> str: 101 | """Check the EFS home folder for the given user ID exists with correct ownership 102 | 103 | The root of the EFS contains folders named for each user UID, but these may not be created 104 | before the user has first logged in (could os.listdir("/mnt/efs") to check). 105 | """ 106 | print("Creating/checking home folder...") 107 | home_folder = f"/mnt/efs/{efs_uid}" 108 | os.makedirs(home_folder, exist_ok=True) 109 | # Set correct ownership permissions for this folder straight away, in case a later process errors out 110 | os.chown(home_folder, int(efs_uid), -1) 111 | return home_folder 112 | 113 | 114 | def clone_git_repository( 115 | base_folder: str, git_repo: str, as_folder: Optional[str] = None, checkout: Optional[str] = None 116 | ) -> str: 117 | """Clone a git repository into `base_folder/as_folder` and optionally check out `checkout` 118 | 119 | DOES NOT CONFIGURE FILE OWNERSHIP PERMISSIONS! Run chown_recursive if required. 120 | """ 121 | print(f"Cloning code... {git_repo}") 122 | if not as_folder: 123 | # Infer target folder name from repo URL if not specified: 124 | as_folder = git_repo.rpartition("/")[2] 125 | if as_folder.lower().endswith(".git"): 126 | as_folder = as_folder[: -len(".git")] 127 | target_folder = os.path.join(base_folder, as_folder) 128 | repo = Repo.clone_from(git_repo, target_folder) 129 | if checkout: 130 | print(f"Checking out '{checkout}'...") 131 | repo.git.checkout(checkout) 132 | else: 133 | print("No specific checkout branch/commit specified - keeping default") 134 | return target_folder 135 | 136 | 137 | def copy_s3_content( 138 | base_folder: str, 139 | content_s3uri: str, 140 | target_path: Optional[str] = None, 141 | extract: Optional[bool] = False, 142 | authenticate_s3: Optional[bool] = False, 143 | ) -> str: 144 | """Download content from Amazon S3 to `base_folder/target_path` 145 | 146 | DOES NOT CONFIGURE FILE OWNERSHIP PERMISSIONS! Run chown_recursive if required. 147 | """ 148 | if not content_s3uri.lower().startswith("s3://"): 149 | raise ValueError("Content URI must start with 's3://'") 150 | bucket_name, _, key_prefix = content_s3uri[len("s3://") :].partition("/") 151 | 152 | # Set up S3 client as anonymous or authenticated, depending on resource config: 153 | s3 = boto3.resource("s3", config=(None if authenticate_s3 else anons3config)) 154 | s3client = boto3.client("s3", config=(None if authenticate_s3 else anons3config)) 155 | 156 | # Check if the provided content URI is a valid object (vs folder/prefix): 157 | bucket = s3.Bucket(bucket_name) 158 | print(f"Checking s3://{bucket_name}/{key_prefix}") 159 | try: 160 | content_type = bucket.Object(key_prefix).content_type 161 | if content_type and content_type.lower() == "application/x-directory": 162 | is_object = False 163 | else: 164 | is_object = True 165 | except s3client.exceptions.ClientError as err: 166 | if err.response["Error"]["Code"] == "404": 167 | is_object = False 168 | else: 169 | raise err 170 | 171 | if is_object: 172 | if target_path is None: 173 | target_path = os.path.basename(key_prefix) 174 | full_target_path = os.path.join(base_folder, target_path) 175 | print(f"Downloading {content_s3uri}") 176 | bucket.download_file(key_prefix, full_target_path) 177 | 178 | if not extract: 179 | return full_target_path 180 | # Otherwise, extract compressed file: 181 | # A file without a dot/extension will produce ("", "", "wholename"): 182 | basename, _, file_ext = key_prefix.rpartition(".") 183 | file_ext = file_ext.lower() 184 | extract_path = full_target_path + "-tmp" 185 | if file_ext == "zip" or not basename: 186 | # (Assume zip for files with no extension if extract specified) 187 | print(f"Extracting to {extract_path}") 188 | with zipfile.ZipFile(full_target_path, "r") as zip_ref: 189 | zip_ref.extractall(extract_path) 190 | else: 191 | raise NotImplementedError(f"File extension '{file_ext}' not supported for extraction") 192 | print(f"Replacing compressed {full_target_path} with {extract_path}") 193 | os.remove(full_target_path) 194 | os.rename(extract_path, full_target_path) 195 | return full_target_path 196 | 197 | # Otherwise looks like a folder 198 | raise NotImplementedError( 199 | f"Object not found and prefix/folder download not yet supported: ${content_s3uri}" 200 | ) 201 | 202 | 203 | def chown_recursive(path: str, uid: Union[str, int] = -1, gid: Union[str, int] = -1): 204 | """Workaround for os.chown() not having a recursive option for folders""" 205 | uid = int(uid) 206 | gid = int(gid) 207 | if os.path.isfile(path): 208 | os.chown(path, uid, gid) 209 | else: 210 | for dirpath, dirnames, filenames in os.walk(path): 211 | os.chown(dirpath, uid, gid) 212 | for filename in filenames: 213 | os.chown(os.path.join(dirpath, filename), uid, gid) 214 | -------------------------------------------------------------------------------- /.infrastructure/cdk_src/smstudio/region_config.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | """CDK constructs for cross-regional configuration mapping of SageMaker resources 4 | """ 5 | # Python Built-Ins: 6 | from typing import Optional, Tuple 7 | 8 | # External Dependencies: 9 | from aws_cdk import CfnMapping 10 | from constructs import Construct 11 | 12 | 13 | STUDIO_APP_ARNS_BY_REGION = { 14 | "us-east-1": { 15 | "datascience": "arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0", 16 | "datascience2": "arn:aws:sagemaker:us-east-1:081325390199:image/sagemaker-data-science-38", 17 | "datascience3": "arn:aws:sagemaker:us-east-1:081325390199:image/sagemaker-data-science-310-v1", 18 | "jlabv3": "arn:aws:sagemaker:us-east-1:081325390199:image/jupyter-server-3", 19 | }, 20 | "us-east-2": { 21 | "datascience": "arn:aws:sagemaker:us-east-2:429704687514:image/datascience-1.0", 22 | "datascience2": "arn:aws:sagemaker:us-east-2:429704687514:image/sagemaker-data-science-38", 23 | "datascience3": "arn:aws:sagemaker:us-east-2:429704687514:image/sagemaker-data-science-310-v1", 24 | "jlabv3": "arn:aws:sagemaker:us-east-2:429704687514:image/jupyter-server-3", 25 | }, 26 | "us-west-1": { 27 | "datascience": "arn:aws:sagemaker:us-west-1:742091327244:image/datascience-1.0", 28 | "datascience2": "arn:aws:sagemaker:us-west-1:742091327244:image/sagemaker-data-science-38", 29 | "datascience3": "arn:aws:sagemaker:us-west-1:742091327244:image/sagemaker-data-science-310-v1", 30 | "jlabv3": "arn:aws:sagemaker:us-west-1:742091327244:image/jupyter-server-3", 31 | }, 32 | "us-west-2": { 33 | "datascience": "arn:aws:sagemaker:us-west-2:236514542706:image/datascience-1.0", 34 | "datascience2": "arn:aws:sagemaker:us-west-2:236514542706:image/sagemaker-data-science-38", 35 | "datascience3": "arn:aws:sagemaker:us-west-2:236514542706:image/sagemaker-data-science-310-v1", 36 | "jlabv3": "arn:aws:sagemaker:us-west-2:236514542706:image/jupyter-server-3", 37 | }, 38 | "af-south-1": { 39 | "datascience": "arn:aws:sagemaker:af-south-1:559312083959:image/datascience-1.0", 40 | "datascience2": "arn:aws:sagemaker:af-south-1:559312083959:image/sagemaker-data-science-38", 41 | "datascience3": "arn:aws:sagemaker:af-south-1:559312083959:image/sagemaker-data-science-310-v1", 42 | "jlabv3": "arn:aws:sagemaker:af-south-1:559312083959:image/jupyter-server-3", 43 | }, 44 | "ap-east-1": { 45 | "datascience": "arn:aws:sagemaker:ap-east-1:493642496378:image/datascience-1.0", 46 | "datascience2": "arn:aws:sagemaker:ap-east-1:493642496378:image/sagemaker-data-science-38", 47 | "datascience3": "arn:aws:sagemaker:ap-east-1:493642496378:image/sagemaker-data-science-310-v1", 48 | "jlabv3": "arn:aws:sagemaker:ap-east-1:493642496378:image/jupyter-server-3", 49 | }, 50 | "ap-south-1": { 51 | "datascience": "arn:aws:sagemaker:ap-south-1:394103062818:image/datascience-1.0", 52 | "datascience2": "arn:aws:sagemaker:ap-south-1:394103062818:image/sagemaker-data-science-38", 53 | "datascience3": "arn:aws:sagemaker:ap-south-1:394103062818:image/sagemaker-data-science-310-v1", 54 | "jlabv3": "arn:aws:sagemaker:ap-south-1:394103062818:image/jupyter-server-3", 55 | }, 56 | "ap-northeast-2": { 57 | "datascience": "arn:aws:sagemaker:ap-northeast-2:806072073708:image/datascience-1.0", 58 | "datascience2": "arn:aws:sagemaker:ap-northeast-2:806072073708:image/sagemaker-data-science-38", 59 | "datascience3": "arn:aws:sagemaker:ap-northeast-2:806072073708:image/sagemaker-data-science-310-v1", 60 | "jlabv3": "arn:aws:sagemaker:ap-northeast-2:806072073708:image/jupyter-server-3", 61 | }, 62 | "ap-southeast-1": { 63 | "datascience": "arn:aws:sagemaker:ap-southeast-1:492261229750:image/datascience-1.0", 64 | "datascience2": "arn:aws:sagemaker:ap-southeast-1:492261229750:image/sagemaker-data-science-38", 65 | "datascience3": "arn:aws:sagemaker:ap-southeast-1:492261229750:image/sagemaker-data-science-310-v1", 66 | "jlabv3": "arn:aws:sagemaker:ap-southeast-1:492261229750:image/jupyter-server-3", 67 | }, 68 | "ap-southeast-2": { 69 | "datascience": "arn:aws:sagemaker:ap-southeast-2:452832661640:image/datascience-1.0", 70 | "datascience2": "arn:aws:sagemaker:ap-southeast-2:452832661640:image/sagemaker-data-science-38", 71 | "datascience3": "arn:aws:sagemaker:ap-southeast-2:452832661640:image/sagemaker-data-science-310-v1", 72 | "jlabv3": "arn:aws:sagemaker:ap-southeast-2:452832661640:image/jupyter-server-3", 73 | }, 74 | "ap-southeast-3": { 75 | "datascience": "arn:aws:sagemaker:ap-southeast-3:276181064229:image/datascience-1.0", 76 | "datascience2": "arn:aws:sagemaker:ap-southeast-3:276181064229:image/sagemaker-data-science-38", 77 | "datascience3": "arn:aws:sagemaker:ap-southeast-3:276181064229:image/sagemaker-data-science-310-v1", 78 | "jlabv3": "arn:aws:sagemaker:ap-southeast-3:276181064229:image/jupyter-server-3", 79 | }, 80 | "ap-northeast-1": { 81 | "datascience": "arn:aws:sagemaker:ap-northeast-1:102112518831:image/datascience-1.0", 82 | "datascience2": "arn:aws:sagemaker:ap-northeast-1:102112518831:image/sagemaker-data-science-38", 83 | "datascience3": "arn:aws:sagemaker:ap-northeast-1:102112518831:image/sagemaker-data-science-310-v1", 84 | "jlabv3": "arn:aws:sagemaker:ap-northeast-1:102112518831:image/jupyter-server-3", 85 | }, 86 | # TODO: ap-northeast-2 and ap-northeast-3 if available? 87 | "ca-central-1": { 88 | "datascience": "arn:aws:sagemaker:ca-central-1:310906938811:image/datascience-1.0", 89 | "datascience2": "arn:aws:sagemaker:ca-central-1:310906938811:image/sagemaker-data-science-38", 90 | "datascience3": "arn:aws:sagemaker:ca-central-1:310906938811:image/sagemaker-data-science-310-v1", 91 | "jlabv3": "arn:aws:sagemaker:ca-central-1:310906938811:image/jupyter-server-3", 92 | }, 93 | "eu-central-1": { 94 | "datascience": "arn:aws:sagemaker:eu-central-1:936697816551:image/datascience-1.0", 95 | "datascience2": "arn:aws:sagemaker:eu-central-1:936697816551:image/sagemaker-data-science-38", 96 | "datascience3": "arn:aws:sagemaker:eu-central-1:936697816551:image/sagemaker-data-science-310-v1", 97 | "jlabv3": "arn:aws:sagemaker:eu-central-1:936697816551:image/jupyter-server-3", 98 | }, 99 | # TODO: eu-central-2 if available? 100 | "eu-west-1": { 101 | "datascience": "arn:aws:sagemaker:eu-west-1:470317259841:image/datascience-1.0", 102 | "datascience2": "arn:aws:sagemaker:eu-west-1:470317259841:image/sagemaker-data-science-38", 103 | "datascience3": "arn:aws:sagemaker:eu-west-1:470317259841:image/sagemaker-data-science-310-v1", 104 | "jlabv3": "arn:aws:sagemaker:eu-west-1:470317259841:image/jupyter-server-3", 105 | }, 106 | "eu-west-2": { 107 | "datascience": "arn:aws:sagemaker:eu-west-2:712779665605:image/datascience-1.0", 108 | "datascience2": "arn:aws:sagemaker:eu-west-2:712779665605:image/sagemaker-data-science-38", 109 | "datascience3": "arn:aws:sagemaker:eu-west-2:712779665605:image/sagemaker-data-science-310-v1", 110 | "jlabv3": "arn:aws:sagemaker:eu-west-2:712779665605:image/jupyter-server-3", 111 | }, 112 | "eu-west-3": { 113 | "datascience": "arn:aws:sagemaker:eu-west-3:615547856133:image/datascience-1.0", 114 | "datascience2": "arn:aws:sagemaker:eu-west-3:615547856133:image/sagemaker-data-science-38", 115 | "datascience3": "arn:aws:sagemaker:eu-west-3:615547856133:image/sagemaker-data-science-310-v1", 116 | "jlabv3": "arn:aws:sagemaker:eu-west-3:615547856133:image/jupyter-server-3", 117 | }, 118 | "eu-north-1": { 119 | "datascience": "arn:aws:sagemaker:eu-north-1:243637512696:image/datascience-1.0", 120 | "datascience2": "arn:aws:sagemaker:eu-north-1:243637512696:image/sagemaker-data-science-38", 121 | "datascience3": "arn:aws:sagemaker:eu-north-1:243637512696:image/sagemaker-data-science-310-v1", 122 | "jlabv3": "arn:aws:sagemaker:eu-north-1:243637512696:image/jupyter-server-3", 123 | }, 124 | "eu-south-1": { 125 | "datascience": "arn:aws:sagemaker:eu-south-1:592751261982:image/datascience-1.0", 126 | "datascience2": "arn:aws:sagemaker:eu-south-1:592751261982:image/sagemaker-data-science-38", 127 | "datascience3": "arn:aws:sagemaker:eu-south-1:592751261982:image/sagemaker-data-science-310-v1", 128 | "jlabv3": "arn:aws:sagemaker:eu-south-1:592751261982:image/jupyter-server-3", 129 | }, 130 | # TODO: me-central-1 and me-south-1 if available? 131 | "sa-east-1": { 132 | "datascience": "arn:aws:sagemaker:sa-east-1:782484402741:image/datascience-1.0", 133 | "datascience2": "arn:aws:sagemaker:sa-east-1:782484402741:image/sagemaker-data-science-38", 134 | "datascience3": "arn:aws:sagemaker:sa-east-1:782484402741:image/sagemaker-data-science-310-v1", 135 | "jlabv3": "arn:aws:sagemaker:sa-east-1:782484402741:image/jupyter-server-3", 136 | }, 137 | } 138 | 139 | 140 | class CfnSageMakerAppsByRegionMapping(CfnMapping): 141 | """Construct for a CloudFormation Mapping of common SMStudio app ARNs by region""" 142 | 143 | def __init__( 144 | self, 145 | scope: Construct, 146 | id: str, 147 | *, 148 | lazy: Optional[bool] = None, 149 | ) -> None: 150 | super().__init__(scope, id, lazy=lazy, mapping=STUDIO_APP_ARNS_BY_REGION) 151 | 152 | @property 153 | def supported_regions(self) -> Tuple[str]: 154 | """Alphabetically sorted list of all regions supported in the map""" 155 | return tuple(sorted(STUDIO_APP_ARNS_BY_REGION.keys())) 156 | 157 | @property 158 | def supported_apps(self) -> Tuple[str]: 159 | """Alphabetically sorted list of all Studio app names supported in the map""" 160 | return next(tuple(sorted(vals)) for _, vals in STUDIO_APP_ARNS_BY_REGION) 161 | -------------------------------------------------------------------------------- /.infrastructure/cdk_src/smstudio/lcc/fn_studio_lcconfig/main.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | """CDK Custom Resource Lambda for a SageMaker Studio Lifecycle Configuration Script 4 | 5 | See `StudioLCCResourceProperties` for expected CloudFormation resource properties. 6 | 7 | CloudFormation Return Values 8 | ---------------------------- 9 | Direct .Ref : 10 | ARN of the created lifecycle configuration script 11 | AppType : 12 | As per resource properties .AppType 13 | Name : 14 | As per resource properties .Name 15 | """ 16 | # Python Built-Ins: 17 | from __future__ import annotations 18 | import json 19 | import logging 20 | import time 21 | from typing import Optional 22 | 23 | logging.getLogger().setLevel(logging.INFO) # Set log level for AWS Lambda *BEFORE* other imports 24 | 25 | # External Dependencies: 26 | import boto3 27 | 28 | # Local Dependencies 29 | from cfn import CustomResourceEvent, CustomResourceRequestType 30 | from sagemaker_util import retry_if_already_updating 31 | 32 | logger = logging.getLogger("main") 33 | smclient = boto3.client("sagemaker") 34 | 35 | 36 | class StudioLCCResourceProperties: 37 | """Parser for CloudFormation resource properties for this Custom Resource 38 | 39 | Resource Properties 40 | ------------------- 41 | 42 | AppType : str 43 | (Required) 'JupyterLab' or 'CodeEditor' for new-style (2024+) SMStudio Spaces, or else 44 | 'JupyterServer' or 'KernelGateway' for SageMaker Studio Classic. 45 | Name : str 46 | (Required) Name of the lifecycle config script to create 47 | Content : str 48 | (Required) Base64-encoded script content, similar to the usage of 49 | `Properties.OnStart[].Content` in AWS::SageMaker::NotebookInstanceLifecycleConfig 50 | Tags : Optional[List[Dict['Key': str, 'Value': str]]] 51 | Optional AWS resource tags 52 | DomainId : Optional[str] 53 | Optional SageMaker Studio Domain ID to associate the script to. (You usually need to attach 54 | the script to a domain if you want to use it!). 55 | """ 56 | 57 | app_type: str 58 | content: str 59 | name: str 60 | domain_id: Optional[str] 61 | tags: Optional[dict] 62 | 63 | def __init__(self, resource_properties: dict): 64 | self.app_type = resource_properties["AppType"] 65 | self.content = resource_properties["Content"] 66 | self.name = resource_properties["Name"] 67 | self.domain_id = resource_properties.get("DomainId") 68 | self.tags = resource_properties.get("Tags", []) 69 | 70 | def __str__(self): 71 | dict_val = { 72 | "AppType": self.app_type, 73 | "Content": self.content, 74 | "Name": self.name, 75 | "Tags": self.tags, 76 | } 77 | if self.domain_id: 78 | dict_val["DomainId"] = self.domain_id 79 | return json.dumps(dict_val) 80 | 81 | @classmethod 82 | def from_str(cls, str_val) -> StudioLCCResourceProperties: 83 | return cls(json.loads(str_val)) 84 | 85 | 86 | def lambda_handler(event_raw: dict, context: dict): 87 | """Main entry point for (CDK) Custom Resource Lambda""" 88 | logger.info(event_raw) 89 | event = CustomResourceEvent(event_raw, StudioLCCResourceProperties) 90 | if event.request_type == CustomResourceRequestType.create: 91 | return handle_create(event, context) 92 | elif event.request_type == CustomResourceRequestType.update: 93 | return handle_update(event, context) 94 | elif event.request_type == CustomResourceRequestType.delete: 95 | return handle_delete(event, context) 96 | else: 97 | raise ValueError(f"Unsupported CFn RequestType '{event_raw['RequestType']}'") 98 | 99 | 100 | def handle_create(event: CustomResourceEvent[StudioLCCResourceProperties], context: dict): 101 | logger.info("**Received create request") 102 | 103 | logger.info("**Creating lifecycle config script") 104 | resp = smclient.create_studio_lifecycle_config( 105 | StudioLifecycleConfigName=event.props.name, 106 | StudioLifecycleConfigContent=event.props.content, 107 | StudioLifecycleConfigAppType=event.props.app_type, 108 | Tags=event.props.tags or [], 109 | ) 110 | script_arn = resp["StudioLifecycleConfigArn"] 111 | domain_id = event.props.domain_id 112 | if domain_id is not None: 113 | try: 114 | attach_lcc_to_domain( 115 | domain_id=domain_id, 116 | script_arn=script_arn, 117 | app_type=event.props.app_type, 118 | ) 119 | except Exception as e: 120 | # If creation succeeded but attachment failed, send explicit fail response to try and 121 | # make sure the physical resource ID is set correctly and therefore enable rollback of 122 | # the resource: 123 | logger.exception("Failed to attach LCC to SM domain") 124 | raise e 125 | 126 | return { 127 | "PhysicalResourceId": script_arn, 128 | "Data": { 129 | "AppType": event.props.app_type, 130 | "Name": event.props.name, 131 | }, 132 | } 133 | 134 | 135 | def handle_delete(event: CustomResourceEvent[StudioLCCResourceProperties], context: dict): 136 | logger.info("**Received delete event") 137 | lcc_id = event.physical_id 138 | lcc_name = lcc_id.rpartition("/")[2] 139 | 140 | domain_id = event.props.domain_id 141 | app_type = event.props.app_type 142 | if domain_id is not None and app_type is not None: 143 | try: 144 | remove_lcc_from_domain(domain_id=domain_id, script_arn=lcc_id, app_type=app_type) 145 | except: 146 | logger.exception("Failed to detach LCC from domain - trying to delete LCC anyway...") 147 | 148 | try: 149 | logger.info(f"Deleting lifecycle config script {lcc_name}") 150 | smclient.delete_studio_lifecycle_config(StudioLifecycleConfigName=lcc_name) 151 | except smclient.exceptions.ResourceNotFound: 152 | pass 153 | 154 | # Already does not exist -> deletion success 155 | return { 156 | "PhysicalResourceId": lcc_id, 157 | "Data": {}, 158 | } 159 | 160 | 161 | def handle_update(event: CustomResourceEvent[StudioLCCResourceProperties], context: dict): 162 | logger.info("**Received update event") 163 | 164 | script_location_modified = not ( 165 | (event.props.name == event.old_props.name) 166 | and (event.props.app_type == event.old_props.app_type) 167 | ) 168 | script_modified = script_location_modified or not ( 169 | (event.props.content == event.old_props.content) 170 | ) 171 | new_domain = event.props.domain_id 172 | old_domain = event.old_props.domain_id 173 | 174 | if old_domain and script_location_modified or (new_domain != old_domain): 175 | remove_lcc_from_domain( 176 | domain_id=old_domain, 177 | script_arn=event.physical_id, 178 | app_type=event.old_props.app_type, 179 | ) 180 | 181 | if script_modified: 182 | # For any modification we have to replace the script: 183 | try: 184 | old_name = event.old_props.name 185 | logger.info(f"Deleting lifecycle config script {old_name}") 186 | smclient.delete_studio_lifecycle_config(StudioLifecycleConfigName=old_name) 187 | except smclient.exceptions.ResourceNotFound: 188 | pass 189 | resp = smclient.create_studio_lifecycle_config( 190 | StudioLifecycleConfigName=event.props.name, 191 | StudioLifecycleConfigContent=event.props.content, 192 | StudioLifecycleConfigAppType=event.props.app_type, 193 | Tags=event.props.tags or [], 194 | ) 195 | 196 | if new_domain and (script_location_modified or (new_domain != old_domain)): 197 | attach_lcc_to_domain( 198 | domain_id=new_domain, 199 | script_arn=event.physical_id, 200 | app_type=event.props.app_type, 201 | ) 202 | 203 | return { 204 | "PhysicalResourceId": resp["StudioLifecycleConfigArn"], 205 | "Data": { 206 | "AppType": event.props.app_type, 207 | "Name": event.props.name, 208 | }, 209 | } 210 | 211 | 212 | def attach_lcc_to_domain(domain_id: str, script_arn: str, app_type: str): 213 | domain_desc = smclient.describe_domain(DomainId=domain_id) 214 | 215 | default_settings = domain_desc["DefaultUserSettings"] 216 | 217 | app_settings_field = f"{app_type}AppSettings" # e.g. "JupyterServerAppSettings" 218 | if not default_settings.get(app_settings_field): 219 | default_settings[app_settings_field] = {} 220 | if not default_settings[app_settings_field].get("LifecycleConfigArns"): 221 | default_settings[app_settings_field]["LifecycleConfigArns"] = [] 222 | 223 | default_scripts = default_settings[app_settings_field]["LifecycleConfigArns"] 224 | if script_arn not in default_scripts: 225 | logger.info(f"Adding script to domain:\n{script_arn}") 226 | default_scripts.append(script_arn) 227 | retry_if_already_updating( 228 | lambda: smclient.update_domain( 229 | DomainId=domain_id, 230 | DefaultUserSettings=default_settings, 231 | ), 232 | ) 233 | time.sleep(10) 234 | else: 235 | logger.info("Script already default on domain:\n{script_arn}") 236 | 237 | 238 | def remove_lcc_from_domain(domain_id: str, script_arn: str, app_type: str): 239 | domain_desc = smclient.describe_domain(DomainId=domain_id) 240 | 241 | default_settings = domain_desc["DefaultUserSettings"] 242 | 243 | app_settings_field = f"{app_type}AppSettings" # e.g. "JupyterServerAppSettings" 244 | if not default_settings.get(app_settings_field): 245 | default_settings[app_settings_field] = {} 246 | if not default_settings[app_settings_field].get("LifecycleConfigArns"): 247 | default_settings[app_settings_field]["LifecycleConfigArns"] = [] 248 | 249 | default_scripts = default_settings[app_settings_field]["LifecycleConfigArns"] 250 | if script_arn in default_scripts: 251 | logger.info(f"Removing script from domain:\n{script_arn}") 252 | default_scripts.remove(script_arn) 253 | retry_if_already_updating( 254 | lambda: smclient.update_domain( 255 | DomainId=domain_id, 256 | DefaultUserSettings=default_settings, 257 | ), 258 | ) 259 | time.sleep(10) 260 | else: 261 | logger.info("Script already deleted from domain:\n{script_arn}") 262 | -------------------------------------------------------------------------------- /migration_challenge/keras_mnist/util/draw.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | """An ipycanvas-based interactive widget for drawing PIL-compatible doodles in JupyterLab 4 | """ 5 | 6 | # Python Built-Ins: 7 | from math import floor 8 | from typing import Tuple, Union 9 | 10 | # External Dependenices: 11 | import numpy as np 12 | from ipycanvas import Canvas, hold_canvas 13 | from IPython.display import display 14 | from ipywidgets import HTML, Button, Layout, Output, VBox 15 | from matplotlib.colors import to_hex, to_rgb 16 | from PIL import Image, ImageDraw 17 | 18 | 19 | class ValidatedColor: 20 | """Canvas expects different color repr from PIL/image, so this class stores both""" 21 | 22 | hexa: str 23 | np_8bit: np.ndarray 24 | 25 | def __init__(self, color: Union[Tuple[float], np.ndarray]): 26 | self.set_color(color) 27 | 28 | def set_color(self, color: Union[Tuple[float], np.ndarray]): 29 | """Use this method to update all stored representations at once""" 30 | self.hexa = to_hex(color) 31 | self.np_8bit = (255 * np.array(to_rgb(color))).astype(int) 32 | 33 | 34 | class PixelDrawCanvas: 35 | """JupyterLab widget to interactively draw on a canvas and export the pixel data to Python 36 | 37 | This widget maintains a buffer of pixel values and draws individual pixel rects to canvas (in 38 | batches, at least) to canvas on each mouse event... More toy/demo than an optimized design! 39 | 40 | Usage 41 | ----- 42 | After creating the PixelDrawCanvas you can either call `.display()` to directly display it in 43 | the notebook, or access the `.widget` property if you want to embed the UI it in another 44 | ipywidgets widget. 45 | 46 | Draw on the canvas by clicking and dragging, or press the "Clear" button to start again. 47 | 48 | You can read the 0-255, 3-channel (height, width, 3) pixel data numpy array from `.data`. 49 | `matplotlib.pyplot.imshow(data)` should confirm that what you see in the widget matches this. 50 | 51 | You can also programmatically `.clear()` the drawing from Python if you like. 52 | """ 53 | 54 | def __init__( 55 | self, 56 | width: int = 28, 57 | height: int = 28, 58 | color_bg: Tuple[float, float, float] = (0, 0, 0), 59 | color_fg: Tuple[float, float, float] = (1.0, 1.0, 1.0), 60 | pen_size: int = 3, 61 | title_html: str = "

Draw a digit!

", 62 | ): 63 | """Create a PixelDrawCanvas""" 64 | self.col_bg = ValidatedColor(color_bg) 65 | self.col_fg = ValidatedColor(color_fg) 66 | 67 | # -- Create individual widget components: 68 | self.canvas = Canvas(width=width, height=height, image_smoothing_enabled=False) 69 | # (Without explicit canvas.layout width, VBox/HBox fills full available width) 70 | self.canvas.layout.height = f"{max(200, min(1000, height))}px" 71 | self.canvas.layout.width = f"{max(200, min(1000, width))}px" 72 | self.canvas.image_smoothing_enabled = False 73 | self._clear_button = Button( 74 | description="Clear", 75 | icon="eraser", 76 | tooltip="Clear the drawing to a blank image", 77 | ) 78 | self._console = Output( 79 | layout=Layout( 80 | max_height="140px", 81 | overflow_y="auto", 82 | ) 83 | ) 84 | self._title = HTML(title_html) 85 | 86 | # -- Initialize state: 87 | self.is_drawing = False 88 | # (Temporary data __init__ to be overridden by clear() shortly:) 89 | self.data = np.zeros((height, width, 3)) 90 | self.set_pen(pen_size=pen_size) 91 | 92 | # -- Set up listeners: 93 | # Wrap widget event listener member functions so they have access to this `self` instance 94 | # when called and are also able to `print()` to the console output if needed. 95 | @self._console.capture() 96 | def on_mouse_down(*args, **kwargs): 97 | return self._on_mouse_down(*args, **kwargs) 98 | 99 | @self._console.capture() 100 | def on_mouse_move(*args, **kwargs): 101 | return self._on_mouse_move(*args, **kwargs) 102 | 103 | @self._console.capture() 104 | def on_mouse_out(*args, **kwargs): 105 | return self._on_mouse_out(*args, **kwargs) 106 | 107 | @self._console.capture() 108 | def on_mouse_up(*args, **kwargs): 109 | return self._on_mouse_up(*args, **kwargs) 110 | 111 | @self._console.capture() 112 | def on_clear_click(*args, **kwargs): 113 | return self.clear() 114 | 115 | self.canvas.on_mouse_down(on_mouse_down) 116 | self.canvas.on_mouse_move(on_mouse_move) 117 | self.canvas.on_mouse_out(on_mouse_out) 118 | self.canvas.on_mouse_up(on_mouse_up) 119 | self._clear_button.on_click(on_clear_click) 120 | 121 | # Set up composite view with the different widget components: 122 | self.widget = VBox( 123 | [self._title, self._clear_button, self.canvas, self._console], 124 | width=f"{width}px", 125 | ) 126 | 127 | # Finally initialize to clear state ready to use: 128 | with self._console: 129 | self.clear() 130 | 131 | def clear(self): 132 | """Clear the drawing""" 133 | height = self.canvas.height 134 | width = self.canvas.width 135 | with hold_canvas(self.canvas): 136 | self.canvas.clear() 137 | self.canvas.fill_style = self.col_bg.hexa 138 | self.canvas.fill_rect(0, 0, width, height) 139 | self.canvas.fill_style = self.col_fg.hexa 140 | self.data = np.tile(self.col_bg.np_8bit, (height, width, 1)) 141 | print("Cleared drawing") 142 | 143 | def draw_from_buffer(self): 144 | """Draw the contents of the .data buffer to the canvas 145 | 146 | This reproduces steps from clear() instead of calling it internally, to avoid flicker. Only 147 | pixels of the current col_fg in the buffer will be drawn (doesn't support changing col_fg 148 | dynamically or drawing multiple colors). 149 | """ 150 | height = self.canvas.height 151 | width = self.canvas.width 152 | fg_mask = (self.data == np.expand_dims(self.col_fg.np_8bit, (0, 1))).all(-1) 153 | with hold_canvas(self.canvas): 154 | self.canvas.clear() 155 | self.canvas.fill_style = self.col_bg.hexa 156 | self.canvas.fill_rect(0, 0, width, height) 157 | self.canvas.fill_style = self.col_fg.hexa 158 | fg_coords = np.argwhere(fg_mask) # N entries of (x, y) pairs 159 | self.canvas.fill_rects(fg_coords[:, 1], fg_coords[:, 0], 1, 1) 160 | 161 | def display(self): 162 | """Display the widget (in a Jupyter/Lab notebook)""" 163 | display(self.widget) 164 | 165 | def _on_mouse_down(self, x, y): 166 | self.is_drawing = True 167 | self.paint(x, y) 168 | 169 | def _on_mouse_move(self, x, y): 170 | if self.is_drawing: 171 | self.paint(x, y) 172 | 173 | def _on_mouse_out(self, x, y): 174 | """Re-draw from data buffer on each mouse-out in case anything weird happened""" 175 | self.is_drawing = False 176 | self.draw_from_buffer() 177 | 178 | def _on_mouse_up(self, x, y): 179 | self.is_drawing = False 180 | 181 | def set_pen(self, pen_size: int = 15) -> np.ndarray: 182 | """Set up the pen/brush (define pen_mask matrix) 183 | 184 | We pre-calculate and store a boolean `.pen_mask` matrix for the requested brush size (and 185 | assumed circular shape). If you wanted, you could set other whacky shapes by replacing your 186 | own boolean matrix (True where the pen marks, False where it doesn't). 187 | 188 | Returns 189 | ------- 190 | pen_mask : 191 | The same boolean 2D matrix this function saves to `self.pen_mask`. 192 | """ 193 | # No sense re-inventing the "pixellated circle" wheel, so use PIL: 194 | mask_img = Image.new("1", (pen_size, pen_size)) 195 | draw = ImageDraw.Draw(mask_img) 196 | draw.ellipse((0, 0, pen_size - 1, pen_size - 1), fill="white") 197 | self.pen_mask = np.array(mask_img) # (pen_size, pen_size) boolean array 198 | return self.pen_mask 199 | 200 | def paint(self, x, y): 201 | """Mark the given location with the current pen""" 202 | # Truncate the current pen mask if required (if location is close to edge of image): 203 | x_floor = floor(x) 204 | y_floor = floor(y) 205 | 206 | pen_mask = self.pen_mask 207 | x_maskstart = floor(x - (pen_mask.shape[1] / 2)) 208 | if x_maskstart < 0: 209 | pen_mask = pen_mask[:, -x_maskstart:] # Truncate left of pen 210 | x_maskstart = 0 211 | x_pixelsafter = self.data.shape[1] - (x_maskstart + pen_mask.shape[1]) 212 | if x_pixelsafter < 0: 213 | pen_mask = pen_mask[:, :x_pixelsafter] # Truncate right of pen 214 | x_pixelsafter = 0 215 | 216 | y_maskstart = floor(y - (pen_mask.shape[0] / 2)) 217 | if y_maskstart < 0: 218 | pen_mask = pen_mask[-y_maskstart:, :] # Truncate top of pen 219 | y_maskstart = 0 220 | y_pixelsafter = self.data.shape[0] - (y_maskstart + pen_mask.shape[0]) 221 | if y_pixelsafter < 0: 222 | pen_mask = pen_mask[:y_pixelsafter, :] # Truncate bottom of pen 223 | y_pixelsafter = 0 224 | 225 | x_maskend = x_maskstart + pen_mask.shape[1] 226 | y_maskend = y_maskstart + pen_mask.shape[0] 227 | 228 | # Check which pixels will be actually updated to avoid drawing unnecessary canvas rects: 229 | new_fg_pixels_offset = np.argwhere( 230 | pen_mask 231 | & ( 232 | self.data[ 233 | y_maskstart:(y_maskstart + pen_mask.shape[0]), 234 | x_maskstart:(x_maskstart + pen_mask.shape[1]), 235 | :, 236 | ] 237 | != np.expand_dims(self.col_fg.np_8bit, (0, 1)) 238 | ).all(-1) 239 | ) 240 | 241 | # Update the data buffer: 242 | full_mask = np.zeros_like(self.data) 243 | full_mask[y_maskstart:y_maskend, x_maskstart:x_maskend, :] = np.expand_dims(pen_mask, -1) 244 | self.data = np.where(full_mask, self.col_fg.np_8bit, self.data) 245 | 246 | # Draw the canvas updates: 247 | with hold_canvas(self.canvas): 248 | self.canvas.fill_style = self.col_fg.hexa 249 | self.canvas.fill_rects( 250 | new_fg_pixels_offset[:, 1] + x_maskstart, 251 | new_fg_pixels_offset[:, 0] + y_maskstart, 252 | 1, 253 | 1, 254 | ) 255 | self.canvas.fill_rect(x_floor, y_floor, 1, 1) 256 | -------------------------------------------------------------------------------- /migration_challenge/pytorch_mnist/util/draw.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | """An ipycanvas-based interactive widget for drawing PIL-compatible doodles in JupyterLab 4 | """ 5 | 6 | # Python Built-Ins: 7 | from math import floor 8 | from typing import Tuple, Union 9 | 10 | # External Dependenices: 11 | import numpy as np 12 | from ipycanvas import Canvas, hold_canvas 13 | from IPython.display import display 14 | from ipywidgets import HTML, Button, Layout, Output, VBox 15 | from matplotlib.colors import to_hex, to_rgb 16 | from PIL import Image, ImageDraw 17 | 18 | 19 | class ValidatedColor: 20 | """Canvas expects different color repr from PIL/image, so this class stores both""" 21 | 22 | hexa: str 23 | np_8bit: np.ndarray 24 | 25 | def __init__(self, color: Union[Tuple[float], np.ndarray]): 26 | self.set_color(color) 27 | 28 | def set_color(self, color: Union[Tuple[float], np.ndarray]): 29 | """Use this method to update all stored representations at once""" 30 | self.hexa = to_hex(color) 31 | self.np_8bit = (255 * np.array(to_rgb(color))).astype(int) 32 | 33 | 34 | class PixelDrawCanvas: 35 | """JupyterLab widget to interactively draw on a canvas and export the pixel data to Python 36 | 37 | This widget maintains a buffer of pixel values and draws individual pixel rects to canvas (in 38 | batches, at least) to canvas on each mouse event... More toy/demo than an optimized design! 39 | 40 | Usage 41 | ----- 42 | After creating the PixelDrawCanvas you can either call `.display()` to directly display it in 43 | the notebook, or access the `.widget` property if you want to embed the UI it in another 44 | ipywidgets widget. 45 | 46 | Draw on the canvas by clicking and dragging, or press the "Clear" button to start again. 47 | 48 | You can read the 0-255, 3-channel (height, width, 3) pixel data numpy array from `.data`. 49 | `matplotlib.pyplot.imshow(data)` should confirm that what you see in the widget matches this. 50 | 51 | You can also programmatically `.clear()` the drawing from Python if you like. 52 | """ 53 | 54 | def __init__( 55 | self, 56 | width: int = 28, 57 | height: int = 28, 58 | color_bg: Tuple[float, float, float] = (0, 0, 0), 59 | color_fg: Tuple[float, float, float] = (1.0, 1.0, 1.0), 60 | pen_size: int = 3, 61 | title_html: str = "

Draw a digit!

", 62 | ): 63 | """Create a PixelDrawCanvas""" 64 | self.col_bg = ValidatedColor(color_bg) 65 | self.col_fg = ValidatedColor(color_fg) 66 | 67 | # -- Create individual widget components: 68 | self.canvas = Canvas(width=width, height=height, image_smoothing_enabled=False) 69 | # (Without explicit canvas.layout width, VBox/HBox fills full available width) 70 | self.canvas.layout.height = f"{max(200, min(1000, height))}px" 71 | self.canvas.layout.width = f"{max(200, min(1000, width))}px" 72 | self.canvas.image_smoothing_enabled = False 73 | self._clear_button = Button( 74 | description="Clear", 75 | icon="eraser", 76 | tooltip="Clear the drawing to a blank image", 77 | ) 78 | self._console = Output( 79 | layout=Layout( 80 | max_height="140px", 81 | overflow_y="auto", 82 | ) 83 | ) 84 | self._title = HTML(title_html) 85 | 86 | # -- Initialize state: 87 | self.is_drawing = False 88 | # (Temporary data __init__ to be overridden by clear() shortly:) 89 | self.data = np.zeros((height, width, 3)) 90 | self.set_pen(pen_size=pen_size) 91 | 92 | # -- Set up listeners: 93 | # Wrap widget event listener member functions so they have access to this `self` instance 94 | # when called and are also able to `print()` to the console output if needed. 95 | @self._console.capture() 96 | def on_mouse_down(*args, **kwargs): 97 | return self._on_mouse_down(*args, **kwargs) 98 | 99 | @self._console.capture() 100 | def on_mouse_move(*args, **kwargs): 101 | return self._on_mouse_move(*args, **kwargs) 102 | 103 | @self._console.capture() 104 | def on_mouse_out(*args, **kwargs): 105 | return self._on_mouse_out(*args, **kwargs) 106 | 107 | @self._console.capture() 108 | def on_mouse_up(*args, **kwargs): 109 | return self._on_mouse_up(*args, **kwargs) 110 | 111 | @self._console.capture() 112 | def on_clear_click(*args, **kwargs): 113 | return self.clear() 114 | 115 | self.canvas.on_mouse_down(on_mouse_down) 116 | self.canvas.on_mouse_move(on_mouse_move) 117 | self.canvas.on_mouse_out(on_mouse_out) 118 | self.canvas.on_mouse_up(on_mouse_up) 119 | self._clear_button.on_click(on_clear_click) 120 | 121 | # Set up composite view with the different widget components: 122 | self.widget = VBox( 123 | [self._title, self._clear_button, self.canvas, self._console], 124 | width=f"{width}px", 125 | ) 126 | 127 | # Finally initialize to clear state ready to use: 128 | with self._console: 129 | self.clear() 130 | 131 | def clear(self): 132 | """Clear the drawing""" 133 | height = self.canvas.height 134 | width = self.canvas.width 135 | with hold_canvas(self.canvas): 136 | self.canvas.clear() 137 | self.canvas.fill_style = self.col_bg.hexa 138 | self.canvas.fill_rect(0, 0, width, height) 139 | self.canvas.fill_style = self.col_fg.hexa 140 | self.data = np.tile(self.col_bg.np_8bit, (height, width, 1)) 141 | print("Cleared drawing") 142 | 143 | def draw_from_buffer(self): 144 | """Draw the contents of the .data buffer to the canvas 145 | 146 | This reproduces steps from clear() instead of calling it internally, to avoid flicker. Only 147 | pixels of the current col_fg in the buffer will be drawn (doesn't support changing col_fg 148 | dynamically or drawing multiple colors). 149 | """ 150 | height = self.canvas.height 151 | width = self.canvas.width 152 | fg_mask = (self.data == np.expand_dims(self.col_fg.np_8bit, (0, 1))).all(-1) 153 | with hold_canvas(self.canvas): 154 | self.canvas.clear() 155 | self.canvas.fill_style = self.col_bg.hexa 156 | self.canvas.fill_rect(0, 0, width, height) 157 | self.canvas.fill_style = self.col_fg.hexa 158 | fg_coords = np.argwhere(fg_mask) # N entries of (x, y) pairs 159 | self.canvas.fill_rects(fg_coords[:, 1], fg_coords[:, 0], 1, 1) 160 | 161 | def display(self): 162 | """Display the widget (in a Jupyter/Lab notebook)""" 163 | display(self.widget) 164 | 165 | def _on_mouse_down(self, x, y): 166 | self.is_drawing = True 167 | self.paint(x, y) 168 | 169 | def _on_mouse_move(self, x, y): 170 | if self.is_drawing: 171 | self.paint(x, y) 172 | 173 | def _on_mouse_out(self, x, y): 174 | """Re-draw from data buffer on each mouse-out in case anything weird happened""" 175 | self.is_drawing = False 176 | self.draw_from_buffer() 177 | 178 | def _on_mouse_up(self, x, y): 179 | self.is_drawing = False 180 | 181 | def set_pen(self, pen_size: int = 15) -> np.ndarray: 182 | """Set up the pen/brush (define pen_mask matrix) 183 | 184 | We pre-calculate and store a boolean `.pen_mask` matrix for the requested brush size (and 185 | assumed circular shape). If you wanted, you could set other whacky shapes by replacing your 186 | own boolean matrix (True where the pen marks, False where it doesn't). 187 | 188 | Returns 189 | ------- 190 | pen_mask : 191 | The same boolean 2D matrix this function saves to `self.pen_mask`. 192 | """ 193 | # No sense re-inventing the "pixellated circle" wheel, so use PIL: 194 | mask_img = Image.new("1", (pen_size, pen_size)) 195 | draw = ImageDraw.Draw(mask_img) 196 | draw.ellipse((0, 0, pen_size - 1, pen_size - 1), fill="white") 197 | self.pen_mask = np.array(mask_img) # (pen_size, pen_size) boolean array 198 | return self.pen_mask 199 | 200 | def paint(self, x, y): 201 | """Mark the given location with the current pen""" 202 | # Truncate the current pen mask if required (if location is close to edge of image): 203 | x_floor = floor(x) 204 | y_floor = floor(y) 205 | 206 | pen_mask = self.pen_mask 207 | x_maskstart = floor(x - (pen_mask.shape[1] / 2)) 208 | if x_maskstart < 0: 209 | pen_mask = pen_mask[:, -x_maskstart:] # Truncate left of pen 210 | x_maskstart = 0 211 | x_pixelsafter = self.data.shape[1] - (x_maskstart + pen_mask.shape[1]) 212 | if x_pixelsafter < 0: 213 | pen_mask = pen_mask[:, :x_pixelsafter] # Truncate right of pen 214 | x_pixelsafter = 0 215 | 216 | y_maskstart = floor(y - (pen_mask.shape[0] / 2)) 217 | if y_maskstart < 0: 218 | pen_mask = pen_mask[-y_maskstart:, :] # Truncate top of pen 219 | y_maskstart = 0 220 | y_pixelsafter = self.data.shape[0] - (y_maskstart + pen_mask.shape[0]) 221 | if y_pixelsafter < 0: 222 | pen_mask = pen_mask[:y_pixelsafter, :] # Truncate bottom of pen 223 | y_pixelsafter = 0 224 | 225 | x_maskend = x_maskstart + pen_mask.shape[1] 226 | y_maskend = y_maskstart + pen_mask.shape[0] 227 | 228 | # Check which pixels will be actually updated to avoid drawing unnecessary canvas rects: 229 | new_fg_pixels_offset = np.argwhere( 230 | pen_mask 231 | & ( 232 | self.data[ 233 | y_maskstart:(y_maskstart + pen_mask.shape[0]), 234 | x_maskstart:(x_maskstart + pen_mask.shape[1]), 235 | :, 236 | ] 237 | != np.expand_dims(self.col_fg.np_8bit, (0, 1)) 238 | ).all(-1) 239 | ) 240 | 241 | # Update the data buffer: 242 | full_mask = np.zeros_like(self.data) 243 | full_mask[y_maskstart:y_maskend, x_maskstart:x_maskend, :] = np.expand_dims(pen_mask, -1) 244 | self.data = np.where(full_mask, self.col_fg.np_8bit, self.data) 245 | 246 | # Draw the canvas updates: 247 | with hold_canvas(self.canvas): 248 | self.canvas.fill_style = self.col_fg.hexa 249 | self.canvas.fill_rects( 250 | new_fg_pixels_offset[:, 1] + x_maskstart, 251 | new_fg_pixels_offset[:, 0] + y_maskstart, 252 | 1, 253 | 1, 254 | ) 255 | self.canvas.fill_rect(x_floor, y_floor, 1, 1) 256 | -------------------------------------------------------------------------------- /.infrastructure/cdk_src/smstudio/user/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | """AWS CDK constructs for creating SageMaker Studio Users with advanced configuration options 4 | """ 5 | # Python Built-Ins: 6 | import os 7 | from typing import Any, Dict, Optional, Sequence, Union 8 | 9 | # External Dependencies: 10 | from aws_cdk import CustomResource, Duration, RemovalPolicy, Stack 11 | import aws_cdk.aws_ec2 as aws_ec2 12 | import aws_cdk.aws_iam as aws_iam 13 | import aws_cdk.aws_kms as aws_kms 14 | from aws_cdk.aws_lambda import ILayerVersion, Runtime as LambdaRuntime 15 | from aws_cdk.aws_lambda_python_alpha import PythonFunction 16 | import aws_cdk.aws_logs as aws_logs 17 | import aws_cdk.custom_resources as cr 18 | from constructs import Construct 19 | 20 | # Local Dependencies: 21 | from ..region_config import CfnSageMakerAppsByRegionMapping 22 | 23 | 24 | LAMBDA_PATH = os.path.join(os.path.dirname(__file__), "fn_user") 25 | 26 | 27 | class SMStudioUserCustomResourceProvider(cr.Provider): 28 | """Provider (AWS Lambda) for a CFn Custom Resource for SMStudio User Profile 29 | 30 | If you're only creating one LCC in your stack, you probably don't need to create this 31 | explicitly: Just use `SageMakerStudioUser` direct. 32 | """ 33 | 34 | def __init__( 35 | self, 36 | scope: Construct, 37 | id: str, 38 | smcr_helper_layer: ILayerVersion, 39 | *, 40 | eligible_domain_execution_role_arns: Optional[str] = None, 41 | log_retention: Optional[aws_logs.RetentionDays] = None, 42 | provider_function_env_encryption: Optional[aws_kms.IKey] = None, 43 | provider_function_name: Optional[str] = None, 44 | role: Optional[aws_iam.IRole] = None, 45 | security_groups: Optional[Sequence[aws_ec2.ISecurityGroup]] = None, 46 | total_timeout: Optional[Duration] = None, 47 | vpc: Optional[aws_ec2.IVpc] = None, 48 | vpc_subnets: Optional[Union[aws_ec2.SubnetSelection, Dict[str, Any]]] = None, 49 | ) -> None: 50 | """Create a SMStudioUserCustomResourceProvider 51 | 52 | Most parameters are as per parent aws_cdk.custom_resources.Provider, with the below 53 | exceptions: 54 | 55 | Parameters 56 | ---------- 57 | smcr_helper_layer : 58 | Shared Lambda layer with helper functions for SageMaker custom resources (see 59 | `cr_lambda_common`) 60 | eligible_domain_execution_role_arns : 61 | Set this optional ARN pattern to restrict the iam:PassRole permissions of the provider 62 | to a particular SageMaker Execution Role or wildcard pattern. By default (`None`), the 63 | provider will be created with permission to create Domains using any IAM Role 64 | role : 65 | By default, we'll create a role with required SageMaker and IAM accesses. If you 66 | provide your own role, you'll need to ensure these permissions are set up. This role is 67 | used for the Custom Resource event handler function, not the CDK CR framework function. 68 | """ 69 | if not role: 70 | role = aws_iam.Role( 71 | scope, 72 | "SMUserProviderRole", 73 | assumed_by=aws_iam.ServicePrincipal("lambda.amazonaws.com"), 74 | description=( 75 | "Execution role for CFN Custom Resource Lambda providing SageMaker Studio " 76 | "User Profiles" 77 | ), 78 | inline_policies={ 79 | "SageMakerLCCAdmin": aws_iam.PolicyDocument( 80 | statements=[ 81 | aws_iam.PolicyStatement( 82 | actions=[ 83 | "sagemaker:CreateUserProfile", 84 | "sagemaker:DeleteUserProfile", 85 | "sagemaker:DescribeUserProfile", 86 | ], 87 | resources=["*"], 88 | ), 89 | aws_iam.PolicyStatement( 90 | actions=["iam:PassRole"], 91 | resources=[eligible_domain_execution_role_arns or "*"], 92 | ), 93 | ], 94 | ), 95 | }, 96 | managed_policies=[ 97 | aws_iam.ManagedPolicy.from_aws_managed_policy_name( 98 | "service-role/AWSLambdaBasicExecutionRole", 99 | ), 100 | aws_iam.ManagedPolicy.from_aws_managed_policy_name( 101 | "AWSXRayDaemonWriteAccess", 102 | ), 103 | ], 104 | ) 105 | if not smcr_helper_layer: 106 | raise ValueError("smcr_helper_layer is required") 107 | on_event_handler = PythonFunction( 108 | scope, 109 | "SMUserEventHandler", 110 | description=("CFn custom resource handler to create SageMaker Studio User Profiles"), 111 | entry=LAMBDA_PATH, 112 | environment_encryption=provider_function_env_encryption, 113 | index="main.py", 114 | handler="lambda_handler", 115 | layers=[smcr_helper_layer], 116 | memory_size=128, 117 | role=role, 118 | runtime=LambdaRuntime.PYTHON_3_12, 119 | timeout=Duration.minutes(10), # Can take some time to wait for create/delete 120 | vpc=vpc, 121 | vpc_subnets=vpc_subnets, 122 | ) 123 | super().__init__( 124 | scope, 125 | id, 126 | on_event_handler=on_event_handler, 127 | log_retention=log_retention, 128 | provider_function_env_encryption=provider_function_env_encryption, 129 | provider_function_name=provider_function_name, 130 | security_groups=security_groups, 131 | total_timeout=total_timeout, 132 | vpc=vpc, 133 | vpc_subnets=vpc_subnets, 134 | ) 135 | 136 | 137 | class SageMakerStudioUser(CustomResource): 138 | """AWS CDK Construct for a SageMaker Studio User Profile with additional features 139 | 140 | Unlike the CDK's built-in construct for a SMStudio User, this construct is backed by a Custom 141 | Resource Lambda and: 142 | - Exposes the EFS POSIX user ID mapped for the created SageMaker Studio user profile 143 | """ 144 | 145 | def __init__( 146 | self, 147 | scope: Construct, 148 | id: str, 149 | app_arn_map: CfnSageMakerAppsByRegionMapping, 150 | domain_id: str, 151 | name: str, 152 | role_arn: str, # TODO: Support default role creation? 153 | *, 154 | lcc_classic_arn: Optional[str] = None, 155 | lcc_jupyterlab_arn: Optional[str] = None, 156 | provider: Optional[SMStudioUserCustomResourceProvider] = None, 157 | removal_policy: Optional[RemovalPolicy] = None, 158 | resource_type: str = "Custom::SageMakerStudioUserProfile", 159 | smcr_helper_layer: Optional[ILayerVersion] = None, 160 | ) -> None: 161 | """Create a SageMakerStudioLifecycleConfig 162 | 163 | Parameters 164 | ---------- 165 | app_arn_map : 166 | CFn mapping by AWS Region containing "jlabv3" default (classic) SageMaker Studio 167 | JupyterServer app image. See `..smstudio.region_config.STUDIO_APP_ARNS_BY_REGION`. 168 | domain_id : 169 | SageMaker Studio Domain ID to create the User Profile in 170 | name : 171 | (Domain-unique) name of the user profile 172 | role_arn : 173 | ARN of the SageMaker execution role to assign the user (which dictates their 174 | permissions once logged in to the notebook environment) 175 | lcc_classic_arn : 176 | Optional JupyterServer (classic) LifeCycle Configuration Script to enable for the user. 177 | lcc_jupyterlab_arn : 178 | Optional (new-style) JupyterLab space LifeCycle Configuration Script to enable for the 179 | user. 180 | enable_content_substitution : 181 | Set `True` to enable CloudFormation `!Sub` substitution on the provided script content, 182 | or `False` to disable. 183 | provider : 184 | Optional `SMStudioUserCustomResourceProvider` if you'd like to customize provider 185 | configuration or re-use the Custom Resource Lambda across multiple LCCs in your CDK app 186 | smcr_helper_layer : 187 | (Required if `provider` is not set) Shared Lambda layer with helper functions for 188 | SageMaker custom resources (see `cr_lambda_common`). 189 | """ 190 | if not domain_id: 191 | raise ValueError("You must provide a SageMaker Studio domain_id") 192 | if not name: 193 | raise ValueError("You must provide a Domain-unique user profile name") 194 | if not provider: 195 | provider = SMStudioUserCustomResourceProvider( 196 | scope, "StudioUserProvider", smcr_helper_layer=smcr_helper_layer 197 | ) 198 | 199 | resource_props = { 200 | "DomainId": domain_id, 201 | "UserProfileName": name, 202 | "UserSettings": { 203 | "ExecutionRole": role_arn, 204 | # Set new-style JupyterLab space defaults: 205 | "JupyterLabAppSettings": { 206 | "DefaultResourceSpec": { 207 | # TODO: Is this necessary or can we omit it? 208 | "InstanceType": "ml.t3.medium", 209 | }, 210 | }, 211 | # Set classic JupyterLabv3 default and attach the lifecycle configuration script: 212 | "JupyterServerAppSettings": { 213 | "DefaultResourceSpec": { 214 | "SageMakerImageArn": app_arn_map.find_in_map( 215 | Stack.of(scope).region, "jlabv3" 216 | ), 217 | "InstanceType": "system", 218 | }, 219 | }, 220 | }, 221 | } 222 | if lcc_classic_arn: 223 | resource_props["UserSettings"]["JupyterServerAppSettings"]["DefaultResourceSpec"][ 224 | "LifecycleConfigArn" 225 | ] = lcc_classic_arn 226 | if lcc_jupyterlab_arn: 227 | resource_props["UserSettings"]["JupyterLabAppSettings"]["DefaultResourceSpec"][ 228 | "LifecycleConfigArn" 229 | ] = lcc_jupyterlab_arn 230 | 231 | super().__init__( 232 | scope, 233 | id, 234 | service_token=provider.service_token, 235 | # pascal_case_properties=None, 236 | properties=resource_props, 237 | removal_policy=removal_policy, 238 | resource_type=resource_type, 239 | ) 240 | 241 | @property 242 | def home_efs_file_system_uid(self): 243 | return self.get_att("HomeEfsFileSystemUid") 244 | 245 | @property 246 | def name(self): 247 | return self.ref 248 | -------------------------------------------------------------------------------- /builtin_algorithm_hpo_tabular/util/data.py: -------------------------------------------------------------------------------- 1 | # Python Built-Ins: 2 | from io import BytesIO 3 | import os 4 | from time import sleep 5 | from typing import Callable, Dict, Iterable, Optional 6 | from urllib.request import urlopen 7 | from zipfile import ZipFile 8 | 9 | # Local Dependencies: 10 | import botocore 11 | import numpy as np 12 | import pandas as pd 13 | import sagemaker 14 | from sagemaker.feature_store.feature_definition import FeatureDefinition 15 | from sagemaker.feature_store.feature_group import FeatureGroup, FeatureParameter 16 | 17 | 18 | def fetch_sample_data( 19 | zip_url: str = "https://sagemaker-sample-data-us-west-2.s3-us-west-2.amazonaws.com/autopilot/direct_marketing/bank-additional.zip", 20 | local_folder: str = "data", 21 | target_file: str = "bank-additional/bank-additional-full.csv", 22 | ) -> str: 23 | """Fetch the raw sample dataset, download and extract it locally, and return the local file path 24 | """ 25 | target_file_path = os.path.join(local_folder, target_file) 26 | 27 | if os.path.isdir(local_folder) and os.path.isfile(target_file_path): 28 | print(f"Skipping download - file already exists {target_file_path}") 29 | else: 30 | print(f"Downloading zip data...\n{zip_url}") 31 | with urlopen(zip_url) as resp: 32 | with ZipFile(BytesIO(resp.read())) as zip_file: 33 | print(f"Extracting to {local_folder}...") 34 | zip_file.extractall(local_folder) 35 | 36 | return target_file_path 37 | 38 | 39 | 40 | def transform_df(df: pd.DataFrame) -> pd.DataFrame: 41 | # Indicator variable to capture when pdays takes a value of 999 42 | df["no_previous_contact"] = np.where(df["pdays"] == 999, 1, 0) 43 | 44 | # Indicator for individuals not actively employed 45 | df["not_working"] = np.where( 46 | np.in1d(df["job"], ["student", "retired", "unemployed"]), 1, 0 47 | ) 48 | 49 | # df = pd.get_dummies(df) # Convert categorical variables to sets of indicators 50 | 51 | # Replace "y_no" and "y_yes" with a single label column, and bring it to the front: 52 | # df_model_data = pd.concat( 53 | # [ 54 | # df_model_data["y_yes"].rename("y"), 55 | # df_model_data.drop(["y_no", "y_yes"], axis=1), 56 | # ], 57 | # axis=1, 58 | # ) 59 | 60 | # Encode 'y' to numeric so AutoGluon-Tabular predictions can be mapped to labels: 61 | assert "yes" in df["y"].unique(), "Expected 'y' column to contain 'yes' and 'no'" 62 | df["y"] = df["y"].apply(lambda y: int(y == "yes")) 63 | 64 | # Move 'y' to front: 65 | df = df.loc[:, ["y"] + [col for col in df.columns if col != "y"]] 66 | 67 | # Add record identifier and event timestamp fields required for SageMaker Feature Store: 68 | df["customer_id"] = df.index.to_series().apply(lambda num: f"C-{num:08}") 69 | df["event_time"] = (pd.Timestamp.utcnow() - pd.DateOffset(years=1)).timestamp() 70 | 71 | return df 72 | 73 | 74 | def load_sample_data( 75 | raw_file_path: str, 76 | fg_s3_uri: str, 77 | ignore_cols: Iterable[str] = ( 78 | "duration", "emp.var.rate", "cons.price.idx", "cons.conf.idx", "euribor3m", "nr.employed" 79 | ), 80 | transform_fn: Callable[[pd.DataFrame], pd.DataFrame] = transform_df, 81 | feature_group_name: str = "sm101-direct-marketing", 82 | feature_group_description: str = ( 83 | "Demo Bank Marketing dataset for 'SageMaker 101' workshop, based on " 84 | "http://archive.ics.uci.edu/ml/datasets/Bank+Marketing" 85 | # "Demo Bank marketing dataset for 'SageMaker 101' introductory workshop.\n\n" 86 | # "This is a transformed version of the 'Bank Marketing' UCI dataset for research. Please " 87 | # "cite: S. Moro, P. Cortez and P. Rita. A Data-Driven Approach to Predict the Success of " 88 | # "Bank Telemarketing. Decision Support Systems, In press, " 89 | # "http://dx.doi.org/10.1016/j.dss.2014.03.001\n\n" 90 | # "Data description at: http://archive.ics.uci.edu/ml/datasets/Bank+Marketing" 91 | ), 92 | feature_descriptions: Dict[str, str] = { 93 | "customer_id": ( 94 | "Unique customer identifier (dummy added for purpose of SageMaker Feature Store)" 95 | ), 96 | "event_time": "Event/update timestamp (dummy added for purpose of SageMaker Feature Store)", 97 | "y": ( 98 | "Has the client subscribed a term deposit? (binary: 0/1). This is the target variable " 99 | "for our direct marketing example." 100 | ), 101 | ## Bank client data: 102 | "age": "Client's age in years", 103 | "job": ( 104 | 'Type of job (categorical: "admin.","blue-collar","entrepreneur","housemaid",' 105 | '"management","retired","self-employed","services","student","technician","unemployed",' 106 | '"unknown")' 107 | ), 108 | "marital": ( 109 | 'Marital status (categorical: "divorced","married","single","unknown"; note: ' 110 | '"divorced" means divorced or widowed)' 111 | ), 112 | "education": ( 113 | 'Highest education (categorical: "basic.4y","basic.6y","basic.9y","high.school",' 114 | '"illiterate","professional.course","university.degree","unknown")' 115 | ), 116 | "default": 'Has credit in default? (categorical: "no","yes","unknown")', 117 | "housing": 'Has housing loan? (categorical: "no","yes","unknown")', 118 | "loan": 'Has personal loan? (categorical: "no","yes","unknown")', 119 | ## Related with last contact of current campaign: 120 | "contact": 'Contact communication type (categorical: "cellular","telephone")', 121 | "day_of_week": 'Last contact day of the week (categorical: "mon","tue","wed","thu","fri")', 122 | # "duration": ( 123 | # 'Last contact duration, in seconds (numeric). Important note: this attribute highly ' 124 | # 'affects the output target (e.g., if duration=0 then y="no"). Yet, the duration is not ' 125 | # 'known before a call is performed. Also, after the end of the call y is obviously ' 126 | # 'known. Thus, this input should only be included for benchmark purposes and should be ' 127 | # 'discarded if the intention is to have a realistic predictive model.' 128 | # ), 129 | ## Other attributes: 130 | "campaign": ( 131 | "Number of contacts performed during this campaign and for this client (numeric, " 132 | "includes last contact)" 133 | ), 134 | "pdays": ( 135 | "Number of days that passed by after the client was last contacted from a previous " 136 | "campaign (numeric; 999 means client was not previously contacted)" 137 | ), 138 | "previous": ( 139 | "Number of contacts performed before this campaign and for this client (numeric)" 140 | ), 141 | "poutcome": ( 142 | 'Outcome of the previous marketing campaign (categorical: "failure","nonexistent",' 143 | '"success")' 144 | ), 145 | ## Social and economic context attributes: 146 | # "emp.var.rate": "Employment variation rate - quarterly indicator (numeric)", 147 | # "cons.price.idx": "Consumer price index - monthly indicator (numeric)", 148 | # "cons.conf.idx": "Consumer confidence index - monthly indicator (numeric)", 149 | # "euribor3m": "EURIBOR 3 month rate - daily indicator (numeric)", 150 | # "nr.employed": "Number of employees - quarterly indicator (numeric)", 151 | ## Synthetics from transform_fn: 152 | "no_previous_contact": ( 153 | "Boolean indicator for clients not previously contacted (pdays=999)" 154 | ), 155 | "not_working": "Boolean indicator for individuals not actively employed", 156 | }, 157 | feature_parameters: Dict[str, Dict[str, str]] = { 158 | "Source": { 159 | "bank-client": ["age", "job", "marital", "education", "default", "housing", "loan"], 160 | "last-contact": ["contact", "day_of_week"], 161 | "other": ["campaign", "pdays", "previous", "poutcome"], 162 | "subscriptions": ["y"], 163 | "transforms": ["no_previous_contact", "not_working"], 164 | }, 165 | }, 166 | fg_record_identifier_field: str = "customer_id", 167 | fg_event_timestamp_field: str = "event_time", 168 | sagemaker_session: Optional[sagemaker.Session] = None, 169 | ) -> None: 170 | print(f"Loading {raw_file_path}...") 171 | df = pd.read_csv(raw_file_path) 172 | print("Transforming dataframe...") 173 | df.drop(columns=[col for col in ignore_cols], inplace=True) 174 | df = transform_fn(df) 175 | 176 | print(f"Setting up SageMaker Feature Store feature group: {feature_group_name}") 177 | if not sagemaker_session: 178 | sagemaker_session = sagemaker.Session() 179 | feature_group = FeatureGroup(name=feature_group_name, sagemaker_session=sagemaker_session) 180 | 181 | # Pandas defaults string fields to 'object' dtype, which FS type inference doesn't like: 182 | for col in df: 183 | if pd.api.types.is_object_dtype(df[col].dtype): 184 | df[col] = df[col].astype(pd.StringDtype()) 185 | 186 | #print(df.info()) 187 | feature_group.load_feature_definitions(data_frame=df) 188 | 189 | feature_group.create( 190 | s3_uri=fg_s3_uri, 191 | record_identifier_name=fg_record_identifier_field, 192 | event_time_feature_name=fg_event_timestamp_field, 193 | role_arn=sagemaker.get_execution_role(sagemaker_session), 194 | enable_online_store=True, 195 | description=feature_group_description, 196 | ) 197 | wait_for_fg_creation(feature_group) 198 | 199 | ingestion_manager = feature_group.ingest(data_frame=df, max_processes=16, wait=False) 200 | 201 | print("Configuring feature metadata...") 202 | update_meta_calls = {} 203 | for feature_name, desc in feature_descriptions.items(): 204 | update_meta_calls[feature_name] = {"description": desc} 205 | for param_name, spec in feature_parameters.items(): 206 | for param_value, features in spec.items(): 207 | for feature_name in features: 208 | if feature_name not in update_meta_calls: 209 | update_meta_calls[feature_name] = {} 210 | feature_spec = update_meta_calls[feature_name] 211 | if param_value is None: 212 | if "parameter_removals" not in feature_spec: 213 | feature_spec["parameter_removals"] = [param_name] 214 | else: 215 | feature_spec["parameter_removals"].append(param_name) 216 | else: 217 | if "parameter_additions" not in feature_spec: 218 | feature_spec["parameter_additions"] = [ 219 | FeatureParameter(key=param_name, value=param_value), 220 | ] 221 | else: 222 | feature_spec["parameter_additions"].append( 223 | FeatureParameter(key=param_name, value=param_value), 224 | ) 225 | for feature_name, feature_spec in update_meta_calls.items(): 226 | feature_group.update_feature_metadata(feature_name, **feature_spec) 227 | sleep(2) 228 | 229 | print("Ingesting data to SageMaker Feature Store...") 230 | ingestion_manager.wait() 231 | ingest_timestamp = pd.Timestamp.now() 232 | 233 | 234 | print("Waiting for propagation to offline Feature Store...") 235 | ingest_wait_period = pd.DateOffset( 236 | minutes=5, # Technically can take 15mins, but who has time for that 237 | ) 238 | sleep(((ingest_timestamp + ingest_wait_period) - pd.Timestamp.now()).seconds) 239 | 240 | print("Done!") 241 | return feature_group_name 242 | 243 | 244 | def describe_fg_if_exists(feature_group: FeatureGroup) -> Optional[dict]: 245 | try: 246 | return feature_group.describe() 247 | except botocore.exceptions.ClientError as e: 248 | if "Not Found" in e.response["Error"]["Message"]: 249 | return None 250 | else: 251 | raise e 252 | 253 | 254 | def wait_for_fg_creation(feature_group): 255 | status = feature_group.describe().get("FeatureGroupStatus") 256 | print( 257 | f"Waiting for creation of Feature Group {feature_group.name} (Initial status {status})", 258 | end="", 259 | ) 260 | while status == "Creating": 261 | print(".", end="") 262 | sleep(5) 263 | status = feature_group.describe().get("FeatureGroupStatus") 264 | print() 265 | if status != "Created": 266 | raise RuntimeError(f"Failed to create feature group {feature_group.name}: {status}") 267 | print(f"Feature Group {feature_group.name} successfully created.") -------------------------------------------------------------------------------- /.infrastructure/cdk_src/smstudio/lcc/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | """AWS CDK constructs for LifeCycle Configurations in Amazon SageMaker 4 | """ 5 | # Python Built-Ins: 6 | import os 7 | from typing import Any, Dict, Optional, Sequence, TextIO, Union 8 | 9 | # External Dependencies: 10 | from aws_cdk import CustomResource, Duration, Fn, RemovalPolicy, Stack 11 | import aws_cdk.aws_ec2 as aws_ec2 12 | import aws_cdk.aws_iam as aws_iam 13 | import aws_cdk.aws_kms as aws_kms 14 | from aws_cdk.aws_lambda import ILayerVersion, Runtime as LambdaRuntime 15 | from aws_cdk.aws_lambda_python_alpha import PythonFunction 16 | import aws_cdk.aws_logs as aws_logs 17 | import aws_cdk.aws_sagemaker as sagemaker_cdk 18 | import aws_cdk.custom_resources as cr 19 | from constructs import Construct 20 | 21 | 22 | CR_LAMBDA_PATH = os.path.join(os.path.dirname(__file__), "fn_studio_lcconfig") 23 | 24 | 25 | class SageMakerNotebookLifecycleConfig(Construct): 26 | """AWS CDK Construct for a SageMaker Notebook Instance Lifecycle Configuration Script 27 | 28 | See also 29 | -------- 30 | https://docs.aws.amazon.com/sagemaker/latest/dg/notebook-lifecycle-config.html 31 | https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-sagemaker-notebookinstancelifecycleconfig.html 32 | """ 33 | 34 | cfn_construct: sagemaker_cdk.CfnNotebookInstanceLifecycleConfig 35 | 36 | def __init__( 37 | self, 38 | scope: Construct, 39 | id: str, 40 | *args, 41 | name: Optional[str] = None, 42 | on_create_script: Optional[Union[str, TextIO]] = None, 43 | on_start_script: Optional[Union[str, TextIO]] = None, 44 | ) -> None: 45 | """Create a SageMakerNotebookLifecycleConfig 46 | 47 | Parameters 48 | ---------- 49 | name : 50 | If not provided, a default will be generated based on the stack name 51 | on_create_script : 52 | The text of the shell script you'd like to run on Notebook Instance creation (one-off), 53 | or an open file handle from which the script may be `.read()`. This script may contain 54 | placeholder variables to be filled in by `Fn::Sub`. 55 | on_start_script : 56 | The text of the shell script you'd like to run on Notebook Instance start (every time), 57 | or an open file handle from which the script may be `.read()`. This script may contain 58 | placeholder variables to be filled in by `Fn::Sub`. 59 | """ 60 | super().__init__(scope, id) 61 | stack = Stack.of(self) 62 | 63 | self.to_string() 64 | if name is None: 65 | # TODO: How to get fully qualified construct name? 66 | name = f"{stack.stack_name}-LCC" 67 | 68 | self.cfn_construct = sagemaker_cdk.CfnNotebookInstanceLifecycleConfig( 69 | self, 70 | id, 71 | notebook_instance_lifecycle_config_name=name, # (Name prop is mandatory) 72 | on_create=( 73 | [self._script_to_lcc_hook_property(on_create_script)] if on_create_script else None 74 | ), 75 | on_start=( 76 | [self._script_to_lcc_hook_property(on_start_script)] if on_start_script else None 77 | ), 78 | ) 79 | 80 | @staticmethod 81 | def _script_to_lcc_hook_property(script: Union[str, TextIO], enable_substitution: bool = True): 82 | """Convert a LCC script (string or file handle) to a CFn LCC hook property 83 | 84 | Parameters 85 | ---------- 86 | script : 87 | String content of the shell script, or an open file from which the content may be 88 | `.read()` 89 | enable_substitution : 90 | Whether to pass the script content through CloudFormation Fn::Sub variable resolution. 91 | Default True. 92 | """ 93 | content = script if isinstance(script, str) else script.read() 94 | 95 | if enable_substitution: 96 | content = Fn.sub(content) 97 | return ( 98 | sagemaker_cdk.CfnNotebookInstanceLifecycleConfig.NotebookInstanceLifecycleHookProperty( 99 | content=Fn.base64(content) 100 | ) 101 | ) 102 | # return { 103 | # "content": Fn.base64(content) 104 | # } 105 | 106 | @property 107 | def name(self) -> str: 108 | return self.cfn_construct.attr_notebook_instance_lifecycle_config_name 109 | 110 | 111 | class SMStudioLCCCustomResourceProvider(cr.Provider): 112 | """Provider (AWS Lambda) for a CFn Custom Resource for SMStudio Lifecycle Configuration 113 | 114 | If you're only creating one LCC in your stack, you probably don't need to create this 115 | explicitly: Just use `SageMakerStudioLifecycleConfig` direct. 116 | """ 117 | 118 | def __init__( 119 | self, 120 | scope: Construct, 121 | id: str, 122 | smcr_helper_layer: ILayerVersion, 123 | *, 124 | eligible_domain_execution_role_arns: Optional[str] = None, 125 | log_retention: Optional[aws_logs.RetentionDays] = None, 126 | provider_function_env_encryption: Optional[aws_kms.IKey] = None, 127 | provider_function_name: Optional[str] = None, 128 | role: Optional[aws_iam.IRole] = None, 129 | security_groups: Optional[Sequence[aws_ec2.ISecurityGroup]] = None, 130 | total_timeout: Optional[Duration] = None, 131 | vpc: Optional[aws_ec2.IVpc] = None, 132 | vpc_subnets: Optional[Union[aws_ec2.SubnetSelection, Dict[str, Any]]] = None, 133 | ) -> None: 134 | """Create a SMStudioLCCCustomResourceProvider 135 | 136 | Most parameters are as per parent aws_cdk.custom_resources.Provider, with the below 137 | exceptions: 138 | 139 | Parameters 140 | ---------- 141 | eligible_domain_execution_role_arns : 142 | Set this optional ARN pattern to restrict the iam:PassRole permissions of the provider 143 | to a particular SageMaker Execution Role or wildcard pattern. By default (`None`), the 144 | provider will be created with permission to create Domains using any IAM Role 145 | role : 146 | By default, we'll create a role with required SageMaker and IAM accesses. If you 147 | provide your own role, you'll need to ensure these permissions are set up. This role is 148 | used for the Custom Resource event handler function, not the CDK CR framework function. 149 | smcr_helper_layer : 150 | Shared Lambda layer with helper functions for SageMaker custom resources (see 151 | `cr_lambda_common`) 152 | """ 153 | if not role: 154 | role = aws_iam.Role( 155 | scope, 156 | "Role", 157 | assumed_by=aws_iam.ServicePrincipal("lambda.amazonaws.com"), 158 | description=( 159 | "Execution role for CFN Custom Resource Lambda providing SageMaker Studio " 160 | "Lifecycle Configuration Scripts" 161 | ), 162 | inline_policies={ 163 | "SageMakerLCCAdmin": aws_iam.PolicyDocument( 164 | statements=[ 165 | aws_iam.PolicyStatement( 166 | actions=[ 167 | "sagemaker:CreateStudioLifecycleConfig", 168 | "sagemaker:DeleteStudioLifecycleConfig", 169 | "sagemaker:DescribeDomain", 170 | "sagemaker:UpdateDomain", 171 | ], 172 | resources=["*"], 173 | ), 174 | aws_iam.PolicyStatement( 175 | actions=["iam:PassRole"], 176 | resources=[eligible_domain_execution_role_arns or "*"], 177 | ), 178 | ], 179 | ), 180 | }, 181 | managed_policies=[ 182 | aws_iam.ManagedPolicy.from_aws_managed_policy_name( 183 | "service-role/AWSLambdaBasicExecutionRole", 184 | ), 185 | aws_iam.ManagedPolicy.from_aws_managed_policy_name( 186 | "AWSXRayDaemonWriteAccess", 187 | ), 188 | ], 189 | ) 190 | if not smcr_helper_layer: 191 | raise ValueError("smcr_helper_layer is required") 192 | on_event_handler = PythonFunction( 193 | scope, 194 | "EventHandler", 195 | description=( 196 | "CFn custom resource handler to create SageMaker Studio Lifecycle Configurations" 197 | ), 198 | entry=CR_LAMBDA_PATH, 199 | environment_encryption=provider_function_env_encryption, 200 | index="main.py", 201 | handler="lambda_handler", 202 | layers=[smcr_helper_layer], 203 | memory_size=128, 204 | role=role, 205 | runtime=LambdaRuntime.PYTHON_3_12, 206 | security_groups=security_groups, 207 | timeout=Duration.minutes(10), # Can take a while if it has to wait for updating domain 208 | vpc=vpc, 209 | vpc_subnets=vpc_subnets, 210 | ) 211 | super().__init__( 212 | scope, 213 | id, 214 | on_event_handler=on_event_handler, 215 | log_retention=log_retention, 216 | provider_function_env_encryption=provider_function_env_encryption, 217 | provider_function_name=provider_function_name, 218 | # TODO: Add support for `role` without circular dependency 219 | # role=role, 220 | security_groups=security_groups, 221 | total_timeout=total_timeout, 222 | vpc=vpc, 223 | vpc_subnets=vpc_subnets, 224 | ) 225 | 226 | 227 | class SageMakerStudioLifecycleConfig(CustomResource): 228 | """AWS CDK Construct for a SageMaker Studio Lifecycle Configuration Script""" 229 | 230 | def __init__( 231 | self, 232 | scope: Construct, 233 | id: str, 234 | content: Union[str, TextIO], 235 | *, 236 | app_type: str = "JupyterServer", 237 | domain_id: Optional[str] = None, 238 | enable_content_substitution: bool = True, 239 | name: Optional[str] = None, 240 | provider: Optional[SMStudioLCCCustomResourceProvider] = None, 241 | removal_policy: Optional[RemovalPolicy] = None, 242 | resource_type: str = "Custom::SageMakerStudioLifecycleConfiguration", 243 | smcr_helper_layer: Optional[ILayerVersion] = None, 244 | ) -> None: 245 | """Create a SageMakerStudioLifecycleConfig 246 | 247 | Parameters 248 | ---------- 249 | app_type : 250 | SageMaker Studio App Type e.g. "JupyterServer" or "KernelGateway" 251 | domain_id : 252 | SageMaker Studio Domain ID to associate the LCC to (will not be associated, if not set) 253 | enable_content_substitution : 254 | Set `True` to enable CloudFormation `!Sub` substitution on the provided script content, 255 | or `False` to disable. 256 | name : 257 | (Account+region unique) name of the LifeCycle Configuration script to create 258 | propose_admin_subnet : 259 | Whether to propose a new administrative subnet IPv4 CIDR at deploy-time 260 | provider : 261 | Optional `SMStudioLCCCustomResourceProvider` if you'd like to customize provider 262 | configuration or re-use the Custom Resource Lambda across multiple LCCs in your CDK app 263 | smcr_helper_layer : 264 | (Required if `provider` is not set) Shared Lambda layer with helper functions for 265 | SageMaker custom resources (see `cr_lambda_common`). 266 | """ 267 | if not isinstance(content, str): 268 | content = content.read() 269 | if enable_content_substitution: 270 | content = Fn.sub(content) 271 | if not provider: 272 | provider = SMStudioLCCCustomResourceProvider( 273 | scope, "StudioLCCProvider", smcr_helper_layer=smcr_helper_layer 274 | ) 275 | if not name: 276 | raise NotImplementedError("TODO: generate a name by default!") 277 | 278 | props = {"AppType": app_type, "Name": name, "Content": Fn.base64(content)} 279 | if domain_id: 280 | props["DomainId"] = domain_id 281 | 282 | super().__init__( 283 | scope, 284 | id, 285 | service_token=provider.service_token, 286 | # pascal_case_properties=None, 287 | properties=props, 288 | removal_policy=removal_policy, 289 | resource_type=resource_type, 290 | ) 291 | 292 | @property 293 | def arn(self): 294 | return self.ref 295 | -------------------------------------------------------------------------------- /.infrastructure/cdk_src/smstudio/domain/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | """AWS CDK constructs for creating SageMaker Domains with advanced configuration options 4 | """ 5 | # Python Built-Ins: 6 | import os 7 | from typing import Any, Dict, List, Optional, Sequence, Union 8 | 9 | # External Dependencies: 10 | from aws_cdk import CustomResource, Duration, RemovalPolicy 11 | import aws_cdk.aws_ec2 as aws_ec2 12 | import aws_cdk.aws_iam as aws_iam 13 | from aws_cdk.aws_lambda import ILayerVersion 14 | import aws_cdk.aws_kms as aws_kms 15 | from aws_cdk.aws_lambda import Runtime as LambdaRuntime 16 | from aws_cdk.aws_lambda_python_alpha import PythonFunction 17 | import aws_cdk.aws_logs as aws_logs 18 | import aws_cdk.custom_resources as cr 19 | from constructs import Construct 20 | 21 | 22 | LAMBDA_PATH = os.path.join(os.path.dirname(__file__), "fn_domain") 23 | 24 | 25 | class SMStudioDomainCustomResourceProvider(cr.Provider): 26 | """Provider (AWS Lambda) for a CFn Custom Resource for SMStudio Domain 27 | 28 | If you're only creating one Domain in your stack, you probably don't need to create this 29 | explicitly: Just use `SageMakerStudioDomain` direct. 30 | """ 31 | 32 | def __init__( 33 | self, 34 | scope: Construct, 35 | id: str, 36 | smcr_helper_layer: ILayerVersion, 37 | *, 38 | eligible_domain_execution_role_arns: Optional[str] = None, 39 | log_retention: Optional[aws_logs.RetentionDays] = None, 40 | provider_function_env_encryption: Optional[aws_kms.IKey] = None, 41 | provider_function_name: Optional[str] = None, 42 | role: Optional[aws_iam.IRole] = None, 43 | security_groups: Optional[Sequence[aws_ec2.ISecurityGroup]] = None, 44 | total_timeout: Optional[Duration] = None, 45 | vpc: Optional[aws_ec2.IVpc] = None, 46 | vpc_subnets: Optional[Union[aws_ec2.SubnetSelection, Dict[str, Any]]] = None, 47 | ) -> None: 48 | """Create a SMStudioDomainCustomResourceProvider 49 | 50 | Most parameters are as per parent aws_cdk.custom_resources.Provider, with the below 51 | exceptions: 52 | 53 | Parameters 54 | ---------- 55 | smcr_helper_layer : 56 | Shared Lambda layer with helper functions for SageMaker custom resources (see 57 | `cr_lambda_common`) 58 | eligible_domain_execution_role_arns : 59 | Set this optional ARN pattern to restrict the iam:PassRole permissions of the provider 60 | to a particular SageMaker Execution Role or wildcard pattern. By default (`None`), the 61 | provider will be created with permission to create Domains using any IAM Role 62 | role : 63 | By default, we'll create a role with required SageMaker, VPC, and IAM accesses. If you 64 | provide your own role, you'll need to ensure these permissions are set up. This role is 65 | used for the Custom Resource event handler function, not the CDK CR framework function. 66 | """ 67 | if not role: 68 | role = aws_iam.Role( 69 | scope, 70 | "SMDomainProviderRole", 71 | assumed_by=aws_iam.ServicePrincipal("lambda.amazonaws.com"), 72 | description=( 73 | "Execution role for CFN Custom Resource Lambda providing SageMaker Studio " 74 | "Domains" 75 | ), 76 | inline_policies={ 77 | "SageMakerDomainAdmin": aws_iam.PolicyDocument( 78 | statements=[ 79 | aws_iam.PolicyStatement( 80 | actions=[ 81 | "ec2:DescribeSecurityGroups", 82 | "ec2:DescribeSubnets", 83 | "ec2:DescribeVpcs", 84 | # IAM access to create service roles if not already existing: 85 | # (e.g. 'AWSServiceRoleForAmazonSageMakerNotebooks') 86 | "iam:CreateServiceLinkedRole", 87 | "iam:DeleteServiceLinkedRole", 88 | "iam:ListRoles", 89 | "sagemaker:CreateDomain", 90 | "sagemaker:DeleteDomain", 91 | "sagemaker:DescribeDomain", 92 | # TODO: Any other service catalog / IAM / etc permissions needed? 93 | "sagemaker:EnableSagemakerServicecatalogPortfolio", 94 | "sagemaker:UpdateDomain", 95 | # For enabling SageMaker Project Templates: 96 | "servicecatalog:AcceptPortfolioShare", 97 | "servicecatalog:AssociatePrincipalWithPortfolio", 98 | "servicecatalog:ListAcceptedPortfolioShares", 99 | ], 100 | resources=["*"], 101 | ), 102 | aws_iam.PolicyStatement( 103 | actions=["iam:PassRole"], 104 | resources=[eligible_domain_execution_role_arns or "*"], 105 | ), 106 | ], 107 | ), 108 | }, 109 | managed_policies=[ 110 | aws_iam.ManagedPolicy.from_aws_managed_policy_name( 111 | "service-role/AWSLambdaBasicExecutionRole", 112 | ), 113 | aws_iam.ManagedPolicy.from_aws_managed_policy_name( 114 | "AWSXRayDaemonWriteAccess", 115 | ), 116 | ], 117 | ) 118 | if not smcr_helper_layer: 119 | raise ValueError("smcr_helper_layer is required") 120 | on_event_handler = PythonFunction( 121 | scope, 122 | "SMDomainEventHandler", 123 | description=("CFn custom resource handler to create SageMaker Studio Domains"), 124 | entry=LAMBDA_PATH, 125 | environment_encryption=provider_function_env_encryption, 126 | index="main.py", 127 | handler="lambda_handler", 128 | layers=[smcr_helper_layer], 129 | memory_size=128, 130 | role=role, 131 | runtime=LambdaRuntime.PYTHON_3_12, 132 | timeout=Duration.seconds(895), # Needs to wait for domain so can take a while 133 | vpc=vpc, 134 | vpc_subnets=vpc_subnets, 135 | ) 136 | super().__init__( 137 | scope, 138 | id, 139 | on_event_handler=on_event_handler, 140 | # is_complete_handler=is_complete_handler, 141 | log_retention=log_retention, 142 | provider_function_env_encryption=provider_function_env_encryption, 143 | provider_function_name=provider_function_name, 144 | # query_interval=query_interval, 145 | # TODO: Add support for `role` without circular dependency 146 | # role=role, 147 | security_groups=security_groups, 148 | total_timeout=total_timeout, 149 | vpc=vpc, 150 | vpc_subnets=vpc_subnets, 151 | ) 152 | 153 | 154 | class SageMakerStudioDomain(CustomResource): 155 | """AWS CDK Construct for a SageMaker Studio Domain with additional features 156 | 157 | Unlike the CDK's built-in construct for a SMStudio Domain, this construct is backed by a Custom 158 | Resource Lambda and: 159 | - Defaults to the Default VPC (or else the first available VPC) in the account automatically, 160 | if a VPC is not specified. 161 | - Defaults to all default subnets (or else all available subnets in the VPC) if VPC subnets are 162 | not specified. 163 | - Optionally proposes a new small IPv4 CIDR for administrative tasks (e.g. EFS), compatible 164 | with the seleted VPC, at deploy time if `propose_admin_subnet` is set to `True`. (This is 165 | not so useful in CDK because of how constructs deal with VPC, but can be useful for SAM). 166 | - Optionally enables SageMaker Projects (SageMaker Service Catalog portfolio) 167 | """ 168 | 169 | _propose_admin_subnet: bool 170 | 171 | def __init__( 172 | self, 173 | scope: Construct, 174 | id: str, 175 | *, 176 | default_space_settings: Optional[dict] = None, 177 | default_user_settings: Optional[dict] = None, 178 | enable_docker_access: bool = True, 179 | enable_projects: bool = True, 180 | name: Optional[str] = None, 181 | propose_admin_subnet: bool = False, 182 | provider: Optional[SMStudioDomainCustomResourceProvider] = None, 183 | removal_policy: Optional[RemovalPolicy] = None, 184 | resource_type: str = "Custom::SageMakerStudioDomain", 185 | smcr_helper_layer: Optional[ILayerVersion] = None, 186 | subnet_ids: Optional[List[str]] = None, 187 | use_vpc_internet: bool = False, 188 | vpc_id: Optional[str] = None, 189 | ) -> None: 190 | """Create a SageMakerStudioDomain 191 | 192 | Parameters 193 | ---------- 194 | default_space_settings : 195 | Dictionary as per SageMaker CreateDomain/UpdateDomain API 196 | default_user_settings : 197 | Dictionary as per SageMaker CreateDomain/UpdateDomain API 198 | enable_docker_access : 199 | Enable docker access within Studio (Does not *install* docker by itself) 200 | name : 201 | Name for the SageMaker Studio Domain to create (must be unique in account+region) 202 | propose_admin_subnet : 203 | Whether to propose a new administrative subnet IPv4 CIDR at deploy-time 204 | provider : 205 | Optional `SMStudioDomainCustomResourceProvider` if you'd like to customize provider 206 | configuration or re-use the Custom Resource Lambda across multiple Domains in your CDK 207 | app 208 | smcr_helper_layer : 209 | (Required if `provider` is not set) Shared Lambda layer with helper functions for 210 | SageMaker custom resources (see `cr_lambda_common`). 211 | use_vpc_internet : 212 | Whether spaces in the SageMaker Studio Domain should use the VPC (True) or direct 213 | connections (False) to access the internet 214 | """ 215 | if not provider: 216 | provider = SMStudioDomainCustomResourceProvider( 217 | scope, "StudioDomainProvider", smcr_helper_layer=smcr_helper_layer 218 | ) 219 | if not name: 220 | raise NotImplementedError("TODO: generate a name by default!") 221 | 222 | self._propose_admin_subnet = propose_admin_subnet 223 | resource_props = { 224 | "DomainName": name, 225 | "DomainSettings": { 226 | "DockerSettings": { 227 | "EnableDockerAccess": "ENABLED" if enable_docker_access else "DISABLED", 228 | }, 229 | }, 230 | "AppNetworkAccessType": "VpcOnly" if use_vpc_internet else "PublicInternetOnly", 231 | "EnableProjects": enable_projects, 232 | "ProposeAdminSubnet": propose_admin_subnet, 233 | } 234 | if default_space_settings: 235 | resource_props["DefaultSpaceSettings"] = default_space_settings 236 | if default_user_settings: 237 | resource_props["DefaultUserSettings"] = default_user_settings 238 | if subnet_ids: 239 | resource_props["SubnetIds"] = subnet_ids 240 | if vpc_id: 241 | resource_props["VpcId"] = vpc_id 242 | 243 | super().__init__( 244 | scope, 245 | id, 246 | service_token=provider.service_token, 247 | # pascal_case_properties=None, 248 | properties=resource_props, 249 | removal_policy=removal_policy, 250 | resource_type=resource_type, 251 | ) 252 | 253 | @property 254 | def domain_id(self) -> str: 255 | return self.get_att_string("DomainId") 256 | 257 | @property 258 | def domain_name(self) -> str: 259 | return self.get_att_string("DomainName") 260 | 261 | @property 262 | def home_efs_filesystem_id(self) -> str: 263 | return self.get_att_string("HomeEfsFileSystemId") 264 | 265 | @property 266 | def subnet_ids(self) -> str: 267 | """Returns *comma-separated string* of subnet IDs 268 | 269 | TODO: Refer to underlying subnets construct instead? 270 | """ 271 | return self.get_att_string("SubnetIds") 272 | 273 | @property 274 | def url(self) -> str: 275 | return self.get_att_string("Url") 276 | 277 | @property 278 | def vpc_id(self) -> str: 279 | return self.get_att_string("VpcId") 280 | 281 | @property 282 | def proposed_admin_subnet_cidr(self) -> str: 283 | """Deploy-time-generated IPv4 CIDR of the proposed administrative subnet""" 284 | if self._propose_admin_subnet: 285 | return self.get_att_string("ProposedAdminSubnetCidr") 286 | raise ValueError( 287 | "ProposedAdminSubnetCidr attr not available if property propose_admin_subnet=False" 288 | ) 289 | 290 | @property 291 | def inbound_efs_security_group_id(self) -> str: 292 | return self.get_att_string("InboundEFSSecurityGroupId") 293 | 294 | @property 295 | def outbound_efs_security_group_id(self) -> str: 296 | return self.get_att_string("OutboundEFSSecurityGroupId") 297 | --------------------------------------------------------------------------------