├── .infrastructure
    ├── .nvmrc
    ├── .python-version
    ├── cdk_src
    │   ├── __init__.py
    │   ├── smstudio
    │   │   ├── user
    │   │   │   ├── fn_user
    │   │   │   │   ├── requirements.txt
    │   │   │   │   └── main.py
    │   │   │   └── __init__.py
    │   │   ├── domain
    │   │   │   ├── fn_domain
    │   │   │   │   ├── requirements.txt
    │   │   │   │   └── vpctools.py
    │   │   │   └── __init__.py
    │   │   ├── lcc
    │   │   │   ├── fn_studio_lcconfig
    │   │   │   │   ├── requirements.txt
    │   │   │   │   └── main.py
    │   │   │   ├── nbi-onstart.sh
    │   │   │   ├── studio-jupyterlab-onstart.sh
    │   │   │   ├── studio-classic-onstart.sh
    │   │   │   └── __init__.py
    │   │   ├── cr_lambda_common
    │   │   │   ├── requirements.txt
    │   │   │   ├── sagemaker_util.py
    │   │   │   └── cfn.py
    │   │   ├── user_setup
    │   │   │   └── fn_user_setup
    │   │   │   │   ├── requirements.txt
    │   │   │   │   ├── main.py
    │   │   │   │   ├── smprojects.py
    │   │   │   │   ├── base.py
    │   │   │   │   └── content.py
    │   │   ├── cr_lambda_common.py
    │   │   ├── iam.py
    │   │   └── region_config.py
    │   ├── cdk_stack.py
    │   └── config_utils.py
    ├── requirements-dev.txt
    ├── pyproject.toml
    ├── source.bat
    ├── requirements.txt
    ├── package.json
    ├── package-lock.json
    ├── cdk_app.py
    ├── cdk.json
    └── README.md
├── custom_script_demos
    ├── keras_nlp
    │   ├── util
    │   │   ├── __init__.py
    │   │   ├── lab-widgets.sh
    │   │   └── preprocessing.py
    │   └── src
    │   │   └── main.py
    ├── pytorch_nlp
    │   ├── util
    │   │   ├── __init__.py
    │   │   ├── lab-widgets.sh
    │   │   └── preprocessing.py
    │   └── src
    │   │   └── main.py
    ├── sklearn_reg
    │   └── .gitignore
    └── huggingface_nlp
    │   └── scripts
    │       └── train.py
├── autopilot
    └── .gitignore
├── builtin_algorithm_hpo_tabular
    ├── .gitignore
    ├── util
    │   ├── __init__.py
    │   └── data.py
    └── img
    │   ├── canvas-01-launch.png
    │   ├── canvas-02-datasets-list.png
    │   ├── canvas-05-config-model.png
    │   ├── feature-store-features.png
    │   ├── model-registry-compare.png
    │   ├── canvas-03-data-selection.png
    │   └── canvas-04-select-dataset.png
├── migration_challenge
    ├── keras_mnist
    │   ├── util
    │   │   ├── __init__.py
    │   │   └── draw.py
    │   ├── src
    │   │   └── main.py
    │   └── README.md
    ├── pytorch_mnist
    │   ├── util
    │   │   ├── __init__.py
    │   │   └── draw.py
    │   ├── src
    │   │   └── main.py
    │   └── README.md
    └── sklearn_cls
    │   └── src
    │       └── main.py
├── .gitignore
├── CODE_OF_CONDUCT.md
├── LICENSE
├── .simple.cf.yaml
├── CONTRIBUTING.md
└── README.md


/.infrastructure/.nvmrc:
--------------------------------------------------------------------------------
1 | 22.15
2 | 


--------------------------------------------------------------------------------
/.infrastructure/.python-version:
--------------------------------------------------------------------------------
1 | 3.12
2 | 


--------------------------------------------------------------------------------
/custom_script_demos/keras_nlp/util/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/custom_script_demos/pytorch_nlp/util/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/autopilot/.gitignore:
--------------------------------------------------------------------------------
1 | autopilot_output/
2 | data/
3 | 


--------------------------------------------------------------------------------
/builtin_algorithm_hpo_tabular/.gitignore:
--------------------------------------------------------------------------------
1 | data/
2 | 


--------------------------------------------------------------------------------
/custom_script_demos/sklearn_reg/.gitignore:
--------------------------------------------------------------------------------
1 | model/
2 | src/
3 | 


--------------------------------------------------------------------------------
/migration_challenge/keras_mnist/util/__init__.py:
--------------------------------------------------------------------------------
1 | from . import draw
2 | 


--------------------------------------------------------------------------------
/migration_challenge/pytorch_mnist/util/__init__.py:
--------------------------------------------------------------------------------
1 | from . import draw
2 | 


--------------------------------------------------------------------------------
/.infrastructure/cdk_src/__init__.py:
--------------------------------------------------------------------------------
1 | """CDK source code for workshop stack"""
2 | 


--------------------------------------------------------------------------------
/.infrastructure/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | black==24.3.0
2 | cfn-lint==0.87
3 | pytest==6.2.5
4 | 


--------------------------------------------------------------------------------
/builtin_algorithm_hpo_tabular/util/__init__.py:
--------------------------------------------------------------------------------
1 | from . import reporting
2 | from . import data
3 | 


--------------------------------------------------------------------------------
/.infrastructure/cdk_src/smstudio/user/fn_user/requirements.txt:
--------------------------------------------------------------------------------
1 | # Nothing extra required beyond helper layer
2 | 


--------------------------------------------------------------------------------
/.infrastructure/cdk_src/smstudio/domain/fn_domain/requirements.txt:
--------------------------------------------------------------------------------
1 | # Nothing extra required beyond helper layer
2 | 


--------------------------------------------------------------------------------
/.infrastructure/cdk_src/smstudio/lcc/fn_studio_lcconfig/requirements.txt:
--------------------------------------------------------------------------------
1 | # Nothing extra required beyond helper layer
2 | 


--------------------------------------------------------------------------------
/.infrastructure/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | requires-python = ">= 3.9"
3 | 
4 | [tool.black]
5 | extend-exclude = "^/(cdk\\.out|setup\\.py)"
6 | line-length = 100
7 | 


--------------------------------------------------------------------------------
/builtin_algorithm_hpo_tabular/img/canvas-01-launch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sagemaker-101-workshop/HEAD/builtin_algorithm_hpo_tabular/img/canvas-01-launch.png


--------------------------------------------------------------------------------
/builtin_algorithm_hpo_tabular/img/canvas-02-datasets-list.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sagemaker-101-workshop/HEAD/builtin_algorithm_hpo_tabular/img/canvas-02-datasets-list.png


--------------------------------------------------------------------------------
/builtin_algorithm_hpo_tabular/img/canvas-05-config-model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sagemaker-101-workshop/HEAD/builtin_algorithm_hpo_tabular/img/canvas-05-config-model.png


--------------------------------------------------------------------------------
/builtin_algorithm_hpo_tabular/img/feature-store-features.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sagemaker-101-workshop/HEAD/builtin_algorithm_hpo_tabular/img/feature-store-features.png


--------------------------------------------------------------------------------
/builtin_algorithm_hpo_tabular/img/model-registry-compare.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sagemaker-101-workshop/HEAD/builtin_algorithm_hpo_tabular/img/model-registry-compare.png


--------------------------------------------------------------------------------
/builtin_algorithm_hpo_tabular/img/canvas-03-data-selection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sagemaker-101-workshop/HEAD/builtin_algorithm_hpo_tabular/img/canvas-03-data-selection.png


--------------------------------------------------------------------------------
/builtin_algorithm_hpo_tabular/img/canvas-04-select-dataset.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sagemaker-101-workshop/HEAD/builtin_algorithm_hpo_tabular/img/canvas-04-select-dataset.png


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Data files (same convention across exercises)
 2 | data/
 3 | **.tmp.*
 4 | 
 5 | # Operating systems
 6 | .DS_Store
 7 | 
 8 | # JavaScript
 9 | node_modules/
10 | 
11 | # Python
12 | .ipynb_checkpoints
13 | __pycache__
14 | **.pyc
15 | .venv/
16 | 
17 | # CDK & SAM
18 | .aws-sam
19 | cdk.out/
20 | 


--------------------------------------------------------------------------------
/.infrastructure/cdk_src/smstudio/cr_lambda_common/requirements.txt:
--------------------------------------------------------------------------------
1 | # Studio user settings incl `StudioWebPortal` (for domain `force_studio_classic`) and
2 | # `JupyterLabAppSettings` (for user) require an upgrade to the Lambda default versions of boto3 /
3 | # botocore:
4 | boto3>=1.34.33
5 | botocore>=1.34.33
6 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/.infrastructure/cdk_src/smstudio/user_setup/fn_user_setup/requirements.txt:
--------------------------------------------------------------------------------
1 | # GitPython provides Python bindings for git *assuming you already have the git binaries installed*
2 | # - We've handled this via a 3rd party Lambda Layer, but you could instead consider instead using a
3 | # PyPI package like 'lambda-git' which bundles binaries.
4 | gitpython>=3.1,<4
5 | 


--------------------------------------------------------------------------------
/.infrastructure/source.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | 
 3 | rem The sole purpose of this script is to make the command
 4 | rem
 5 | rem     source .venv/bin/activate
 6 | rem
 7 | rem (which activates a Python virtualenv on Linux or Mac OS X) work on Windows.
 8 | rem On Windows, this command just runs this batch file (the argument is ignored).
 9 | rem
10 | rem Now we don't need to document a Windows command for activating a virtualenv.
11 | 
12 | echo Executing .venv\Scripts\activate.bat for you
13 | .venv\Scripts\activate.bat
14 | 


--------------------------------------------------------------------------------
/custom_script_demos/keras_nlp/util/lab-widgets.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Example for installing IPyWidgets extension from a SageMaker Lifecycle Configuration script
 3 | sudo -u ec2-user -i <<EOF
 4 | EXTENSION_NAME='@jupyter-widgets/jupyterlab-manager'
 5 | echo "Activating JupyterSystemEnv"
 6 | source /home/ec2-user/anaconda3/bin/activate JupyterSystemEnv
 7 | echo "Installing extension \$EXTENSION_NAME"
 8 | jupyter labextension install \$EXTENSION_NAME
 9 | echo "Deactivating JupyterSystemEnv"
10 | source /home/ec2-user/anaconda3/bin/deactivate
11 | EOF
12 | 


--------------------------------------------------------------------------------
/custom_script_demos/pytorch_nlp/util/lab-widgets.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Example for installing IPyWidgets extension from a SageMaker Lifecycle Configuration script
 3 | sudo -u ec2-user -i <<EOF
 4 | EXTENSION_NAME='@jupyter-widgets/jupyterlab-manager'
 5 | echo "Activating JupyterSystemEnv"
 6 | source /home/ec2-user/anaconda3/bin/activate JupyterSystemEnv
 7 | echo "Installing extension \$EXTENSION_NAME"
 8 | jupyter labextension install \$EXTENSION_NAME
 9 | echo "Deactivating JupyterSystemEnv"
10 | source /home/ec2-user/anaconda3/bin/deactivate
11 | EOF
12 | 


--------------------------------------------------------------------------------
/.infrastructure/requirements.txt:
--------------------------------------------------------------------------------
 1 | # aws-cdk-lib>=2.109.0 for Python 3.12 Lambda runtime, smstudio.domain to be able to set the
 2 | # "StudioWebPortal" user setting to force classic Studio experience
 3 | # >=2.140 to try and avoid 'Package @aws-sdk/client-cognito-identity-provider does not exist.' on
 4 | # AwsCustomResource (this version includes fix for related failure to upgrade AWS SDK)
 5 | # See: https://github.com/aws/aws-cdk/issues/30067
 6 | aws-cdk-lib==2.158.0
 7 | aws-cdk.aws-lambda-python-alpha==2.158.0-alpha.0
 8 | cdk-nag==2.28
 9 | constructs>=10.0.0,<11.0.0
10 | upsert-slr>=1.0.2,<2
11 | 


--------------------------------------------------------------------------------
/.infrastructure/cdk_src/smstudio/lcc/nbi-onstart.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | # Install extension for interactive canvas drawing:
 5 | # ipywidgets is already present on al2-v2 NBIs. Pin versions to avoid reinstallations
 6 | sudo -u ec2-user -i <<'EOF'
 7 | source /home/ec2-user/anaconda3/bin/activate JupyterSystemEnv
 8 | JUPYTERSERVER_VER=`pip show jupyter-server | grep 'Version:' | sed 's/Version: //'`
 9 | IPYWIDGETS_VER=`pip show ipywidgets | grep 'Version:' | sed 's/Version: //'`
10 | pip install \
11 |   jupyter-server==$JUPYTERSERVER_VER \
12 |   ipywidgets==$IPYWIDGETS_VER \
13 |   'ipycanvas<0.13'
14 | source /home/ec2-user/anaconda3/bin/deactivate
15 | EOF
16 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 4 | this software and associated documentation files (the "Software"), to deal in
 5 | the Software without restriction, including without limitation the rights to
 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 7 | the Software, and to permit persons to whom the Software is furnished to do so.
 8 | 
 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
10 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
11 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
12 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
13 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
14 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
15 | 


--------------------------------------------------------------------------------
/migration_challenge/keras_mnist/src/main.py:
--------------------------------------------------------------------------------
 1 | """CNN-based image classification on SageMaker with TensorFlow and Keras
 2 | 
 3 | (Complete me with help from Local Notebook.ipynb, and the NLP example's src/main.py!)
 4 | """
 5 | 
 6 | # Dependencies:
 7 | import argparse
 8 | # TODO: Others?
 9 | 
10 | def parse_args():
11 |     # TODO: Standard pattern for loading parameters in from SageMaker
12 | 
13 | # TODO: Other function definitions, if you'd like to break up your code into functions?
14 | 
15 | # Training script:
16 | if __name__ == "__main__":
17 |     # Load arguments from CLI / environment variables:
18 |     args, unknown = parse_args()
19 | 
20 |     # TODO: Load images from container filesystem into training / test data sets?
21 | 
22 |     # TODO: Create the Keras model?
23 | 
24 |     # Fit the Keras model:
25 |     model.fit(
26 |         ?
27 |     )
28 | 
29 |     # TODO: Evaluate model quality and log metrics?
30 | 
31 |     # TODO: Save outputs (trained model) to specified folder?
32 |     model.save(
33 |         ?
34 |     )
35 | 


--------------------------------------------------------------------------------
/.infrastructure/cdk_src/smstudio/cr_lambda_common/sagemaker_util.py:
--------------------------------------------------------------------------------
 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | # SPDX-License-Identifier: MIT-0
 3 | """Shared utilities for CloudFormation Custom Resources working with SageMaker"""
 4 | # Python Built-Ins:
 5 | import logging
 6 | import time
 7 | from typing import Callable, TypeVar
 8 | 
 9 | # External Dependencies:
10 | from botocore.exceptions import ClientError
11 | 
12 | 
13 | logger = logging.getLogger("sagemaker_util")
14 | TResponse = TypeVar("TResponse")
15 | 
16 | 
17 | def retry_if_already_updating(fn: Callable[[], TResponse], delay_secs: float = 10) -> TResponse:
18 |     """Retry `fn` every `delay_secs` if it fails because a SageMaker Domain is already updating"""
19 |     while True:
20 |         try:
21 |             return fn()
22 |         except ClientError as err:
23 |             if "is already being updated" in err.response["Error"]["Message"]:
24 |                 logger.info("Domain already updating - waiting to retry...")
25 |                 time.sleep(delay_secs)
26 |                 continue
27 |             else:
28 |                 raise err
29 | 


--------------------------------------------------------------------------------
/migration_challenge/pytorch_mnist/src/main.py:
--------------------------------------------------------------------------------
 1 | """CNN-based image classification on SageMaker with PyTorch
 2 | 
 3 | (Complete me with help from Local Notebook.ipynb, and the NLP example's src/main.py!)
 4 | """
 5 | 
 6 | # Dependencies:
 7 | import argparse
 8 | # TODO: Others?
 9 | 
10 | def parse_args():
11 |     # TODO: Standard pattern for loading parameters in from SageMaker
12 | 
13 | def model_fn(model_dir):
14 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15 |     model = torch.jit.load(os.path.join(model_dir, 'model.pth'))
16 |     return model
17 | 
18 | # TODO: Other function definitions, if you'd like to break up your code into functions?
19 | 
20 | # Training script:
21 | if __name__ == "__main__":
22 |     # TODO: Load arguments from CLI / environment variables?
23 |     args, _ = parse_args()
24 | 
25 |     # TODO: Load images from container filesystem into training / test data sets?
26 |     
27 |     # TODO: Load dataset into a PyTorch Data Loader with correct batch size
28 | 
29 |     # TODO: Fit the PyTorch model?
30 |     model = ?
31 |         
32 |     # TODO: Save outputs (trained model) to specified folder?


--------------------------------------------------------------------------------
/.infrastructure/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "sagemaker-101-workshop",
 3 |   "version": "0.1.0",
 4 |   "description": "CDK infrastructure for Amazon SageMaker 101 workshop",
 5 |   "main": "index.js",
 6 |   "directories": {
 7 |     "test": "tests"
 8 |   },
 9 |   "scripts": {
10 |     "cdk:bootstrap": "cdk bootstrap",
11 |     "deploy": "npm run login:ecrpublic && cdk deploy --all",
12 |     "destroy": "cdk destroy --all",
13 |     "lint:cfn": "cfn-lint cfn_bootstrap.yaml",
14 |     "lint:python": "black ./cdk_src",
15 |     "lint": "npm run lint:cfn && npm run lint:python",
16 |     "login:ecrpublic": "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws",
17 |     "scan:cfn": "cfn_nag_scan --input-path cfn_bootstrap.yaml",
18 |     "test": "echo \"Error: no test specified\" && exit 1"
19 |   },
20 |   "keywords": [
21 |     "Workshop",
22 |     "SageMaker",
23 |     "AWS"
24 |   ],
25 |   "author": "Amazon Web Services",
26 |   "license": "MIT-0",
27 |   "private": true,
28 |   "dependencies": {
29 |     "aws-cdk": "2.158.0"
30 |   },
31 |   "engines": {
32 |     "node": ">=20"
33 |   }
34 | }
35 | 


--------------------------------------------------------------------------------
/migration_challenge/keras_mnist/README.md:
--------------------------------------------------------------------------------
 1 | # SageMaker Migration Exercise (TensorFlow)
 2 | 
 3 | In this exercise, you'll migrate an [example notebook](Local%20Notebook.ipynb) (fitting a Keras CNN model on the MNIST Digits sample dataset) into the SageMaker data science workflow.
 4 | 
 5 | **To get started, clone this repository into a SageMaker Notebook instance (any instance type will do) and fire up the [Instructions.ipynb](Instructions.ipynb) notebook!**
 6 | 
 7 | 
 8 | ## Prerequisites
 9 | 
10 | This practice exercise is intended to be delivered with in-person support, and assumes you:
11 | 
12 | - Have had a high-level introduction to the SageMaker workflow, and:
13 | - Are familiar with using the AWS Console to access Amazon SageMaker and Amazon S3
14 | - Are familiar with configuring SageMaker Notebook Instance Execution Roles with appropriate Amazon S3 access
15 | 
16 | If that doesn't sound like you, you might prefer to check out:
17 | 
18 | - The official [Introductory Amazon SageMaker Tutorial](https://aws.amazon.com/getting-started/tutorials/build-train-deploy-machine-learning-model-sagemaker/)
19 | - The ["Get Started with the Amazon SageMaker Console"](https://docs.aws.amazon.com/sagemaker/latest/dg/gs-console.html) page in the [Amazon SageMaker Developer Guide](https://docs.aws.amazon.com/sagemaker/latest/dg/whatis.html)
20 | 


--------------------------------------------------------------------------------
/migration_challenge/pytorch_mnist/README.md:
--------------------------------------------------------------------------------
 1 | # SageMaker Migration Exercise (PyTorch)
 2 | 
 3 | In this exercise, you'll migrate an [example notebook](Local%20Notebook.ipynb) (fitting a PyTorch CNN model on the MNIST Digits sample dataset) into the SageMaker data science workflow.
 4 | 
 5 | **To get started, clone this repository into a SageMaker Notebook instance (any instance type will do) and fire up the [Instructions.ipynb](Instructions.ipynb) notebook!**
 6 | 
 7 | 
 8 | ## Prerequisites
 9 | 
10 | This practice exercise is intended to be delivered with in-person support, and assumes you:
11 | 
12 | - Have had a high-level introduction to the SageMaker workflow, and:
13 | - Are familiar with using the AWS Console to access Amazon SageMaker and Amazon S3
14 | - Are familiar with configuring SageMaker Notebook Instance Execution Roles with appropriate Amazon S3 access
15 | 
16 | If that doesn't sound like you, you might prefer to check out:
17 | 
18 | - The official [Introductory Amazon SageMaker Tutorial](https://aws.amazon.com/getting-started/tutorials/build-train-deploy-machine-learning-model-sagemaker/)
19 | - The ["Get Started with the Amazon SageMaker Console"](https://docs.aws.amazon.com/sagemaker/latest/dg/gs-console.html) page in the [Amazon SageMaker Developer Guide](https://docs.aws.amazon.com/sagemaker/latest/dg/whatis.html)
20 | 


--------------------------------------------------------------------------------
/migration_challenge/sklearn_cls/src/main.py:
--------------------------------------------------------------------------------
 1 | """SageMaker combined training/inference script for Scikit Learn random forest classifier"""
 2 | # TODO: Add any other libraries you need below
 3 | # Python Built-Ins:
 4 | import argparse
 5 | import os
 6 | 
 7 | # External Dependencies:
 8 | import joblib  # Utilities for saving and re-loading models
 9 | 
10 | 
11 | # Helper Functions
12 | 
13 | 
14 | # Main training script block:
15 | if __name__ == "__main__":
16 |     # Parse input parameters from command line and environment variables:
17 |     print("Parsing training arguments")
18 |     parser = argparse.ArgumentParser()
19 | 
20 |     # TODO: Load RandomForest hyperparameters
21 |     # TODO: Find data, model, and output directories from CLI/env vars
22 | 
23 |     args, _ = parser.parse_known_args()
24 | 
25 |     # TODO: Parse class names to Id mappings:
26 | 
27 |     # TODO: Load your data (both training and test) from container filesystem
28 |     # (split into training and test datasets and identify correct features/labels)
29 | 
30 |     # TODO: Fit the random forest model
31 | 
32 |     # TODO: Save the model to the location specified by args.model_dir, using the joblib
33 | 
34 | 
35 | # TODO: Function to load the trained model at inference time
36 | 
37 | 
38 | # TODO: (Bonus!) Custom inference output_fn to return string labels instead of numeric class IDs
39 | 


--------------------------------------------------------------------------------
/.infrastructure/package-lock.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "sagemaker-101-workshop",
 3 |   "version": "0.1.0",
 4 |   "lockfileVersion": 3,
 5 |   "requires": true,
 6 |   "packages": {
 7 |     "": {
 8 |       "name": "sagemaker-101-workshop",
 9 |       "version": "0.1.0",
10 |       "license": "MIT-0",
11 |       "dependencies": {
12 |         "aws-cdk": "2.158.0"
13 |       },
14 |       "engines": {
15 |         "node": ">=20"
16 |       }
17 |     },
18 |     "node_modules/aws-cdk": {
19 |       "version": "2.158.0",
20 |       "resolved": "https://registry.npmjs.org/aws-cdk/-/aws-cdk-2.158.0.tgz",
21 |       "integrity": "sha512-UcrxBG02RACrnTvfuyZiTuOz8gqOpnqjCMTdVmdpExv5qk9hddhtRAubNaC4xleHuNJnvskYqqVW+Y3Abh6zGQ==",
22 |       "bin": {
23 |         "cdk": "bin/cdk"
24 |       },
25 |       "engines": {
26 |         "node": ">= 14.15.0"
27 |       },
28 |       "optionalDependencies": {
29 |         "fsevents": "2.3.2"
30 |       }
31 |     },
32 |     "node_modules/fsevents": {
33 |       "version": "2.3.2",
34 |       "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz",
35 |       "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==",
36 |       "hasInstallScript": true,
37 |       "optional": true,
38 |       "os": [
39 |         "darwin"
40 |       ],
41 |       "engines": {
42 |         "node": "^8.16.0 || ^10.6.0 || >=11.0.0"
43 |       }
44 |     }
45 |   }
46 | }
47 | 


--------------------------------------------------------------------------------
/.infrastructure/cdk_src/cdk_stack.py:
--------------------------------------------------------------------------------
 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | # SPDX-License-Identifier: MIT-0
 3 | """CDK stack for AWS workshop with Amazon SageMaker"""
 4 | # Python Built-Ins:
 5 | from typing import Optional
 6 | 
 7 | # External Dependencies:
 8 | from aws_cdk import Stack
 9 | from constructs import Construct
10 | from aws_cdk import aws_ec2
11 | 
12 | # Local Dependencies:
13 | from .smstudio import WorkshopSageMakerEnvironment
14 | 
15 | 
16 | class WorkshopStack(Stack):
17 |     def __init__(
18 |         self,
19 |         scope: Construct,
20 |         construct_id: str,
21 |         sagemaker_code_checkout: Optional[str] = None,
22 |         sagemaker_code_repo: Optional[str] = None,
23 |     ) -> None:
24 |         super().__init__(scope, construct_id)
25 | 
26 |         # Shared VPC:
27 |         vpc = aws_ec2.Vpc(self, "Vpc")
28 | 
29 |         # Deploy SageMaker Studio environment:
30 |         sagemaker_env = WorkshopSageMakerEnvironment(
31 |             self,
32 |             "SageMakerEnvironment",
33 |             vpc=vpc,
34 |             code_checkout=sagemaker_code_checkout,
35 |             code_repo=sagemaker_code_repo,
36 |             create_nbi=False,  # Don't create a 'Notebook Instance' (save costs, use Studio)
37 |             domain_name="WorkshopDomain",
38 |             instance_type="ml.t3.large",
39 |             studio_classic=False,  # Keep SMStudio classic disabled (save costs)
40 |         )
41 | 


--------------------------------------------------------------------------------
/.infrastructure/cdk_src/smstudio/lcc/studio-jupyterlab-onstart.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #### Clone sample code for labs
 4 | # For new-style SMStudio we can't use EFS mounts to initialize user content, so have to use
 5 | # this LCC. Repo name (and possibly branch config) below is populated by CDK.
 6 | # `|| true` to swallow any errors (e.g. if folder already exists) - `set +e` doesn't work
 7 | git clone {{CODE_REPO}} || true
 8 | 
 9 | #### Docker installation (for SageMaker Local Mode)
10 | # As per: https://docs.docker.com/engine/install/ubuntu/#install-using-the-repository
11 | # Add Docker's official GPG key:
12 | sudo apt-get update
13 | sudo apt-get -y install ca-certificates curl
14 | sudo install -m 0755 -d /etc/apt/keyrings
15 | sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
16 | sudo chmod a+r /etc/apt/keyrings/docker.asc
17 | # Add the repository to Apt sources:
18 | echo \
19 |   "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
20 |   $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
21 |   sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
22 | sudo apt-get update
23 | 
24 | sudo apt-get -y install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
25 | 
26 | #### JupyterLab extensions / etc
27 | # MNIST exercises require ipycanvas
28 | pip install "ipycanvas>=0.12,<0.14"
29 | restart-jupyter-server
30 | 


--------------------------------------------------------------------------------
/.infrastructure/cdk_src/config_utils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 3 | # SPDX-License-Identifier: MIT-0
 4 | """Utilities for configuring the stack (e.g. environment variable parsing)
 5 | """
 6 | # Python Built-Ins:
 7 | import os
 8 | from typing import Optional
 9 | 
10 | 
11 | def bool_env_var(env_var_name: str, default: Optional[bool] = None) -> bool:
12 |     """Parse a boolean environment variable
13 | 
14 |     Raises
15 |     ------
16 |     ValueError :
17 |         If environment variable `env_var_name` is not found and no `default` is specified, or if the
18 |         raw value string could not be interpreted as a boolean.
19 | 
20 |     Returns
21 |     -------
22 |     parsed :
23 |         True if the env var has values such as `1`, `true`, `y`, `yes` (case-insensitive). False if
24 |         opposite values `0`, `false`, `n`, `no` or empty string.
25 |     """
26 |     raw = os.environ.get(env_var_name)
27 |     if raw is None:
28 |         if default is None:
29 |             raise ValueError(f"Mandatory boolean env var '{env_var_name}' not found")
30 |         return default
31 |     raw = raw.lower()
32 |     if raw in ("1", "true", "y", "yes"):
33 |         return True
34 |     elif raw in ("", "0", "false", "n", "no"):
35 |         return False
36 |     else:
37 |         raise ValueError(
38 |             "Couldn't interpret env var '%s' as boolean. Got: '%s'" % (env_var_name, raw)
39 |         )
40 | 


--------------------------------------------------------------------------------
/.infrastructure/cdk_app.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 3 | # SPDX-License-Identifier: MIT-0
 4 | """Main AWS CDK entry point for the workshop infrastructure
 5 | """
 6 | # Python Built-Ins:
 7 | import json
 8 | import os
 9 | 
10 | # External Dependencies:
11 | import aws_cdk as cdk
12 | from cdk_nag import AwsSolutionsChecks  # (Optional stack security checks)
13 | 
14 | # Local Dependencies:
15 | from cdk_src.cdk_stack import WorkshopStack
16 | from cdk_src.config_utils import bool_env_var
17 | 
18 | # Top-level configurations are loaded from environment variables at the point `cdk synth` or
19 | # `cdk deploy` is run (or you can override here):
20 | config = {
21 |     # cdk_nag is a useful tool for auditing configuration security, but can sometimes be noisy:
22 |     "cdk_nag": bool_env_var("CDK_NAG", default=False),
23 |     "sagemaker_code_checkout": os.environ.get("SAGEMAKER_CODE_CHECKOUT"),
24 |     "sagemaker_code_repo": os.environ.get(
25 |         "SAGEMAKER_CODE_REPO",
26 |         "https://github.com/aws-samples/sagemaker-101-workshop",
27 |     ),
28 | }
29 | 
30 | app = cdk.App()
31 | print(f"Preparing stack with configuration:\n{json.dumps(config, indent=2)}")
32 | llm_eval_wkshp_stack = WorkshopStack(
33 |     app,
34 |     "WorkshopStack",
35 |     **{k: v for k, v in config.items() if k != "cdk_nag"},
36 | )
37 | 
38 | if config["cdk_nag"]:
39 |     print("Adding cdk_nag checks")
40 |     cdk.Aspects.of(app).add(AwsSolutionsChecks())
41 | else:
42 |     print("Skipping cdk_nag checks")
43 | 
44 | app.synth()
45 | 


--------------------------------------------------------------------------------
/.infrastructure/cdk_src/smstudio/user_setup/fn_user_setup/main.py:
--------------------------------------------------------------------------------
 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | # SPDX-License-Identifier: MIT-0
 3 | """Custom CloudFormation Resource for loading content to a SageMaker Studio user
 4 | 
 5 | Updating or deleting this resource does not currently do anything. Errors in the setup process are
 6 | also ignored (typically don't want to roll back the whole stack just because we couldn't clone a
 7 | repo - as users can always do it manually!)
 8 | 
 9 | For input CloudFormation resource properties, see `StudioUserSetupResourceProperties` in base.py.
10 | 
11 | CloudFormation Return Values
12 | ----------------------------
13 | Direct .Ref : string
14 |     SageMaker user profile name
15 | """
16 | # Python Built-Ins:
17 | import logging
18 | 
19 | logging.getLogger().setLevel(logging.INFO)  # Set log level for AWS Lambda *BEFORE* other imports
20 | 
21 | # Local Dependencies:
22 | from base import StudioUserSetupResourceProperties
23 | from cfn import CustomResourceEvent, CustomResourceRequestType
24 | import content
25 | import smprojects
26 | 
27 | logger = logging.getLogger("main")
28 | 
29 | 
30 | def lambda_handler(event_raw: dict, context: dict):
31 |     logger.info(event_raw)
32 |     event = CustomResourceEvent(event_raw, StudioUserSetupResourceProperties)
33 |     if event.request_type == CustomResourceRequestType.create:
34 |         try:
35 |             smprojects.on_create_update(event)
36 |         except:
37 |             logging.exception("Failed to set up user for SageMaker Projects")
38 |         return content.handle_create(event, context)
39 |     elif event.request_type == CustomResourceRequestType.update:
40 |         try:
41 |             smprojects.on_create_update(event)
42 |         except:
43 |             logging.exception("Failed to set up user for SageMaker Projects")
44 |         return content.handle_update(event, context)
45 |     elif event.request_type == CustomResourceRequestType.delete:
46 |         return content.handle_delete(event, context)
47 |     else:
48 |         raise ValueError(f"Unsupported CFn RequestType '{event_raw['RequestType']}'")
49 | 


--------------------------------------------------------------------------------
/.infrastructure/cdk_src/smstudio/cr_lambda_common.py:
--------------------------------------------------------------------------------
 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | # SPDX-License-Identifier: MIT-0
 3 | """Shared Lambda constructs to help with SageMaker Studio CDK"""
 4 | # Python Built-Ins:
 5 | import os
 6 | from typing import Any, Dict, Sequence
 7 | 
 8 | # External Dependencies:
 9 | from aws_cdk import RemovalPolicy
10 | from aws_cdk.aws_lambda import Architecture, Runtime
11 | from aws_cdk.aws_lambda_python_alpha import BundlingOptions, PythonLayerVersion
12 | from constructs import Construct
13 | 
14 | LAYER_CODE_PATH = os.path.join(os.path.dirname(__file__), "cr_lambda_common")
15 | 
16 | 
17 | class SMCustomResourceHelperLayer(PythonLayerVersion):
18 |     """Lambda layer with helper functions/classes for SageMaker CloudFormation Custom Resources
19 | 
20 |     It works like a regular aws_cdk.aws_lambda_python_alpha.PythonLayerVersion, but the code
21 |     location is already specified for you. You probably don't need to specify
22 |     """
23 | 
24 |     def __init__(
25 |         self,
26 |         scope: Construct,
27 |         id: str,
28 |         *,
29 |         bundling: BundlingOptions | Dict[str, Any] | None = None,
30 |         compatible_architectures: Sequence[Architecture] | None = None,
31 |         compatible_runtimes: Sequence[Runtime] | None = None,
32 |         description: str | None = (
33 |             "Helper functions & classes for SageMaker CloudFormation custom resources"
34 |         ),
35 |         layer_version_name: str | None = None,
36 |         license: str | None = None,
37 |         removal_policy: RemovalPolicy | None = None,
38 |     ) -> None:
39 |         super().__init__(
40 |             scope,
41 |             id,
42 |             entry=LAYER_CODE_PATH,
43 |             bundling=bundling,
44 |             compatible_architectures=compatible_architectures,
45 |             compatible_runtimes=[
46 |                 Runtime.PYTHON_3_8,
47 |                 Runtime.PYTHON_3_9,
48 |                 Runtime.PYTHON_3_10,
49 |                 Runtime.PYTHON_3_11,
50 |                 Runtime.PYTHON_3_12,
51 |             ],
52 |             description=description,
53 |             layer_version_name=layer_version_name,
54 |             license=license,
55 |             removal_policy=removal_policy,
56 |         )
57 | 


--------------------------------------------------------------------------------
/.infrastructure/cdk.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "app": "python3 cdk_app.py",
 3 |   "watch": {
 4 |     "include": [
 5 |       "**"
 6 |     ],
 7 |     "exclude": [
 8 |       "README.md",
 9 |       "cdk*.json",
10 |       "requirements*.txt",
11 |       "source.bat",
12 |       "**/__init__.py",
13 |       "python/__pycache__",
14 |       "tests"
15 |     ]
16 |   },
17 |   "context": {
18 |     "@aws-cdk/aws-lambda:recognizeLayerVersion": true,
19 |     "@aws-cdk/core:checkSecretUsage": true,
20 |     "@aws-cdk/core:target-partitions": [
21 |       "aws",
22 |       "aws-cn"
23 |     ],
24 |     "@aws-cdk-containers/ecs-service-extensions:enableDefaultLogDriver": true,
25 |     "@aws-cdk/aws-ec2:uniqueImdsv2TemplateName": true,
26 |     "@aws-cdk/aws-ecs:arnFormatIncludesClusterName": true,
27 |     "@aws-cdk/aws-iam:minimizePolicies": true,
28 |     "@aws-cdk/core:validateSnapshotRemovalPolicy": true,
29 |     "@aws-cdk/aws-codepipeline:crossAccountKeyAliasStackSafeResourceName": true,
30 |     "@aws-cdk/aws-s3:createDefaultLoggingPolicy": true,
31 |     "@aws-cdk/aws-sns-subscriptions:restrictSqsDescryption": true,
32 |     "@aws-cdk/aws-apigateway:disableCloudWatchRole": true,
33 |     "@aws-cdk/core:enablePartitionLiterals": true,
34 |     "@aws-cdk/aws-events:eventsTargetQueueSameAccount": true,
35 |     "@aws-cdk/aws-iam:standardizedServicePrincipals": true,
36 |     "@aws-cdk/aws-ecs:disableExplicitDeploymentControllerForCircuitBreaker": true,
37 |     "@aws-cdk/aws-iam:importedRoleStackSafeDefaultPolicyName": true,
38 |     "@aws-cdk/aws-s3:serverAccessLogsUseBucketPolicy": true,
39 |     "@aws-cdk/aws-route53-patters:useCertificate": true,
40 |     "@aws-cdk/customresources:installLatestAwsSdkDefault": false,
41 |     "@aws-cdk/aws-rds:databaseProxyUniqueResourceName": true,
42 |     "@aws-cdk/aws-codedeploy:removeAlarmsFromDeploymentGroup": true,
43 |     "@aws-cdk/aws-apigateway:authorizerChangeDeploymentLogicalId": true,
44 |     "@aws-cdk/aws-ec2:launchTemplateDefaultUserData": true,
45 |     "@aws-cdk/aws-secretsmanager:useAttachedSecretResourcePolicyForSecretTargetAttachments": true,
46 |     "@aws-cdk/aws-redshift:columnId": true,
47 |     "@aws-cdk/aws-stepfunctions-tasks:enableEmrServicePolicyV2": true,
48 |     "@aws-cdk/aws-ec2:restrictDefaultSecurityGroup": true,
49 |     "@aws-cdk/aws-apigateway:requestValidatorUniqueId": true,
50 |     "@aws-cdk/aws-kms:aliasNameRef": true,
51 |     "@aws-cdk/core:includePrefixInUniqueNameGeneration": true
52 |   }
53 | }
54 | 


--------------------------------------------------------------------------------
/.simple.cf.yaml:
--------------------------------------------------------------------------------
 1 | # This CloudFormation template provides a basic SageMaker Notebook Instance setup for you to try out
 2 | # the workshop. The permissions are probably more generous than you'd want to grant in a production
 3 | # account!
 4 | AWSTemplateFormatVersion: '2010-09-09'
 5 | Resources:
 6 |   SageMakerIamRole:
 7 |     Type: 'AWS::IAM::Role'
 8 |     Properties:
 9 |       AssumeRolePolicyDocument:
10 |         Version: '2012-10-17'
11 |         Statement:
12 |           -
13 |             Effect: Allow
14 |             Principal:
15 |               Service: sagemaker.amazonaws.com
16 |             Action: sts:AssumeRole
17 |       Path: /
18 |       ManagedPolicyArns:
19 |         - 'arn:aws:iam::aws:policy/AmazonSageMakerFullAccess'
20 |         - 'arn:aws:iam::aws:policy/AmazonS3FullAccess'
21 | 
22 |   # SageMaker notebook
23 |   NotebookConfig:
24 |     Type: 'AWS::SageMaker::NotebookInstanceLifecycleConfig'
25 |     Properties:
26 |       NotebookInstanceLifecycleConfigName: !Sub '${AWS::StackName}-LifecycleConfig'
27 |       OnStart:
28 |         - Content:
29 |             Fn::Base64: !Sub |
30 |               #!/bin/bash
31 |               set -e
32 | 
33 |               # Install extension for interactive canvas drawing:
34 |               # ipywidgets is already present on al2-v2 NBIs. Pin versions to avoid reinstallations
35 |               sudo -u ec2-user -i <<'EOF'
36 |               source /home/ec2-user/anaconda3/bin/activate JupyterSystemEnv
37 |               JUPYTERSERVER_VER=`pip show jupyter-server | grep 'Version:' | sed 's/Version: //'`
38 |               IPYWIDGETS_VER=`pip show ipywidgets | grep 'Version:' | sed 's/Version: //'`
39 |               pip install \
40 |                 jupyter-server==$JUPYTERSERVER_VER \
41 |                 ipywidgets==$IPYWIDGETS_VER \
42 |                 'ipycanvas<0.13'
43 |               source /home/ec2-user/anaconda3/bin/deactivate
44 |               EOF
45 | 
46 |   NotebookInstance:
47 |     Type: 'AWS::SageMaker::NotebookInstance'
48 |     Properties:
49 |       InstanceType: ml.t3.medium
50 |       LifecycleConfigName: !GetAtt NotebookConfig.NotebookInstanceLifecycleConfigName
51 |       # Otherwise it gets some garbage name by default:
52 |       NotebookInstanceName: !Sub '${AWS::StackName}-Notebook'
53 |       RoleArn: !GetAtt SageMakerIamRole.Arn
54 |       VolumeSizeInGB: 20
55 |       PlatformIdentifier: notebook-al2-v2
56 |       DefaultCodeRepository: https://github.com/aws-samples/sagemaker-101-workshop
57 | 


--------------------------------------------------------------------------------
/.infrastructure/cdk_src/smstudio/lcc/studio-classic-onstart.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -eu
 3 | 
 4 | echo "Checking conda environments"
 5 | if conda info --envs | grep ^studio; then
 6 |     # Standard on JLv3 image at time of writing
 7 |     CONDA_ENV=studio
 8 | else
 9 |     # Standard on JLv1 image at time of writing
10 |     exit 0
11 | fi
12 | echo "Activating conda env $CONDA_ENV"
13 | source activate $CONDA_ENV
14 | 
15 | BOTO3_VER=`pip show boto3 | grep 'Version:' | sed 's/Version: //'`
16 | BOTOCORE_VER=`pip show botocore | grep 'Version:' | sed 's/Version: //'`
17 | JUPYTERSERVER_VER=`pip show jupyter-server | grep 'Version:' | sed 's/Version: //'`
18 | 
19 | echo "Installing CodeWhisperer, jupyterlab-lsp, language tools, canvas widget"
20 | pip install amazon-codewhisperer-jupyterlab-ext \
21 |     jupyterlab-lsp \
22 |     'python-lsp-server[flake8,mccabe,pycodestyle,pydocstyle,pyflakes,pylint,rope]' \
23 |     jupyterlab-spellchecker \
24 |     jupyterlab-code-formatter black isort \
25 |     jupyterlab-s3-browser \
26 |     boto3==$BOTO3_VER \
27 |     botocore==$BOTOCORE_VER \
28 |     jupyter-server==$JUPYTERSERVER_VER \
29 |     'ipycanvas<0.13'
30 | # bash-language-server v5+ requires Node v16+ (not yet available):
31 | jlpm add --dev bash-language-server@"<5.0.0" dockerfile-language-server-nodejs
32 | 
33 | # CodeWhisperer should be specifically enabled:
34 | jupyter server extension enable amazon_codewhisperer_jupyterlab_ext
35 | 
36 | CMP_CONFIG_DIR=.jupyter/lab/user-settings/@krassowski/jupyterlab-lsp/
37 | CMP_CONFIG_FILE=completion.jupyterlab-settings
38 | CMP_CONFIG_PATH="$CMP_CONFIG_DIR/$CMP_CONFIG_FILE"
39 | if test -f $CMP_CONFIG_PATH; then
40 |     echo "jupyterlab-lsp config file already exists: Skipping default config setup"
41 | else
42 |     echo "Setting continuous hinting to enabled by default"
43 |     mkdir -p $CMP_CONFIG_DIR
44 |     echo '{ "continuousHinting": true }' > $CMP_CONFIG_PATH
45 | fi
46 | 
47 | FMT_CONFIG_DIR=~/.jupyter/lab/user-settings/@ryantam626/jupyterlab_code_formatter
48 | FMT_CONFIG_FILE=settings.jupyterlab-settings
49 | FMT_CONFIG_PATH="$FMT_CONFIG_DIR/$FMT_CONFIG_FILE"
50 | if test -f $FMT_CONFIG_PATH; then
51 |     echo "jupyterlab-code-formatter config file already exists: Skipping default config setup"
52 | else
53 |     echo "Configuring jupyterlab-code-formatter format on save and line width"
54 |     mkdir -p $FMT_CONFIG_DIR
55 |     # Could turn on "formatOnSave": true here, but would raise error messages for partial nbks
56 |     cat > $FMT_CONFIG_PATH <<EOF
57 | {"black": {"line_length": 100}, "isort": {"line_length": 100}}
58 | EOF
59 | fi
60 | echo "Configuring pycodestyle linter max line width"
61 | mkdir -p ~/.config
62 | cat > ~/.config/pycodestyle <<EOF
63 | [pycodestyle]
64 | max-line-length = 100
65 | EOF
66 | 
67 | echo "Restarting Jupyter server..."
68 | nohup supervisorctl -c /etc/supervisor/conf.d/supervisord.conf restart jupyterlabserver \
69 |     > /dev/null 2>&1


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 | 


--------------------------------------------------------------------------------
/.infrastructure/cdk_src/smstudio/user_setup/fn_user_setup/smprojects.py:
--------------------------------------------------------------------------------
 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | # SPDX-License-Identifier: MIT-0
 3 | """Custom CloudFormation Resource for SageMaker Projects setup
 4 | 
 5 | See `.base.StudioUserSetupResourceProperties` for CloudFormation input Properties, and main.py
 6 | docstring for CloudFormation return values.
 7 | 
 8 | This sub-resource handles granting (existing) SMStudio user profiles permission to view and launch
 9 | SageMaker Project Templates, from CloudFormation.
10 | """
11 | # Python Built-Ins:
12 | from logging import getLogger
13 | 
14 | # External Dependencies:
15 | import boto3  # AWS SDK for Python
16 | 
17 | # Local Dependencies:
18 | from base import StudioUserSetupResourceProperties
19 | from cfn import CustomResourceEvent
20 | 
21 | 
22 | scclient = boto3.client("servicecatalog")
23 | smclient = boto3.client("sagemaker")
24 | logger = getLogger("smprojects")
25 | 
26 | 
27 | def enable_sm_projects_for_role(studio_role_arn: str) -> None:
28 |     """Enable SageMaker Projects for a SageMaker Execution Role
29 |     This function assumes you've already run Boto SageMaker
30 |     enable_sagemaker_servicecatalog_portfolio() for the account as a whole
31 |     """
32 |     portfolios_resp = scclient.list_accepted_portfolio_shares()
33 | 
34 |     portfolio_ids = set()
35 |     for portfolio in portfolios_resp["PortfolioDetails"]:
36 |         if portfolio["ProviderName"] == "Amazon SageMaker":
37 |             portfolio_ids.add(portfolio["Id"])
38 | 
39 |     logger.info(f"Adding {len(portfolio_ids)} SageMaker SC portfolios to role {studio_role_arn}")
40 |     for portfolio_id in portfolio_ids:
41 |         scclient.associate_principal_with_portfolio(
42 |             PortfolioId=portfolio_id, PrincipalARN=studio_role_arn, PrincipalType="IAM"
43 |         )
44 | 
45 | 
46 | def disable_sm_projects_for_role(studio_role_arn: str) -> None:
47 |     """Enable SageMaker Projects for a SageMaker Execution Role
48 |     This function assumes you've already run Boto SageMaker
49 |     enable_sagemaker_servicecatalog_portfolio() for the account as a whole
50 |     """
51 |     portfolios_resp = scclient.list_accepted_portfolio_shares()
52 | 
53 |     portfolio_ids = set()
54 |     for portfolio in portfolios_resp["PortfolioDetails"]:
55 |         if portfolio["ProviderName"] == "Amazon SageMaker":
56 |             portfolio_ids.add(portfolio["Id"])
57 | 
58 |     logger.info(
59 |         f"Removing {len(portfolio_ids)} SageMaker SC portfolios from role {studio_role_arn}"
60 |     )
61 |     for portfolio_id in portfolio_ids:
62 |         scclient.disassociate_principal_from_portfolio(
63 |             PortfolioId=portfolio_id,
64 |             PrincipalARN=studio_role_arn,
65 |         )
66 | 
67 | 
68 | def get_user_profile_role_arn(domain_id: str, user_profile_name: str) -> str:
69 |     user_desc = smclient.describe_user_profile(
70 |         DomainId=domain_id, UserProfileName=user_profile_name
71 |     )
72 |     return user_desc["UserSettings"]["ExecutionRole"]
73 | 
74 | 
75 | def on_create_update(event: CustomResourceEvent[StudioUserSetupResourceProperties]) -> bool:
76 |     logger.info("**Received create/update request")
77 |     if event.props.enable_projects:
78 |         logger.info("**Setting up SageMaker projects for user")
79 |         role_arn = get_user_profile_role_arn(event.props.domain_id, event.props.user_profile_name)
80 |         enable_sm_projects_for_role(role_arn)
81 |         return True
82 |     else:
83 |         logger.info("**Skipping removing SM Projects from user")
84 |         return False
85 | 


--------------------------------------------------------------------------------
/custom_script_demos/keras_nlp/src/main.py:
--------------------------------------------------------------------------------
 1 | """CNN-based text classification on SageMaker with TensorFlow and Keras"""
 2 | 
 3 | # Python Built-Ins:
 4 | import argparse
 5 | import os
 6 | 
 7 | # External Dependencies:
 8 | import numpy as np
 9 | import tensorflow as tf
10 | from tensorflow.keras.layers import Conv1D, Dense, Dropout, Embedding, Flatten, MaxPooling1D
11 | from tensorflow.keras.models import Sequential
12 | 
13 | ###### Helper functions ############
14 | def load_training_data(base_dir):
15 |     X_train = np.load(os.path.join(base_dir, "train_X.npy"))
16 |     y_train = np.load(os.path.join(base_dir, "train_Y.npy"))
17 |     return X_train, y_train
18 | 
19 | def load_testing_data(base_dir):
20 |     X_test = np.load(os.path.join(base_dir, "test_X.npy"))
21 |     y_test = np.load(os.path.join(base_dir, "test_Y.npy"))
22 |     return X_test, y_test
23 | 
24 | def load_embeddings(base_dir):
25 |     embedding_matrix = np.load(os.path.join(base_dir, "docs-embedding-matrix.npy"))
26 |     return embedding_matrix
27 | 
28 | def parse_args():
29 |     """Acquire hyperparameters and directory locations passed by SageMaker"""
30 |     parser = argparse.ArgumentParser()
31 | 
32 |     # Hyperparameters sent by the client are passed as command-line arguments to the script.
33 |     parser.add_argument("--epochs", type=int, default=1)
34 |     parser.add_argument("--learning_rate", type=float, default=0.001)
35 |     parser.add_argument("--num_classes", type=int, default=4)
36 |     parser.add_argument("--max_seq_len", type=int, default=40)
37 | 
38 |     # Data, model, and output directories
39 |     parser.add_argument("--output-data-dir", type=str, default=os.environ.get("SM_OUTPUT_DATA_DIR"))
40 |     parser.add_argument("--model-dir", type=str, default=os.environ.get("SM_MODEL_DIR"))
41 |     parser.add_argument("--train", type=str, default=os.environ.get("SM_CHANNEL_TRAIN"))
42 |     parser.add_argument("--test", type=str, default=os.environ.get("SM_CHANNEL_TEST"))
43 |     parser.add_argument("--embeddings", type=str, default=os.environ.get("SM_CHANNEL_EMBEDDINGS"))
44 | 
45 |     return parser.parse_known_args()
46 | 
47 | ###### Main application  ############
48 | if __name__ == "__main__":
49 | 
50 |     ###### Parse input arguments ############
51 |     args, unknown = parse_args()
52 |     print(args)
53 | 
54 |     ###### Load data from input channels ############
55 |     X_train, y_train = load_training_data(args.train)
56 |     X_test, y_test = load_testing_data(args.test)
57 |     embedding_matrix = load_embeddings(args.embeddings)
58 | 
59 | 
60 |     ###### Setup model architecture ############
61 |     model = Sequential()
62 |     model.add(Embedding(
63 |         embedding_matrix.shape[0],  # Final vocabulary size
64 |         embedding_matrix.shape[1],  # Word vector dimensions
65 |         weights=[embedding_matrix],
66 |         input_length=args.max_seq_len,
67 |         trainable=False,
68 |         name="embed",
69 |     ))
70 |     model.add(Conv1D(filters=128, kernel_size=3, activation="relu", name="conv_1"))
71 |     model.add(MaxPooling1D(pool_size=5, name="maxpool_1"))
72 |     model.add(Flatten(name="flat_1"))
73 |     model.add(Dropout(0.3, name="dropout_1"))
74 |     model.add(Dense(128, activation="relu", name="dense_1"))
75 |     model.add(Dense(args.num_classes, activation="softmax", name="out_1"))
76 | 
77 |     ###### Compile the model ############
78 |     optimizer = tf.keras.optimizers.RMSprop(learning_rate=args.learning_rate)
79 |     model.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["acc"])
80 | 
81 |     model.summary()
82 | 
83 |     print("Training model")
84 |     model.fit(X_train, y_train, batch_size=16, epochs=args.epochs, verbose=2)
85 |     print("Evaluating model")
86 |     # TODO: Better differentiate train vs val loss in logs
87 |     scores = model.evaluate(X_test, y_test, verbose=2)
88 |     print(
89 |         "Validation results: "
90 |         + "; ".join(map(
91 |             lambda i: f"{model.metrics_names[i]}={scores[i]:.5f}", range(len(model.metrics_names))
92 |         ))
93 |     )
94 | 
95 | 
96 |     ###### Save Keras model for TensorFlow Serving ############
97 |     print(f"------ save model to {os.path.join(args.model_dir, 'model/1/')}")
98 |     model.save(os.path.join(args.model_dir, "model/1"))
99 | 


--------------------------------------------------------------------------------
/.infrastructure/cdk_src/smstudio/cr_lambda_common/cfn.py:
--------------------------------------------------------------------------------
  1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | # SPDX-License-Identifier: MIT-0
  3 | """Types/classes for working with CloudFormation Custom Resource events in Python Lambda functions
  4 | 
  5 | TODO: Assess `aws-lambda-powertools` and/or `crhelper` instead
  6 | 
  7 | https://docs.powertools.aws.dev/lambda/python/
  8 | https://github.com/aws-cloudformation/custom-resource-helper
  9 | """
 10 | # Python Built-Ins:
 11 | from enum import Enum
 12 | from logging import getLogger
 13 | from typing import Generic, Optional, Type, TypeVar, Union
 14 | 
 15 | logger = getLogger("cfn")
 16 | 
 17 | 
 18 | class CustomResourceRequestType(str, Enum):
 19 |     "Enumeration of CloudFormation event 'RequestType's received by a custom resource"
 20 |     create = "Create"
 21 |     update = "Update"
 22 |     delete = "Delete"
 23 | 
 24 | 
 25 | def parse_cfn_boolean(raw: Union[bool, str], var_name: Optional[str] = None) -> bool:
 26 |     """Parse a boolean value from (potentially stringified/text) CloudFormation event properties
 27 | 
 28 |     Common text values like 'true', 'yes', etc are supported. Raises a ValueError if the raw
 29 |     value is `None` or cannot be interpreted as boolean.
 30 | 
 31 |     Parameters
 32 |     ----------
 33 |     raw :
 34 |         The raw value from CloudFormation, which might be a string
 35 |     var_name :
 36 |         Optional name of the variable to be parsed (only used for error messages)
 37 |     """
 38 |     if isinstance(raw, bool):
 39 |         return raw
 40 |     if isinstance(raw, str):
 41 |         if raw in ("1", "t", "true", "y", "yes"):
 42 |             return True
 43 |         elif raw in ("0", "f", "false", "n", "no"):
 44 |             return False
 45 |         else:
 46 |             raise ValueError(
 47 |                 f"Invalid {(var_name + ' ') if var_name else ''}string value '{raw}' (expected boolean)"
 48 |             )
 49 |     else:
 50 |         raise ValueError(
 51 |             f"Invalid {(var_name + ' ') if var_name else ''}value type '{type(raw)}' (expected boolean)"
 52 |         )
 53 | 
 54 | 
 55 | TResourceProps = TypeVar("TResourceProps")
 56 | 
 57 | 
 58 | class CustomResourceEvent(Generic[TResourceProps]):
 59 |     """Class to parse a CFn Custom Resource event
 60 | 
 61 |     This is a generic class: TResourceProps should be a class that can be initialized with the
 62 |     dict of CloudFormation resource properties for your specific custom resource - and raises an
 63 |     exception if the properties are invalid.
 64 |     """
 65 | 
 66 |     physical_id: Optional[str]
 67 |     props: Optional[TResourceProps]
 68 |     old_props: Optional[TResourceProps]
 69 |     request_type: CustomResourceRequestType
 70 |     resource_type: str
 71 | 
 72 |     def __init__(self, event: dict, PropertiesClass: Type[TResourceProps]):
 73 |         """Create a CustomResourceEvent
 74 | 
 75 |         Parameters
 76 |         ----------
 77 |         event :
 78 |             Raw event dict from AWS Lambda
 79 |         PropertiesClass :
 80 |             Python class that should be created for the resource properties. Your class will be
 81 |             instantiated with one constructor argument - the raw properties dictionary. If this
 82 |             is an 'Update' event, another instance will be created from the OldResourceProperties.
 83 |             If the OldResourceProperties cannot be parsed, an exception will be logged but not
 84 |             raised.
 85 |         """
 86 |         self.physical_id = event.get("PhysicalResourceId")
 87 |         self.request_type = CustomResourceRequestType(event["RequestType"])
 88 |         self.resource_type = event["ResourceType"]
 89 |         resource_properties = event.get("ResourceProperties")
 90 |         if resource_properties:
 91 |             self.props = PropertiesClass(resource_properties)
 92 |         else:
 93 |             self.props = None
 94 |         # Only present for 'Update' requests:
 95 |         old_resource_properties = event.get("OldResourceProperties")
 96 |         if old_resource_properties:
 97 |             try:
 98 |                 self.old_props = PropertiesClass(old_resource_properties)
 99 |             except Exception:
100 |                 logger.exception("Failed to parse OldResourceProperties of Update event")
101 |                 self.old_props = None
102 |         else:
103 |             self.old_props = None
104 | 


--------------------------------------------------------------------------------
/custom_script_demos/pytorch_nlp/util/preprocessing.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | 
  3 | # Python Built-Ins:
  4 | import gzip
  5 | import os
  6 | import shutil
  7 | import subprocess
  8 | import tarfile
  9 | import time
 10 | from typing import Optional
 11 | 
 12 | # External Dependencies:
 13 | import numpy as np
 14 | from sklearn import preprocessing
 15 | import torchtext
 16 | 
 17 | 
 18 | def wait_for_file_stable(path: str, stable_secs: int=60, poll_secs: Optional[int]=None) -> bool:
 19 |     """Wait for a file to become stable (not recently modified) & return existence
 20 | 
 21 |     Returns False if file does not exist. Raises FileNotFoundError if file deleted during polling.
 22 | 
 23 |     When running through the two notebooks at the same time in parallel, this helps to minimize any
 24 |     errors caused by initiating multiple downloads/extractions/etc on the same file in parallel.
 25 |     """
 26 |     if not poll_secs:
 27 |         poll_secs = stable_secs / 4
 28 |     try:
 29 |         init_stat = os.stat(path)
 30 |     except FileNotFoundError:
 31 |         return False
 32 | 
 33 |     if (time.time() - init_stat.st_mtime) < stable_secs:
 34 |         print(f"Waiting for file to stabilize... {path}")
 35 |         while (time.time() - os.stat(path).st_mtime) < stable_secs:
 36 |             time.sleep(poll_secs)
 37 |         print("File ready")
 38 | 
 39 |     return True
 40 | 
 41 | def dummy_encode_labels(df,label):
 42 |     encoder = preprocessing.LabelEncoder()
 43 |     encoded_y = encoder.fit_transform(df[label].values)
 44 |     num_classes = len(encoder.classes_)
 45 |     # convert integers to dummy variables (i.e. one hot encoded)
 46 |     dummy_y = np.eye(num_classes, dtype="float32")[encoded_y]
 47 |     return dummy_y, encoder.classes_
 48 | 
 49 | 
 50 | def tokenize_and_pad_docs(df, columns, max_length=40):
 51 |     docs = df[columns].values
 52 | 
 53 |     t = torchtext.data.Field(
 54 |       lower = True,
 55 |       tokenize = "basic_english",
 56 |       fix_length = max_length
 57 |     )
 58 |     docs = list(map(t.preprocess, docs))
 59 |     padded_docs = t.pad(docs)
 60 |     t.build_vocab(padded_docs)
 61 |     print(f"Vocabulary size: {len(t.vocab)}")
 62 |     numericalized_docs = []
 63 |     for d in padded_docs:
 64 |         temp = []
 65 |         for c in d:
 66 |             temp.append(t.vocab.stoi[c])
 67 |         numericalized_docs.append(temp)
 68 |     print(f"Number of headlines: {len(numericalized_docs)}")
 69 |     return np.array(numericalized_docs), t
 70 | 
 71 | 
 72 | def get_word_embeddings(t, folder, lang="en"):
 73 |     """Download pre-trained word vectors and construct an embedding matrix for tokenizer `t`
 74 | 
 75 |     Any tokens in `t` not found in the embedding vectors are mapped to all-zeros.
 76 |     """
 77 |     vecs_url = f"https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.{lang}.300.vec.gz"
 78 |     vecs_gz_filename = vecs_url.rpartition("/")[2]
 79 |     os.makedirs(folder, exist_ok=True)
 80 |     vecs_gz_filepath = os.path.join(folder, vecs_gz_filename)
 81 | 
 82 |     tokenizer_vocab_size = len(t.vocab)
 83 | 
 84 |     if wait_for_file_stable(vecs_gz_filepath):
 85 |         print("Using existing embeddings file")
 86 |     else:
 87 |         print("Downloading word vectors...")
 88 |         subprocess.run(
 89 |             [" ".join(["curl", vecs_url, "-o", vecs_gz_filepath])], check=True, shell=True
 90 |         )
 91 | 
 92 |     print("Loading into memory...")
 93 |     embeddings_index = dict()
 94 |     with gzip.open(vecs_gz_filepath, "rt") as zipf:
 95 |         firstline = zipf.readline()
 96 |         emb_vocab_size, emb_d = firstline.split(" ")
 97 |         emb_vocab_size = int(emb_vocab_size)
 98 |         emb_d = int(emb_d)
 99 |         for line in zipf:
100 |             values = line.split()
101 |             word = values[0]
102 |             # Only load subset of the embeddings recognised by the tokenizer:
103 |             if word in t.vocab.stoi:
104 |                 coefs = np.asarray(values[1:], dtype="float32")
105 |                 embeddings_index[word] = coefs
106 |     print("Loaded {} of {} word vectors for tokenizer vocabulary length {}".format(
107 |         len(embeddings_index),
108 |         emb_vocab_size,
109 |         tokenizer_vocab_size,
110 |     ))
111 | 
112 |     # create a weight matrix for words in training docs
113 |     embedding_matrix = np.zeros((tokenizer_vocab_size, emb_d))
114 |     for word, i in t.vocab.stoi.items():
115 |         embedding_vector = embeddings_index.get(word)
116 |         if embedding_vector is not None:
117 |             embedding_matrix[i] = embedding_vector
118 | 
119 |     return embedding_matrix
120 | 


--------------------------------------------------------------------------------
/custom_script_demos/keras_nlp/util/preprocessing.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | 
  3 | # Python Built-Ins:
  4 | import gzip
  5 | import os
  6 | import shutil
  7 | import subprocess
  8 | import tarfile
  9 | import time
 10 | from typing import Optional
 11 | 
 12 | # External Dependencies:
 13 | import numpy as np
 14 | from sklearn import preprocessing
 15 | import tensorflow as tf
 16 | from tensorflow.keras.preprocessing.text import Tokenizer
 17 | from tensorflow.keras.preprocessing.sequence import pad_sequences
 18 | 
 19 | 
 20 | def wait_for_file_stable(path: str, stable_secs: int=60, poll_secs: Optional[int]=None) -> bool:
 21 |     """Wait for a file to become stable (not recently modified) & return existence
 22 | 
 23 |     Returns False if file does not exist. Raises FileNotFoundError if file deleted during polling.
 24 | 
 25 |     When running through the two notebooks at the same time in parallel, this helps to minimize any
 26 |     errors caused by initiating multiple downloads/extractions/etc on the same file in parallel.
 27 |     """
 28 |     if not poll_secs:
 29 |         poll_secs = stable_secs / 4
 30 |     try:
 31 |         init_stat = os.stat(path)
 32 |     except FileNotFoundError:
 33 |         return False
 34 | 
 35 |     if (time.time() - init_stat.st_mtime) < stable_secs:
 36 |         print(f"Waiting for file to stabilize... {path}")
 37 |         while (time.time() - os.stat(path).st_mtime) < stable_secs:
 38 |             time.sleep(poll_secs)
 39 |         print("File ready")
 40 | 
 41 |     return True
 42 | 
 43 | 
 44 | def dummy_encode_labels(df,label):
 45 |     encoder = preprocessing.LabelEncoder()
 46 |     encoded_y = encoder.fit_transform(df[label].values)
 47 |     num_classes = len(encoder.classes_)
 48 |     # convert integers to dummy variables (i.e. one hot encoded)
 49 |     dummy_y = np.eye(num_classes, dtype="float32")[encoded_y]
 50 |     return dummy_y, encoder.classes_
 51 | 
 52 | 
 53 | def tokenize_and_pad_docs(df, columns, max_length=40):
 54 |     docs = df[columns].values
 55 |     # prepare tokenizer
 56 |     t = Tokenizer()
 57 |     t.fit_on_texts(docs)
 58 |     vocab_size = len(t.word_index) + 1
 59 |     # integer encode the documents
 60 |     encoded_docs = t.texts_to_sequences(docs)
 61 |     print(f"Vocabulary size: {vocab_size}")
 62 |     print("Padding docs to max_length={} (truncating {} docs)".format(
 63 |         max_length,
 64 |         sum(1 for doc in encoded_docs if len(doc) > max_length),
 65 |     ))
 66 |     padded_docs = pad_sequences(encoded_docs, maxlen=max_length, padding="post")
 67 |     print(f"Number of headlines: {len(padded_docs)}")
 68 |     return padded_docs, t
 69 | 
 70 | 
 71 | def get_word_embeddings(t, folder, lang="en"):
 72 |     """Download pre-trained word vectors and construct an embedding matrix for tokenizer `t`
 73 | 
 74 |     Any tokens in `t` not found in the embedding vectors are mapped to all-zeros.
 75 |     """
 76 |     vecs_url = f"https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.{lang}.300.vec.gz"
 77 |     vecs_gz_filename = vecs_url.rpartition("/")[2]
 78 |     os.makedirs(folder, exist_ok=True)
 79 |     vecs_gz_filepath = os.path.join(folder, vecs_gz_filename)
 80 | 
 81 |     # Tokenizer.num_words is nullable, and there's an OOV token, so:
 82 |     tokenizer_vocab_size = len(t.word_index) + 1
 83 | 
 84 |     if wait_for_file_stable(vecs_gz_filepath):
 85 |         print("Using existing embeddings file")
 86 |     else:
 87 |         print("Downloading word vectors...")
 88 |         subprocess.run(
 89 |             [" ".join(["curl", vecs_url, "-o", vecs_gz_filepath])], check=True, shell=True
 90 |         )
 91 | 
 92 |     print("Loading into memory...")
 93 |     embeddings_index = dict()
 94 |     with gzip.open(vecs_gz_filepath, "rt") as zipf:
 95 |         firstline = zipf.readline()
 96 |         emb_vocab_size, emb_d = firstline.split(" ")
 97 |         emb_vocab_size = int(emb_vocab_size)
 98 |         emb_d = int(emb_d)
 99 |         for line in zipf:
100 |             values = line.split()
101 |             word = values[0]
102 |             # Only load subset of the embeddings recognised by the tokenizer:
103 |             if word in t.word_index:
104 |                 coefs = np.asarray(values[1:], dtype="float32")
105 |                 embeddings_index[word] = coefs
106 |     print("Loaded {} of {} word vectors for tokenizer vocabulary length {}".format(
107 |         len(embeddings_index),
108 |         emb_vocab_size,
109 |         tokenizer_vocab_size,
110 |     ))
111 | 
112 |     # create a weight matrix for words in training docs
113 |     embedding_matrix = np.zeros((tokenizer_vocab_size, emb_d))
114 |     for word, i in t.word_index.items():
115 |         embedding_vector = embeddings_index.get(word)
116 |         if embedding_vector is not None:
117 |             embedding_matrix[i] = embedding_vector
118 | 
119 |     return embedding_matrix
120 | 


--------------------------------------------------------------------------------
/.infrastructure/cdk_src/smstudio/user_setup/fn_user_setup/base.py:
--------------------------------------------------------------------------------
  1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | # SPDX-License-Identifier: MIT-0
  3 | """Shared (CloudFormation resource property) definitions"""
  4 | # Python Built-Ins:
  5 | from __future__ import annotations
  6 | import json
  7 | from typing import Optional, Union
  8 | 
  9 | 
 10 | class StudioUserSetupResourceProperties:
 11 |     """Parser for CloudFormation resource properties for this Custom Resource
 12 | 
 13 |     Resource Properties
 14 |     -------------------
 15 |     DomainId: str
 16 |         ID of the (already existing) target SageMaker Studio domain.
 17 |     HomeEfsFileSystemUid : Union[str, int]
 18 |         EFS user ID (numeric) of the target SageMaker Studio user. You can get this from the
 19 |         SageMaker DescribeUserProfile API.
 20 |     UserProfileName : str
 21 |         Name of the target SageMaker Studio user profile.
 22 |     TargetPath : Optional[str]
 23 |         Path (relative to Studio home folder) where the content should be loaded. If not set, this
 24 |         will default to the repository name or source file name. Trying to escape the Studio home
 25 |         folder with '../' is not supported and may have unintended consequences (including possibly
 26 |         writing to other users' folders).
 27 |     GitRepository : Optional[str]
 28 |         (Required if using git) A `git clone`able URL.
 29 |     GitCheckout : Optional[str]
 30 |         (Only used if `GitRepository` is set) A `git checkout`able name (e.g. branch name) in your
 31 |         target repository. If not provided, the cloned repository will remain on the default
 32 |         branch.
 33 |     ContentS3Uri : Optional[str]
 34 |         s3://doc-example-bucket/path URI for fetching the content. Currently only an individual
 35 |         object is supported (not folder prefix).
 36 |     AuthenticateS3 : Optional[bool]
 37 |         (Only if using `ContentS3Uri`) Set true to authenticate S3 requests with this Lambda's IAM
 38 |         identity. By default (false), requests will be anonymous/unsigned - which is appropriate
 39 |         for public buckets such as sample data and the AWS Open Data Registry.
 40 |     ExtractContent (bool, optional):
 41 |         (Only if using `ContentS3Uri`) Set true to unzip the content after download. By default
 42 |         (false), the object will simply be downloaded as-is. Tarballs and other archive formats
 43 |         apart from zip files are not currently supported.
 44 |     """
 45 | 
 46 |     # Common parameters:
 47 |     domain_id: str
 48 |     home_efs_file_system_uid: Union[str, int]
 49 |     user_profile_name: str
 50 |     target_path: Optional[str]
 51 |     # Parameters for Git content:
 52 |     git_repository: Optional[str]
 53 |     git_checkout: Optional[str]
 54 |     # Parameters for S3 content:
 55 |     content_s3_uri: Optional[str]
 56 |     authenticate_s3: bool
 57 |     extract_content: bool
 58 |     # Parameters for SageMaker projects:
 59 |     enable_projects: bool
 60 | 
 61 |     def __init__(self, resource_properties: dict):
 62 |         self.domain_id = resource_properties["DomainId"]
 63 |         self.home_efs_file_system_uid = resource_properties["HomeEfsFileSystemUid"]
 64 |         self.user_profile_name = resource_properties["UserProfileName"]
 65 |         self.target_path = resource_properties.get("TargetPath")
 66 | 
 67 |         # Git content:
 68 |         self.git_checkout = resource_properties.get("GitCheckout")
 69 |         self.git_repository = resource_properties.get("GitRepository")
 70 | 
 71 |         # S3 content:
 72 |         self.authenticate_s3 = resource_properties.get("AuthenticateS3", False)
 73 |         self.content_s3_uri = resource_properties.get("ContentS3Uri")
 74 |         self.extract_content = resource_properties.get("ExtractContent", False)
 75 | 
 76 |         # SageMaker projects:
 77 |         self.enable_projects = resource_properties.get("EnableProjects", False)
 78 | 
 79 |         # Validations:
 80 |         if self.git_repository and self.content_s3_uri:
 81 |             raise ValueError(
 82 |                 "Cannot set both GitRepository and ContentS3Uri: Create a separate custom "
 83 |                 "resource instance for your git and S3 content items"
 84 |             )
 85 |         if not (self.git_repository or self.content_s3_uri):
 86 |             raise ValueError(
 87 |                 "Must set either GitRepository (git content) or ContentS3Uri (S3 content)"
 88 |             )
 89 | 
 90 |     def __str__(self):
 91 |         dict_val = {
 92 |             "DomainId": self.domain_id,
 93 |             "HomeEfsFileSystemUid": self.home_efs_file_system_uid,
 94 |             "UserProfileName": self.user_profile_name,
 95 |         }
 96 |         if self.target_path:
 97 |             dict_val["TargetPath"] = self.target_path
 98 |         if self.git_checkout:
 99 |             dict_val["GitCheckout"] = self.git_checkout
100 |         if self.git_repository:
101 |             dict_val["GitRepository"] = self.git_repository
102 |         if self.content_s3_uri:
103 |             dict_val["ContentS3Uri"] = self.content_s3_uri
104 |         if self.authenticate_s3:
105 |             dict_val["AuthenticateS3"] = self.authenticate_s3
106 |         if self.extract_content:
107 |             dict_val["ExtractContent"] = self.extract_content
108 |         return json.dumps(dict_val)
109 | 
110 |     @classmethod
111 |     def from_str(cls, str_val) -> StudioUserSetupResourceProperties:
112 |         return cls(json.loads(str_val))
113 | 


--------------------------------------------------------------------------------
/.infrastructure/cdk_src/smstudio/domain/fn_domain/vpctools.py:
--------------------------------------------------------------------------------
  1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | # SPDX-License-Identifier: MIT-0
  3 | """Utilities for analyzing VPCs for use with SageMaker Studio"""
  4 | # Python Built-Ins:
  5 | import ipaddress
  6 | import logging
  7 | from typing import Tuple, Union
  8 | 
  9 | # External Dependencies:
 10 | import boto3
 11 | 
 12 | logger = logging.getLogger("vpctools")
 13 | ec2 = boto3.client("ec2")
 14 | 
 15 | 
 16 | def get_studio_efs_security_group_ids(
 17 |     studio_domain_id: str, vpc_id: str
 18 | ) -> Tuple[Union[str, None], Union[str, None]]:
 19 |     """Retrieve the security groups you need for [inbound, outbound] comms with SMStudio EFS filesystem
 20 | 
 21 |     Returns
 22 |     -------
 23 |     inbound : Union[str, None]
 24 |         Security Group ID for inbound connection from SMStudio filesystem, or None if could not be found
 25 |     outbound : str
 26 |         Secrity Group ID for outbound connection to SMStudio filesystem, or None if could nom be found
 27 | 
 28 |     Raises
 29 |     ------
 30 |     ValueError :
 31 |         If multiple potential SGs are found for either inbound or outbound connection (suggests duplication
 32 |         or otherwise erroneous SMStudio/VPC setup).
 33 |     Other :
 34 |         As per boto3 EC2 describe_security_groups()
 35 |     """
 36 |     inbound_sg_name = f"security-group-for-inbound-nfs-{studio_domain_id}"
 37 |     outbound_sg_name = f"security-group-for-outbound-nfs-{studio_domain_id}"
 38 |     nfs_sgs = ec2.describe_security_groups(
 39 |         Filters=[
 40 |             {"Name": "vpc-id", "Values": [vpc_id]},
 41 |             {"Name": "group-name", "Values": [inbound_sg_name, outbound_sg_name]},
 42 |         ],
 43 |     )["SecurityGroups"]
 44 |     inbound_sgs = list(
 45 |         filter(
 46 |             lambda sg: sg["GroupName"] == inbound_sg_name,
 47 |             nfs_sgs,
 48 |         )
 49 |     )
 50 |     n_inbound_sgs = len(inbound_sgs)
 51 |     outbound_sgs = list(
 52 |         filter(
 53 |             lambda sg: sg["GroupName"] == outbound_sg_name,
 54 |             nfs_sgs,
 55 |         )
 56 |     )
 57 |     n_outbound_sgs = len(outbound_sgs)
 58 |     if n_inbound_sgs > 1 or n_outbound_sgs > 1:
 59 |         raise ValueError(
 60 |             "Found duplicate EFS security groups for SMStudio {}: Got {} inbound, {} outbound".format(
 61 |                 studio_domain_id,
 62 |                 n_inbound_sgs,
 63 |                 n_outbound_sgs,
 64 |             )
 65 |         )
 66 |     return (
 67 |         inbound_sgs[0]["GroupId"] if n_inbound_sgs else None,
 68 |         outbound_sgs[0]["GroupId"] if n_outbound_sgs else None,
 69 |     )
 70 | 
 71 | 
 72 | def propose_subnet(vpc_id, new_subnet_prefixlen=26):
 73 |     """Propose a valid configuration for a new (IPv4) subnet to add to the VPC for CF stack purposes.
 74 | 
 75 |     Parameters
 76 |     ----------
 77 |     vpc_id : str
 78 |         ID of the VPC to propose a subnet for
 79 |     new_subnet_prefixlen : int (optional)
 80 |         CIDR mask length in bits for requested *new* subnet to propose. Defaults to 26 bits (64 IPs)
 81 |     """
 82 |     logger.info(f"Proposing admin subnet for VPC {vpc_id}...")
 83 |     # Get VPC info:
 84 |     vpc_list = ec2.describe_vpcs(
 85 |         Filters=[{"Name": "vpc-id", "Values": [vpc_id]}],
 86 |     )["Vpcs"]
 87 |     if not len(vpc_list):
 88 |         raise ValueError(f"VPC ID {vpc_id} not found")
 89 |     vpc_description = vpc_list[0]
 90 |     existing_subnets = ec2.describe_subnets(
 91 |         Filters=[{"Name": "vpc-id", "Values": [vpc_id]}],
 92 |     )["Subnets"]
 93 | 
 94 |     # Load CIDRs of provided VPC and existing subnets with Python ipaddress library:
 95 |     logger.info(f"Parsing existing CIDRs...")
 96 |     vpc_net = ipaddress.ip_network(vpc_description["CidrBlock"])
 97 |     existing_nets = list(
 98 |         map(
 99 |             lambda subnet: ipaddress.ip_network(subnet["CidrBlock"]),
100 |             existing_subnets,
101 |         )
102 |     )
103 | 
104 |     # Validate existing configuration:
105 |     # (Could probably skip this since we just retrieved fresh data, but might help to prevent any weird
106 |     # errors manifesting as harder-to-interpret issues further down)
107 |     for subnet in existing_nets:
108 |         if not subnet.subnet_of(vpc_net):
109 |             raise ValueError(f"Listed 'subnet' {subnet} is not inside VPC {vpc_net}")
110 |         for checknet in existing_nets:
111 |             if checknet != subnet and subnet.overlaps(checknet):
112 |                 raise ValueError(f"Listed subnets {subnet} and {checknet} overlap")
113 | 
114 |     # Calculate remaining vacant ranges:
115 |     logger.info(f"Calculating remaining vacant ranges...")
116 |     available_nets = [vpc_net]
117 |     for subnet in existing_nets:
118 |         next_available = []
119 |         for vacancy in available_nets:
120 |             if vacancy.subnet_of(subnet):
121 |                 # This gap is fully contained by `subnet`
122 |                 continue
123 |             try:
124 |                 # Preserve the list of subranges in `vacancy` after excluding `subnet`:
125 |                 next_available += list(vacancy.address_exclude(subnet))
126 |             except ValueError:
127 |                 # This `vacancy` does not contain `subnet`:
128 |                 next_available.append(vacancy)
129 |         available_nets = next_available
130 |     available_nets.sort()
131 | 
132 |     # Select the first available subnet of requested size:
133 |     try:
134 |         parent = next(
135 |             filter(
136 |                 lambda n: n.prefixlen <= new_subnet_prefixlen,
137 |                 available_nets,
138 |             )
139 |         )
140 |     except StopIteration:
141 |         raise ValueError(f"No vacant subnets of requested size /{new_subnet_prefixlen} left in VPC")
142 | 
143 |     if parent.prefixlen == new_subnet_prefixlen:
144 |         proposed_net = parent
145 |     else:
146 |         diff = new_subnet_prefixlen - parent.prefixlen
147 |         proposed_net = next(parent.subnets(diff))
148 | 
149 |     return {"CidrBlock": str(proposed_net)}
150 | 


--------------------------------------------------------------------------------
/custom_script_demos/pytorch_nlp/src/main.py:
--------------------------------------------------------------------------------
  1 | """CNN-based text classification on SageMaker with PyTorch"""
  2 | 
  3 | # Python Built-Ins:
  4 | import argparse
  5 | import os
  6 | import io
  7 | import logging
  8 | import sys
  9 | 
 10 | # External Dependencies:
 11 | import numpy as np
 12 | import torch
 13 | import torch.nn as nn
 14 | import torch.nn.functional as F
 15 | import torch.optim as optim
 16 | from torch.utils.data import DataLoader
 17 | 
 18 | # Configure log level & destination for running nicely in SageMaker:
 19 | logger = logging.getLogger(__name__)
 20 | logger.setLevel(logging.DEBUG)
 21 | logger.addHandler(logging.StreamHandler(sys.stdout))
 22 | 
 23 | 
 24 | class Net(nn.Module):
 25 |     """Custom PyTorch model definition: A basic 1D CNN for text"""
 26 | 
 27 |     def __init__(self, vocab_size=400000, emb_dim=300, num_classes=4):
 28 |         super(Net, self).__init__()
 29 |         self.embedding = nn.Embedding(vocab_size, emb_dim)
 30 |         self.conv1 = nn.Conv1d(emb_dim, 128, kernel_size=3)
 31 |         self.max_pool1d = nn.MaxPool1d(5)
 32 |         self.flatten1 = nn.Flatten()
 33 |         self.dropout1 = nn.Dropout(p=0.3)
 34 |         self.fc1 = nn.Linear(896, 128)
 35 |         self.fc2 = nn.Linear(128, num_classes)
 36 | 
 37 |     def forward(self, x):
 38 |         x = self.embedding(x)
 39 |         x = torch.transpose(x,1,2)
 40 |         x = self.flatten1(self.max_pool1d(self.conv1(x)))
 41 |         x = self.dropout1(x)
 42 |         x = F.relu(self.fc1(x))
 43 |         x = self.fc2(x)
 44 |         return F.softmax(x, dim=-1)
 45 | 
 46 | 
 47 | class Dataset(torch.utils.data.Dataset):
 48 |     """Custom PyTorch dataset for text classification"""
 49 | 
 50 |     def __init__(self, data: np.array, labels: np.array):
 51 |         "Initialization"
 52 |         self.labels = labels
 53 |         self.data = data
 54 | 
 55 |     def __len__(self):
 56 |         "Denotes the total number of samples"
 57 |         return len(self.data)
 58 | 
 59 |     def __getitem__(self, index):
 60 |         # Load data and get label
 61 |         X = torch.as_tensor(self.data[index]).long()
 62 |         y = torch.as_tensor(self.labels[index])
 63 |         return X, y
 64 | 
 65 | 
 66 | def load_training_data(base_dir):
 67 |     X_train = np.load(os.path.join(base_dir, "train_X.npy"))
 68 |     y_train = np.load(os.path.join(base_dir, "train_Y.npy"))
 69 |     return DataLoader(Dataset(X_train, y_train), batch_size=16)
 70 | 
 71 | 
 72 | def load_testing_data(base_dir):
 73 |     X_test = np.load(os.path.join(base_dir, "test_X.npy"))
 74 |     y_test = np.load(os.path.join(base_dir, "test_Y.npy"))
 75 |     return DataLoader(Dataset(X_test, y_test), batch_size=1)
 76 | 
 77 | 
 78 | def load_embeddings(base_dir):
 79 |     embedding_matrix = np.load(os.path.join(base_dir, "docs-embedding-matrix.npy"))
 80 |     return embedding_matrix
 81 | 
 82 | 
 83 | def parse_args():
 84 |     """Acquire hyperparameters and directory locations passed by SageMaker"""
 85 |     parser = argparse.ArgumentParser()
 86 | 
 87 |     # Hyperparameters sent by the client are passed as command-line arguments to the script.
 88 |     parser.add_argument("--epochs", type=int, default=1)
 89 |     parser.add_argument("--learning_rate", type=float, default=0.001)
 90 |     parser.add_argument("--num_classes", type=int, default=4)
 91 |     parser.add_argument("--max_seq_len", type=int, default=40)
 92 | 
 93 |     # Data, model, and output directories
 94 |     parser.add_argument("--output-data-dir", type=str, default=os.environ.get("SM_OUTPUT_DATA_DIR"))
 95 |     parser.add_argument("--model-dir", type=str, default=os.environ.get("SM_MODEL_DIR"))
 96 |     parser.add_argument("--train", type=str, default=os.environ.get("SM_CHANNEL_TRAIN"))
 97 |     parser.add_argument("--test", type=str, default=os.environ.get("SM_CHANNEL_TEST"))
 98 |     parser.add_argument("--embeddings", type=str, default=os.environ.get("SM_CHANNEL_EMBEDDINGS"))
 99 | 
100 |     return parser.parse_known_args()
101 | 
102 | 
103 | def test(model, test_loader, device):
104 |     model.eval()
105 |     test_loss = 0.0
106 |     correct = 0
107 |     with torch.no_grad():
108 |         for data, target in test_loader:
109 |             data, target = data.to(device), target.to(device)
110 |             output = model(data)
111 |             test_loss += F.binary_cross_entropy(output, target, reduction="sum").item()
112 |             pred = output.max(1, keepdim=True)[1]  # get the index of the max log-probability
113 |             target_index = target.max(1, keepdim=True)[1]
114 |             correct += pred.eq(target_index).sum().item()
115 | 
116 |     test_loss /= len(test_loader.dataset)  # Average loss over dataset samples
117 |     print(f"val_loss: {test_loss:.4f}, val_acc: {correct/len(test_loader.dataset):.4f}")
118 | 
119 | 
120 | def train(args):
121 |     ###### Load data from input channels ############
122 |     train_loader = load_training_data(args.train)
123 |     test_loader = load_testing_data(args.test)
124 |     embedding_matrix = load_embeddings(args.embeddings)
125 | 
126 |     ###### Setup model architecture ############
127 |     model = Net(
128 |         vocab_size=embedding_matrix.shape[0],
129 |         emb_dim=embedding_matrix.shape[1],
130 |         num_classes=args.num_classes,
131 |     )
132 |     model.embedding.weight = torch.nn.parameter.Parameter(torch.FloatTensor(embedding_matrix), False)
133 |     device = torch.device("cpu")
134 |     if torch.cuda.is_available():
135 |         device = torch.device("cuda")
136 |     model.to(device)
137 |     optimizer = optim.RMSprop(model.parameters(), lr=args.learning_rate)
138 | 
139 |     for epoch in range(1, args.epochs + 1):
140 |         model.train()
141 |         running_loss = 0.0
142 |         n_batches = 0
143 |         for batch_idx, (X_train, y_train) in enumerate(train_loader, 1):
144 |             data, target = X_train.to(device), y_train.to(device)
145 |             optimizer.zero_grad()
146 |             output = model(data)
147 |             loss = F.binary_cross_entropy(output, target)
148 |             loss.backward()
149 |             optimizer.step()
150 |             running_loss += loss.item()
151 |             n_batches += 1
152 |         print(f"epoch: {epoch}, train_loss: {running_loss / n_batches:.6f}")  # (Avg over batches)
153 |         print("Evaluating model")
154 |         test(model, test_loader, device)
155 |     save_model(model, args.model_dir, args.max_seq_len)
156 | 
157 | 
158 | def save_model(model, model_dir, max_seq_len):
159 |     path = os.path.join(model_dir, "model.pth")
160 |     x = torch.randint(0, 10, (1, max_seq_len))
161 |     model = model.cpu()
162 |     model.eval()
163 |     m = torch.jit.trace(model, x)
164 |     torch.jit.save(m, path)
165 | 
166 | 
167 | def model_fn(model_dir):
168 |     """Customized model loading function for inference
169 | 
170 |     https://sagemaker.readthedocs.io/en/stable/frameworks/pytorch/using_pytorch.html#load-a-model
171 |     """
172 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
173 |     model = torch.jit.load(os.path.join(model_dir, "model.pth")).to(device)
174 |     return model
175 | 
176 | ###### Main application  ############
177 | if __name__ == "__main__":
178 | 
179 |     ###### Parse input arguments ############
180 |     args, unknown = parse_args()
181 | 
182 |     train(args)
183 | 


--------------------------------------------------------------------------------
/.infrastructure/README.md:
--------------------------------------------------------------------------------
  1 | # Workshop infrastructure
  2 | 
  3 | This project provides infrastructure-as-code to deploy an [Amazon SageMaker Studio Domain](https://docs.aws.amazon.com/sagemaker/latest/dg/sm-domain.html) pre-configured ready to use in a guided workshop setting (in case you don't have one already).
  4 | 
  5 | 
  6 | ## Architecture overview
  7 | 
  8 | The above infrastructure, and the optional SageMaker Studio Domain deployment, is implemented in and deployed through [AWS CDK for Python](https://aws.amazon.com/cdk/). Since deploying CDK code requires setting up a development environment (as detailed below), we also provide a directly-deployable ["bootstrap" CloudFormation template](cfn_bootstrap.yaml) which fetches this repository and runs the CDK deploment via [AWS CodeBuild](https://aws.amazon.com/codebuild/).
  9 | 
 10 | > ⚠️ **Note:** The above CloudFormation template creates an AWS CodeBuild Project with broad IAM permissions to deploy the solution on your behalf. It's not recommended for use in production-environments where [least-privilege principles](https://aws.amazon.com/blogs/security/techniques-for-writing-least-privilege-iam-policies/) should be followed.
 11 | 
 12 | For a detailed list of other security configurations you might want to optimize before using the stack in prodution, you can enable [cdk-nag](https://github.com/cdklabs/cdk-nag) by running the build with the `CDK_NAG=true` environment variable or editing the defaults in [cdk_app.py](cdk_app.py). You don't need to request stack deployment to complete this analysis: running `npx cdk synth` would show the same error list.
 13 | 
 14 | 
 15 | ## Development environment pre-requisites
 16 | 
 17 | To customize and deploy from source code, you'll need:
 18 | 
 19 | - [NodeJS](https://nodejs.org/en) installed
 20 |     - The minimum required version is specified in [package.json](package.json) `engines` field and the canonical development version is specified in [.nvmrc](.nvmrc)
 21 |     - If you work across multiple projects and need to manage multiple parallel versions of NodeJS on your system, you may want to install it via [NVM](https://github.com/nvm-sh/nvm) or [NVM-Windows](https://github.com/coreybutler/nvm-windows)
 22 | - [Python](https://www.python.org/)
 23 |     - The minimum required version is specified in [pyproject.toml](pyproject.toml) and the canonical development version is specified in [.python-version](.python-version)
 24 |     - If you work across multiple projects and need to manage multiple parallel versions of Python on your system, you may want to install it via [pyenv](https://github.com/pyenv/pyenv) or [pyenv for Windows](https://github.com/pyenv-win/pyenv-win)
 25 | - The [AWS CLI](https://aws.amazon.com/cli/) installed and [configured / logged in](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html) with access to your AWS Account
 26 | - [Docker Desktop](https://www.docker.com/products/docker-desktop/) (or a suitable alternative) installed for building (and maybe locally testing?) container images.
 27 | 
 28 | 
 29 | ## Getting started
 30 | 
 31 | The following commands assume you're working in a terminal from the same directory as this README.
 32 | 
 33 | These examples use `$` to indicate the prompt of a Bash/POSIX-like shell (e.g. on macOS or Linux), and `%` to indicate a Windows-like shell. You only need to type the commands **after** the prompt!
 34 | 
 35 | ### Install & activate
 36 | 
 37 | If you haven't already, consider setting your `AWS_REGION` and `AWS_DEFAULT_REGION` environment variables to your target AWS Region for deployment:
 38 | 
 39 | ```sh
 40 | $ export AWS_REGION="us-west-2"
 41 | $ export AWS_DEFAULT_REGION="us-west-2"
 42 | ```
 43 | 
 44 | **IF** you're using NVM and/or pyenv, first activate the target versions of NodeJS and/or Python:
 45 | 
 46 | ```sh
 47 | $ nvm use  # Should discover version from .nvmrc file
 48 | $ pyenv local  # Should discover version from .python-version file
 49 | ```
 50 | 
 51 | Install the [CDK Toolkit CLI](https://docs.aws.amazon.com/cdk/v2/guide/cli.html) and any other NodeJS dependencies from [package.json](package.json) **locally** in the project:
 52 | 
 53 | ```sh
 54 | $ npm install  # Will enable locally-versioned `npx cdk deploy` rather than global `cdk` CLI
 55 | ```
 56 | 
 57 | The initialization process [should *automatically* create](https://docs.aws.amazon.com/cdk/v2/guide/work-with-cdk-python.html) a Python virtualenv when you first run e.g. `npx cdk synth`, if you have the `virtualenv` package installed - but if you prefer to create one manually you can run:
 58 | 
 59 | ```sh
 60 | $ python3 -m venv .venv
 61 | ```
 62 | 
 63 | After the init process completes and the virtualenv is created, you can use the following
 64 | step to activate your virtualenv (from Bash/POSIX-like shells):
 65 | 
 66 | ```sh
 67 | $ source .venv/bin/activate
 68 | ```
 69 | 
 70 | If you are a Windows platform, you would activate the virtualenv like this:
 71 | 
 72 | ```
 73 | % .venv\Scripts\activate.bat
 74 | ```
 75 | 
 76 | Once the virtualenv is activated, you can install the required dependencies.
 77 | 
 78 | ```
 79 | (.venv) $ pip install -r requirements.txt
 80 | ```
 81 | 
 82 | (If you need to add any dependencies, simply add them to your requirements.txt and re-run this installation in your Python virtual environment)
 83 | 
 84 | 
 85 | ### Synthesizing and deploying with CDK
 86 | 
 87 | Once your AWS CLI is configured, virtual environment activated and dependencies installed, you should be able to use the CDK application. If you haven't already deployed CDK-based infrastructure in your AWS Account & Region, first [bootstrap](https://docs.aws.amazon.com/cdk/v2/guide/cli.html#cli-bootstrap) your environment by running:
 88 | 
 89 | ```sh
 90 | $ npm run cdk:bootstrap
 91 | ```
 92 | 
 93 | Then, you should be able to directly synthesize and deploy this project by running:
 94 | 
 95 | ```sh
 96 | $ npm run deploy
 97 | 
 98 | # Or optionally to suppress approval prompts:
 99 | $ npm run deploy -- --require-approval never
100 | ```
101 | 
102 | To delete your deployed stacks, you can run:
103 | 
104 | ```sh
105 | $ npm run destroy
106 | 
107 | # Or optionally to suppress approval prompts:
108 | $ npm run destroy -- --force
109 | ```
110 | 
111 | The NPM `deploy` script (and others) are defined in the `scripts` field of [package.json](package.json) and run inside NPM context so have access to the locally-installed version of the `cdk` CLI. The `--` separates arguments for NPM from those that should be passed through to the underlying script. The `app` field of [cdk.json](cdk.json) defines the entry-point command for `cdk` commands.
112 | 
113 | You can also run CDK commands directly via [npx](https://docs.npmjs.com/cli/v7/commands/npx) if you prefer - for example to **just synthesize** the CloudFormation template(s) instead of also deploying them:
114 | 
115 | ```sh
116 | $ npx cdk synth --all  # Note no extra '--' required here
117 | ```
118 | 
119 | See the [CDK Toolkit CLI docs](https://docs.aws.amazon.com/cdk/v2/guide/cli.html) for other useful commands you can run (but add the `npx` prefix!).
120 | 
121 | 
122 | ## Re-configuring the stack
123 | 
124 | [cdk_app.py](cdk_app.py) accepts some configuration parameters as environment variables. [cfn_bootstrap.yaml](cfn_bootstrap.yaml) uses these same environment variables to pass CloudFormation stack parameters through to the CDK build & deployment process.
125 | 


--------------------------------------------------------------------------------
/.infrastructure/cdk_src/smstudio/user/fn_user/main.py:
--------------------------------------------------------------------------------
  1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | # SPDX-License-Identifier: MIT-0
  3 | """Custom CloudFormation Resource for a SageMaker Studio User Profile
  4 | 
  5 | See `StudioUserResourceProperties` for expected CloudFormation resource properties.
  6 | 
  7 | CloudFormation Return Values
  8 | ----------------------------
  9 | Direct .Ref :
 10 |     Name of the created SageMaker Studio user profile
 11 | UserProfileName :
 12 |     Name of the created SageMaker Studio user profile
 13 | HomeEfsFileSystemUid :
 14 |     Home EFS File System POSIX user ID allocated for the created SageMaker Studio user (the UID
 15 |     they'll appear as when mounting the Studio Domain EFS).
 16 | """
 17 | # Python Built-Ins:
 18 | from __future__ import annotations
 19 | import json
 20 | import logging
 21 | import time
 22 | 
 23 | logging.getLogger().setLevel(logging.INFO)  # Set log level for AWS Lambda *BEFORE* other imports
 24 | 
 25 | # External Dependencies:
 26 | import boto3
 27 | 
 28 | # Local Dependencies:
 29 | from cfn import CustomResourceEvent, CustomResourceRequestType
 30 | 
 31 | logger = logging.getLogger("main")
 32 | smclient = boto3.client("sagemaker")
 33 | 
 34 | 
 35 | class StudioUserResourceProperties:
 36 |     """Parser for CloudFormation resource properties for this Custom Resource
 37 | 
 38 |     Resource Properties
 39 |     -------------------
 40 | 
 41 |     DomainId : str
 42 |         (Required) SageMaker Studio Domain ID to create the profile on.
 43 |     UserProfileName : str
 44 |         (Required) Domain-unique name to give the user profile (update requires replacement).
 45 |     UserSettings : dict
 46 |         Optional user settings object to apply to the user profile. Default `{}`.
 47 |     """
 48 | 
 49 |     domain_id: str
 50 |     user_profile_name: str
 51 |     user_settings: dict
 52 | 
 53 |     def __init__(self, resource_properties: dict):
 54 |         self.domain_id = resource_properties["DomainId"]
 55 |         self.user_profile_name = resource_properties["UserProfileName"]
 56 |         self.user_settings = resource_properties.get("UserSettings", {})
 57 | 
 58 |     def __str__(self):
 59 |         dict_val = {
 60 |             "DomainId": self.domain_id,
 61 |             "UserProfileName": self.user_profile_name,
 62 |             "UserSettings": self.user_settings,
 63 |         }
 64 |         return json.dumps(dict_val)
 65 | 
 66 |     @classmethod
 67 |     def from_str(cls, str_val) -> StudioUserResourceProperties:
 68 |         return cls(json.loads(str_val))
 69 | 
 70 | 
 71 | def lambda_handler(event_raw: dict, context: dict):
 72 |     """Main entry point for (CDK) Custom Resource Lambda"""
 73 |     logger.info(event_raw)
 74 |     event = CustomResourceEvent(event_raw, StudioUserResourceProperties)
 75 |     if event.request_type == CustomResourceRequestType.create:
 76 |         return handle_create(event, context)
 77 |     elif event.request_type == CustomResourceRequestType.update:
 78 |         return handle_update(event, context)
 79 |     elif event.request_type == CustomResourceRequestType.delete:
 80 |         return handle_delete(event, context)
 81 |     else:
 82 |         raise ValueError(f"Unsupported CFn RequestType '{event_raw['RequestType']}'")
 83 | 
 84 | 
 85 | def handle_create(event: CustomResourceEvent[StudioUserResourceProperties], context):
 86 |     logging.info("**Received create request")
 87 | 
 88 |     logging.info("**Creating user profile")
 89 |     result = create_user_profile(event.props)
 90 |     # TODO: Do we need to wait for completion?
 91 |     response = {
 92 |         "UserProfileName": result["UserProfileName"],
 93 |         "HomeEfsFileSystemUid": result["HomeEfsFileSystemUid"],
 94 |     }
 95 |     print(response)
 96 |     return {
 97 |         "PhysicalResourceId": result["UserProfileName"],
 98 |         "Data": response,
 99 |     }
100 | 
101 | 
102 | def handle_delete(event: CustomResourceEvent[StudioUserResourceProperties], context):
103 |     logging.info("**Received delete event")
104 |     domain_id = event.props.domain_id
105 |     try:
106 |         smclient.describe_user_profile(DomainId=domain_id, UserProfileName=event.physical_id)
107 |     except smclient.exceptions.ResourceNotFound:
108 |         # Not found -> Treat as deletion successful
109 |         return {"PhysicalResourceId": event.physical_id, "Data": {}}
110 |     delete_user_profile(domain_id, event.physical_id)
111 |     return {"PhysicalResourceId": event.physical_id, "Data": {}}
112 | 
113 | 
114 | def handle_update(event: CustomResourceEvent[StudioUserResourceProperties], context):
115 |     logging.info("**Received update event")
116 |     update_user_profile(
117 |         domain_id=event.props.domain_id,
118 |         user_profile_name=event.physical_id,
119 |         user_settings=event.props.user_settings,
120 |     )
121 |     return {"PhysicalResourceId": event.physical_id, "Data": {}}
122 | 
123 | 
124 | def create_user_profile(config: StudioUserResourceProperties):
125 |     domain_id = config.domain_id
126 |     user_profile_name = config.user_profile_name
127 | 
128 |     response = smclient.create_user_profile(
129 |         DomainId=domain_id,
130 |         UserProfileName=user_profile_name,
131 |         UserSettings=config.user_settings,
132 |     )
133 |     created = False
134 |     time.sleep(0.2)
135 |     while not created:
136 |         response = smclient.describe_user_profile(
137 |             DomainId=domain_id, UserProfileName=user_profile_name
138 |         )
139 |         status_lower = response["Status"].lower()
140 |         if status_lower == "inservice":
141 |             created = True
142 |             break
143 |         elif "failed" in status_lower:
144 |             raise ValueError(
145 |                 f"User '{user_profile_name}' entered Failed state during creation (domain {domain_id})",
146 |             )
147 |         time.sleep(5)
148 | 
149 |     logging.info("**SageMaker domain created successfully: %s", domain_id)
150 |     return response
151 | 
152 | 
153 | def delete_user_profile(domain_id: str, user_profile_name: str):
154 |     response = smclient.delete_user_profile(
155 |         DomainId=domain_id,
156 |         UserProfileName=user_profile_name,
157 |     )
158 |     deleted = False
159 |     time.sleep(0.2)
160 |     while not deleted:
161 |         try:
162 |             response = smclient.describe_user_profile(
163 |                 DomainId=domain_id, UserProfileName=user_profile_name
164 |             )
165 |             status_lower = response["Status"].lower()
166 |             if "failed" in status_lower:
167 |                 raise ValueError(
168 |                     f"User '{user_profile_name}' entered Failed state during deletion (domain {domain_id})",
169 |                 )
170 |             elif "deleting" not in status_lower:
171 |                 raise ValueError(
172 |                     f"User '{user_profile_name}' no longer 'Deleting' but not deleted (domain {domain_id})",
173 |                 )
174 |         except smclient.exceptions.ResourceNotFound:
175 |             logging.info("Deleted user %s from domain %s", user_profile_name, domain_id)
176 |             deleted = True
177 |             break
178 |         time.sleep(5)
179 |     return response
180 | 
181 | 
182 | def update_user_profile(domain_id: str, user_profile_name: str, user_settings: dict):
183 |     response = smclient.update_user_profile(
184 |         DomainId=domain_id,
185 |         UserProfileName=user_profile_name,
186 |         UserSettings=user_settings,
187 |     )
188 |     updated = False
189 |     time.sleep(0.2)
190 |     while not updated:
191 |         response = smclient.describe_user_profile(
192 |             DomainId=domain_id, UserProfileName=user_profile_name
193 |         )
194 |         status_lower = response["Status"].lower()
195 |         if status_lower == "inservice":
196 |             updated = True
197 |             break
198 |         elif "failed" in status_lower:
199 |             raise ValueError(
200 |                 f"User '{user_profile_name}' entered Failed state during deletion (domain {domain_id})",
201 |             )
202 |         time.sleep(5)
203 |     return response
204 | 


--------------------------------------------------------------------------------
/custom_script_demos/huggingface_nlp/scripts/train.py:
--------------------------------------------------------------------------------
  1 | """Transformer-based text classification on SageMaker with Hugging Face"""
  2 | 
  3 | # Python Built-Ins:
  4 | import argparse
  5 | import logging
  6 | import os
  7 | import sys
  8 | from typing import List, Optional
  9 | 
 10 | # External Dependencies:
 11 | import datasets
 12 | #from datasets import disable_progress_bar as disable_datasets_progress_bar
 13 | from transformers import (
 14 |     AutoModelForSequenceClassification,
 15 |     Trainer,
 16 |     TrainingArguments,
 17 |     AutoTokenizer,
 18 |     DataCollatorWithPadding,
 19 | )
 20 | from sklearn.metrics import accuracy_score, precision_recall_fscore_support
 21 | 
 22 | # Set up logging:
 23 | logging.basicConfig(
 24 |     level=logging.getLevelName("INFO"),
 25 |     handlers=[logging.StreamHandler(sys.stdout)],
 26 |     format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
 27 | )
 28 | logger = logging.getLogger(__name__)
 29 | datasets.disable_progress_bar()  # Too noisy on conventional log streams
 30 | 
 31 | # Factoring your code out into smaller helper functions can help with debugging:
 32 | 
 33 | 
 34 | def parse_args():
 35 |     """Parse hyperparameters and data args from CLI arguments and environment variables"""
 36 |     parser = argparse.ArgumentParser()
 37 | 
 38 |     # hyperparameters sent by the client are passed as command-line arguments to the script.
 39 |     parser.add_argument("--model_id", type=str, required=True)
 40 |     parser.add_argument("--class_names", type=lambda s: s.split(","), required=True)
 41 |     parser.add_argument("--learning_rate", type=float, default=5e-5)
 42 |     parser.add_argument("--warmup_steps", type=int, default=500)
 43 |     parser.add_argument("--epochs", type=int, default=3)
 44 |     parser.add_argument("--train_max_steps", type=int, default=-1)
 45 |     parser.add_argument("--train_batch_size", type=int, default=32)
 46 |     parser.add_argument("--eval_batch_size", type=int, default=64)
 47 |     parser.add_argument("--fp16", type=int, default=1)
 48 | 
 49 |     # Data, model, and output folders are set by combination of CLI args and env vars:
 50 |     parser.add_argument("--train", type=str, default=os.environ.get("SM_CHANNEL_TRAIN"))
 51 |     parser.add_argument("--test", type=str, default=os.environ.get("SM_CHANNEL_TEST"))
 52 |     parser.add_argument("--model_dir", type=str, default=os.environ.get("SM_MODEL_DIR"))
 53 |     parser.add_argument("--output_data_dir", type=str, default=os.environ.get("SM_OUTPUT_DATA_DIR"))
 54 |     # parser.add_argument("--n_gpus", type=int, default=os.environ.get("SM_NUM_GPUS"))
 55 | 
 56 |     args, _ = parser.parse_known_args()
 57 |     return args
 58 | 
 59 | 
 60 | def compute_metrics(pred):
 61 |     labels = pred.label_ids
 62 |     preds = pred.predictions.argmax(-1)
 63 |     precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average="micro")
 64 |     acc = accuracy_score(labels, preds)
 65 |     return {"accuracy": acc, "f1": f1, "precision": precision, "recall": recall}
 66 | 
 67 | 
 68 | def get_model(model_id: str, class_names: List[str]) -> (
 69 |     AutoTokenizer, AutoModelForSequenceClassification, DataCollatorWithPadding
 70 | ):
 71 |     """Set up tokenizer, model, data_collator from job parameters"""
 72 |     tokenizer = AutoTokenizer.from_pretrained(model_id)
 73 | 
 74 |     model = AutoModelForSequenceClassification.from_pretrained(
 75 |         model_id, num_labels=len(class_names)
 76 |     )
 77 |     model.config.label2id = {name: ix for ix, name in enumerate(class_names)}
 78 |     model.config.id2label = {ix: name for ix, name in enumerate(class_names)}
 79 | 
 80 |     data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
 81 | 
 82 |     return tokenizer, model, data_collator
 83 | 
 84 | 
 85 | def load_datasets(tokenizer: AutoTokenizer, train_dir: str, test_dir: Optional[str] = None) -> (
 86 |     datasets.Dataset, Optional[datasets.Dataset]
 87 | ):
 88 |     """Load and pre-process training (+ validation?) dataset(s)"""
 89 | 
 90 |     def preprocess(batch):
 91 |         """Tokenize and pre-process raw examples for training/validation"""
 92 |         result = tokenizer(batch["title"], truncation=True)
 93 |         result["label"] = batch["category"]
 94 |         return result
 95 | 
 96 | 
 97 |     raw_train_dataset = datasets.load_dataset(
 98 |         "csv",
 99 |         data_files=[os.path.join(train_dir, f) for f in os.listdir(train_dir)],
100 |         column_names=["category", "title", "content"],
101 |         split=datasets.Split.ALL,
102 |     )
103 |     train_dataset = raw_train_dataset.map(
104 |         preprocess, batched=True, batch_size=1000, remove_columns=raw_train_dataset.column_names
105 |     )
106 |     logger.info(f"Loaded train_dataset length is: {len(train_dataset)}")
107 |     if test_dir:
108 |         # test channel is optional:
109 |         raw_test_dataset = datasets.load_dataset(
110 |             "csv",
111 |             data_files=[os.path.join(test_dir, f) for f in os.listdir(test_dir)],
112 |             column_names=["category", "title", "content"],
113 |             split=datasets.Split.ALL,
114 |         )
115 |         test_dataset = raw_test_dataset.map(
116 |             preprocess, batched=True, batch_size=1000, remove_columns=raw_test_dataset.column_names
117 |         )
118 |         logger.info(f"Loaded test_dataset length is: {len(test_dataset)}")
119 |     else:
120 |         test_dataset = None
121 |         logger.info("No test_dataset provided")
122 |     return train_dataset, test_dataset
123 | 
124 | 
125 | # Only run this main block if running as a script (e.g. in training), not when imported as a module
126 | # (which would be the case if used at inference):
127 | if __name__ == "__main__":
128 |     # Load job parameters:
129 |     args = parse_args()
130 |     training_args = TrainingArguments(
131 |         max_steps=args.train_max_steps,
132 |         num_train_epochs=args.epochs,
133 |         per_device_train_batch_size=args.train_batch_size,
134 |         per_device_eval_batch_size=args.eval_batch_size,
135 |         fp16=bool(args.fp16),
136 |         evaluation_strategy="epoch",
137 |         save_strategy="epoch",
138 |         load_best_model_at_end=True,
139 |         metric_for_best_model="f1",
140 |         learning_rate=args.learning_rate,
141 |         warmup_steps=args.warmup_steps,
142 |         disable_tqdm=True,  # Interactive progress bars too noisy on conventional log streams
143 |         # You could save checkpoints & logs under args.output_data_dir to upload them, but it
144 |         # increases job run time by a few minutes:
145 |         output_dir="/tmp/transformers/checkpoints",
146 |         logging_dir="/tmp/transformers/logs",
147 |     )
148 | 
149 |     # Load tokenizer/model/collator:
150 |     tokenizer, model, collator = get_model(model_id=args.model_id, class_names=args.class_names)
151 | 
152 |     # Load and pre-process the dataset:
153 |     train_dataset, test_dataset = load_datasets(
154 |         tokenizer=tokenizer,
155 |         train_dir=args.train,
156 |         test_dir=args.test,
157 |     )
158 | 
159 |     # Create Trainer instance
160 |     trainer = Trainer(
161 |         model=model,
162 |         args=training_args,
163 |         compute_metrics=compute_metrics,
164 |         train_dataset=train_dataset,
165 |         eval_dataset=test_dataset,
166 |         tokenizer=tokenizer,
167 |         data_collator=collator,
168 |     )
169 | 
170 |     # Train the model
171 |     trainer.train()
172 | 
173 |     # Save the model output
174 |     trainer.save_model(args.model_dir)
175 | 
176 |     # Evaluate the final model and save a report, if test dataset provided:
177 |     if test_dataset:
178 |         eval_result = trainer.evaluate(eval_dataset=test_dataset)
179 |         # The 'output' folder will also (separately from model) get uploaded to S3 by SageMaker:
180 |         if args.output_data_dir:
181 |             os.makedirs(args.output_data_dir, exist_ok=True)
182 |             with open(os.path.join(args.output_data_dir, "eval_results.txt"), "w") as writer:
183 |                 print("***** Eval results *****")
184 |                 for key, value in sorted(eval_result.items()):
185 |                     writer.write(f"{key} = {value}\n")
186 | 


--------------------------------------------------------------------------------
/.infrastructure/cdk_src/smstudio/iam.py:
--------------------------------------------------------------------------------
  1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | # SPDX-License-Identifier: MIT-0
  3 | """AWS CDK constructs for IAM roles in Amazon SageMaker workshops
  4 | """
  5 | # Python Built-Ins:
  6 | from typing import Mapping, Optional, Sequence
  7 | 
  8 | # External Dependencies:
  9 | from aws_cdk import Duration
 10 | import aws_cdk.aws_iam as iam
 11 | from aws_cdk.aws_iam import IManagedPolicy, IPrincipal, PolicyDocument
 12 | from constructs import Construct
 13 | 
 14 | 
 15 | class WorkshopSageMakerExecutionRole(iam.Role):
 16 |     """An IAM role set up for Amazon SageMaker execution in workshops
 17 | 
 18 |     This construct sets permissive permissions by default and is not recommended for production use
 19 |     """
 20 | 
 21 |     def __init__(
 22 |         self,
 23 |         scope: Construct,
 24 |         id: str,
 25 |         *,
 26 |         assumed_by_extra: Optional[IPrincipal] = None,
 27 |         description: Optional[str] = None,
 28 |         enable_bedrock: bool = True,
 29 |         enable_codewhisperer: bool = True,
 30 |         enable_glueis: bool = True,
 31 |         enable_iamfullaccess: bool = False,
 32 |         enable_s3fullaccess: bool = True,
 33 |         enable_sagemakerfullaccess: bool = True,
 34 |         external_ids: Optional[Sequence[str]] = None,
 35 |         extras_inline_policy_name: str = "WorkshopExtras",
 36 |         inline_policies: Optional[Mapping[str, PolicyDocument]] = None,
 37 |         managed_policies: Optional[Sequence[IManagedPolicy]] = None,
 38 |         max_session_duration: Optional[Duration] = None,
 39 |         path: Optional[str] = None,
 40 |         permissions_boundary: Optional[IManagedPolicy] = None,
 41 |         role_name: Optional[str] = None,
 42 |     ) -> None:
 43 |         """Create a WorkshopSageMakerExecutionRole
 44 | 
 45 |         Parameters are generally as per CDK iam.Role, but with customized default values.
 46 | 
 47 |         Parameters
 48 |         ----------
 49 |         scope :
 50 |             CDK construct scope
 51 |         id :
 52 |             CDK construct ID
 53 |         assumed_by_extra :
 54 |             Optionally provide an extra Principal this role should trust. SageMaker and (if
 55 |             `enable_glueis` is set) AWS Glue principals will already be trusted: You only need to
 56 |             set this parameter if needing to add an additional principal.
 57 |         description :
 58 |             A description of the role
 59 |         enable_bedrock :
 60 |             This construct will grant bedrock:* permissions in an inline policy by default. Set
 61 |             False to prevent this.
 62 |         enable_codewhisperer :
 63 |             This construct will grant the codewhisperer:GenerateRecommendations permission in an
 64 |             inline policy by default. Set False to prevent this.
 65 |         enable_glueis :
 66 |             This construct will trust the AWS Glue service and apply the AWS Managed
 67 |             AwsGlueSessionUserRestrictedServiceRole by default, for using Glue Interactive Sessions
 68 |             within SageMaker Studio notebooks. Set False to prevent this.
 69 |         enable_iamfullaccess :
 70 |             You can attach the AWS Managed IAMFullAccess policy to your role by setting this to
 71 |             `True`... But since this is a very broad permission, it's `False` by default.
 72 |         enable_s3fullaccess :
 73 |             By default, this construct will append the AmazonS3FullAccess AWS Managed Policy to
 74 |             your `managed_policies`. Set False to prevent this.
 75 |         enable_sagemakerfullaccess :
 76 |             By default, this construct will append the AmazonSageMakerFullAccess AWS Managed Policy
 77 |             to your `managed_policies`. Set False to prevent this.
 78 |         external_ids :
 79 |             A list of external IDs that are allowed to assume the role
 80 |         extras_inline_policy_name :
 81 |             The name to use for the auto-generated Inline Policy of extra permissions for
 82 |             SageMaker workshops.
 83 |         inline_policies :
 84 |             Inline policies to attach to the role
 85 |         managed_policies :
 86 |             By default, we'll apply AWS policies AmazonSageMakerFullAccess, AmazonS3FullAccess,
 87 |             AwsGlueSessionUserRestrictedServiceRole, and IAMFullAccess. You only need to set this
 88 |             parameter if you want to override this.
 89 |         max_session_duration :
 90 |             The maximum session duration for the role
 91 |         path :
 92 |             The path for the role
 93 |         permissions_boundary :
 94 |             The permissions boundary for the role
 95 |         role_name :
 96 |             The name of the role
 97 |         """
 98 |         principals = [iam.ServicePrincipal("sagemaker.amazonaws.com")]
 99 |         extra_managed_policies = []
100 |         inline_policy_statements = []
101 | 
102 |         # Parse required extra principals/policies/statements from the config options:
103 |         if enable_bedrock:
104 |             inline_policy_statements.append(
105 |                 iam.PolicyStatement(actions=["bedrock:*"], resources=["*"], sid="BedrockAccess")
106 |             )
107 |         if enable_codewhisperer:
108 |             inline_policy_statements.append(
109 |                 iam.PolicyStatement(
110 |                     actions=["codewhisperer:GenerateRecommendations"],
111 |                     resources=["*"],
112 |                     sid="CodeWhispererPermissions",
113 |                 )
114 |             )
115 |         if enable_glueis:
116 |             principals.append(iam.ServicePrincipal("glue.amazonaws.com"))
117 |             extra_managed_policies.append(
118 |                 iam.ManagedPolicy.from_aws_managed_policy_name(
119 |                     "service-role/AwsGlueSessionUserRestrictedServiceRole"
120 |                 )
121 |             )
122 |             inline_policy_statements.append(
123 |                 # TODO: Scope this down better
124 |                 iam.PolicyStatement(
125 |                     actions=["iam:GetRole", "iam:PassRole", "sts:GetCallerIdentity"],
126 |                     resources=["*"],
127 |                     sid="GlueSessionsIAMPerms",
128 |                 )
129 |             )
130 |         if enable_iamfullaccess:
131 |             extra_managed_policies.append(
132 |                 iam.ManagedPolicy.from_aws_managed_policy_name("IAMFullAccess")
133 |             )
134 |         if enable_s3fullaccess:
135 |             extra_managed_policies.append(
136 |                 iam.ManagedPolicy.from_aws_managed_policy_name("AmazonS3FullAccess")
137 |             )
138 |         if enable_sagemakerfullaccess:
139 |             extra_managed_policies.append(
140 |                 iam.ManagedPolicy.from_aws_managed_policy_name("AmazonSageMakerFullAccess")
141 |             )
142 | 
143 |         # Apply the extras to the core iam.Role arguments:
144 |         if assumed_by_extra:
145 |             principals.append(assumed_by_extra)
146 |         assumed_by = iam.CompositePrincipal(*principals)
147 |         if len(extra_managed_policies):
148 |             if not managed_policies:
149 |                 managed_policies = []
150 |             managed_policies = [*managed_policies, *extra_managed_policies]
151 |         if len(inline_policy_statements):
152 |             if not inline_policies:
153 |                 inline_policies = {}
154 |             if extras_inline_policy_name in inline_policies:
155 |                 inline_policies[extras_inline_policy_name].add_statements(inline_policy_statements)
156 |             else:
157 |                 inline_policies[extras_inline_policy_name] = iam.PolicyDocument(
158 |                     statements=inline_policy_statements,
159 |                 )
160 | 
161 |         # Call iam.Role with the updated args:
162 |         super().__init__(
163 |             scope,
164 |             id,
165 |             assumed_by=assumed_by,
166 |             description=description,
167 |             external_ids=external_ids,
168 |             inline_policies=inline_policies,
169 |             managed_policies=managed_policies,
170 |             max_session_duration=max_session_duration,
171 |             path=path,
172 |             permissions_boundary=permissions_boundary,
173 |             role_name=role_name,
174 |         )
175 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Getting Started with "Amazon SageMaker 101"
 2 | 
 3 | This repository accompanies a hands-on training event to introduce data scientists (and ML-ready developers / technical leaders) to core model training and deployment workflows with [Amazon SageMaker](https://aws.amazon.com/sagemaker/).
 4 | 
 5 | Like a "101" course in [the academic sense](https://en.wikipedia.org/wiki/101_(topic)), this will likely **not** be the simplest introduction to SageMaker you can find; nor the fastest way to get started with advanced features like [optimized SageMaker Distributed training](https://docs.aws.amazon.com/sagemaker/latest/dg/distributed-training.html) or [SageMaker Clarify for bias and explainability analyses](https://aws.amazon.com/sagemaker/clarify/).
 6 | 
 7 | Instead, these exercises are chosen to demonstrate some core build/train/deploy patterns that we've found help new users to first get productive with SageMaker - and to later understand how the more advanced features fit in.
 8 | 
 9 | ## Agenda
10 | 
11 | An interactive walkthrough of the content with screenshots is available at:
12 | 
13 | > **[https://sagemaker-101-workshop.workshop.aws/](https://sagemaker-101-workshop.workshop.aws/)**
14 | 
15 | Sessions in suggested order:
16 | 
17 | 1. [builtin_algorithm_hpo_tabular](builtin_algorithm_hpo_tabular): Explore some **pre-built algorithms** and tools for tabular data, including [SageMaker Canvas](https://aws.amazon.com/sagemaker/canvas/), [SageMaker AutoML APIs](https://docs.aws.amazon.com/sagemaker/latest/dg/use-auto-ml.html), the [XGBoost built-in algorithm](https://docs.aws.amazon.com/sagemaker/latest/dg/xgboost.html), and [automatic hyperparameter tuning](https://docs.aws.amazon.com/sagemaker/latest/dg/automatic-model-tuning.html)
18 |     - This module also includes a quick initial look at [SageMaker Feature Store](https://docs.aws.amazon.com/sagemaker/latest/dg/feature-store.html), [SageMaker Model Registry](https://docs.aws.amazon.com/sagemaker/latest/dg/model-registry.html), and the [AutoGluon built-in algorithm](https://docs.aws.amazon.com/sagemaker/latest/dg/autogluon-tabular.html) - but you don't need to dive deep on these topics.
19 | 1. [custom_script_demos](custom_script_demos): See how you can train and deploy your own models on SageMaker with **custom Python scripts** and the pre-built framework containers
20 |     - (Optional) Start with [sklearn_reg](custom_script_demos/sklearn_reg) for an introduction if you're new to deep learning but familiar with Scikit-Learn
21 |     - See [huggingface_nlp](custom_script_demos/sklearn_reg) (preferred) for a side-by-side comparison of in-notebook versus on-SageMaker model training and inference for text classification - or alternatively the custom CNN-based [keras_nlp](custom_script_demos/keras_nlp) or [pytorch_nlp](custom_script_demos/pytorch_nlp) examples.
22 | 1. [migration_challenge](migration_challenge): **Apply** what you learned to port an in-notebook workflow to a SageMaker training job + endpoint deployment on your own
23 |     - Choose the [sklearn_cls](migration_challenge/sklearn_cls), [keras_mnist](migration_challenge/keras_mnist) or [pytorch_mnist](migration_challenge/pytorch_mnist) challenge, depending which ML framework you're most comfortable with.
24 | 
25 | 
26 | ## Deploying in Your Own Account
27 | 
28 | The recommended way to explore these exercises is through Amazon SageMaker AI Studio - and you deploy use the [**template in .infrastructure/cfn_bootstrap.yaml**](.infrastructure/cfn_bootstrap.yaml) from the [AWS CloudFormation Console](https://console.aws.amazon.com/cloudformation/home), to get started with the same environment configuration we use for AWS-guided deliveries of this workshop.
29 | 
30 | > ⚠️ Our `.infrastructure` is optimized for getting started easily with SageMaker Studio, but is not recommended for use in production environments!
31 | 
32 | You can also [read more about how to onboard to SageMaker Studio](https://docs.aws.amazon.com/sagemaker/latest/dg/gs-studio-onboard.html) in the SageMaker AI Developer Guide, and learn [how SageMaker Studio Notebooks are different from Notebook Instances?"*](https://docs.aws.amazon.com/sagemaker/latest/dg/notebooks-comparison.html). A more basic Notebook Instance-based CloudFormation stack is also available in [.simple.cf.yaml](.simple.cf.yaml), but some features of the labs will not be available.
33 | 
34 | Depending on your setup, you may be asked to **choose a kernel** when opening some notebooks. There should be guidance at the top of each notebook on suggested kernel types, but if you can't find any, `Data Science 3.0 (Python 3)` (on Studio) or `conda_python3` (on Notebook Instances) are likely good options.
35 | 
36 | ### Setting up widgets and code completion (JupyterLab extensions)
37 | 
38 | Some of the examples depend on [ipywidgets](@jupyter-widgets/jupyterlab-manager) and [ipycanvas](https://ipycanvas.readthedocs.io/en/latest/) for interactive inference demo widgets (but do provide code-only alternatives).
39 | 
40 | We also usually enable some additional JupyterLab extensions powered by [jupyterlab-lsp](https://github.com/jupyter-lsp/jupyterlab-lsp#readme) and [jupyterlab-s3-browser](https://github.com/IBM/jupyterlab-s3-browser#readme) to improve user experience. You can find more information about these extensions in [this AWS ML blog post](https://aws.amazon.com/blogs/machine-learning/amazon-sagemaker-studio-and-sagemaker-notebook-instance-now-come-with-jupyterlab-3-notebooks-to-boost-developer-productivity/)
41 | 
42 | `ipywidgets` should be available by default on SageMaker Studio, but not on Notebook Instances when we last tested. The other extensions require installation.
43 | 
44 | To see how we automate these extra setup steps for AWS-run events, you can refer to the **lifecycle configuration scripts** in our CloudFormation templates. For a [Notebook Instance LCC](https://docs.amazonaws.cn/en_us/sagemaker/latest/dg/notebook-lifecycle-config.html), see the `AWS::SageMaker::NotebookInstanceLifecycleConfig` in [.simple.cf.yaml](.simple.cf.yaml). For a [SageMaker Studio LCC](https://docs.amazonaws.cn/en_us/sagemaker/latest/dg/studio-lcc-create.html), see the `Custom::StudioLifecycleConfig` in [.infrastructure/template.sam.yaml](.infrastructure/template.sam.yaml).
45 | 
46 | 
47 | ## Security
48 | 
49 | See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information.
50 | 
51 | 
52 | ## License
53 | 
54 | This library is licensed under the MIT-0 License. See the LICENSE file.
55 | 
56 | 
57 | ## Further Reading
58 | 
59 | One major focus of this workshop is how SageMaker helps us right-size and segregate compute resources for different ML tasks, without sacrificing (and ideally accelerating!) data scientist productivity. For more information on this topic, see this post on the AWS Machine Learning Blog: [Right-sizing resources and avoiding unnecessary costs in Amazon SageMaker](https://aws.amazon.com/blogs/machine-learning/right-sizing-resources-and-avoiding-unnecessary-costs-in-amazon-sagemaker/)
60 | 
61 | For a workshop that starts with a similar migration-based approach, but dives further into automated pipelines and CI/CD, check out [aws-samples/amazon-sagemaker-from-idea-to-production](https://github.com/aws-samples/amazon-sagemaker-from-idea-to-production).
62 | 
63 | As you continue to explore Amazon SageMaker, you'll also find many more useful resources in:
64 | 
65 | - The official **[Amazon SageMaker Examples repository](https://github.com/aws/amazon-sagemaker-examples)**: with a broad range of code samples covering SageMaker use cases from beginner to expert.
66 | - The **[documentation](https://sagemaker.readthedocs.io/en/stable/)** (and maybe even the [source code](https://github.com/aws/sagemaker-python-sdk)) for the **SageMaker Python SDK**: The high-level, open-source [PyPI library](https://pypi.org/project/sagemaker/) we use when we `import sagemaker`.
67 | - The **[Amazon SageMaker Developer Guide](https://docs.aws.amazon.com/sagemaker/latest/dg/whatis.html)**: documenting the SageMaker service itself.
68 | 
69 | More advanced users may also find it helpful to refer to:
70 | 
71 | - The **[boto3 reference for SageMaker](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html)** and the **[SageMaker API reference](https://docs.aws.amazon.com/sagemaker/latest/APIReference/Welcome.html)**: in case you have use cases for SageMaker where you want (or need) to use low-level APIs directly, instead of through the `sagemaker` library.
72 | - The **[AWS Deep Learning Containers](https://github.com/aws/deep-learning-containers)** and **[SageMaker Scikit-Learn Containers](https://github.com/aws/sagemaker-scikit-learn-container)** **source code**: For a deeper understanding of the framework container environments.
73 | 


--------------------------------------------------------------------------------
/.infrastructure/cdk_src/smstudio/user_setup/fn_user_setup/content.py:
--------------------------------------------------------------------------------
  1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | # SPDX-License-Identifier: MIT-0
  3 | """Custom CloudFormation Resource for loading content to a SageMaker Studio user
  4 | 
  5 | See `.base.StudioUserSetupResourceProperties` for CloudFormation input Properties, and main.py
  6 | docstring for CloudFormation return values.
  7 | 
  8 | This sub-resource either clones a (public) git repository or downloads content from Amazon S3, into
  9 | a SageMaker Studio user's home folder on create. Updating and Deleting the resource currently do
 10 | nothing as it's designed for one-off account setup.
 11 | """
 12 | # Python Built-Ins:
 13 | import logging
 14 | import os
 15 | import traceback
 16 | from typing import Optional, Union
 17 | import zipfile
 18 | 
 19 | # External Dependencies:
 20 | import boto3
 21 | from botocore import UNSIGNED
 22 | from botocore.config import Config
 23 | from git import Repo
 24 | 
 25 | # Local Dependencies:
 26 | from base import StudioUserSetupResourceProperties
 27 | from cfn import CustomResourceEvent
 28 | 
 29 | anons3config = Config(signature_version=UNSIGNED)
 30 | smclient = boto3.client("sagemaker")
 31 | 
 32 | 
 33 | def handle_create(event: CustomResourceEvent[StudioUserSetupResourceProperties], context):
 34 |     """Handle a resource creation Lambda event from CloudFormation"""
 35 |     logging.info("**Received create request")
 36 |     logging.info("**Setting up user content")
 37 |     try:
 38 |         # Check home folder exists and is assigned to correct EFS owner:
 39 |         home_folder = ensure_home_dir(event.props.home_efs_file_system_uid)
 40 | 
 41 |         # Now ready to clone in Git content (or whatever else...)
 42 |         if event.props.git_repository:
 43 |             output_content_path = clone_git_repository(
 44 |                 home_folder,
 45 |                 event.props.git_repository,
 46 |                 event.props.target_path,
 47 |                 event.props.git_checkout,
 48 |             )
 49 |         elif event.props.content_s3_uri:
 50 |             output_content_path = copy_s3_content(
 51 |                 home_folder,
 52 |                 event.props.content_s3_uri,
 53 |                 event.props.target_path,
 54 |                 event.props.extract_content,
 55 |                 event.props.authenticate_s3,
 56 |             )
 57 |         else:
 58 |             logging.warning("Neither GitRepository nor ContentS3Uri set - nothing to create")
 59 | 
 60 |         # Remember to set ownership/permissions for all the stuff we just created, to give the
 61 |         # user write access:
 62 |         chown_recursive(output_content_path, uid=event.props.home_efs_file_system_uid)
 63 |         print("All done")
 64 |     except Exception as e:
 65 |         # Don't bring the entire CF stack down just because we couldn't copy a repo:
 66 |         print("IGNORING CONTENT SETUP ERROR")
 67 |         traceback.print_exc()
 68 | 
 69 |     logging.info("**SageMaker Studio user '%s' set up successfully", event.props.user_profile_name)
 70 |     return {
 71 |         "PhysicalResourceId": event.props.user_profile_name,
 72 |         "Data": {"UserProfileName": event.props.user_profile_name},
 73 |     }
 74 | 
 75 | 
 76 | def handle_delete(event: CustomResourceEvent[StudioUserSetupResourceProperties], context):
 77 |     """Handle a resource deletion Lambda event from CloudFormation (a no-op for this resource)"""
 78 |     logging.info("**Received delete event")
 79 |     # Since this is a no-op, there's no point strictly parsing the props (risking failures):
 80 |     logging.info(
 81 |         "**Deleting user setup is a no-op: user '%s' on domain '%s",
 82 |         event.physical_id,
 83 |         event.props.domain_id,
 84 |     )
 85 |     return {"PhysicalResourceId": event.physical_id, "Data": {}}
 86 | 
 87 | 
 88 | def handle_update(event: CustomResourceEvent[StudioUserSetupResourceProperties], context):
 89 |     """Handle a resource update Lambda event from CloudFormation (a no-op for this resource)"""
 90 |     logging.info("**Received update event")
 91 |     # Since this is a no-op, there's no point strictly parsing the props (risking failures):
 92 |     logging.info(
 93 |         "**Updating user setup is a no-op: user '%s' on domain '%s",
 94 |         event.physical_id,
 95 |         event.props.domain_id,
 96 |     )
 97 |     return {"PhysicalResourceId": event.physical_id, "Data": {}}
 98 | 
 99 | 
100 | def ensure_home_dir(efs_uid: Union[int, str]) -> str:
101 |     """Check the EFS home folder for the given user ID exists with correct ownership
102 | 
103 |     The root of the EFS contains folders named for each user UID, but these may not be created
104 |     before the user has first logged in (could os.listdir("/mnt/efs") to check).
105 |     """
106 |     print("Creating/checking home folder...")
107 |     home_folder = f"/mnt/efs/{efs_uid}"
108 |     os.makedirs(home_folder, exist_ok=True)
109 |     # Set correct ownership permissions for this folder straight away, in case a later process errors out
110 |     os.chown(home_folder, int(efs_uid), -1)
111 |     return home_folder
112 | 
113 | 
114 | def clone_git_repository(
115 |     base_folder: str, git_repo: str, as_folder: Optional[str] = None, checkout: Optional[str] = None
116 | ) -> str:
117 |     """Clone a git repository into `base_folder/as_folder` and optionally check out `checkout`
118 | 
119 |     DOES NOT CONFIGURE FILE OWNERSHIP PERMISSIONS! Run chown_recursive if required.
120 |     """
121 |     print(f"Cloning code... {git_repo}")
122 |     if not as_folder:
123 |         # Infer target folder name from repo URL if not specified:
124 |         as_folder = git_repo.rpartition("/")[2]
125 |         if as_folder.lower().endswith(".git"):
126 |             as_folder = as_folder[: -len(".git")]
127 |     target_folder = os.path.join(base_folder, as_folder)
128 |     repo = Repo.clone_from(git_repo, target_folder)
129 |     if checkout:
130 |         print(f"Checking out '{checkout}'...")
131 |         repo.git.checkout(checkout)
132 |     else:
133 |         print("No specific checkout branch/commit specified - keeping default")
134 |     return target_folder
135 | 
136 | 
137 | def copy_s3_content(
138 |     base_folder: str,
139 |     content_s3uri: str,
140 |     target_path: Optional[str] = None,
141 |     extract: Optional[bool] = False,
142 |     authenticate_s3: Optional[bool] = False,
143 | ) -> str:
144 |     """Download content from Amazon S3 to `base_folder/target_path`
145 | 
146 |     DOES NOT CONFIGURE FILE OWNERSHIP PERMISSIONS! Run chown_recursive if required.
147 |     """
148 |     if not content_s3uri.lower().startswith("s3://"):
149 |         raise ValueError("Content URI must start with 's3://'")
150 |     bucket_name, _, key_prefix = content_s3uri[len("s3://") :].partition("/")
151 | 
152 |     # Set up S3 client as anonymous or authenticated, depending on resource config:
153 |     s3 = boto3.resource("s3", config=(None if authenticate_s3 else anons3config))
154 |     s3client = boto3.client("s3", config=(None if authenticate_s3 else anons3config))
155 | 
156 |     # Check if the provided content URI is a valid object (vs folder/prefix):
157 |     bucket = s3.Bucket(bucket_name)
158 |     print(f"Checking s3://{bucket_name}/{key_prefix}")
159 |     try:
160 |         content_type = bucket.Object(key_prefix).content_type
161 |         if content_type and content_type.lower() == "application/x-directory":
162 |             is_object = False
163 |         else:
164 |             is_object = True
165 |     except s3client.exceptions.ClientError as err:
166 |         if err.response["Error"]["Code"] == "404":
167 |             is_object = False
168 |         else:
169 |             raise err
170 | 
171 |     if is_object:
172 |         if target_path is None:
173 |             target_path = os.path.basename(key_prefix)
174 |         full_target_path = os.path.join(base_folder, target_path)
175 |         print(f"Downloading {content_s3uri}")
176 |         bucket.download_file(key_prefix, full_target_path)
177 | 
178 |         if not extract:
179 |             return full_target_path
180 |         # Otherwise, extract compressed file:
181 |         # A file without a dot/extension will produce ("", "", "wholename"):
182 |         basename, _, file_ext = key_prefix.rpartition(".")
183 |         file_ext = file_ext.lower()
184 |         extract_path = full_target_path + "-tmp"
185 |         if file_ext == "zip" or not basename:
186 |             # (Assume zip for files with no extension if extract specified)
187 |             print(f"Extracting to {extract_path}")
188 |             with zipfile.ZipFile(full_target_path, "r") as zip_ref:
189 |                 zip_ref.extractall(extract_path)
190 |         else:
191 |             raise NotImplementedError(f"File extension '{file_ext}' not supported for extraction")
192 |         print(f"Replacing compressed {full_target_path} with {extract_path}")
193 |         os.remove(full_target_path)
194 |         os.rename(extract_path, full_target_path)
195 |         return full_target_path
196 | 
197 |     # Otherwise looks like a folder
198 |     raise NotImplementedError(
199 |         f"Object not found and prefix/folder download not yet supported: ${content_s3uri}"
200 |     )
201 | 
202 | 
203 | def chown_recursive(path: str, uid: Union[str, int] = -1, gid: Union[str, int] = -1):
204 |     """Workaround for os.chown() not having a recursive option for folders"""
205 |     uid = int(uid)
206 |     gid = int(gid)
207 |     if os.path.isfile(path):
208 |         os.chown(path, uid, gid)
209 |     else:
210 |         for dirpath, dirnames, filenames in os.walk(path):
211 |             os.chown(dirpath, uid, gid)
212 |             for filename in filenames:
213 |                 os.chown(os.path.join(dirpath, filename), uid, gid)
214 | 


--------------------------------------------------------------------------------
/.infrastructure/cdk_src/smstudio/region_config.py:
--------------------------------------------------------------------------------
  1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | # SPDX-License-Identifier: MIT-0
  3 | """CDK constructs for cross-regional configuration mapping of SageMaker resources
  4 | """
  5 | # Python Built-Ins:
  6 | from typing import Optional, Tuple
  7 | 
  8 | # External Dependencies:
  9 | from aws_cdk import CfnMapping
 10 | from constructs import Construct
 11 | 
 12 | 
 13 | STUDIO_APP_ARNS_BY_REGION = {
 14 |     "us-east-1": {
 15 |         "datascience": "arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0",
 16 |         "datascience2": "arn:aws:sagemaker:us-east-1:081325390199:image/sagemaker-data-science-38",
 17 |         "datascience3": "arn:aws:sagemaker:us-east-1:081325390199:image/sagemaker-data-science-310-v1",
 18 |         "jlabv3": "arn:aws:sagemaker:us-east-1:081325390199:image/jupyter-server-3",
 19 |     },
 20 |     "us-east-2": {
 21 |         "datascience": "arn:aws:sagemaker:us-east-2:429704687514:image/datascience-1.0",
 22 |         "datascience2": "arn:aws:sagemaker:us-east-2:429704687514:image/sagemaker-data-science-38",
 23 |         "datascience3": "arn:aws:sagemaker:us-east-2:429704687514:image/sagemaker-data-science-310-v1",
 24 |         "jlabv3": "arn:aws:sagemaker:us-east-2:429704687514:image/jupyter-server-3",
 25 |     },
 26 |     "us-west-1": {
 27 |         "datascience": "arn:aws:sagemaker:us-west-1:742091327244:image/datascience-1.0",
 28 |         "datascience2": "arn:aws:sagemaker:us-west-1:742091327244:image/sagemaker-data-science-38",
 29 |         "datascience3": "arn:aws:sagemaker:us-west-1:742091327244:image/sagemaker-data-science-310-v1",
 30 |         "jlabv3": "arn:aws:sagemaker:us-west-1:742091327244:image/jupyter-server-3",
 31 |     },
 32 |     "us-west-2": {
 33 |         "datascience": "arn:aws:sagemaker:us-west-2:236514542706:image/datascience-1.0",
 34 |         "datascience2": "arn:aws:sagemaker:us-west-2:236514542706:image/sagemaker-data-science-38",
 35 |         "datascience3": "arn:aws:sagemaker:us-west-2:236514542706:image/sagemaker-data-science-310-v1",
 36 |         "jlabv3": "arn:aws:sagemaker:us-west-2:236514542706:image/jupyter-server-3",
 37 |     },
 38 |     "af-south-1": {
 39 |         "datascience": "arn:aws:sagemaker:af-south-1:559312083959:image/datascience-1.0",
 40 |         "datascience2": "arn:aws:sagemaker:af-south-1:559312083959:image/sagemaker-data-science-38",
 41 |         "datascience3": "arn:aws:sagemaker:af-south-1:559312083959:image/sagemaker-data-science-310-v1",
 42 |         "jlabv3": "arn:aws:sagemaker:af-south-1:559312083959:image/jupyter-server-3",
 43 |     },
 44 |     "ap-east-1": {
 45 |         "datascience": "arn:aws:sagemaker:ap-east-1:493642496378:image/datascience-1.0",
 46 |         "datascience2": "arn:aws:sagemaker:ap-east-1:493642496378:image/sagemaker-data-science-38",
 47 |         "datascience3": "arn:aws:sagemaker:ap-east-1:493642496378:image/sagemaker-data-science-310-v1",
 48 |         "jlabv3": "arn:aws:sagemaker:ap-east-1:493642496378:image/jupyter-server-3",
 49 |     },
 50 |     "ap-south-1": {
 51 |         "datascience": "arn:aws:sagemaker:ap-south-1:394103062818:image/datascience-1.0",
 52 |         "datascience2": "arn:aws:sagemaker:ap-south-1:394103062818:image/sagemaker-data-science-38",
 53 |         "datascience3": "arn:aws:sagemaker:ap-south-1:394103062818:image/sagemaker-data-science-310-v1",
 54 |         "jlabv3": "arn:aws:sagemaker:ap-south-1:394103062818:image/jupyter-server-3",
 55 |     },
 56 |     "ap-northeast-2": {
 57 |         "datascience": "arn:aws:sagemaker:ap-northeast-2:806072073708:image/datascience-1.0",
 58 |         "datascience2": "arn:aws:sagemaker:ap-northeast-2:806072073708:image/sagemaker-data-science-38",
 59 |         "datascience3": "arn:aws:sagemaker:ap-northeast-2:806072073708:image/sagemaker-data-science-310-v1",
 60 |         "jlabv3": "arn:aws:sagemaker:ap-northeast-2:806072073708:image/jupyter-server-3",
 61 |     },
 62 |     "ap-southeast-1": {
 63 |         "datascience": "arn:aws:sagemaker:ap-southeast-1:492261229750:image/datascience-1.0",
 64 |         "datascience2": "arn:aws:sagemaker:ap-southeast-1:492261229750:image/sagemaker-data-science-38",
 65 |         "datascience3": "arn:aws:sagemaker:ap-southeast-1:492261229750:image/sagemaker-data-science-310-v1",
 66 |         "jlabv3": "arn:aws:sagemaker:ap-southeast-1:492261229750:image/jupyter-server-3",
 67 |     },
 68 |     "ap-southeast-2": {
 69 |         "datascience": "arn:aws:sagemaker:ap-southeast-2:452832661640:image/datascience-1.0",
 70 |         "datascience2": "arn:aws:sagemaker:ap-southeast-2:452832661640:image/sagemaker-data-science-38",
 71 |         "datascience3": "arn:aws:sagemaker:ap-southeast-2:452832661640:image/sagemaker-data-science-310-v1",
 72 |         "jlabv3": "arn:aws:sagemaker:ap-southeast-2:452832661640:image/jupyter-server-3",
 73 |     },
 74 |     "ap-southeast-3": {
 75 |         "datascience": "arn:aws:sagemaker:ap-southeast-3:276181064229:image/datascience-1.0",
 76 |         "datascience2": "arn:aws:sagemaker:ap-southeast-3:276181064229:image/sagemaker-data-science-38",
 77 |         "datascience3": "arn:aws:sagemaker:ap-southeast-3:276181064229:image/sagemaker-data-science-310-v1",
 78 |         "jlabv3": "arn:aws:sagemaker:ap-southeast-3:276181064229:image/jupyter-server-3",
 79 |     },
 80 |     "ap-northeast-1": {
 81 |         "datascience": "arn:aws:sagemaker:ap-northeast-1:102112518831:image/datascience-1.0",
 82 |         "datascience2": "arn:aws:sagemaker:ap-northeast-1:102112518831:image/sagemaker-data-science-38",
 83 |         "datascience3": "arn:aws:sagemaker:ap-northeast-1:102112518831:image/sagemaker-data-science-310-v1",
 84 |         "jlabv3": "arn:aws:sagemaker:ap-northeast-1:102112518831:image/jupyter-server-3",
 85 |     },
 86 |     # TODO: ap-northeast-2 and ap-northeast-3 if available?
 87 |     "ca-central-1": {
 88 |         "datascience": "arn:aws:sagemaker:ca-central-1:310906938811:image/datascience-1.0",
 89 |         "datascience2": "arn:aws:sagemaker:ca-central-1:310906938811:image/sagemaker-data-science-38",
 90 |         "datascience3": "arn:aws:sagemaker:ca-central-1:310906938811:image/sagemaker-data-science-310-v1",
 91 |         "jlabv3": "arn:aws:sagemaker:ca-central-1:310906938811:image/jupyter-server-3",
 92 |     },
 93 |     "eu-central-1": {
 94 |         "datascience": "arn:aws:sagemaker:eu-central-1:936697816551:image/datascience-1.0",
 95 |         "datascience2": "arn:aws:sagemaker:eu-central-1:936697816551:image/sagemaker-data-science-38",
 96 |         "datascience3": "arn:aws:sagemaker:eu-central-1:936697816551:image/sagemaker-data-science-310-v1",
 97 |         "jlabv3": "arn:aws:sagemaker:eu-central-1:936697816551:image/jupyter-server-3",
 98 |     },
 99 |     # TODO: eu-central-2 if available?
100 |     "eu-west-1": {
101 |         "datascience": "arn:aws:sagemaker:eu-west-1:470317259841:image/datascience-1.0",
102 |         "datascience2": "arn:aws:sagemaker:eu-west-1:470317259841:image/sagemaker-data-science-38",
103 |         "datascience3": "arn:aws:sagemaker:eu-west-1:470317259841:image/sagemaker-data-science-310-v1",
104 |         "jlabv3": "arn:aws:sagemaker:eu-west-1:470317259841:image/jupyter-server-3",
105 |     },
106 |     "eu-west-2": {
107 |         "datascience": "arn:aws:sagemaker:eu-west-2:712779665605:image/datascience-1.0",
108 |         "datascience2": "arn:aws:sagemaker:eu-west-2:712779665605:image/sagemaker-data-science-38",
109 |         "datascience3": "arn:aws:sagemaker:eu-west-2:712779665605:image/sagemaker-data-science-310-v1",
110 |         "jlabv3": "arn:aws:sagemaker:eu-west-2:712779665605:image/jupyter-server-3",
111 |     },
112 |     "eu-west-3": {
113 |         "datascience": "arn:aws:sagemaker:eu-west-3:615547856133:image/datascience-1.0",
114 |         "datascience2": "arn:aws:sagemaker:eu-west-3:615547856133:image/sagemaker-data-science-38",
115 |         "datascience3": "arn:aws:sagemaker:eu-west-3:615547856133:image/sagemaker-data-science-310-v1",
116 |         "jlabv3": "arn:aws:sagemaker:eu-west-3:615547856133:image/jupyter-server-3",
117 |     },
118 |     "eu-north-1": {
119 |         "datascience": "arn:aws:sagemaker:eu-north-1:243637512696:image/datascience-1.0",
120 |         "datascience2": "arn:aws:sagemaker:eu-north-1:243637512696:image/sagemaker-data-science-38",
121 |         "datascience3": "arn:aws:sagemaker:eu-north-1:243637512696:image/sagemaker-data-science-310-v1",
122 |         "jlabv3": "arn:aws:sagemaker:eu-north-1:243637512696:image/jupyter-server-3",
123 |     },
124 |     "eu-south-1": {
125 |         "datascience": "arn:aws:sagemaker:eu-south-1:592751261982:image/datascience-1.0",
126 |         "datascience2": "arn:aws:sagemaker:eu-south-1:592751261982:image/sagemaker-data-science-38",
127 |         "datascience3": "arn:aws:sagemaker:eu-south-1:592751261982:image/sagemaker-data-science-310-v1",
128 |         "jlabv3": "arn:aws:sagemaker:eu-south-1:592751261982:image/jupyter-server-3",
129 |     },
130 |     # TODO: me-central-1 and me-south-1 if available?
131 |     "sa-east-1": {
132 |         "datascience": "arn:aws:sagemaker:sa-east-1:782484402741:image/datascience-1.0",
133 |         "datascience2": "arn:aws:sagemaker:sa-east-1:782484402741:image/sagemaker-data-science-38",
134 |         "datascience3": "arn:aws:sagemaker:sa-east-1:782484402741:image/sagemaker-data-science-310-v1",
135 |         "jlabv3": "arn:aws:sagemaker:sa-east-1:782484402741:image/jupyter-server-3",
136 |     },
137 | }
138 | 
139 | 
140 | class CfnSageMakerAppsByRegionMapping(CfnMapping):
141 |     """Construct for a CloudFormation Mapping of common SMStudio app ARNs by region"""
142 | 
143 |     def __init__(
144 |         self,
145 |         scope: Construct,
146 |         id: str,
147 |         *,
148 |         lazy: Optional[bool] = None,
149 |     ) -> None:
150 |         super().__init__(scope, id, lazy=lazy, mapping=STUDIO_APP_ARNS_BY_REGION)
151 | 
152 |     @property
153 |     def supported_regions(self) -> Tuple[str]:
154 |         """Alphabetically sorted list of all regions supported in the map"""
155 |         return tuple(sorted(STUDIO_APP_ARNS_BY_REGION.keys()))
156 | 
157 |     @property
158 |     def supported_apps(self) -> Tuple[str]:
159 |         """Alphabetically sorted list of all Studio app names supported in the map"""
160 |         return next(tuple(sorted(vals)) for _, vals in STUDIO_APP_ARNS_BY_REGION)
161 | 


--------------------------------------------------------------------------------
/.infrastructure/cdk_src/smstudio/lcc/fn_studio_lcconfig/main.py:
--------------------------------------------------------------------------------
  1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | # SPDX-License-Identifier: MIT-0
  3 | """CDK Custom Resource Lambda for a SageMaker Studio Lifecycle Configuration Script
  4 | 
  5 | See `StudioLCCResourceProperties` for expected CloudFormation resource properties.
  6 | 
  7 | CloudFormation Return Values
  8 | ----------------------------
  9 | Direct .Ref :
 10 |     ARN of the created lifecycle configuration script
 11 | AppType :
 12 |     As per resource properties .AppType
 13 | Name :
 14 |     As per resource properties .Name
 15 | """
 16 | # Python Built-Ins:
 17 | from __future__ import annotations
 18 | import json
 19 | import logging
 20 | import time
 21 | from typing import Optional
 22 | 
 23 | logging.getLogger().setLevel(logging.INFO)  # Set log level for AWS Lambda *BEFORE* other imports
 24 | 
 25 | # External Dependencies:
 26 | import boto3
 27 | 
 28 | # Local Dependencies
 29 | from cfn import CustomResourceEvent, CustomResourceRequestType
 30 | from sagemaker_util import retry_if_already_updating
 31 | 
 32 | logger = logging.getLogger("main")
 33 | smclient = boto3.client("sagemaker")
 34 | 
 35 | 
 36 | class StudioLCCResourceProperties:
 37 |     """Parser for CloudFormation resource properties for this Custom Resource
 38 | 
 39 |     Resource Properties
 40 |     -------------------
 41 | 
 42 |     AppType : str
 43 |         (Required) 'JupyterLab' or 'CodeEditor' for new-style (2024+) SMStudio Spaces, or else
 44 |         'JupyterServer' or 'KernelGateway' for SageMaker Studio Classic.
 45 |     Name : str
 46 |         (Required) Name of the lifecycle config script to create
 47 |     Content : str
 48 |         (Required) Base64-encoded script content, similar to the usage of
 49 |         `Properties.OnStart[].Content` in AWS::SageMaker::NotebookInstanceLifecycleConfig
 50 |     Tags : Optional[List[Dict['Key': str, 'Value': str]]]
 51 |         Optional AWS resource tags
 52 |     DomainId : Optional[str]
 53 |         Optional SageMaker Studio Domain ID to associate the script to. (You usually need to attach
 54 |         the script to a domain if you want to use it!).
 55 |     """
 56 | 
 57 |     app_type: str
 58 |     content: str
 59 |     name: str
 60 |     domain_id: Optional[str]
 61 |     tags: Optional[dict]
 62 | 
 63 |     def __init__(self, resource_properties: dict):
 64 |         self.app_type = resource_properties["AppType"]
 65 |         self.content = resource_properties["Content"]
 66 |         self.name = resource_properties["Name"]
 67 |         self.domain_id = resource_properties.get("DomainId")
 68 |         self.tags = resource_properties.get("Tags", [])
 69 | 
 70 |     def __str__(self):
 71 |         dict_val = {
 72 |             "AppType": self.app_type,
 73 |             "Content": self.content,
 74 |             "Name": self.name,
 75 |             "Tags": self.tags,
 76 |         }
 77 |         if self.domain_id:
 78 |             dict_val["DomainId"] = self.domain_id
 79 |         return json.dumps(dict_val)
 80 | 
 81 |     @classmethod
 82 |     def from_str(cls, str_val) -> StudioLCCResourceProperties:
 83 |         return cls(json.loads(str_val))
 84 | 
 85 | 
 86 | def lambda_handler(event_raw: dict, context: dict):
 87 |     """Main entry point for (CDK) Custom Resource Lambda"""
 88 |     logger.info(event_raw)
 89 |     event = CustomResourceEvent(event_raw, StudioLCCResourceProperties)
 90 |     if event.request_type == CustomResourceRequestType.create:
 91 |         return handle_create(event, context)
 92 |     elif event.request_type == CustomResourceRequestType.update:
 93 |         return handle_update(event, context)
 94 |     elif event.request_type == CustomResourceRequestType.delete:
 95 |         return handle_delete(event, context)
 96 |     else:
 97 |         raise ValueError(f"Unsupported CFn RequestType '{event_raw['RequestType']}'")
 98 | 
 99 | 
100 | def handle_create(event: CustomResourceEvent[StudioLCCResourceProperties], context: dict):
101 |     logger.info("**Received create request")
102 | 
103 |     logger.info("**Creating lifecycle config script")
104 |     resp = smclient.create_studio_lifecycle_config(
105 |         StudioLifecycleConfigName=event.props.name,
106 |         StudioLifecycleConfigContent=event.props.content,
107 |         StudioLifecycleConfigAppType=event.props.app_type,
108 |         Tags=event.props.tags or [],
109 |     )
110 |     script_arn = resp["StudioLifecycleConfigArn"]
111 |     domain_id = event.props.domain_id
112 |     if domain_id is not None:
113 |         try:
114 |             attach_lcc_to_domain(
115 |                 domain_id=domain_id,
116 |                 script_arn=script_arn,
117 |                 app_type=event.props.app_type,
118 |             )
119 |         except Exception as e:
120 |             # If creation succeeded but attachment failed, send explicit fail response to try and
121 |             # make sure the physical resource ID is set correctly and therefore enable rollback of
122 |             # the resource:
123 |             logger.exception("Failed to attach LCC to SM domain")
124 |             raise e
125 | 
126 |     return {
127 |         "PhysicalResourceId": script_arn,
128 |         "Data": {
129 |             "AppType": event.props.app_type,
130 |             "Name": event.props.name,
131 |         },
132 |     }
133 | 
134 | 
135 | def handle_delete(event: CustomResourceEvent[StudioLCCResourceProperties], context: dict):
136 |     logger.info("**Received delete event")
137 |     lcc_id = event.physical_id
138 |     lcc_name = lcc_id.rpartition("/")[2]
139 | 
140 |     domain_id = event.props.domain_id
141 |     app_type = event.props.app_type
142 |     if domain_id is not None and app_type is not None:
143 |         try:
144 |             remove_lcc_from_domain(domain_id=domain_id, script_arn=lcc_id, app_type=app_type)
145 |         except:
146 |             logger.exception("Failed to detach LCC from domain - trying to delete LCC anyway...")
147 | 
148 |     try:
149 |         logger.info(f"Deleting lifecycle config script {lcc_name}")
150 |         smclient.delete_studio_lifecycle_config(StudioLifecycleConfigName=lcc_name)
151 |     except smclient.exceptions.ResourceNotFound:
152 |         pass
153 | 
154 |     # Already does not exist -> deletion success
155 |     return {
156 |         "PhysicalResourceId": lcc_id,
157 |         "Data": {},
158 |     }
159 | 
160 | 
161 | def handle_update(event: CustomResourceEvent[StudioLCCResourceProperties], context: dict):
162 |     logger.info("**Received update event")
163 | 
164 |     script_location_modified = not (
165 |         (event.props.name == event.old_props.name)
166 |         and (event.props.app_type == event.old_props.app_type)
167 |     )
168 |     script_modified = script_location_modified or not (
169 |         (event.props.content == event.old_props.content)
170 |     )
171 |     new_domain = event.props.domain_id
172 |     old_domain = event.old_props.domain_id
173 | 
174 |     if old_domain and script_location_modified or (new_domain != old_domain):
175 |         remove_lcc_from_domain(
176 |             domain_id=old_domain,
177 |             script_arn=event.physical_id,
178 |             app_type=event.old_props.app_type,
179 |         )
180 | 
181 |     if script_modified:
182 |         # For any modification we have to replace the script:
183 |         try:
184 |             old_name = event.old_props.name
185 |             logger.info(f"Deleting lifecycle config script {old_name}")
186 |             smclient.delete_studio_lifecycle_config(StudioLifecycleConfigName=old_name)
187 |         except smclient.exceptions.ResourceNotFound:
188 |             pass
189 |         resp = smclient.create_studio_lifecycle_config(
190 |             StudioLifecycleConfigName=event.props.name,
191 |             StudioLifecycleConfigContent=event.props.content,
192 |             StudioLifecycleConfigAppType=event.props.app_type,
193 |             Tags=event.props.tags or [],
194 |         )
195 | 
196 |     if new_domain and (script_location_modified or (new_domain != old_domain)):
197 |         attach_lcc_to_domain(
198 |             domain_id=new_domain,
199 |             script_arn=event.physical_id,
200 |             app_type=event.props.app_type,
201 |         )
202 | 
203 |     return {
204 |         "PhysicalResourceId": resp["StudioLifecycleConfigArn"],
205 |         "Data": {
206 |             "AppType": event.props.app_type,
207 |             "Name": event.props.name,
208 |         },
209 |     }
210 | 
211 | 
212 | def attach_lcc_to_domain(domain_id: str, script_arn: str, app_type: str):
213 |     domain_desc = smclient.describe_domain(DomainId=domain_id)
214 | 
215 |     default_settings = domain_desc["DefaultUserSettings"]
216 | 
217 |     app_settings_field = f"{app_type}AppSettings"  # e.g. "JupyterServerAppSettings"
218 |     if not default_settings.get(app_settings_field):
219 |         default_settings[app_settings_field] = {}
220 |     if not default_settings[app_settings_field].get("LifecycleConfigArns"):
221 |         default_settings[app_settings_field]["LifecycleConfigArns"] = []
222 | 
223 |     default_scripts = default_settings[app_settings_field]["LifecycleConfigArns"]
224 |     if script_arn not in default_scripts:
225 |         logger.info(f"Adding script to domain:\n{script_arn}")
226 |         default_scripts.append(script_arn)
227 |         retry_if_already_updating(
228 |             lambda: smclient.update_domain(
229 |                 DomainId=domain_id,
230 |                 DefaultUserSettings=default_settings,
231 |             ),
232 |         )
233 |         time.sleep(10)
234 |     else:
235 |         logger.info("Script already default on domain:\n{script_arn}")
236 | 
237 | 
238 | def remove_lcc_from_domain(domain_id: str, script_arn: str, app_type: str):
239 |     domain_desc = smclient.describe_domain(DomainId=domain_id)
240 | 
241 |     default_settings = domain_desc["DefaultUserSettings"]
242 | 
243 |     app_settings_field = f"{app_type}AppSettings"  # e.g. "JupyterServerAppSettings"
244 |     if not default_settings.get(app_settings_field):
245 |         default_settings[app_settings_field] = {}
246 |     if not default_settings[app_settings_field].get("LifecycleConfigArns"):
247 |         default_settings[app_settings_field]["LifecycleConfigArns"] = []
248 | 
249 |     default_scripts = default_settings[app_settings_field]["LifecycleConfigArns"]
250 |     if script_arn in default_scripts:
251 |         logger.info(f"Removing script from domain:\n{script_arn}")
252 |         default_scripts.remove(script_arn)
253 |         retry_if_already_updating(
254 |             lambda: smclient.update_domain(
255 |                 DomainId=domain_id,
256 |                 DefaultUserSettings=default_settings,
257 |             ),
258 |         )
259 |         time.sleep(10)
260 |     else:
261 |         logger.info("Script already deleted from domain:\n{script_arn}")
262 | 


--------------------------------------------------------------------------------
/migration_challenge/keras_mnist/util/draw.py:
--------------------------------------------------------------------------------
  1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | # SPDX-License-Identifier: MIT-0
  3 | """An ipycanvas-based interactive widget for drawing PIL-compatible doodles in JupyterLab
  4 | """
  5 | 
  6 | # Python Built-Ins:
  7 | from math import floor
  8 | from typing import Tuple, Union
  9 | 
 10 | # External Dependenices:
 11 | import numpy as np
 12 | from ipycanvas import Canvas, hold_canvas
 13 | from IPython.display import display
 14 | from ipywidgets import HTML, Button, Layout, Output, VBox
 15 | from matplotlib.colors import to_hex, to_rgb
 16 | from PIL import Image, ImageDraw
 17 | 
 18 | 
 19 | class ValidatedColor:
 20 |     """Canvas expects different color repr from PIL/image, so this class stores both"""
 21 | 
 22 |     hexa: str
 23 |     np_8bit: np.ndarray
 24 | 
 25 |     def __init__(self, color: Union[Tuple[float], np.ndarray]):
 26 |         self.set_color(color)
 27 | 
 28 |     def set_color(self, color: Union[Tuple[float], np.ndarray]):
 29 |         """Use this method to update all stored representations at once"""
 30 |         self.hexa = to_hex(color)
 31 |         self.np_8bit = (255 * np.array(to_rgb(color))).astype(int)
 32 | 
 33 | 
 34 | class PixelDrawCanvas:
 35 |     """JupyterLab widget to interactively draw on a canvas and export the pixel data to Python
 36 | 
 37 |     This widget maintains a buffer of pixel values and draws individual pixel rects to canvas (in
 38 |     batches, at least) to canvas on each mouse event... More toy/demo than an optimized design!
 39 | 
 40 |     Usage
 41 |     -----
 42 |     After creating the PixelDrawCanvas you can either call `.display()` to directly display it in
 43 |     the notebook, or access the `.widget` property if you want to embed the UI it in another
 44 |     ipywidgets widget.
 45 | 
 46 |     Draw on the canvas by clicking and dragging, or press the "Clear" button to start again.
 47 | 
 48 |     You can read the 0-255, 3-channel (height, width, 3) pixel data numpy array from `.data`.
 49 |     `matplotlib.pyplot.imshow(data)` should confirm that what you see in the widget matches this.
 50 | 
 51 |     You can also programmatically `.clear()` the drawing from Python if you like.
 52 |     """
 53 | 
 54 |     def __init__(
 55 |         self,
 56 |         width: int = 28,
 57 |         height: int = 28,
 58 |         color_bg: Tuple[float, float, float] = (0, 0, 0),
 59 |         color_fg: Tuple[float, float, float] = (1.0, 1.0, 1.0),
 60 |         pen_size: int = 3,
 61 |         title_html: str = "<h3>Draw a digit!</h3>",
 62 |     ):
 63 |         """Create a PixelDrawCanvas"""
 64 |         self.col_bg = ValidatedColor(color_bg)
 65 |         self.col_fg = ValidatedColor(color_fg)
 66 | 
 67 |         # -- Create individual widget components:
 68 |         self.canvas = Canvas(width=width, height=height, image_smoothing_enabled=False)
 69 |         # (Without explicit canvas.layout width, VBox/HBox fills full available width)
 70 |         self.canvas.layout.height = f"{max(200, min(1000, height))}px"
 71 |         self.canvas.layout.width = f"{max(200, min(1000, width))}px"
 72 |         self.canvas.image_smoothing_enabled = False
 73 |         self._clear_button = Button(
 74 |             description="Clear",
 75 |             icon="eraser",
 76 |             tooltip="Clear the drawing to a blank image",
 77 |         )
 78 |         self._console = Output(
 79 |             layout=Layout(
 80 |                 max_height="140px",
 81 |                 overflow_y="auto",
 82 |             )
 83 |         )
 84 |         self._title = HTML(title_html)
 85 | 
 86 |         # -- Initialize state:
 87 |         self.is_drawing = False
 88 |         # (Temporary data __init__ to be overridden by clear() shortly:)
 89 |         self.data = np.zeros((height, width, 3))
 90 |         self.set_pen(pen_size=pen_size)
 91 | 
 92 |         # -- Set up listeners:
 93 |         # Wrap widget event listener member functions so they have access to this `self` instance
 94 |         # when called and are also able to `print()` to the console output if needed.
 95 |         @self._console.capture()
 96 |         def on_mouse_down(*args, **kwargs):
 97 |             return self._on_mouse_down(*args, **kwargs)
 98 | 
 99 |         @self._console.capture()
100 |         def on_mouse_move(*args, **kwargs):
101 |             return self._on_mouse_move(*args, **kwargs)
102 | 
103 |         @self._console.capture()
104 |         def on_mouse_out(*args, **kwargs):
105 |             return self._on_mouse_out(*args, **kwargs)
106 | 
107 |         @self._console.capture()
108 |         def on_mouse_up(*args, **kwargs):
109 |             return self._on_mouse_up(*args, **kwargs)
110 | 
111 |         @self._console.capture()
112 |         def on_clear_click(*args, **kwargs):
113 |             return self.clear()
114 | 
115 |         self.canvas.on_mouse_down(on_mouse_down)
116 |         self.canvas.on_mouse_move(on_mouse_move)
117 |         self.canvas.on_mouse_out(on_mouse_out)
118 |         self.canvas.on_mouse_up(on_mouse_up)
119 |         self._clear_button.on_click(on_clear_click)
120 | 
121 |         # Set up composite view with the different widget components:
122 |         self.widget = VBox(
123 |             [self._title, self._clear_button, self.canvas, self._console],
124 |             width=f"{width}px",
125 |         )
126 | 
127 |         # Finally initialize to clear state ready to use:
128 |         with self._console:
129 |             self.clear()
130 | 
131 |     def clear(self):
132 |         """Clear the drawing"""
133 |         height = self.canvas.height
134 |         width = self.canvas.width
135 |         with hold_canvas(self.canvas):
136 |             self.canvas.clear()
137 |             self.canvas.fill_style = self.col_bg.hexa
138 |             self.canvas.fill_rect(0, 0, width, height)
139 |             self.canvas.fill_style = self.col_fg.hexa
140 |         self.data = np.tile(self.col_bg.np_8bit, (height, width, 1))
141 |         print("Cleared drawing")
142 | 
143 |     def draw_from_buffer(self):
144 |         """Draw the contents of the .data buffer to the canvas
145 | 
146 |         This reproduces steps from clear() instead of calling it internally, to avoid flicker. Only
147 |         pixels of the current col_fg in the buffer will be drawn (doesn't support changing col_fg
148 |         dynamically or drawing multiple colors).
149 |         """
150 |         height = self.canvas.height
151 |         width = self.canvas.width
152 |         fg_mask = (self.data == np.expand_dims(self.col_fg.np_8bit, (0, 1))).all(-1)
153 |         with hold_canvas(self.canvas):
154 |             self.canvas.clear()
155 |             self.canvas.fill_style = self.col_bg.hexa
156 |             self.canvas.fill_rect(0, 0, width, height)
157 |             self.canvas.fill_style = self.col_fg.hexa
158 |             fg_coords = np.argwhere(fg_mask)  # N entries of (x, y) pairs
159 |             self.canvas.fill_rects(fg_coords[:, 1], fg_coords[:, 0], 1, 1)
160 | 
161 |     def display(self):
162 |         """Display the widget (in a Jupyter/Lab notebook)"""
163 |         display(self.widget)
164 | 
165 |     def _on_mouse_down(self, x, y):
166 |         self.is_drawing = True
167 |         self.paint(x, y)
168 | 
169 |     def _on_mouse_move(self, x, y):
170 |         if self.is_drawing:
171 |             self.paint(x, y)
172 | 
173 |     def _on_mouse_out(self, x, y):
174 |         """Re-draw from data buffer on each mouse-out in case anything weird happened"""
175 |         self.is_drawing = False
176 |         self.draw_from_buffer()
177 | 
178 |     def _on_mouse_up(self, x, y):
179 |         self.is_drawing = False
180 | 
181 |     def set_pen(self, pen_size: int = 15) -> np.ndarray:
182 |         """Set up the pen/brush (define pen_mask matrix)
183 | 
184 |         We pre-calculate and store a boolean `.pen_mask` matrix for the requested brush size (and
185 |         assumed circular shape). If you wanted, you could set other whacky shapes by replacing your
186 |         own boolean matrix (True where the pen marks, False where it doesn't).
187 | 
188 |         Returns
189 |         -------
190 |         pen_mask :
191 |             The same boolean 2D matrix this function saves to `self.pen_mask`.
192 |         """
193 |         # No sense re-inventing the "pixellated circle" wheel, so use PIL:
194 |         mask_img = Image.new("1", (pen_size, pen_size))
195 |         draw = ImageDraw.Draw(mask_img)
196 |         draw.ellipse((0, 0, pen_size - 1, pen_size - 1), fill="white")
197 |         self.pen_mask = np.array(mask_img)  # (pen_size, pen_size) boolean array
198 |         return self.pen_mask
199 | 
200 |     def paint(self, x, y):
201 |         """Mark the given location with the current pen"""
202 |         # Truncate the current pen mask if required (if location is close to edge of image):
203 |         x_floor = floor(x)
204 |         y_floor = floor(y)
205 | 
206 |         pen_mask = self.pen_mask
207 |         x_maskstart = floor(x - (pen_mask.shape[1] / 2))
208 |         if x_maskstart < 0:
209 |             pen_mask = pen_mask[:, -x_maskstart:]  # Truncate left of pen
210 |             x_maskstart = 0
211 |         x_pixelsafter = self.data.shape[1] - (x_maskstart + pen_mask.shape[1])
212 |         if x_pixelsafter < 0:
213 |             pen_mask = pen_mask[:, :x_pixelsafter]  # Truncate right of pen
214 |             x_pixelsafter = 0
215 | 
216 |         y_maskstart = floor(y - (pen_mask.shape[0] / 2))
217 |         if y_maskstart < 0:
218 |             pen_mask = pen_mask[-y_maskstart:, :]  # Truncate top of pen
219 |             y_maskstart = 0
220 |         y_pixelsafter = self.data.shape[0] - (y_maskstart + pen_mask.shape[0])
221 |         if y_pixelsafter < 0:
222 |             pen_mask = pen_mask[:y_pixelsafter, :]  # Truncate bottom of pen
223 |             y_pixelsafter = 0
224 | 
225 |         x_maskend = x_maskstart + pen_mask.shape[1]
226 |         y_maskend = y_maskstart + pen_mask.shape[0]
227 | 
228 |         # Check which pixels will be actually updated to avoid drawing unnecessary canvas rects:
229 |         new_fg_pixels_offset = np.argwhere(
230 |             pen_mask
231 |             & (
232 |                 self.data[
233 |                     y_maskstart:(y_maskstart + pen_mask.shape[0]),
234 |                     x_maskstart:(x_maskstart + pen_mask.shape[1]),
235 |                     :,
236 |                 ]
237 |                 != np.expand_dims(self.col_fg.np_8bit, (0, 1))
238 |             ).all(-1)
239 |         )
240 | 
241 |         # Update the data buffer:
242 |         full_mask = np.zeros_like(self.data)
243 |         full_mask[y_maskstart:y_maskend, x_maskstart:x_maskend, :] = np.expand_dims(pen_mask, -1)
244 |         self.data = np.where(full_mask, self.col_fg.np_8bit, self.data)
245 | 
246 |         # Draw the canvas updates:
247 |         with hold_canvas(self.canvas):
248 |             self.canvas.fill_style = self.col_fg.hexa
249 |             self.canvas.fill_rects(
250 |                 new_fg_pixels_offset[:, 1] + x_maskstart,
251 |                 new_fg_pixels_offset[:, 0] + y_maskstart,
252 |                 1,
253 |                 1,
254 |             )
255 |             self.canvas.fill_rect(x_floor, y_floor, 1, 1)
256 | 


--------------------------------------------------------------------------------
/migration_challenge/pytorch_mnist/util/draw.py:
--------------------------------------------------------------------------------
  1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | # SPDX-License-Identifier: MIT-0
  3 | """An ipycanvas-based interactive widget for drawing PIL-compatible doodles in JupyterLab
  4 | """
  5 | 
  6 | # Python Built-Ins:
  7 | from math import floor
  8 | from typing import Tuple, Union
  9 | 
 10 | # External Dependenices:
 11 | import numpy as np
 12 | from ipycanvas import Canvas, hold_canvas
 13 | from IPython.display import display
 14 | from ipywidgets import HTML, Button, Layout, Output, VBox
 15 | from matplotlib.colors import to_hex, to_rgb
 16 | from PIL import Image, ImageDraw
 17 | 
 18 | 
 19 | class ValidatedColor:
 20 |     """Canvas expects different color repr from PIL/image, so this class stores both"""
 21 | 
 22 |     hexa: str
 23 |     np_8bit: np.ndarray
 24 | 
 25 |     def __init__(self, color: Union[Tuple[float], np.ndarray]):
 26 |         self.set_color(color)
 27 | 
 28 |     def set_color(self, color: Union[Tuple[float], np.ndarray]):
 29 |         """Use this method to update all stored representations at once"""
 30 |         self.hexa = to_hex(color)
 31 |         self.np_8bit = (255 * np.array(to_rgb(color))).astype(int)
 32 | 
 33 | 
 34 | class PixelDrawCanvas:
 35 |     """JupyterLab widget to interactively draw on a canvas and export the pixel data to Python
 36 | 
 37 |     This widget maintains a buffer of pixel values and draws individual pixel rects to canvas (in
 38 |     batches, at least) to canvas on each mouse event... More toy/demo than an optimized design!
 39 | 
 40 |     Usage
 41 |     -----
 42 |     After creating the PixelDrawCanvas you can either call `.display()` to directly display it in
 43 |     the notebook, or access the `.widget` property if you want to embed the UI it in another
 44 |     ipywidgets widget.
 45 | 
 46 |     Draw on the canvas by clicking and dragging, or press the "Clear" button to start again.
 47 | 
 48 |     You can read the 0-255, 3-channel (height, width, 3) pixel data numpy array from `.data`.
 49 |     `matplotlib.pyplot.imshow(data)` should confirm that what you see in the widget matches this.
 50 | 
 51 |     You can also programmatically `.clear()` the drawing from Python if you like.
 52 |     """
 53 | 
 54 |     def __init__(
 55 |         self,
 56 |         width: int = 28,
 57 |         height: int = 28,
 58 |         color_bg: Tuple[float, float, float] = (0, 0, 0),
 59 |         color_fg: Tuple[float, float, float] = (1.0, 1.0, 1.0),
 60 |         pen_size: int = 3,
 61 |         title_html: str = "<h3>Draw a digit!</h3>",
 62 |     ):
 63 |         """Create a PixelDrawCanvas"""
 64 |         self.col_bg = ValidatedColor(color_bg)
 65 |         self.col_fg = ValidatedColor(color_fg)
 66 | 
 67 |         # -- Create individual widget components:
 68 |         self.canvas = Canvas(width=width, height=height, image_smoothing_enabled=False)
 69 |         # (Without explicit canvas.layout width, VBox/HBox fills full available width)
 70 |         self.canvas.layout.height = f"{max(200, min(1000, height))}px"
 71 |         self.canvas.layout.width = f"{max(200, min(1000, width))}px"
 72 |         self.canvas.image_smoothing_enabled = False
 73 |         self._clear_button = Button(
 74 |             description="Clear",
 75 |             icon="eraser",
 76 |             tooltip="Clear the drawing to a blank image",
 77 |         )
 78 |         self._console = Output(
 79 |             layout=Layout(
 80 |                 max_height="140px",
 81 |                 overflow_y="auto",
 82 |             )
 83 |         )
 84 |         self._title = HTML(title_html)
 85 | 
 86 |         # -- Initialize state:
 87 |         self.is_drawing = False
 88 |         # (Temporary data __init__ to be overridden by clear() shortly:)
 89 |         self.data = np.zeros((height, width, 3))
 90 |         self.set_pen(pen_size=pen_size)
 91 | 
 92 |         # -- Set up listeners:
 93 |         # Wrap widget event listener member functions so they have access to this `self` instance
 94 |         # when called and are also able to `print()` to the console output if needed.
 95 |         @self._console.capture()
 96 |         def on_mouse_down(*args, **kwargs):
 97 |             return self._on_mouse_down(*args, **kwargs)
 98 | 
 99 |         @self._console.capture()
100 |         def on_mouse_move(*args, **kwargs):
101 |             return self._on_mouse_move(*args, **kwargs)
102 | 
103 |         @self._console.capture()
104 |         def on_mouse_out(*args, **kwargs):
105 |             return self._on_mouse_out(*args, **kwargs)
106 | 
107 |         @self._console.capture()
108 |         def on_mouse_up(*args, **kwargs):
109 |             return self._on_mouse_up(*args, **kwargs)
110 | 
111 |         @self._console.capture()
112 |         def on_clear_click(*args, **kwargs):
113 |             return self.clear()
114 | 
115 |         self.canvas.on_mouse_down(on_mouse_down)
116 |         self.canvas.on_mouse_move(on_mouse_move)
117 |         self.canvas.on_mouse_out(on_mouse_out)
118 |         self.canvas.on_mouse_up(on_mouse_up)
119 |         self._clear_button.on_click(on_clear_click)
120 | 
121 |         # Set up composite view with the different widget components:
122 |         self.widget = VBox(
123 |             [self._title, self._clear_button, self.canvas, self._console],
124 |             width=f"{width}px",
125 |         )
126 | 
127 |         # Finally initialize to clear state ready to use:
128 |         with self._console:
129 |             self.clear()
130 | 
131 |     def clear(self):
132 |         """Clear the drawing"""
133 |         height = self.canvas.height
134 |         width = self.canvas.width
135 |         with hold_canvas(self.canvas):
136 |             self.canvas.clear()
137 |             self.canvas.fill_style = self.col_bg.hexa
138 |             self.canvas.fill_rect(0, 0, width, height)
139 |             self.canvas.fill_style = self.col_fg.hexa
140 |         self.data = np.tile(self.col_bg.np_8bit, (height, width, 1))
141 |         print("Cleared drawing")
142 | 
143 |     def draw_from_buffer(self):
144 |         """Draw the contents of the .data buffer to the canvas
145 | 
146 |         This reproduces steps from clear() instead of calling it internally, to avoid flicker. Only
147 |         pixels of the current col_fg in the buffer will be drawn (doesn't support changing col_fg
148 |         dynamically or drawing multiple colors).
149 |         """
150 |         height = self.canvas.height
151 |         width = self.canvas.width
152 |         fg_mask = (self.data == np.expand_dims(self.col_fg.np_8bit, (0, 1))).all(-1)
153 |         with hold_canvas(self.canvas):
154 |             self.canvas.clear()
155 |             self.canvas.fill_style = self.col_bg.hexa
156 |             self.canvas.fill_rect(0, 0, width, height)
157 |             self.canvas.fill_style = self.col_fg.hexa
158 |             fg_coords = np.argwhere(fg_mask)  # N entries of (x, y) pairs
159 |             self.canvas.fill_rects(fg_coords[:, 1], fg_coords[:, 0], 1, 1)
160 | 
161 |     def display(self):
162 |         """Display the widget (in a Jupyter/Lab notebook)"""
163 |         display(self.widget)
164 | 
165 |     def _on_mouse_down(self, x, y):
166 |         self.is_drawing = True
167 |         self.paint(x, y)
168 | 
169 |     def _on_mouse_move(self, x, y):
170 |         if self.is_drawing:
171 |             self.paint(x, y)
172 | 
173 |     def _on_mouse_out(self, x, y):
174 |         """Re-draw from data buffer on each mouse-out in case anything weird happened"""
175 |         self.is_drawing = False
176 |         self.draw_from_buffer()
177 | 
178 |     def _on_mouse_up(self, x, y):
179 |         self.is_drawing = False
180 | 
181 |     def set_pen(self, pen_size: int = 15) -> np.ndarray:
182 |         """Set up the pen/brush (define pen_mask matrix)
183 | 
184 |         We pre-calculate and store a boolean `.pen_mask` matrix for the requested brush size (and
185 |         assumed circular shape). If you wanted, you could set other whacky shapes by replacing your
186 |         own boolean matrix (True where the pen marks, False where it doesn't).
187 | 
188 |         Returns
189 |         -------
190 |         pen_mask :
191 |             The same boolean 2D matrix this function saves to `self.pen_mask`.
192 |         """
193 |         # No sense re-inventing the "pixellated circle" wheel, so use PIL:
194 |         mask_img = Image.new("1", (pen_size, pen_size))
195 |         draw = ImageDraw.Draw(mask_img)
196 |         draw.ellipse((0, 0, pen_size - 1, pen_size - 1), fill="white")
197 |         self.pen_mask = np.array(mask_img)  # (pen_size, pen_size) boolean array
198 |         return self.pen_mask
199 | 
200 |     def paint(self, x, y):
201 |         """Mark the given location with the current pen"""
202 |         # Truncate the current pen mask if required (if location is close to edge of image):
203 |         x_floor = floor(x)
204 |         y_floor = floor(y)
205 | 
206 |         pen_mask = self.pen_mask
207 |         x_maskstart = floor(x - (pen_mask.shape[1] / 2))
208 |         if x_maskstart < 0:
209 |             pen_mask = pen_mask[:, -x_maskstart:]  # Truncate left of pen
210 |             x_maskstart = 0
211 |         x_pixelsafter = self.data.shape[1] - (x_maskstart + pen_mask.shape[1])
212 |         if x_pixelsafter < 0:
213 |             pen_mask = pen_mask[:, :x_pixelsafter]  # Truncate right of pen
214 |             x_pixelsafter = 0
215 | 
216 |         y_maskstart = floor(y - (pen_mask.shape[0] / 2))
217 |         if y_maskstart < 0:
218 |             pen_mask = pen_mask[-y_maskstart:, :]  # Truncate top of pen
219 |             y_maskstart = 0
220 |         y_pixelsafter = self.data.shape[0] - (y_maskstart + pen_mask.shape[0])
221 |         if y_pixelsafter < 0:
222 |             pen_mask = pen_mask[:y_pixelsafter, :]  # Truncate bottom of pen
223 |             y_pixelsafter = 0
224 | 
225 |         x_maskend = x_maskstart + pen_mask.shape[1]
226 |         y_maskend = y_maskstart + pen_mask.shape[0]
227 | 
228 |         # Check which pixels will be actually updated to avoid drawing unnecessary canvas rects:
229 |         new_fg_pixels_offset = np.argwhere(
230 |             pen_mask
231 |             & (
232 |                 self.data[
233 |                     y_maskstart:(y_maskstart + pen_mask.shape[0]),
234 |                     x_maskstart:(x_maskstart + pen_mask.shape[1]),
235 |                     :,
236 |                 ]
237 |                 != np.expand_dims(self.col_fg.np_8bit, (0, 1))
238 |             ).all(-1)
239 |         )
240 | 
241 |         # Update the data buffer:
242 |         full_mask = np.zeros_like(self.data)
243 |         full_mask[y_maskstart:y_maskend, x_maskstart:x_maskend, :] = np.expand_dims(pen_mask, -1)
244 |         self.data = np.where(full_mask, self.col_fg.np_8bit, self.data)
245 | 
246 |         # Draw the canvas updates:
247 |         with hold_canvas(self.canvas):
248 |             self.canvas.fill_style = self.col_fg.hexa
249 |             self.canvas.fill_rects(
250 |                 new_fg_pixels_offset[:, 1] + x_maskstart,
251 |                 new_fg_pixels_offset[:, 0] + y_maskstart,
252 |                 1,
253 |                 1,
254 |             )
255 |             self.canvas.fill_rect(x_floor, y_floor, 1, 1)
256 | 


--------------------------------------------------------------------------------
/.infrastructure/cdk_src/smstudio/user/__init__.py:
--------------------------------------------------------------------------------
  1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | # SPDX-License-Identifier: MIT-0
  3 | """AWS CDK constructs for creating SageMaker Studio Users with advanced configuration options
  4 | """
  5 | # Python Built-Ins:
  6 | import os
  7 | from typing import Any, Dict, Optional, Sequence, Union
  8 | 
  9 | # External Dependencies:
 10 | from aws_cdk import CustomResource, Duration, RemovalPolicy, Stack
 11 | import aws_cdk.aws_ec2 as aws_ec2
 12 | import aws_cdk.aws_iam as aws_iam
 13 | import aws_cdk.aws_kms as aws_kms
 14 | from aws_cdk.aws_lambda import ILayerVersion, Runtime as LambdaRuntime
 15 | from aws_cdk.aws_lambda_python_alpha import PythonFunction
 16 | import aws_cdk.aws_logs as aws_logs
 17 | import aws_cdk.custom_resources as cr
 18 | from constructs import Construct
 19 | 
 20 | # Local Dependencies:
 21 | from ..region_config import CfnSageMakerAppsByRegionMapping
 22 | 
 23 | 
 24 | LAMBDA_PATH = os.path.join(os.path.dirname(__file__), "fn_user")
 25 | 
 26 | 
 27 | class SMStudioUserCustomResourceProvider(cr.Provider):
 28 |     """Provider (AWS Lambda) for a CFn Custom Resource for SMStudio User Profile
 29 | 
 30 |     If you're only creating one LCC in your stack, you probably don't need to create this
 31 |     explicitly: Just use `SageMakerStudioUser` direct.
 32 |     """
 33 | 
 34 |     def __init__(
 35 |         self,
 36 |         scope: Construct,
 37 |         id: str,
 38 |         smcr_helper_layer: ILayerVersion,
 39 |         *,
 40 |         eligible_domain_execution_role_arns: Optional[str] = None,
 41 |         log_retention: Optional[aws_logs.RetentionDays] = None,
 42 |         provider_function_env_encryption: Optional[aws_kms.IKey] = None,
 43 |         provider_function_name: Optional[str] = None,
 44 |         role: Optional[aws_iam.IRole] = None,
 45 |         security_groups: Optional[Sequence[aws_ec2.ISecurityGroup]] = None,
 46 |         total_timeout: Optional[Duration] = None,
 47 |         vpc: Optional[aws_ec2.IVpc] = None,
 48 |         vpc_subnets: Optional[Union[aws_ec2.SubnetSelection, Dict[str, Any]]] = None,
 49 |     ) -> None:
 50 |         """Create a SMStudioUserCustomResourceProvider
 51 | 
 52 |         Most parameters are as per parent aws_cdk.custom_resources.Provider, with the below
 53 |         exceptions:
 54 | 
 55 |         Parameters
 56 |         ----------
 57 |         smcr_helper_layer :
 58 |             Shared Lambda layer with helper functions for SageMaker custom resources (see
 59 |             `cr_lambda_common`)
 60 |         eligible_domain_execution_role_arns :
 61 |             Set this optional ARN pattern to restrict the iam:PassRole permissions of the provider
 62 |             to a particular SageMaker Execution Role or wildcard pattern. By default (`None`), the
 63 |             provider will be created with permission to create Domains using any IAM Role
 64 |         role :
 65 |             By default, we'll create a role with required SageMaker and IAM accesses. If you
 66 |             provide your own role, you'll need to ensure these permissions are set up. This role is
 67 |             used for the Custom Resource event handler function, not the CDK CR framework function.
 68 |         """
 69 |         if not role:
 70 |             role = aws_iam.Role(
 71 |                 scope,
 72 |                 "SMUserProviderRole",
 73 |                 assumed_by=aws_iam.ServicePrincipal("lambda.amazonaws.com"),
 74 |                 description=(
 75 |                     "Execution role for CFN Custom Resource Lambda providing SageMaker Studio "
 76 |                     "User Profiles"
 77 |                 ),
 78 |                 inline_policies={
 79 |                     "SageMakerLCCAdmin": aws_iam.PolicyDocument(
 80 |                         statements=[
 81 |                             aws_iam.PolicyStatement(
 82 |                                 actions=[
 83 |                                     "sagemaker:CreateUserProfile",
 84 |                                     "sagemaker:DeleteUserProfile",
 85 |                                     "sagemaker:DescribeUserProfile",
 86 |                                 ],
 87 |                                 resources=["*"],
 88 |                             ),
 89 |                             aws_iam.PolicyStatement(
 90 |                                 actions=["iam:PassRole"],
 91 |                                 resources=[eligible_domain_execution_role_arns or "*"],
 92 |                             ),
 93 |                         ],
 94 |                     ),
 95 |                 },
 96 |                 managed_policies=[
 97 |                     aws_iam.ManagedPolicy.from_aws_managed_policy_name(
 98 |                         "service-role/AWSLambdaBasicExecutionRole",
 99 |                     ),
100 |                     aws_iam.ManagedPolicy.from_aws_managed_policy_name(
101 |                         "AWSXRayDaemonWriteAccess",
102 |                     ),
103 |                 ],
104 |             )
105 |         if not smcr_helper_layer:
106 |             raise ValueError("smcr_helper_layer is required")
107 |         on_event_handler = PythonFunction(
108 |             scope,
109 |             "SMUserEventHandler",
110 |             description=("CFn custom resource handler to create SageMaker Studio User Profiles"),
111 |             entry=LAMBDA_PATH,
112 |             environment_encryption=provider_function_env_encryption,
113 |             index="main.py",
114 |             handler="lambda_handler",
115 |             layers=[smcr_helper_layer],
116 |             memory_size=128,
117 |             role=role,
118 |             runtime=LambdaRuntime.PYTHON_3_12,
119 |             timeout=Duration.minutes(10),  # Can take some time to wait for create/delete
120 |             vpc=vpc,
121 |             vpc_subnets=vpc_subnets,
122 |         )
123 |         super().__init__(
124 |             scope,
125 |             id,
126 |             on_event_handler=on_event_handler,
127 |             log_retention=log_retention,
128 |             provider_function_env_encryption=provider_function_env_encryption,
129 |             provider_function_name=provider_function_name,
130 |             security_groups=security_groups,
131 |             total_timeout=total_timeout,
132 |             vpc=vpc,
133 |             vpc_subnets=vpc_subnets,
134 |         )
135 | 
136 | 
137 | class SageMakerStudioUser(CustomResource):
138 |     """AWS CDK Construct for a SageMaker Studio User Profile with additional features
139 | 
140 |     Unlike the CDK's built-in construct for a SMStudio User, this construct is backed by a Custom
141 |     Resource Lambda and:
142 |     - Exposes the EFS POSIX user ID mapped for the created SageMaker Studio user profile
143 |     """
144 | 
145 |     def __init__(
146 |         self,
147 |         scope: Construct,
148 |         id: str,
149 |         app_arn_map: CfnSageMakerAppsByRegionMapping,
150 |         domain_id: str,
151 |         name: str,
152 |         role_arn: str,  # TODO: Support default role creation?
153 |         *,
154 |         lcc_classic_arn: Optional[str] = None,
155 |         lcc_jupyterlab_arn: Optional[str] = None,
156 |         provider: Optional[SMStudioUserCustomResourceProvider] = None,
157 |         removal_policy: Optional[RemovalPolicy] = None,
158 |         resource_type: str = "Custom::SageMakerStudioUserProfile",
159 |         smcr_helper_layer: Optional[ILayerVersion] = None,
160 |     ) -> None:
161 |         """Create a SageMakerStudioLifecycleConfig
162 | 
163 |         Parameters
164 |         ----------
165 |         app_arn_map :
166 |             CFn mapping by AWS Region containing "jlabv3" default (classic) SageMaker Studio
167 |             JupyterServer app image. See `..smstudio.region_config.STUDIO_APP_ARNS_BY_REGION`.
168 |         domain_id :
169 |             SageMaker Studio Domain ID to create the User Profile in
170 |         name :
171 |             (Domain-unique) name of the user profile
172 |         role_arn :
173 |             ARN of the SageMaker execution role to assign the user (which dictates their
174 |             permissions once logged in to the notebook environment)
175 |         lcc_classic_arn :
176 |             Optional JupyterServer (classic) LifeCycle Configuration Script to enable for the user.
177 |         lcc_jupyterlab_arn :
178 |             Optional (new-style) JupyterLab space LifeCycle Configuration Script to enable for the
179 |             user.
180 |         enable_content_substitution :
181 |             Set `True` to enable CloudFormation `!Sub` substitution on the provided script content,
182 |             or `False` to disable.
183 |         provider :
184 |             Optional `SMStudioUserCustomResourceProvider` if you'd like to customize provider
185 |             configuration or re-use the Custom Resource Lambda across multiple LCCs in your CDK app
186 |         smcr_helper_layer :
187 |             (Required if `provider` is not set) Shared Lambda layer with helper functions for
188 |             SageMaker custom resources (see `cr_lambda_common`).
189 |         """
190 |         if not domain_id:
191 |             raise ValueError("You must provide a SageMaker Studio domain_id")
192 |         if not name:
193 |             raise ValueError("You must provide a Domain-unique user profile name")
194 |         if not provider:
195 |             provider = SMStudioUserCustomResourceProvider(
196 |                 scope, "StudioUserProvider", smcr_helper_layer=smcr_helper_layer
197 |             )
198 | 
199 |         resource_props = {
200 |             "DomainId": domain_id,
201 |             "UserProfileName": name,
202 |             "UserSettings": {
203 |                 "ExecutionRole": role_arn,
204 |                 # Set new-style JupyterLab space defaults:
205 |                 "JupyterLabAppSettings": {
206 |                     "DefaultResourceSpec": {
207 |                         # TODO: Is this necessary or can we omit it?
208 |                         "InstanceType": "ml.t3.medium",
209 |                     },
210 |                 },
211 |                 # Set classic JupyterLabv3 default and attach the lifecycle configuration script:
212 |                 "JupyterServerAppSettings": {
213 |                     "DefaultResourceSpec": {
214 |                         "SageMakerImageArn": app_arn_map.find_in_map(
215 |                             Stack.of(scope).region, "jlabv3"
216 |                         ),
217 |                         "InstanceType": "system",
218 |                     },
219 |                 },
220 |             },
221 |         }
222 |         if lcc_classic_arn:
223 |             resource_props["UserSettings"]["JupyterServerAppSettings"]["DefaultResourceSpec"][
224 |                 "LifecycleConfigArn"
225 |             ] = lcc_classic_arn
226 |         if lcc_jupyterlab_arn:
227 |             resource_props["UserSettings"]["JupyterLabAppSettings"]["DefaultResourceSpec"][
228 |                 "LifecycleConfigArn"
229 |             ] = lcc_jupyterlab_arn
230 | 
231 |         super().__init__(
232 |             scope,
233 |             id,
234 |             service_token=provider.service_token,
235 |             # pascal_case_properties=None,
236 |             properties=resource_props,
237 |             removal_policy=removal_policy,
238 |             resource_type=resource_type,
239 |         )
240 | 
241 |     @property
242 |     def home_efs_file_system_uid(self):
243 |         return self.get_att("HomeEfsFileSystemUid")
244 | 
245 |     @property
246 |     def name(self):
247 |         return self.ref
248 | 


--------------------------------------------------------------------------------
/builtin_algorithm_hpo_tabular/util/data.py:
--------------------------------------------------------------------------------
  1 | # Python Built-Ins:
  2 | from io import BytesIO
  3 | import os
  4 | from time import sleep
  5 | from typing import Callable, Dict, Iterable, Optional
  6 | from urllib.request import urlopen
  7 | from zipfile import ZipFile
  8 | 
  9 | # Local Dependencies:
 10 | import botocore
 11 | import numpy as np
 12 | import pandas as pd
 13 | import sagemaker
 14 | from sagemaker.feature_store.feature_definition import FeatureDefinition
 15 | from sagemaker.feature_store.feature_group import FeatureGroup, FeatureParameter
 16 | 
 17 | 
 18 | def fetch_sample_data(
 19 |     zip_url: str = "https://sagemaker-sample-data-us-west-2.s3-us-west-2.amazonaws.com/autopilot/direct_marketing/bank-additional.zip",
 20 |     local_folder: str = "data",
 21 |     target_file: str = "bank-additional/bank-additional-full.csv",
 22 | ) -> str:
 23 |     """Fetch the raw sample dataset, download and extract it locally, and return the local file path
 24 |     """
 25 |     target_file_path = os.path.join(local_folder, target_file)
 26 | 
 27 |     if os.path.isdir(local_folder) and os.path.isfile(target_file_path):
 28 |         print(f"Skipping download - file already exists {target_file_path}")
 29 |     else:
 30 |         print(f"Downloading zip data...\n{zip_url}")
 31 |         with urlopen(zip_url) as resp:
 32 |             with ZipFile(BytesIO(resp.read())) as zip_file:
 33 |                 print(f"Extracting to {local_folder}...")
 34 |                 zip_file.extractall(local_folder)
 35 | 
 36 |     return target_file_path
 37 |     
 38 | 
 39 | 
 40 | def transform_df(df: pd.DataFrame) -> pd.DataFrame:
 41 |     # Indicator variable to capture when pdays takes a value of 999
 42 |     df["no_previous_contact"] = np.where(df["pdays"] == 999, 1, 0)
 43 | 
 44 |     # Indicator for individuals not actively employed
 45 |     df["not_working"] = np.where(
 46 |         np.in1d(df["job"], ["student", "retired", "unemployed"]), 1, 0
 47 |     )
 48 | 
 49 |     # df = pd.get_dummies(df)  # Convert categorical variables to sets of indicators
 50 | 
 51 |     # Replace "y_no" and "y_yes" with a single label column, and bring it to the front:
 52 |     # df_model_data = pd.concat(
 53 |     #     [
 54 |     #         df_model_data["y_yes"].rename("y"),
 55 |     #         df_model_data.drop(["y_no", "y_yes"], axis=1),
 56 |     #     ],
 57 |     #     axis=1,
 58 |     # )
 59 |     
 60 |     # Encode 'y' to numeric so AutoGluon-Tabular predictions can be mapped to labels:
 61 |     assert "yes" in df["y"].unique(), "Expected 'y' column to contain 'yes' and 'no'"
 62 |     df["y"] = df["y"].apply(lambda y: int(y == "yes"))
 63 | 
 64 |     # Move 'y' to front:
 65 |     df = df.loc[:, ["y"] + [col for col in df.columns if col != "y"]]
 66 | 
 67 |     # Add record identifier and event timestamp fields required for SageMaker Feature Store:
 68 |     df["customer_id"] = df.index.to_series().apply(lambda num: f"C-{num:08}")
 69 |     df["event_time"] = (pd.Timestamp.utcnow() - pd.DateOffset(years=1)).timestamp()
 70 | 
 71 |     return df
 72 | 
 73 | 
 74 | def load_sample_data(
 75 |     raw_file_path: str,
 76 |     fg_s3_uri: str,
 77 |     ignore_cols: Iterable[str] = (
 78 |         "duration", "emp.var.rate", "cons.price.idx", "cons.conf.idx", "euribor3m", "nr.employed"
 79 |     ),
 80 |     transform_fn: Callable[[pd.DataFrame], pd.DataFrame] = transform_df,
 81 |     feature_group_name: str = "sm101-direct-marketing",
 82 |     feature_group_description: str = (
 83 |         "Demo Bank Marketing dataset for 'SageMaker 101' workshop, based on "
 84 |         "http://archive.ics.uci.edu/ml/datasets/Bank+Marketing"
 85 |         # "Demo Bank marketing dataset for 'SageMaker 101' introductory workshop.\n\n"
 86 |         # "This is a transformed version of the 'Bank Marketing' UCI dataset for research. Please "
 87 |         # "cite: S. Moro, P. Cortez and P. Rita. A Data-Driven Approach to Predict the Success of "
 88 |         # "Bank Telemarketing. Decision Support Systems, In press, "
 89 |         # "http://dx.doi.org/10.1016/j.dss.2014.03.001\n\n"
 90 |         # "Data description at: http://archive.ics.uci.edu/ml/datasets/Bank+Marketing"
 91 |     ),
 92 |     feature_descriptions: Dict[str, str] = {
 93 |         "customer_id": (
 94 |             "Unique customer identifier (dummy added for purpose of SageMaker Feature Store)"
 95 |         ),
 96 |         "event_time": "Event/update timestamp (dummy added for purpose of SageMaker Feature Store)",
 97 |         "y": (
 98 |             "Has the client subscribed a term deposit? (binary: 0/1). This is the target variable "
 99 |             "for our direct marketing example."
100 |         ),
101 |         ## Bank client data:
102 |         "age": "Client's age in years",
103 |         "job": (
104 |             'Type of job (categorical: "admin.","blue-collar","entrepreneur","housemaid",'
105 |             '"management","retired","self-employed","services","student","technician","unemployed",'
106 |             '"unknown")'
107 |         ),
108 |         "marital": (
109 |             'Marital status (categorical: "divorced","married","single","unknown"; note: '
110 |             '"divorced" means divorced or widowed)'
111 |         ),
112 |         "education": (
113 |             'Highest education (categorical: "basic.4y","basic.6y","basic.9y","high.school",'
114 |             '"illiterate","professional.course","university.degree","unknown")'
115 |         ),
116 |         "default": 'Has credit in default? (categorical: "no","yes","unknown")',
117 |         "housing": 'Has housing loan? (categorical: "no","yes","unknown")',
118 |         "loan": 'Has personal loan? (categorical: "no","yes","unknown")',
119 |         ## Related with last contact of current campaign:
120 |         "contact": 'Contact communication type (categorical: "cellular","telephone")',
121 |         "day_of_week": 'Last contact day of the week (categorical: "mon","tue","wed","thu","fri")',
122 |         # "duration": (
123 |         #     'Last contact duration, in seconds (numeric). Important note:  this attribute highly '
124 |         #     'affects the output target (e.g., if duration=0 then y="no"). Yet, the duration is not '
125 |         #     'known before a call is performed. Also, after the end of the call y is obviously '
126 |         #     'known. Thus, this input should only be included for benchmark purposes and should be '
127 |         #     'discarded if the intention is to have a realistic predictive model.'
128 |         # ),
129 |         ## Other attributes:
130 |         "campaign": (
131 |             "Number of contacts performed during this campaign and for this client (numeric, "
132 |             "includes last contact)"
133 |         ),
134 |         "pdays": (
135 |             "Number of days that passed by after the client was last contacted from a previous "
136 |             "campaign (numeric; 999 means client was not previously contacted)"
137 |         ),
138 |         "previous": (
139 |             "Number of contacts performed before this campaign and for this client (numeric)"
140 |         ),
141 |         "poutcome": (
142 |             'Outcome of the previous marketing campaign (categorical: "failure","nonexistent",'
143 |             '"success")'
144 |         ),
145 |         ## Social and economic context attributes:
146 |         # "emp.var.rate": "Employment variation rate - quarterly indicator (numeric)",
147 |         # "cons.price.idx": "Consumer price index - monthly indicator (numeric)",
148 |         # "cons.conf.idx": "Consumer confidence index - monthly indicator (numeric)",
149 |         # "euribor3m": "EURIBOR 3 month rate - daily indicator (numeric)",
150 |         # "nr.employed": "Number of employees - quarterly indicator (numeric)",
151 |         ## Synthetics from transform_fn:
152 |         "no_previous_contact": (
153 |             "Boolean indicator for clients not previously contacted (pdays=999)"
154 |         ),
155 |         "not_working": "Boolean indicator for individuals not actively employed",
156 |     },
157 |     feature_parameters: Dict[str, Dict[str, str]] = {
158 |         "Source": {
159 |             "bank-client": ["age", "job", "marital", "education", "default", "housing", "loan"],
160 |             "last-contact": ["contact", "day_of_week"],
161 |             "other": ["campaign", "pdays", "previous", "poutcome"],
162 |             "subscriptions": ["y"],
163 |             "transforms": ["no_previous_contact", "not_working"],
164 |         },
165 |     },
166 |     fg_record_identifier_field: str = "customer_id",
167 |     fg_event_timestamp_field: str = "event_time",
168 |     sagemaker_session: Optional[sagemaker.Session] = None,
169 | ) -> None:
170 |     print(f"Loading {raw_file_path}...")
171 |     df = pd.read_csv(raw_file_path)
172 |     print("Transforming dataframe...")
173 |     df.drop(columns=[col for col in ignore_cols], inplace=True)
174 |     df = transform_fn(df)
175 | 
176 |     print(f"Setting up SageMaker Feature Store feature group: {feature_group_name}")
177 |     if not sagemaker_session:
178 |         sagemaker_session = sagemaker.Session()
179 |     feature_group = FeatureGroup(name=feature_group_name, sagemaker_session=sagemaker_session)
180 | 
181 |     # Pandas defaults string fields to 'object' dtype, which FS type inference doesn't like:
182 |     for col in df:
183 |         if pd.api.types.is_object_dtype(df[col].dtype):
184 |             df[col] = df[col].astype(pd.StringDtype())
185 | 
186 |     #print(df.info())
187 |     feature_group.load_feature_definitions(data_frame=df)
188 | 
189 |     feature_group.create(
190 |         s3_uri=fg_s3_uri,
191 |         record_identifier_name=fg_record_identifier_field,
192 |         event_time_feature_name=fg_event_timestamp_field,
193 |         role_arn=sagemaker.get_execution_role(sagemaker_session),
194 |         enable_online_store=True,
195 |         description=feature_group_description,
196 |     )
197 |     wait_for_fg_creation(feature_group)
198 | 
199 |     ingestion_manager = feature_group.ingest(data_frame=df, max_processes=16, wait=False)
200 | 
201 |     print("Configuring feature metadata...")
202 |     update_meta_calls = {}
203 |     for feature_name, desc in feature_descriptions.items():
204 |         update_meta_calls[feature_name] = {"description": desc}
205 |     for param_name, spec in feature_parameters.items():
206 |         for param_value, features in spec.items():
207 |             for feature_name in features:
208 |                 if feature_name not in update_meta_calls:
209 |                     update_meta_calls[feature_name] = {}
210 |                 feature_spec = update_meta_calls[feature_name]
211 |                 if param_value is None:
212 |                     if "parameter_removals" not in feature_spec:
213 |                         feature_spec["parameter_removals"] = [param_name]
214 |                     else:
215 |                         feature_spec["parameter_removals"].append(param_name)
216 |                 else:
217 |                     if "parameter_additions" not in feature_spec:
218 |                         feature_spec["parameter_additions"] = [
219 |                             FeatureParameter(key=param_name, value=param_value),
220 |                         ]
221 |                     else:
222 |                         feature_spec["parameter_additions"].append(
223 |                             FeatureParameter(key=param_name, value=param_value),
224 |                         )
225 |     for feature_name, feature_spec in update_meta_calls.items():
226 |         feature_group.update_feature_metadata(feature_name, **feature_spec)
227 |         sleep(2)
228 | 
229 |     print("Ingesting data to SageMaker Feature Store...")
230 |     ingestion_manager.wait()
231 |     ingest_timestamp = pd.Timestamp.now()
232 | 
233 | 
234 |     print("Waiting for propagation to offline Feature Store...")
235 |     ingest_wait_period = pd.DateOffset(
236 |         minutes=5,  # Technically can take 15mins, but who has time for that
237 |     )
238 |     sleep(((ingest_timestamp + ingest_wait_period) - pd.Timestamp.now()).seconds)
239 | 
240 |     print("Done!")
241 |     return feature_group_name
242 | 
243 | 
244 | def describe_fg_if_exists(feature_group: FeatureGroup) -> Optional[dict]:
245 |     try:
246 |         return feature_group.describe()
247 |     except botocore.exceptions.ClientError as e:
248 |         if "Not Found" in e.response["Error"]["Message"]:
249 |             return None
250 |         else:
251 |             raise e
252 | 
253 | 
254 | def wait_for_fg_creation(feature_group):
255 |     status = feature_group.describe().get("FeatureGroupStatus")
256 |     print(
257 |         f"Waiting for creation of Feature Group {feature_group.name} (Initial status {status})",
258 |         end="",
259 |     )
260 |     while status == "Creating":
261 |         print(".", end="")
262 |         sleep(5)
263 |         status = feature_group.describe().get("FeatureGroupStatus")
264 |     print()
265 |     if status != "Created":
266 |         raise RuntimeError(f"Failed to create feature group {feature_group.name}: {status}")
267 |     print(f"Feature Group {feature_group.name} successfully created.")


--------------------------------------------------------------------------------
/.infrastructure/cdk_src/smstudio/lcc/__init__.py:
--------------------------------------------------------------------------------
  1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | # SPDX-License-Identifier: MIT-0
  3 | """AWS CDK constructs for LifeCycle Configurations in Amazon SageMaker
  4 | """
  5 | # Python Built-Ins:
  6 | import os
  7 | from typing import Any, Dict, Optional, Sequence, TextIO, Union
  8 | 
  9 | # External Dependencies:
 10 | from aws_cdk import CustomResource, Duration, Fn, RemovalPolicy, Stack
 11 | import aws_cdk.aws_ec2 as aws_ec2
 12 | import aws_cdk.aws_iam as aws_iam
 13 | import aws_cdk.aws_kms as aws_kms
 14 | from aws_cdk.aws_lambda import ILayerVersion, Runtime as LambdaRuntime
 15 | from aws_cdk.aws_lambda_python_alpha import PythonFunction
 16 | import aws_cdk.aws_logs as aws_logs
 17 | import aws_cdk.aws_sagemaker as sagemaker_cdk
 18 | import aws_cdk.custom_resources as cr
 19 | from constructs import Construct
 20 | 
 21 | 
 22 | CR_LAMBDA_PATH = os.path.join(os.path.dirname(__file__), "fn_studio_lcconfig")
 23 | 
 24 | 
 25 | class SageMakerNotebookLifecycleConfig(Construct):
 26 |     """AWS CDK Construct for a SageMaker Notebook Instance Lifecycle Configuration Script
 27 | 
 28 |     See also
 29 |     --------
 30 |     https://docs.aws.amazon.com/sagemaker/latest/dg/notebook-lifecycle-config.html
 31 |     https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-sagemaker-notebookinstancelifecycleconfig.html
 32 |     """
 33 | 
 34 |     cfn_construct: sagemaker_cdk.CfnNotebookInstanceLifecycleConfig
 35 | 
 36 |     def __init__(
 37 |         self,
 38 |         scope: Construct,
 39 |         id: str,
 40 |         *args,
 41 |         name: Optional[str] = None,
 42 |         on_create_script: Optional[Union[str, TextIO]] = None,
 43 |         on_start_script: Optional[Union[str, TextIO]] = None,
 44 |     ) -> None:
 45 |         """Create a SageMakerNotebookLifecycleConfig
 46 | 
 47 |         Parameters
 48 |         ----------
 49 |         name :
 50 |             If not provided, a default will be generated based on the stack name
 51 |         on_create_script :
 52 |             The text of the shell script you'd like to run on Notebook Instance creation (one-off),
 53 |             or an open file handle from which the script may be `.read()`. This script may contain
 54 |             placeholder variables to be filled in by `Fn::Sub`.
 55 |         on_start_script :
 56 |             The text of the shell script you'd like to run on Notebook Instance start (every time),
 57 |             or an open file handle from which the script may be `.read()`. This script may contain
 58 |             placeholder variables to be filled in by `Fn::Sub`.
 59 |         """
 60 |         super().__init__(scope, id)
 61 |         stack = Stack.of(self)
 62 | 
 63 |         self.to_string()
 64 |         if name is None:
 65 |             # TODO: How to get fully qualified construct name?
 66 |             name = f"{stack.stack_name}-LCC"
 67 | 
 68 |         self.cfn_construct = sagemaker_cdk.CfnNotebookInstanceLifecycleConfig(
 69 |             self,
 70 |             id,
 71 |             notebook_instance_lifecycle_config_name=name,  # (Name prop is mandatory)
 72 |             on_create=(
 73 |                 [self._script_to_lcc_hook_property(on_create_script)] if on_create_script else None
 74 |             ),
 75 |             on_start=(
 76 |                 [self._script_to_lcc_hook_property(on_start_script)] if on_start_script else None
 77 |             ),
 78 |         )
 79 | 
 80 |     @staticmethod
 81 |     def _script_to_lcc_hook_property(script: Union[str, TextIO], enable_substitution: bool = True):
 82 |         """Convert a LCC script (string or file handle) to a CFn LCC hook property
 83 | 
 84 |         Parameters
 85 |         ----------
 86 |         script :
 87 |             String content of the shell script, or an open file from which the content may be
 88 |             `.read()`
 89 |         enable_substitution :
 90 |             Whether to pass the script content through CloudFormation Fn::Sub variable resolution.
 91 |             Default True.
 92 |         """
 93 |         content = script if isinstance(script, str) else script.read()
 94 | 
 95 |         if enable_substitution:
 96 |             content = Fn.sub(content)
 97 |         return (
 98 |             sagemaker_cdk.CfnNotebookInstanceLifecycleConfig.NotebookInstanceLifecycleHookProperty(
 99 |                 content=Fn.base64(content)
100 |             )
101 |         )
102 |         # return {
103 |         #     "content": Fn.base64(content)
104 |         # }
105 | 
106 |     @property
107 |     def name(self) -> str:
108 |         return self.cfn_construct.attr_notebook_instance_lifecycle_config_name
109 | 
110 | 
111 | class SMStudioLCCCustomResourceProvider(cr.Provider):
112 |     """Provider (AWS Lambda) for a CFn Custom Resource for SMStudio Lifecycle Configuration
113 | 
114 |     If you're only creating one LCC in your stack, you probably don't need to create this
115 |     explicitly: Just use `SageMakerStudioLifecycleConfig` direct.
116 |     """
117 | 
118 |     def __init__(
119 |         self,
120 |         scope: Construct,
121 |         id: str,
122 |         smcr_helper_layer: ILayerVersion,
123 |         *,
124 |         eligible_domain_execution_role_arns: Optional[str] = None,
125 |         log_retention: Optional[aws_logs.RetentionDays] = None,
126 |         provider_function_env_encryption: Optional[aws_kms.IKey] = None,
127 |         provider_function_name: Optional[str] = None,
128 |         role: Optional[aws_iam.IRole] = None,
129 |         security_groups: Optional[Sequence[aws_ec2.ISecurityGroup]] = None,
130 |         total_timeout: Optional[Duration] = None,
131 |         vpc: Optional[aws_ec2.IVpc] = None,
132 |         vpc_subnets: Optional[Union[aws_ec2.SubnetSelection, Dict[str, Any]]] = None,
133 |     ) -> None:
134 |         """Create a SMStudioLCCCustomResourceProvider
135 | 
136 |         Most parameters are as per parent aws_cdk.custom_resources.Provider, with the below
137 |         exceptions:
138 | 
139 |         Parameters
140 |         ----------
141 |         eligible_domain_execution_role_arns :
142 |             Set this optional ARN pattern to restrict the iam:PassRole permissions of the provider
143 |             to a particular SageMaker Execution Role or wildcard pattern. By default (`None`), the
144 |             provider will be created with permission to create Domains using any IAM Role
145 |         role :
146 |             By default, we'll create a role with required SageMaker and IAM accesses. If you
147 |             provide your own role, you'll need to ensure these permissions are set up. This role is
148 |             used for the Custom Resource event handler function, not the CDK CR framework function.
149 |         smcr_helper_layer :
150 |             Shared Lambda layer with helper functions for SageMaker custom resources (see
151 |             `cr_lambda_common`)
152 |         """
153 |         if not role:
154 |             role = aws_iam.Role(
155 |                 scope,
156 |                 "Role",
157 |                 assumed_by=aws_iam.ServicePrincipal("lambda.amazonaws.com"),
158 |                 description=(
159 |                     "Execution role for CFN Custom Resource Lambda providing SageMaker Studio "
160 |                     "Lifecycle Configuration Scripts"
161 |                 ),
162 |                 inline_policies={
163 |                     "SageMakerLCCAdmin": aws_iam.PolicyDocument(
164 |                         statements=[
165 |                             aws_iam.PolicyStatement(
166 |                                 actions=[
167 |                                     "sagemaker:CreateStudioLifecycleConfig",
168 |                                     "sagemaker:DeleteStudioLifecycleConfig",
169 |                                     "sagemaker:DescribeDomain",
170 |                                     "sagemaker:UpdateDomain",
171 |                                 ],
172 |                                 resources=["*"],
173 |                             ),
174 |                             aws_iam.PolicyStatement(
175 |                                 actions=["iam:PassRole"],
176 |                                 resources=[eligible_domain_execution_role_arns or "*"],
177 |                             ),
178 |                         ],
179 |                     ),
180 |                 },
181 |                 managed_policies=[
182 |                     aws_iam.ManagedPolicy.from_aws_managed_policy_name(
183 |                         "service-role/AWSLambdaBasicExecutionRole",
184 |                     ),
185 |                     aws_iam.ManagedPolicy.from_aws_managed_policy_name(
186 |                         "AWSXRayDaemonWriteAccess",
187 |                     ),
188 |                 ],
189 |             )
190 |         if not smcr_helper_layer:
191 |             raise ValueError("smcr_helper_layer is required")
192 |         on_event_handler = PythonFunction(
193 |             scope,
194 |             "EventHandler",
195 |             description=(
196 |                 "CFn custom resource handler to create SageMaker Studio Lifecycle Configurations"
197 |             ),
198 |             entry=CR_LAMBDA_PATH,
199 |             environment_encryption=provider_function_env_encryption,
200 |             index="main.py",
201 |             handler="lambda_handler",
202 |             layers=[smcr_helper_layer],
203 |             memory_size=128,
204 |             role=role,
205 |             runtime=LambdaRuntime.PYTHON_3_12,
206 |             security_groups=security_groups,
207 |             timeout=Duration.minutes(10),  # Can take a while if it has to wait for updating domain
208 |             vpc=vpc,
209 |             vpc_subnets=vpc_subnets,
210 |         )
211 |         super().__init__(
212 |             scope,
213 |             id,
214 |             on_event_handler=on_event_handler,
215 |             log_retention=log_retention,
216 |             provider_function_env_encryption=provider_function_env_encryption,
217 |             provider_function_name=provider_function_name,
218 |             # TODO: Add support for `role` without circular dependency
219 |             # role=role,
220 |             security_groups=security_groups,
221 |             total_timeout=total_timeout,
222 |             vpc=vpc,
223 |             vpc_subnets=vpc_subnets,
224 |         )
225 | 
226 | 
227 | class SageMakerStudioLifecycleConfig(CustomResource):
228 |     """AWS CDK Construct for a SageMaker Studio Lifecycle Configuration Script"""
229 | 
230 |     def __init__(
231 |         self,
232 |         scope: Construct,
233 |         id: str,
234 |         content: Union[str, TextIO],
235 |         *,
236 |         app_type: str = "JupyterServer",
237 |         domain_id: Optional[str] = None,
238 |         enable_content_substitution: bool = True,
239 |         name: Optional[str] = None,
240 |         provider: Optional[SMStudioLCCCustomResourceProvider] = None,
241 |         removal_policy: Optional[RemovalPolicy] = None,
242 |         resource_type: str = "Custom::SageMakerStudioLifecycleConfiguration",
243 |         smcr_helper_layer: Optional[ILayerVersion] = None,
244 |     ) -> None:
245 |         """Create a SageMakerStudioLifecycleConfig
246 | 
247 |         Parameters
248 |         ----------
249 |         app_type :
250 |             SageMaker Studio App Type e.g. "JupyterServer" or "KernelGateway"
251 |         domain_id :
252 |             SageMaker Studio Domain ID to associate the LCC to (will not be associated, if not set)
253 |         enable_content_substitution :
254 |             Set `True` to enable CloudFormation `!Sub` substitution on the provided script content,
255 |             or `False` to disable.
256 |         name :
257 |             (Account+region unique) name of the LifeCycle Configuration script to create
258 |         propose_admin_subnet :
259 |             Whether to propose a new administrative subnet IPv4 CIDR at deploy-time
260 |         provider :
261 |             Optional `SMStudioLCCCustomResourceProvider` if you'd like to customize provider
262 |             configuration or re-use the Custom Resource Lambda across multiple LCCs in your CDK app
263 |         smcr_helper_layer :
264 |             (Required if `provider` is not set) Shared Lambda layer with helper functions for
265 |             SageMaker custom resources (see `cr_lambda_common`).
266 |         """
267 |         if not isinstance(content, str):
268 |             content = content.read()
269 |         if enable_content_substitution:
270 |             content = Fn.sub(content)
271 |         if not provider:
272 |             provider = SMStudioLCCCustomResourceProvider(
273 |                 scope, "StudioLCCProvider", smcr_helper_layer=smcr_helper_layer
274 |             )
275 |         if not name:
276 |             raise NotImplementedError("TODO: generate a name by default!")
277 | 
278 |         props = {"AppType": app_type, "Name": name, "Content": Fn.base64(content)}
279 |         if domain_id:
280 |             props["DomainId"] = domain_id
281 | 
282 |         super().__init__(
283 |             scope,
284 |             id,
285 |             service_token=provider.service_token,
286 |             # pascal_case_properties=None,
287 |             properties=props,
288 |             removal_policy=removal_policy,
289 |             resource_type=resource_type,
290 |         )
291 | 
292 |     @property
293 |     def arn(self):
294 |         return self.ref
295 | 


--------------------------------------------------------------------------------
/.infrastructure/cdk_src/smstudio/domain/__init__.py:
--------------------------------------------------------------------------------
  1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | # SPDX-License-Identifier: MIT-0
  3 | """AWS CDK constructs for creating SageMaker Domains with advanced configuration options
  4 | """
  5 | # Python Built-Ins:
  6 | import os
  7 | from typing import Any, Dict, List, Optional, Sequence, Union
  8 | 
  9 | # External Dependencies:
 10 | from aws_cdk import CustomResource, Duration, RemovalPolicy
 11 | import aws_cdk.aws_ec2 as aws_ec2
 12 | import aws_cdk.aws_iam as aws_iam
 13 | from aws_cdk.aws_lambda import ILayerVersion
 14 | import aws_cdk.aws_kms as aws_kms
 15 | from aws_cdk.aws_lambda import Runtime as LambdaRuntime
 16 | from aws_cdk.aws_lambda_python_alpha import PythonFunction
 17 | import aws_cdk.aws_logs as aws_logs
 18 | import aws_cdk.custom_resources as cr
 19 | from constructs import Construct
 20 | 
 21 | 
 22 | LAMBDA_PATH = os.path.join(os.path.dirname(__file__), "fn_domain")
 23 | 
 24 | 
 25 | class SMStudioDomainCustomResourceProvider(cr.Provider):
 26 |     """Provider (AWS Lambda) for a CFn Custom Resource for SMStudio Domain
 27 | 
 28 |     If you're only creating one Domain in your stack, you probably don't need to create this
 29 |     explicitly: Just use `SageMakerStudioDomain` direct.
 30 |     """
 31 | 
 32 |     def __init__(
 33 |         self,
 34 |         scope: Construct,
 35 |         id: str,
 36 |         smcr_helper_layer: ILayerVersion,
 37 |         *,
 38 |         eligible_domain_execution_role_arns: Optional[str] = None,
 39 |         log_retention: Optional[aws_logs.RetentionDays] = None,
 40 |         provider_function_env_encryption: Optional[aws_kms.IKey] = None,
 41 |         provider_function_name: Optional[str] = None,
 42 |         role: Optional[aws_iam.IRole] = None,
 43 |         security_groups: Optional[Sequence[aws_ec2.ISecurityGroup]] = None,
 44 |         total_timeout: Optional[Duration] = None,
 45 |         vpc: Optional[aws_ec2.IVpc] = None,
 46 |         vpc_subnets: Optional[Union[aws_ec2.SubnetSelection, Dict[str, Any]]] = None,
 47 |     ) -> None:
 48 |         """Create a SMStudioDomainCustomResourceProvider
 49 | 
 50 |         Most parameters are as per parent aws_cdk.custom_resources.Provider, with the below
 51 |         exceptions:
 52 | 
 53 |         Parameters
 54 |         ----------
 55 |         smcr_helper_layer :
 56 |             Shared Lambda layer with helper functions for SageMaker custom resources (see
 57 |             `cr_lambda_common`)
 58 |         eligible_domain_execution_role_arns :
 59 |             Set this optional ARN pattern to restrict the iam:PassRole permissions of the provider
 60 |             to a particular SageMaker Execution Role or wildcard pattern. By default (`None`), the
 61 |             provider will be created with permission to create Domains using any IAM Role
 62 |         role :
 63 |             By default, we'll create a role with required SageMaker, VPC, and IAM accesses. If you
 64 |             provide your own role, you'll need to ensure these permissions are set up. This role is
 65 |             used for the Custom Resource event handler function, not the CDK CR framework function.
 66 |         """
 67 |         if not role:
 68 |             role = aws_iam.Role(
 69 |                 scope,
 70 |                 "SMDomainProviderRole",
 71 |                 assumed_by=aws_iam.ServicePrincipal("lambda.amazonaws.com"),
 72 |                 description=(
 73 |                     "Execution role for CFN Custom Resource Lambda providing SageMaker Studio "
 74 |                     "Domains"
 75 |                 ),
 76 |                 inline_policies={
 77 |                     "SageMakerDomainAdmin": aws_iam.PolicyDocument(
 78 |                         statements=[
 79 |                             aws_iam.PolicyStatement(
 80 |                                 actions=[
 81 |                                     "ec2:DescribeSecurityGroups",
 82 |                                     "ec2:DescribeSubnets",
 83 |                                     "ec2:DescribeVpcs",
 84 |                                     # IAM access to create service roles if not already existing:
 85 |                                     # (e.g. 'AWSServiceRoleForAmazonSageMakerNotebooks')
 86 |                                     "iam:CreateServiceLinkedRole",
 87 |                                     "iam:DeleteServiceLinkedRole",
 88 |                                     "iam:ListRoles",
 89 |                                     "sagemaker:CreateDomain",
 90 |                                     "sagemaker:DeleteDomain",
 91 |                                     "sagemaker:DescribeDomain",
 92 |                                     # TODO: Any other service catalog / IAM / etc permissions needed?
 93 |                                     "sagemaker:EnableSagemakerServicecatalogPortfolio",
 94 |                                     "sagemaker:UpdateDomain",
 95 |                                     # For enabling SageMaker Project Templates:
 96 |                                     "servicecatalog:AcceptPortfolioShare",
 97 |                                     "servicecatalog:AssociatePrincipalWithPortfolio",
 98 |                                     "servicecatalog:ListAcceptedPortfolioShares",
 99 |                                 ],
100 |                                 resources=["*"],
101 |                             ),
102 |                             aws_iam.PolicyStatement(
103 |                                 actions=["iam:PassRole"],
104 |                                 resources=[eligible_domain_execution_role_arns or "*"],
105 |                             ),
106 |                         ],
107 |                     ),
108 |                 },
109 |                 managed_policies=[
110 |                     aws_iam.ManagedPolicy.from_aws_managed_policy_name(
111 |                         "service-role/AWSLambdaBasicExecutionRole",
112 |                     ),
113 |                     aws_iam.ManagedPolicy.from_aws_managed_policy_name(
114 |                         "AWSXRayDaemonWriteAccess",
115 |                     ),
116 |                 ],
117 |             )
118 |         if not smcr_helper_layer:
119 |             raise ValueError("smcr_helper_layer is required")
120 |         on_event_handler = PythonFunction(
121 |             scope,
122 |             "SMDomainEventHandler",
123 |             description=("CFn custom resource handler to create SageMaker Studio Domains"),
124 |             entry=LAMBDA_PATH,
125 |             environment_encryption=provider_function_env_encryption,
126 |             index="main.py",
127 |             handler="lambda_handler",
128 |             layers=[smcr_helper_layer],
129 |             memory_size=128,
130 |             role=role,
131 |             runtime=LambdaRuntime.PYTHON_3_12,
132 |             timeout=Duration.seconds(895),  # Needs to wait for domain so can take a while
133 |             vpc=vpc,
134 |             vpc_subnets=vpc_subnets,
135 |         )
136 |         super().__init__(
137 |             scope,
138 |             id,
139 |             on_event_handler=on_event_handler,
140 |             # is_complete_handler=is_complete_handler,
141 |             log_retention=log_retention,
142 |             provider_function_env_encryption=provider_function_env_encryption,
143 |             provider_function_name=provider_function_name,
144 |             # query_interval=query_interval,
145 |             # TODO: Add support for `role` without circular dependency
146 |             # role=role,
147 |             security_groups=security_groups,
148 |             total_timeout=total_timeout,
149 |             vpc=vpc,
150 |             vpc_subnets=vpc_subnets,
151 |         )
152 | 
153 | 
154 | class SageMakerStudioDomain(CustomResource):
155 |     """AWS CDK Construct for a SageMaker Studio Domain with additional features
156 | 
157 |     Unlike the CDK's built-in construct for a SMStudio Domain, this construct is backed by a Custom
158 |     Resource Lambda and:
159 |     - Defaults to the Default VPC (or else the first available VPC) in the account automatically,
160 |         if a VPC is not specified.
161 |     - Defaults to all default subnets (or else all available subnets in the VPC) if VPC subnets are
162 |         not specified.
163 |     - Optionally proposes a new small IPv4 CIDR for administrative tasks (e.g. EFS), compatible
164 |         with the seleted VPC, at deploy time if `propose_admin_subnet` is set to `True`. (This is
165 |         not so useful in CDK because of how constructs deal with VPC, but can be useful for SAM).
166 |     - Optionally enables SageMaker Projects (SageMaker Service Catalog portfolio)
167 |     """
168 | 
169 |     _propose_admin_subnet: bool
170 | 
171 |     def __init__(
172 |         self,
173 |         scope: Construct,
174 |         id: str,
175 |         *,
176 |         default_space_settings: Optional[dict] = None,
177 |         default_user_settings: Optional[dict] = None,
178 |         enable_docker_access: bool = True,
179 |         enable_projects: bool = True,
180 |         name: Optional[str] = None,
181 |         propose_admin_subnet: bool = False,
182 |         provider: Optional[SMStudioDomainCustomResourceProvider] = None,
183 |         removal_policy: Optional[RemovalPolicy] = None,
184 |         resource_type: str = "Custom::SageMakerStudioDomain",
185 |         smcr_helper_layer: Optional[ILayerVersion] = None,
186 |         subnet_ids: Optional[List[str]] = None,
187 |         use_vpc_internet: bool = False,
188 |         vpc_id: Optional[str] = None,
189 |     ) -> None:
190 |         """Create a SageMakerStudioDomain
191 | 
192 |         Parameters
193 |         ----------
194 |         default_space_settings :
195 |             Dictionary as per SageMaker CreateDomain/UpdateDomain API
196 |         default_user_settings :
197 |             Dictionary as per SageMaker CreateDomain/UpdateDomain API
198 |         enable_docker_access :
199 |             Enable docker access within Studio (Does not *install* docker by itself)
200 |         name :
201 |             Name for the SageMaker Studio Domain to create (must be unique in account+region)
202 |         propose_admin_subnet :
203 |             Whether to propose a new administrative subnet IPv4 CIDR at deploy-time
204 |         provider :
205 |             Optional `SMStudioDomainCustomResourceProvider` if you'd like to customize provider
206 |             configuration or re-use the Custom Resource Lambda across multiple Domains in your CDK
207 |             app
208 |         smcr_helper_layer :
209 |             (Required if `provider` is not set) Shared Lambda layer with helper functions for
210 |             SageMaker custom resources (see `cr_lambda_common`).
211 |         use_vpc_internet :
212 |             Whether spaces in the SageMaker Studio Domain should use the VPC (True) or direct
213 |             connections (False) to access the internet
214 |         """
215 |         if not provider:
216 |             provider = SMStudioDomainCustomResourceProvider(
217 |                 scope, "StudioDomainProvider", smcr_helper_layer=smcr_helper_layer
218 |             )
219 |         if not name:
220 |             raise NotImplementedError("TODO: generate a name by default!")
221 | 
222 |         self._propose_admin_subnet = propose_admin_subnet
223 |         resource_props = {
224 |             "DomainName": name,
225 |             "DomainSettings": {
226 |                 "DockerSettings": {
227 |                     "EnableDockerAccess": "ENABLED" if enable_docker_access else "DISABLED",
228 |                 },
229 |             },
230 |             "AppNetworkAccessType": "VpcOnly" if use_vpc_internet else "PublicInternetOnly",
231 |             "EnableProjects": enable_projects,
232 |             "ProposeAdminSubnet": propose_admin_subnet,
233 |         }
234 |         if default_space_settings:
235 |             resource_props["DefaultSpaceSettings"] = default_space_settings
236 |         if default_user_settings:
237 |             resource_props["DefaultUserSettings"] = default_user_settings
238 |         if subnet_ids:
239 |             resource_props["SubnetIds"] = subnet_ids
240 |         if vpc_id:
241 |             resource_props["VpcId"] = vpc_id
242 | 
243 |         super().__init__(
244 |             scope,
245 |             id,
246 |             service_token=provider.service_token,
247 |             # pascal_case_properties=None,
248 |             properties=resource_props,
249 |             removal_policy=removal_policy,
250 |             resource_type=resource_type,
251 |         )
252 | 
253 |     @property
254 |     def domain_id(self) -> str:
255 |         return self.get_att_string("DomainId")
256 | 
257 |     @property
258 |     def domain_name(self) -> str:
259 |         return self.get_att_string("DomainName")
260 | 
261 |     @property
262 |     def home_efs_filesystem_id(self) -> str:
263 |         return self.get_att_string("HomeEfsFileSystemId")
264 | 
265 |     @property
266 |     def subnet_ids(self) -> str:
267 |         """Returns *comma-separated string* of subnet IDs
268 | 
269 |         TODO: Refer to underlying subnets construct instead?
270 |         """
271 |         return self.get_att_string("SubnetIds")
272 | 
273 |     @property
274 |     def url(self) -> str:
275 |         return self.get_att_string("Url")
276 | 
277 |     @property
278 |     def vpc_id(self) -> str:
279 |         return self.get_att_string("VpcId")
280 | 
281 |     @property
282 |     def proposed_admin_subnet_cidr(self) -> str:
283 |         """Deploy-time-generated IPv4 CIDR of the proposed administrative subnet"""
284 |         if self._propose_admin_subnet:
285 |             return self.get_att_string("ProposedAdminSubnetCidr")
286 |         raise ValueError(
287 |             "ProposedAdminSubnetCidr attr not available if property propose_admin_subnet=False"
288 |         )
289 | 
290 |     @property
291 |     def inbound_efs_security_group_id(self) -> str:
292 |         return self.get_att_string("InboundEFSSecurityGroupId")
293 | 
294 |     @property
295 |     def outbound_efs_security_group_id(self) -> str:
296 |         return self.get_att_string("OutboundEFSSecurityGroupId")
297 | 


--------------------------------------------------------------------------------