├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── WINDOWS_INSTALLATION.md ├── blog └── pytorch_cnn_cifar10 │ ├── daemon.json │ ├── pytorch_local_mode_cifar10.ipynb │ ├── setup.sh │ ├── source │ └── cifar10.py │ └── utils_cifar.py ├── catboost_bring_your_own_container_local_training_and_serving ├── build_and_push.sh ├── catboost_bring_your_own_container_local_training_and_serving.py └── container │ ├── Dockerfile │ ├── ReadMe.md │ └── catboost_regressor │ ├── nginx.conf │ ├── predictor.py │ ├── serve │ ├── train │ └── wsgi.py ├── catboost_bring_your_own_container_local_training_toolkit ├── catboost_bring_your_own_container_local_training_toolkit.py ├── code │ └── california_housing_train.py └── container │ └── Dockerfile ├── catboost_scikit_learn_script_mode_local_training_and_serving ├── catboost_scikit_learn_script_mode_local_training_and_serving.py ├── code │ ├── catboost_train_deploy.py │ └── requirements.txt └── requirements.txt ├── dask_bring_your_own_container_local_processing ├── container │ ├── Dockerfile │ ├── dask_config │ │ └── dask.yaml │ └── program │ │ └── bootstrap.py ├── dask_bring_your_own_container_local_processing.py └── processing_script.py ├── deep_java_library_bring_your_own_container_serving_local_mode ├── .gitignore ├── container │ ├── Dockerfile │ ├── build.gradle │ ├── gradle │ │ └── wrapper │ │ │ ├── gradle-wrapper.jar │ │ │ └── gradle-wrapper.properties │ ├── gradlew │ ├── gradlew.bat │ ├── settings.gradle │ └── src │ │ ├── main │ │ ├── java │ │ │ └── com │ │ │ │ └── example │ │ │ │ └── sagemaker │ │ │ │ └── djl │ │ │ │ └── serving │ │ │ │ ├── SagemakerDjlServingApplication.java │ │ │ │ └── ServingController.java │ │ └── resources │ │ │ └── application.properties │ │ └── test │ │ └── java │ │ └── com │ │ └── example │ │ └── sagemaker │ │ └── djl │ │ └── serving │ │ └── SagemakerDjlServingApplicationTests.java └── deep_java_library_bring_your_own_container_serving_local_mode.py ├── delta_lake_bring_your_own_container_local_training_and_serving ├── container │ ├── Dockerfile │ ├── ReadMe.md │ └── catboost_regressor │ │ ├── nginx.conf │ │ ├── predictor.py │ │ ├── serve │ │ ├── train │ │ └── wsgi.py └── delta_lake_bring_your_own_container_local_training_and_serving.py ├── delta_sharing_bring_your_own_container_local_processing ├── container │ └── Dockerfile ├── delta_sharing_bring_your_own_container_local_processing.py ├── processing_script.py └── profile │ └── open-datasets.share ├── delta_sharing_scikit_learn_local_training_and_serving ├── code │ ├── requirements.txt │ └── scikit_boston_housing.py ├── delta_sharing_scikit_learn_local_training_and_serving.py ├── profile │ └── open-datasets.share └── requirements.txt ├── general_pipeline_local_debug ├── Readme.md └── sagemaker-pipelines-local-mode-debug.ipynb ├── gensim_with_word2vec_model_artifacts_local_serving ├── code │ ├── inference.py │ └── requirements.txt ├── gensim_with_word2vec_model_artifacts_local_serving.py └── requirements.txt ├── hdbscan_bring_your_own_container_local_training ├── container │ ├── Dockerfile │ └── hdbscan │ │ └── train.py └── hdbscan_bring_your_own_container_local_training.py ├── huggingface_hebert_sentiment_analysis_local_serving ├── code │ ├── inference.py │ └── requirements.txt ├── hebert_model.py ├── huggingface_hebert_sentiment_analysis_local_serving.py └── requirements.txt ├── img ├── activate_specific_conda_environment.png ├── aws_deep_learning_containers.png ├── aws_ml.png ├── create_sagemaker_local_notebook.png ├── debug_and_resume_program.png ├── debug_waiting_for_process_connection.png ├── debug_your_application.png ├── debug_your_application_2.png ├── docker.png ├── icons.actions.startDebugger.svg ├── inference_success.png ├── install_requirements_txt.png ├── list_conda_envs.png ├── local_machine.png ├── map_container_code_to_your_project.png ├── map_container_code_to_your_project_fix.png ├── map_container_code_to_your_project_fixed.png ├── new.png ├── open_terminal.png ├── open_tf_training_and_serving.png ├── output_tf_training_and_serving.png ├── py_remote_debug.png ├── pycharm_sagemaker_local_processing_jobs.png ├── pycharm_sagemaker_local_serving.png ├── pycharm_sagemaker_local_tf2_debug_create_conf.png ├── pycharm_sagemaker_local_training.png ├── pydevd_pycharm_install.png ├── python_interpreter_final_add_venv.png ├── python_interpreter_initial_add_venv.png ├── python_interpreter_save_new_venv.png ├── run_tf_training_and_serving.png ├── set_breakpoint.png ├── start_run_python.png ├── windows_error_01.png ├── windows_error_02.png ├── windows_image_01.png └── windows_image_02.png ├── lightgbm_bring_your_own_container_local_training_and_serving ├── build_and_push.sh ├── container │ ├── Dockerfile │ ├── ReadMe.md │ └── lightgbm_regression │ │ ├── nginx.conf │ │ ├── predictor.py │ │ ├── serve │ │ ├── train │ │ └── wsgi.py ├── data │ ├── test │ │ └── boston_test.csv │ ├── train │ │ └── boston_train.csv │ └── validation │ │ └── boston_validation.csv └── lightgbm_bring_your_own_container_local_training_and_serving.py ├── prophet_bring_your_own_container_local_training_and_serving ├── container │ ├── Dockerfile │ └── prophet │ │ ├── nginx.conf │ │ ├── predictor.py │ │ ├── serve │ │ ├── train │ │ └── wsgi.py ├── data │ └── avocado_daily.csv └── prophet_bring_your_own_container_local_training_and_serving.py ├── pytorch_extend_dlc_container_ofa_local_serving ├── code │ └── inference.py ├── container │ └── Dockerfile ├── pytorch_extend_dlc_container_ofa_local_serving.py └── test_image.jpg ├── pytorch_graviton_script_mode_local_model_inference ├── pytorch_graviton_script_mode_local_model_inference.py └── requirements.txt ├── pytorch_nlp_script_mode_local_model_inference ├── code │ ├── inference.py │ └── requirements.txt ├── data │ └── test_data.csv └── pytorch_nlp_script_mode_local_model_inference.py ├── pytorch_script_mode_local_model_inference ├── data │ └── README.md ├── pytorch_script_mode_local_model_inference.py └── utils_cifar.py ├── pytorch_script_mode_local_training_and_serving ├── code │ ├── cifar10_pytorch.py │ └── requirements.txt ├── data │ └── README.md ├── pytorch_script_mode_local_training_and_serving.py └── utils_cifar.py ├── pytorch_wandb_script_mode_local_training ├── code │ ├── mnist.py │ └── requirements.txt ├── data │ └── README.md └── pytorch_wandb_script_mode_local_training.py ├── pytorch_yolov5_local_model_inference ├── code │ ├── inference.py │ └── requirements.txt ├── pytorch_yolov5_local_model_inference.py └── requirements.txt ├── requirements.txt ├── sagemaker_studio_docker_cli_install ├── README.md ├── sagemaker-debian-bullseye-cli-install.sh ├── sagemaker-distribution-docker-cli-install.sh ├── sagemaker-ubuntu-focal-docker-cli-install.sh └── sagemaker-ubuntu-jammy-docker-cli-install.sh ├── scikit_learn_bring_your_own_container_and_own_model_local_serving ├── build_and_push.sh ├── container │ ├── Dockerfile │ ├── ReadMe.md │ └── sklearn_rf_regressor │ │ ├── nginx.conf │ │ ├── predictor.py │ │ ├── serve │ │ └── wsgi.py └── scikit_learn_bring_your_own_container_and_own_model_local_serving.py ├── scikit_learn_bring_your_own_container_local_processing ├── container │ └── Dockerfile ├── input_data │ ├── README.md │ ├── sample_file_1.txt │ ├── sample_file_2.txt │ └── sample_file_3.txt ├── processing_script.py └── scikit_learn_bring_your_own_container_local_processing.py ├── scikit_learn_bring_your_own_model_local_serving ├── code │ ├── inference.py │ └── requirements.txt ├── requirements.txt └── scikit_learn_bring_your_own_model_local_serving.py ├── scikit_learn_graviton_bring_your_own_container_local_training_and_serving ├── build_and_push.sh ├── container │ ├── Dockerfile │ ├── ReadMe.md │ ├── linear_regressor │ │ ├── nginx.conf │ │ ├── predictor.py │ │ ├── serve │ │ ├── train │ │ └── wsgi.py │ └── ml-dependencies.yml └── scikit_learn_graviton_bring_your_own_container_local_training_and_serving.py ├── scikit_learn_local_processing ├── SKLearnProcessor_local_processing.py ├── input_data │ ├── README.md │ ├── sample_file_1.txt │ ├── sample_file_2.txt │ └── sample_file_3.txt └── processing_script.py ├── scikit_learn_nltk_local_processing ├── FrameworkProcessor_nltk_local_processing.py ├── dependencies │ └── requirements.txt ├── input_data │ ├── README.md │ ├── sample_file_1.txt │ ├── sample_file_2.txt │ └── sample_file_3.txt └── processing_script.py ├── scikit_learn_script_mode_local_serving_multiple_models_with_one_invocation ├── code │ └── inference.py ├── requirements.txt └── scikit_learn_script_mode_local_serving_multiple_models_with_one_invocation.py ├── scikit_learn_script_mode_local_serving_no_model_artifact ├── code │ └── inference.py ├── requirements.txt └── scikit_learn_script_mode_local_serving_no_model_artifact.py ├── scikit_learn_script_mode_local_training_and_serving ├── code │ └── scikit_learn_california.py ├── requirements.txt └── scikit_learn_script_mode_local_training_and_serving.py ├── snowflake_bring_your_own_container_local_training ├── code │ └── predictive_maintenance_classification.py ├── container │ ├── Dockerfile │ └── requirements.txt └── snowflake_bring_your_own_container_local_training.py ├── tensorflow_bring_your_own_california_housing_local_serving_without_tfs ├── container │ ├── Dockerfile │ └── california_housing │ │ ├── __init__.py │ │ ├── nginx.conf │ │ ├── predictor.py │ │ ├── serve │ │ └── wsgi.py └── tensorflow_bring_your_own_california_housing_local_serving_without_tfs.py ├── tensorflow_bring_your_own_california_housing_local_training_and_batch_transform ├── container │ ├── Dockerfile │ └── california_housing │ │ ├── __init__.py │ │ ├── nginx.conf │ │ ├── predictor.py │ │ ├── serve │ │ ├── train │ │ └── wsgi.py └── tensorflow_bring_your_own_california_housing_local_training_and_batch_transform.py ├── tensorflow_bring_your_own_california_housing_local_training_and_serving ├── container │ ├── Dockerfile │ └── california_housing │ │ ├── __init__.py │ │ ├── nginx.conf │ │ ├── serve │ │ └── train └── tensorflow_bring_your_own_california_housing_local_training_and_serving.py ├── tensorflow_bring_your_own_california_housing_local_training_toolkit ├── code │ └── california_housing_tf2.py ├── container │ ├── Dockerfile │ └── requirements.txt └── tensorflow_bring_your_own_california_housing_local_training_toolkit.py ├── tensorflow_bring_your_own_california_housing_mms_local_serving ├── container │ ├── Dockerfile │ ├── dockerd-entrypoint.py │ └── model_handler.py └── tensorflow_bring_your_own_california_housing_mms_local_serving.py ├── tensorflow_extend_dlc_california_housing_local_training ├── code │ └── california_housing_tf2.py ├── container │ └── Dockerfile └── tensorflow_extend_dlc_california_housing_local_training.py ├── tensorflow_graviton_bring_your_own_california_housing_local_training ├── container │ ├── Dockerfile │ ├── california_housing │ │ └── train │ └── ml-dependencies.yml └── tensorflow_graviton_bring_your_own_california_housing_local_training.py ├── tensorflow_graviton_bring_your_own_california_housing_local_training_toolkit ├── code │ └── california_housing_tf2.py ├── container │ ├── Dockerfile │ └── ml-dependencies.yml └── tensorflow_graviton_bring_your_own_california_housing_local_training_toolkit.py ├── tensorflow_graviton_script_mode_local_model_inference ├── requirements.txt └── tensorflow_graviton_script_mode_local_model_inference.py ├── tensorflow_script_mode_california_housing_local_training_and_batch_transform ├── code │ └── california_housing_tf2.py ├── requirements.txt └── tensorflow_script_mode_california_housing_local_training_and_batch_transform.py ├── tensorflow_script_mode_california_housing_local_training_and_serving ├── code │ └── california_housing_tf2.py ├── requirements.txt └── tensorflow_script_mode_california_housing_local_training_and_serving.py ├── tensorflow_script_mode_debug_local_training ├── README.md ├── data │ └── README.md ├── requirements.txt ├── source_dir │ ├── mnist_tf2.py │ └── requirements.txt └── tensorflow_script_mode_debug_local_training.py ├── tensorflow_script_mode_local_gpu_training_resnet50 ├── data │ ├── training │ │ └── README.md │ └── validation │ │ └── README.md ├── requirements.txt ├── source_dir │ └── cifar10_tf2.py └── tensorflow_script_mode_local_training_resnet50.py ├── tensorflow_script_mode_local_model_inference ├── code │ ├── inference.py │ └── requirements.txt ├── instances.json ├── requirements.txt └── tensorflow_script_mode_local_model_inference.py ├── tensorflow_script_mode_local_model_inference_file ├── code │ ├── inference.py │ └── requirements.txt ├── requirements.txt └── tensorflow_script_mode_local_model_inference_file.py ├── tensorflow_script_mode_local_training_and_serving ├── code │ ├── mnist_tf2.py │ └── requirements.txt ├── data │ └── README.md ├── requirements.txt └── tensorflow_script_mode_local_training_and_serving.py ├── xgboost_script_mode_local_serving_no_compressed_model ├── code │ └── inference.py ├── model │ └── my-xgboost-model ├── requirements.txt └── xgboost_script_mode_local_serving_no_compressed_model.py └── xgboost_script_mode_local_training_and_serving ├── code ├── abalone.py └── inference.py ├── data └── train │ └── abalone ├── requirements.txt └── xgboost_script_mode_local_training_and_serving.py /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 10 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 11 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 12 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 13 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 15 | 16 | -------------------------------------------------------------------------------- /WINDOWS_INSTALLATION.md: -------------------------------------------------------------------------------- 1 | # Use SageMaker Local Mode on Windows 2 | 3 | ## Default Windows paths will cause an error! 4 | SageMaker Local Mode will not work on Windows unless you install WSL 2 and then, a Linux distro (Ubuntu is the default). 5 | 6 | If you try to run the examples in this repo, you'll eventually get `TypeError: object of type 'int' has no len()` error after completing the training job. 7 | ![Error training in Windows - exception](img/windows_error_01.png) 8 | 9 | The problem is because of failures to output the model on temporary folders created for the training job by SageMaker Local, and are related to Windows directory structure/permissions. 10 | 11 | ![Error training in Windows - directory structure](img/windows_error_02.png) 12 | 13 | ## Installation Instructions for Windows and Visual Studio Code - using Ubuntu paths 14 | 15 | 1. Install [Docker Desktop for Windows](https://docs.docker.com/desktop/install/windows-install/). 16 | 2. Install [Linux on Windows with WSL](https://learn.microsoft.com/en-us/windows/wsl/install). We assume you install the default Ubuntu distro. 17 | 3. Once installed, you can search for Ubuntu in your Windows search bar. 18 | 4. Once Ubuntu has finished its initial setup you will need to create a username and password (this does not need to match your Windows user credentials). 19 | 5. Finally, it’s always good practice to install the latest updates with the following commands, entering your password when prompted. Execute: `sudo apt update` Press Y when prompted. 20 | 6. Clone this GitHub repository: `git clone https://github.com/aws-samples/amazon-sagemaker-local-mode` 21 | 8. Open Visual Studio Code and install the [WSL Extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-wsl) 22 | ![install the WSL Extension](img/windows_image_01.png) 23 | 10. In Visual Studio Code: Choose `File` -> `Open Folder` and open the `amazon-sagemaker-local-mode` folder located on Ubuntu, you just cloned in previous step. 24 | 11. Once you run a sample code, you'll see that the temporary folders created for the training job by SageMaker Local, are now of Linux format, using WSL2. You should see `[WSL:Ubuntu]` in the upper bar of the Visual Studio Code window. 25 | ![Running a sample with WSL2](img/windows_image_02.png) 26 | 27 | -------------------------------------------------------------------------------- /blog/pytorch_cnn_cifar10/daemon.json: -------------------------------------------------------------------------------- 1 | 2 | { 3 | "default-runtime": "nvidia", 4 | "runtimes": { 5 | "nvidia": { 6 | "path": "/usr/bin/nvidia-container-runtime", 7 | "runtimeArgs": [] 8 | } 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /blog/pytorch_cnn_cifar10/utils_cifar.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | import torchvision.transforms as transforms 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | 7 | classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') 8 | 9 | 10 | def _get_transform(): 11 | return transforms.Compose( 12 | [transforms.ToTensor(), 13 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 14 | 15 | 16 | def get_train_data_loader(): 17 | transform = _get_transform() 18 | trainset = torchvision.datasets.CIFAR10(root='./data', train=True, 19 | download=True, transform=transform) 20 | return torch.utils.data.DataLoader(trainset, batch_size=4, 21 | shuffle=True, num_workers=2) 22 | 23 | 24 | def get_test_data_loader(): 25 | transform = _get_transform() 26 | testset = torchvision.datasets.CIFAR10(root='./data', train=False, 27 | download=True, transform=transform) 28 | return torch.utils.data.DataLoader(testset, batch_size=4, 29 | shuffle=False, num_workers=2) 30 | 31 | 32 | # function to show an image 33 | def imshow(img): 34 | img = img / 2 + 0.5 # unnormalize 35 | npimg = img.numpy() 36 | plt.imshow(np.transpose(npimg, (1, 2, 0))) -------------------------------------------------------------------------------- /catboost_bring_your_own_container_local_training_and_serving/build_and_push.sh: -------------------------------------------------------------------------------- 1 | 2 | # The name of our algorithm 3 | algorithm_name=sagemaker-catboost-regressor 4 | 5 | cd container 6 | 7 | chmod +x catboost_regressor/train 8 | chmod +x catboost_regressor/serve 9 | 10 | account=$(aws sts get-caller-identity --query Account --output text) 11 | 12 | # Get the region defined in the current configuration (default to us-west-2 if none defined) 13 | region=$(aws configure get region) 14 | region=${region:-us-west-2} 15 | 16 | fullname="${account}.dkr.ecr.${region}.amazonaws.com/${algorithm_name}:latest" 17 | 18 | # If the repository doesn't exist in ECR, create it. 19 | aws ecr describe-repositories --repository-names "${algorithm_name}" > /dev/null 2>&1 20 | 21 | if [ $? -ne 0 ] 22 | then 23 | aws ecr create-repository --repository-name "${algorithm_name}" > /dev/null 24 | fi 25 | 26 | # Get the login command from ECR and execute it directly 27 | aws ecr get-login-password --region ${region}|docker login --username AWS --password-stdin ${fullname} 28 | 29 | # Build the docker image locally with the image name and then push it to ECR 30 | # with the full name. 31 | 32 | docker build -t ${algorithm_name} . 33 | docker tag ${algorithm_name} ${fullname} 34 | 35 | docker push ${fullname} 36 | -------------------------------------------------------------------------------- /catboost_bring_your_own_container_local_training_and_serving/container/Dockerfile: -------------------------------------------------------------------------------- 1 | # Build an image that can do training and inference in SageMaker 2 | # This is a Python 3 image that uses the nginx, gunicorn, flask stack 3 | # for serving inferences in a stable way. 4 | 5 | FROM python:3.7-slim-buster 6 | 7 | RUN apt-get -y update && apt-get install -y --no-install-recommends \ 8 | wget \ 9 | nginx \ 10 | ca-certificates 11 | 12 | RUN pip install numpy==1.16.2 scipy==1.2.1 catboost pandas flask gevent gunicorn 13 | 14 | # Set some environment variables. PYTHONUNBUFFERED keeps Python from buffering our standard 15 | # output stream, which means that logs can be delivered to the user quickly. PYTHONDONTWRITEBYTECODE 16 | # keeps Python from writing the .pyc files which are unnecessary in this case. We also update 17 | # PATH so that the train and serve programs are found when the container is invoked. 18 | 19 | ENV PYTHONUNBUFFERED=TRUE 20 | ENV PYTHONDONTWRITEBYTECODE=TRUE 21 | ENV PATH="/opt/program:${PATH}" 22 | 23 | # Set up the program in the image 24 | COPY catboost_regressor /opt/program 25 | WORKDIR /opt/program 26 | 27 | -------------------------------------------------------------------------------- /catboost_bring_your_own_container_local_training_and_serving/container/catboost_regressor/nginx.conf: -------------------------------------------------------------------------------- 1 | worker_processes 1; 2 | daemon off; # Prevent forking 3 | 4 | 5 | pid /tmp/nginx.pid; 6 | error_log /var/log/nginx/error.log; 7 | 8 | events { 9 | # defaults 10 | } 11 | 12 | http { 13 | include /etc/nginx/mime.types; 14 | default_type application/octet-stream; 15 | access_log /var/log/nginx/access.log combined; 16 | 17 | upstream gunicorn { 18 | server unix:/tmp/gunicorn.sock; 19 | } 20 | 21 | server { 22 | listen 8080 deferred; 23 | client_max_body_size 5m; 24 | 25 | keepalive_timeout 5; 26 | proxy_read_timeout 1200s; 27 | 28 | location ~ ^/(ping|invocations) { 29 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 30 | proxy_set_header Host $http_host; 31 | proxy_redirect off; 32 | proxy_pass http://gunicorn; 33 | } 34 | 35 | location / { 36 | return 404 "{}"; 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /catboost_bring_your_own_container_local_training_and_serving/container/catboost_regressor/serve: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # This file implements the scoring service shell. You don't necessarily need to modify it for various 4 | # algorithms. It starts nginx and gunicorn with the correct configurations and then simply waits until 5 | # gunicorn exits. 6 | # 7 | # The flask server is specified to be the app object in wsgi.py 8 | # 9 | # We set the following parameters: 10 | # 11 | # Parameter Environment Variable Default Value 12 | # --------- -------------------- ------------- 13 | # number of workers MODEL_SERVER_WORKERS the number of CPU cores 14 | # timeout MODEL_SERVER_TIMEOUT 60 seconds 15 | 16 | from __future__ import print_function 17 | import multiprocessing 18 | import os 19 | import signal 20 | import subprocess 21 | import sys 22 | 23 | cpu_count = multiprocessing.cpu_count() 24 | 25 | model_server_timeout = os.environ.get('MODEL_SERVER_TIMEOUT', 60) 26 | model_server_workers = int(os.environ.get('MODEL_SERVER_WORKERS', cpu_count)) 27 | 28 | def sigterm_handler(nginx_pid, gunicorn_pid): 29 | try: 30 | os.kill(nginx_pid, signal.SIGQUIT) 31 | except OSError: 32 | pass 33 | try: 34 | os.kill(gunicorn_pid, signal.SIGTERM) 35 | except OSError: 36 | pass 37 | 38 | sys.exit(0) 39 | 40 | def start_server(): 41 | print('Starting the inference server with {} workers.'.format(model_server_workers)) 42 | 43 | 44 | # link the log streams to stdout/err so they will be logged to the container logs 45 | subprocess.check_call(['ln', '-sf', '/dev/stdout', '/var/log/nginx/access.log']) 46 | subprocess.check_call(['ln', '-sf', '/dev/stderr', '/var/log/nginx/error.log']) 47 | 48 | nginx = subprocess.Popen(['nginx', '-c', '/opt/program/nginx.conf']) 49 | gunicorn = subprocess.Popen(['gunicorn', 50 | '--timeout', str(model_server_timeout), 51 | '-k', 'gevent', 52 | '-b', 'unix:/tmp/gunicorn.sock', 53 | '-w', str(model_server_workers), 54 | 'wsgi:app']) 55 | 56 | signal.signal(signal.SIGTERM, lambda a, b: sigterm_handler(nginx.pid, gunicorn.pid)) 57 | 58 | # If either subprocess exits, so do we. 59 | pids = set([nginx.pid, gunicorn.pid]) 60 | while True: 61 | pid, _ = os.wait() 62 | if pid in pids: 63 | break 64 | 65 | sigterm_handler(nginx.pid, gunicorn.pid) 66 | print('Inference server exiting') 67 | 68 | # The main routine just invokes the start function. 69 | 70 | if __name__ == '__main__': 71 | start_server() 72 | -------------------------------------------------------------------------------- /catboost_bring_your_own_container_local_training_and_serving/container/catboost_regressor/wsgi.py: -------------------------------------------------------------------------------- 1 | import predictor as myapp 2 | 3 | # This is just a simple wrapper for gunicorn to find your app. 4 | # If you want to change the algorithm file, simply change "predictor" above to the 5 | # new file. 6 | 7 | app = myapp.app 8 | -------------------------------------------------------------------------------- /catboost_bring_your_own_container_local_training_toolkit/container/Dockerfile: -------------------------------------------------------------------------------- 1 | # Build an image that can do training in SageMaker with SageMaker Training Toolkit 2 | 3 | FROM python:3.10 4 | 5 | RUN pip install --upgrade pip 6 | RUN pip install catboost pandas 7 | RUN pip install sagemaker-training 8 | -------------------------------------------------------------------------------- /catboost_scikit_learn_script_mode_local_training_and_serving/code/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | catboost==0.26 4 | -------------------------------------------------------------------------------- /catboost_scikit_learn_script_mode_local_training_and_serving/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | sagemaker>=2.0.0<3.0.0 4 | sagemaker[local] 5 | -------------------------------------------------------------------------------- /dask_bring_your_own_container_local_processing/container/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM continuumio/miniconda3:4.7.12 2 | 3 | ENV PYTHONHASHSEED 0 4 | ENV PYTHONIOENCODING UTF-8 5 | 6 | # Install required Python packages fo Dask 7 | RUN conda install --yes dask distributed dask-ml boto3 8 | 9 | # Install additional Python packages 10 | RUN conda install aiohttp boto3 11 | 12 | # Dumb init 13 | RUN wget -O /usr/local/bin/dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.0/dumb-init_1.2.0_amd64 14 | RUN chmod +x /usr/local/bin/dumb-init 15 | 16 | RUN mkdir /opt/app /etc/dask 17 | COPY dask_config/dask.yaml /etc/dask/ 18 | 19 | # Set up bootstrapping program and Dask configuration 20 | COPY program /opt/program 21 | RUN chmod +x /opt/program/bootstrap.py 22 | 23 | ENTRYPOINT ["/opt/program/bootstrap.py"] -------------------------------------------------------------------------------- /dask_bring_your_own_container_local_processing/container/dask_config/dask.yaml: -------------------------------------------------------------------------------- 1 | logging: 2 | distributed: info 3 | bokeh: critical 4 | tornado: critical 5 | 6 | scheduler: 7 | work-stealing: True 8 | allowed-failures: 10 9 | 10 | admin: 11 | log-format: '%(name)s - %(levelname)s - %(message)s' -------------------------------------------------------------------------------- /dask_bring_your_own_container_local_processing/dask_bring_your_own_container_local_processing.py: -------------------------------------------------------------------------------- 1 | # This is a sample Python program that runs a Dask Processing job on a JSON fetched from a web site. 2 | # The output of the processing will be total files found in the JSON. 3 | # This implementation will work on your *local computer*. 4 | # 5 | # Prerequisites: 6 | # 1. Install required Python packages: 7 | # pip install boto3 sagemaker pandas scikit-learn 8 | # pip install 'sagemaker[local]' 9 | # 2. Docker Desktop has to be installed on your computer, and running. 10 | # 3. Open terminal and run the following commands: 11 | # docker build -t sagemaker-dask-processing-local container/. 12 | ######################################################################################################################## 13 | 14 | from sagemaker.local import LocalSession 15 | from sagemaker.processing import ScriptProcessor, ProcessingOutput 16 | import boto3 17 | 18 | 19 | s3 = boto3.client('s3') 20 | sagemaker_session = LocalSession() 21 | sagemaker_session.config = {'local': {'local_code': True}} 22 | 23 | # For local training a dummy role will be sufficient 24 | role = 'arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001' 25 | 26 | dask_processor = ScriptProcessor(command=["/opt/program/bootstrap.py"], 27 | image_uri='sagemaker-dask-processing-local', 28 | role=role, 29 | instance_count=1, 30 | instance_type='local') 31 | 32 | dask_processor.run(code='processing_script.py', 33 | outputs=[ProcessingOutput( 34 | output_name='filenames_processed_data', 35 | source='/opt/ml/processing/processed_data/')], 36 | arguments=['site_uri', 'https://archive.analytics.mybinder.org/index.jsonl'] 37 | ) 38 | 39 | preprocessing_job_description = dask_processor.jobs[-1].describe() 40 | output_config = preprocessing_job_description['ProcessingOutputConfig'] 41 | 42 | print(output_config) 43 | 44 | for output in output_config['Outputs']: 45 | if output['OutputName'] == 'filenames_processed_data': 46 | filenames_processed_data_file = output['S3Output']['S3Uri'] 47 | bucket = filenames_processed_data_file.split("/")[:3][2] 48 | output_file_name = '/'.join(filenames_processed_data_file.split("/")[3:])+"/filenames_in_json.txt" 49 | 50 | print(f'Opening processing output file: {"s3://"+bucket+"/"+output_file_name}') 51 | data = s3.get_object(Bucket=bucket, Key=output_file_name) 52 | contents = data['Body'].read() 53 | print('Processing output file content\n-----------\n') 54 | print(contents.decode("utf-8")) -------------------------------------------------------------------------------- /dask_bring_your_own_container_local_processing/processing_script.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import logging 4 | from dask.distributed import Client 5 | import dask.bag as db 6 | import json 7 | 8 | 9 | processed_data_path = '/opt/ml/processing/processed_data' 10 | 11 | 12 | def main(): 13 | print("Processing Started") 14 | 15 | # Convert command line args into a map of args 16 | args_iter = iter(sys.argv[1:]) 17 | args = dict(zip(args_iter, args_iter)) 18 | scheduler_ip = sys.argv[-1] 19 | print(f"scheduler_ip: {scheduler_ip}") 20 | 21 | # Start the Dask cluster client 22 | try: 23 | print("initiating client") 24 | client = Client("tcp://{ip}:8786".format(ip=scheduler_ip)) 25 | print("Cluster information: {}".format(client)) 26 | except Exception as err: 27 | logging.exception(err) 28 | 29 | print(f"Received arguments {args}") 30 | 31 | if "site_uri" in args: 32 | print(f"Processing web site JSON: {args['site_uri']}") 33 | filenames = (db.read_text(args['site_uri']) 34 | .map(json.loads) 35 | .pluck('name') 36 | .compute()) 37 | 38 | filenames = ['https://archive.analytics.mybinder.org/' + fn for fn in filenames] 39 | print(f"Total filenames: {len(filenames)}") 40 | print(f"Sample filenames found: {filenames[:5]}") 41 | 42 | output_file = os.path.join(processed_data_path, "filenames_in_json.txt") 43 | print(f'Writing output file: {output_file}') 44 | with open(output_file, 'w') as outfile: 45 | outfile.write(json.dumps(filenames)) 46 | else: 47 | print("No `site_uri` parameter - doing nothing") 48 | 49 | print("Processing Complete") 50 | 51 | print(client) 52 | sys.exit(os.EX_OK) 53 | 54 | if __name__ == "__main__": 55 | main() -------------------------------------------------------------------------------- /deep_java_library_bring_your_own_container_serving_local_mode/.gitignore: -------------------------------------------------------------------------------- 1 | HELP.md 2 | .gradle 3 | build/ 4 | !gradle/wrapper/gradle-wrapper.jar 5 | !**/src/main/**/build/ 6 | !**/src/test/**/build/ 7 | 8 | ### STS ### 9 | .apt_generated 10 | .classpath 11 | .factorypath 12 | .project 13 | .settings 14 | .springBeans 15 | .sts4-cache 16 | bin/ 17 | !**/src/main/**/bin/ 18 | !**/src/test/**/bin/ 19 | 20 | ### IntelliJ IDEA ### 21 | .idea 22 | *.iws 23 | *.iml 24 | *.ipr 25 | out/ 26 | !**/src/main/**/out/ 27 | !**/src/test/**/out/ 28 | 29 | ### NetBeans ### 30 | /nbproject/private/ 31 | /nbbuild/ 32 | /dist/ 33 | /nbdist/ 34 | /.nb-gradle/ 35 | 36 | ### VS Code ### 37 | .vscode/ 38 | -------------------------------------------------------------------------------- /deep_java_library_bring_your_own_container_serving_local_mode/container/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM openjdk:11 2 | 3 | COPY src/ src/ 4 | COPY gradle/ gradle/ 5 | COPY build.gradle settings.gradle gradlew ./ 6 | 7 | RUN ./gradlew assemble 8 | 9 | ENTRYPOINT ["java","-jar","./build/libs/sagemaker-djl-serving-0.0.1-SNAPSHOT.jar"] -------------------------------------------------------------------------------- /deep_java_library_bring_your_own_container_serving_local_mode/container/build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'org.springframework.boot' version '2.6.6' 3 | id 'io.spring.dependency-management' version '1.0.11.RELEASE' 4 | id 'java' 5 | } 6 | 7 | group = 'com.example' 8 | version = '0.0.1-SNAPSHOT' 9 | sourceCompatibility = '11' 10 | 11 | configurations { 12 | compileOnly { 13 | extendsFrom annotationProcessor 14 | } 15 | } 16 | 17 | repositories { 18 | mavenCentral() 19 | } 20 | 21 | dependencies { 22 | implementation 'org.springframework.boot:spring-boot-starter-web' 23 | implementation platform("ai.djl:api:0.16.0") 24 | 25 | implementation "ai.djl.pytorch:pytorch-engine:0.16.0" 26 | 27 | compileOnly 'org.projectlombok:lombok' 28 | annotationProcessor 'org.projectlombok:lombok' 29 | testImplementation 'org.springframework.boot:spring-boot-starter-test' 30 | } 31 | 32 | tasks.named('test') { 33 | useJUnitPlatform() 34 | } 35 | -------------------------------------------------------------------------------- /deep_java_library_bring_your_own_container_serving_local_mode/container/gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/deep_java_library_bring_your_own_container_serving_local_mode/container/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /deep_java_library_bring_your_own_container_serving_local_mode/container/gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-7.4.1-bin.zip 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | -------------------------------------------------------------------------------- /deep_java_library_bring_your_own_container_serving_local_mode/container/settings.gradle: -------------------------------------------------------------------------------- 1 | rootProject.name='sagemaker-djl-serving' 2 | -------------------------------------------------------------------------------- /deep_java_library_bring_your_own_container_serving_local_mode/container/src/main/java/com/example/sagemaker/djl/serving/SagemakerDjlServingApplication.java: -------------------------------------------------------------------------------- 1 | package com.example.sagemaker.djl.serving; 2 | 3 | import org.springframework.boot.SpringApplication; 4 | import org.springframework.boot.autoconfigure.SpringBootApplication; 5 | 6 | @SpringBootApplication 7 | public class SagemakerDjlServingApplication { 8 | 9 | public static void main(String[] args) { 10 | SpringApplication.run(SagemakerDjlServingApplication.class, args); 11 | } 12 | 13 | } 14 | -------------------------------------------------------------------------------- /deep_java_library_bring_your_own_container_serving_local_mode/container/src/main/resources/application.properties: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /deep_java_library_bring_your_own_container_serving_local_mode/container/src/test/java/com/example/sagemaker/djl/serving/SagemakerDjlServingApplicationTests.java: -------------------------------------------------------------------------------- 1 | package com.example.sagemaker.djl.serving; 2 | 3 | import org.junit.jupiter.api.Test; 4 | import org.springframework.boot.test.context.SpringBootTest; 5 | 6 | @SpringBootTest 7 | class SagemakerDjlServingApplicationTests { 8 | 9 | @Test 10 | void contextLoads() { 11 | } 12 | 13 | } 14 | -------------------------------------------------------------------------------- /deep_java_library_bring_your_own_container_serving_local_mode/deep_java_library_bring_your_own_container_serving_local_mode.py: -------------------------------------------------------------------------------- 1 | # This is a sample Python program that performs inference with Deep Java Library (DJL). 2 | # Example was referenced from: https://docs.djl.ai/jupyter/load_pytorch_model.html 3 | # This implementation will work on your local computer. 4 | # 5 | # Prerequisites: 6 | # 1. Install required Python packages: 7 | # pip install boto3 sagemaker pandas scikit-learn 8 | # pip install 'sagemaker[local]' 9 | # 2. Docker Desktop has to be installed on your computer, and running. 10 | # 3. Open terminal and run the following commands: 11 | # docker build -t sagemaker-djl-serving-local ./container/. 12 | ######################################################################################################################## 13 | 14 | import os 15 | 16 | from sagemaker import Model, LocalSession, Predictor 17 | from sagemaker.deserializers import JSONDeserializer 18 | from sagemaker.serializers import JSONSerializer 19 | 20 | DUMMY_IAM_ROLE = 'arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001' 21 | 22 | sagemaker_session = LocalSession() 23 | sagemaker_session.config = {'local': {'local_code': True}} 24 | 25 | 26 | def main(): 27 | 28 | image = 'sagemaker-djl-serving-local' 29 | endpoint_name = "my-local-endpoint" 30 | 31 | role = DUMMY_IAM_ROLE 32 | 33 | model = Model( 34 | image_uri=image, 35 | role=role, 36 | model_data="s3://aws-ml-blog/artifacts/deep-java-library-bring-your-own-container-serving/model.tar.gz", 37 | ) 38 | 39 | print('Deploying endpoint in local mode') 40 | print( 41 | 'Note: if launching for the first time in local mode, container image download might take a few minutes to complete.') 42 | 43 | model.deploy( 44 | initial_instance_count=1, 45 | instance_type='local', 46 | endpoint_name=endpoint_name 47 | ) 48 | 49 | predictor = Predictor(endpoint_name=endpoint_name, 50 | sagemaker_session=sagemaker_session, 51 | serializer=JSONSerializer(), 52 | deserializer=JSONDeserializer()) 53 | 54 | predictions = predictor.predict("https://raw.githubusercontent.com/pytorch/hub/master/images/dog.jpg") 55 | print(f'predictions: {predictions}') 56 | 57 | print('About to delete the endpoint to stop paying (if in cloud mode).') 58 | predictor.delete_endpoint() 59 | 60 | 61 | if __name__ == "__main__": 62 | main() 63 | -------------------------------------------------------------------------------- /delta_lake_bring_your_own_container_local_training_and_serving/container/Dockerfile: -------------------------------------------------------------------------------- 1 | # Build an image that can do training and inference in SageMaker 2 | 3 | FROM openjdk:8-jre-slim 4 | 5 | RUN apt-get update 6 | RUN apt-get install -y python3 python3-setuptools python3-pip python-dev python3-dev 7 | 8 | RUN apt-get install -y --no-install-recommends \ 9 | wget \ 10 | nginx \ 11 | ca-certificates 12 | 13 | RUN pip3 install catboost pandas flask gevent gunicorn pyspark==3.2.0 delta-spark 14 | 15 | # Set some environment variables. PYTHONUNBUFFERED keeps Python from buffering our standard 16 | # output stream, which means that logs can be delivered to the user quickly. PYTHONDONTWRITEBYTECODE 17 | # keeps Python from writing the .pyc files which are unnecessary in this case. We also update 18 | # PATH so that the train and serve programs are found when the container is invoked. 19 | 20 | ENV PYTHONUNBUFFERED=TRUE 21 | ENV PYTHONDONTWRITEBYTECODE=TRUE 22 | ENV PATH="/opt/program:${PATH}" 23 | 24 | # Set up the program in the image 25 | COPY catboost_regressor /opt/program 26 | WORKDIR /opt/program 27 | 28 | -------------------------------------------------------------------------------- /delta_lake_bring_your_own_container_local_training_and_serving/container/catboost_regressor/nginx.conf: -------------------------------------------------------------------------------- 1 | worker_processes 1; 2 | daemon off; # Prevent forking 3 | 4 | 5 | pid /tmp/nginx.pid; 6 | error_log /var/log/nginx/error.log; 7 | 8 | events { 9 | # defaults 10 | } 11 | 12 | http { 13 | include /etc/nginx/mime.types; 14 | default_type application/octet-stream; 15 | access_log /var/log/nginx/access.log combined; 16 | 17 | upstream gunicorn { 18 | server unix:/tmp/gunicorn.sock; 19 | } 20 | 21 | server { 22 | listen 8080 deferred; 23 | client_max_body_size 5m; 24 | 25 | keepalive_timeout 5; 26 | proxy_read_timeout 1200s; 27 | 28 | location ~ ^/(ping|invocations) { 29 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 30 | proxy_set_header Host $http_host; 31 | proxy_redirect off; 32 | proxy_pass http://gunicorn; 33 | } 34 | 35 | location / { 36 | return 404 "{}"; 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /delta_lake_bring_your_own_container_local_training_and_serving/container/catboost_regressor/serve: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # This file implements the scoring service shell. You don't necessarily need to modify it for various 4 | # algorithms. It starts nginx and gunicorn with the correct configurations and then simply waits until 5 | # gunicorn exits. 6 | # 7 | # The flask server is specified to be the app object in wsgi.py 8 | # 9 | # We set the following parameters: 10 | # 11 | # Parameter Environment Variable Default Value 12 | # --------- -------------------- ------------- 13 | # number of workers MODEL_SERVER_WORKERS the number of CPU cores 14 | # timeout MODEL_SERVER_TIMEOUT 60 seconds 15 | 16 | from __future__ import print_function 17 | import multiprocessing 18 | import os 19 | import signal 20 | import subprocess 21 | import sys 22 | 23 | cpu_count = multiprocessing.cpu_count() 24 | 25 | model_server_timeout = os.environ.get('MODEL_SERVER_TIMEOUT', 60) 26 | model_server_workers = int(os.environ.get('MODEL_SERVER_WORKERS', cpu_count)) 27 | 28 | def sigterm_handler(nginx_pid, gunicorn_pid): 29 | try: 30 | os.kill(nginx_pid, signal.SIGQUIT) 31 | except OSError: 32 | pass 33 | try: 34 | os.kill(gunicorn_pid, signal.SIGTERM) 35 | except OSError: 36 | pass 37 | 38 | sys.exit(0) 39 | 40 | def start_server(): 41 | print('Starting the inference server with {} workers.'.format(model_server_workers)) 42 | 43 | 44 | # link the log streams to stdout/err so they will be logged to the container logs 45 | subprocess.check_call(['ln', '-sf', '/dev/stdout', '/var/log/nginx/access.log']) 46 | subprocess.check_call(['ln', '-sf', '/dev/stderr', '/var/log/nginx/error.log']) 47 | 48 | nginx = subprocess.Popen(['nginx', '-c', '/opt/program/nginx.conf']) 49 | gunicorn = subprocess.Popen(['gunicorn', 50 | '--timeout', str(model_server_timeout), 51 | '-k', 'gevent', 52 | '-b', 'unix:/tmp/gunicorn.sock', 53 | '-w', str(model_server_workers), 54 | 'wsgi:app']) 55 | 56 | signal.signal(signal.SIGTERM, lambda a, b: sigterm_handler(nginx.pid, gunicorn.pid)) 57 | 58 | # If either subprocess exits, so do we. 59 | pids = set([nginx.pid, gunicorn.pid]) 60 | while True: 61 | pid, _ = os.wait() 62 | if pid in pids: 63 | break 64 | 65 | sigterm_handler(nginx.pid, gunicorn.pid) 66 | print('Inference server exiting') 67 | 68 | # The main routine just invokes the start function. 69 | 70 | if __name__ == '__main__': 71 | start_server() 72 | -------------------------------------------------------------------------------- /delta_lake_bring_your_own_container_local_training_and_serving/container/catboost_regressor/wsgi.py: -------------------------------------------------------------------------------- 1 | import predictor as myapp 2 | 3 | # This is just a simple wrapper for gunicorn to find your app. 4 | # If you want to change the algorithm file, simply change "predictor" above to the 5 | # new file. 6 | 7 | app = myapp.app 8 | -------------------------------------------------------------------------------- /delta_lake_bring_your_own_container_local_training_and_serving/delta_lake_bring_your_own_container_local_training_and_serving.py: -------------------------------------------------------------------------------- 1 | # This is a sample Python program that trains a simple CatBoost Regressor tree model 2 | # on the california-housing dataset fetched from Delta Lake, directly from S3, and then performs inference. 3 | # This implementation will work on your *local computer*. 4 | # 5 | # Prerequisites: 6 | # 1. Install required Python packages: 7 | # pip install boto3 sagemaker pandas scikit-learn 8 | # pip install 'sagemaker[local]' 9 | # 2. Docker Desktop has to be installed on your computer, and running. 10 | # 3. Open terminal and run the following commands: 11 | # docker build -t sagemaker-delta-lake-training-local container/. 12 | ######################################################################################################################## 13 | 14 | import pandas as pd 15 | from sagemaker.estimator import Estimator 16 | from sagemaker.local import LocalSession 17 | from sagemaker.predictor import csv_serializer 18 | 19 | sagemaker_session = LocalSession() 20 | sagemaker_session.config = {'local': {'local_code': True}} 21 | 22 | # For local training a dummy role will be sufficient 23 | role = 'arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001' 24 | 25 | image = 'sagemaker-delta-lake-training-local' 26 | 27 | print('Starting model training.') 28 | local_regressor = Estimator( 29 | image, 30 | role, 31 | instance_count=1, 32 | instance_type="local") 33 | 34 | train_location = "s3://aws-ml-blog/artifacts/delta-lake-bring-your-own-container/delta-table/california-housing/" 35 | local_regressor.fit({'train':train_location}, logs=True) 36 | 37 | print('Deploying endpoint in local mode') 38 | predictor = local_regressor.deploy(1, 'local', serializer=csv_serializer) 39 | 40 | payload = "-122.230003,37.880001,41.0,880.0,129.0,322.0,126.0,8.3252" 41 | predicted = predictor.predict(payload).decode('utf-8') 42 | print(f'Prediction: {predicted}') 43 | 44 | print('About to delete the endpoint to stop paying (if in cloud mode).') 45 | predictor.delete_endpoint() 46 | -------------------------------------------------------------------------------- /delta_sharing_bring_your_own_container_local_processing/container/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.7-slim-buster 2 | 3 | # Install scikit-learn and pandas 4 | RUN pip3 install pandas==0.25.3 scikit-learn==0.21.3 delta-sharing 5 | 6 | ENTRYPOINT ["python3"] -------------------------------------------------------------------------------- /delta_sharing_bring_your_own_container_local_processing/delta_sharing_bring_your_own_container_local_processing.py: -------------------------------------------------------------------------------- 1 | # This is a sample Python program that runs a simple scikit-learn processing on data fetched from Delta Lake, using Delta Sharing. 2 | # The output of the processing will be total_cases per location. 3 | # This implementation will work on your *local computer*. 4 | # 5 | # Prerequisites: 6 | # 1. Install required Python packages: 7 | # pip install boto3 sagemaker pandas scikit-learn 8 | # pip install 'sagemaker[local]' 9 | # 2. Docker Desktop has to be installed on your computer, and running. 10 | # 3. Open terminal and run the following commands: 11 | # docker build -t sagemaker-delta-sharing-processing-local container/. 12 | ######################################################################################################################## 13 | 14 | from sagemaker.local import LocalSession 15 | from sagemaker.processing import ScriptProcessor, ProcessingInput, ProcessingOutput 16 | import boto3 17 | 18 | 19 | s3 = boto3.client('s3') 20 | sagemaker_session = LocalSession() 21 | sagemaker_session.config = {'local': {'local_code': True}} 22 | 23 | # For local training a dummy role will be sufficient 24 | role = 'arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001' 25 | 26 | processor = ScriptProcessor(command=['python3'], 27 | image_uri='sagemaker-delta-sharing-processing-local', 28 | role=role, 29 | instance_count=1, 30 | instance_type='local') 31 | 32 | processor.run(code='processing_script.py', 33 | inputs=[ProcessingInput( 34 | source='./profile/', 35 | destination='/opt/ml/processing/profile/')], 36 | outputs=[ProcessingOutput( 37 | output_name='delta_lake_processed_data', 38 | source='/opt/ml/processing/processed_data/')] 39 | ) 40 | 41 | preprocessing_job_description = processor.jobs[-1].describe() 42 | output_config = preprocessing_job_description['ProcessingOutputConfig'] 43 | 44 | print(output_config) 45 | 46 | for output in output_config['Outputs']: 47 | if output['OutputName'] == 'delta_lake_processed_data': 48 | delta_lake_processed_data_file = output['S3Output']['S3Uri'] 49 | bucket = delta_lake_processed_data_file.split("/")[:3][2] 50 | output_file_name = '/'.join(delta_lake_processed_data_file.split("/")[3:])+"/total_cases_per_location.csv" 51 | 52 | print(f'Opening processing output file: {"s3://"+bucket+"/"+output_file_name}') 53 | data = s3.get_object(Bucket=bucket, Key=output_file_name) 54 | contents = data['Body'].read() 55 | print('Processing output file content\n-----------\n') 56 | print(contents.decode("utf-8")) -------------------------------------------------------------------------------- /delta_sharing_bring_your_own_container_local_processing/processing_script.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import delta_sharing 4 | 5 | 6 | profile_path = '/opt/ml/processing/profile/' 7 | processed_data_path = '/opt/ml/processing/processed_data' 8 | 9 | 10 | def main(): 11 | print("Processing Started") 12 | 13 | # Convert command line args into a map of args 14 | args_iter = iter(sys.argv[1:]) 15 | args = dict(zip(args_iter, args_iter)) 16 | 17 | print('Received arguments {}'.format(args)) 18 | 19 | profile_files = [os.path.join(profile_path, file) for file in os.listdir(profile_path)] 20 | if len(profile_files) == 0: 21 | raise ValueError( 22 | ( 23 | "There are no files in {}.\n" 24 | + "This usually indicates that the channel ({}) was incorrectly specified,\n" 25 | + "the data specification in S3 was incorrectly specified or the role specified\n" 26 | + "does not have permission to access the data." 27 | ).format(profile_path) 28 | ) 29 | 30 | profile_file = profile_files[0] 31 | print(f'Found profile file: {profile_file}') 32 | 33 | # Create a SharingClient 34 | client = delta_sharing.SharingClient(profile_file) 35 | table_url = profile_file + "#delta_sharing.default.owid-covid-data" 36 | 37 | # Load the table as a Pandas DataFrame 38 | print('Loading owid-covid-data table from Delta Lake') 39 | data = delta_sharing.load_as_pandas(table_url) 40 | print(f'Data shape: {data.shape}') 41 | 42 | # Aggregate total_cases per location 43 | cases_per_location = data.groupby(['location'])['total_cases'].sum() 44 | print(f'cases_per_location\n{cases_per_location}\n') 45 | 46 | output_file = os.path.join(processed_data_path,'total_cases_per_location.csv') 47 | print(f'Writing output file: {output_file}') 48 | cases_per_location.to_csv(output_file) 49 | 50 | print("Processing Complete") 51 | 52 | if __name__ == "__main__": 53 | main() -------------------------------------------------------------------------------- /delta_sharing_bring_your_own_container_local_processing/profile/open-datasets.share: -------------------------------------------------------------------------------- 1 | { 2 | "shareCredentialsVersion": 1, 3 | "endpoint": "https://sharing.delta.io/delta-sharing/", 4 | "bearerToken": "faaie590d541265bcab1f2de9813274bf233" 5 | } -------------------------------------------------------------------------------- /delta_sharing_scikit_learn_local_training_and_serving/code/requirements.txt: -------------------------------------------------------------------------------- 1 | delta-sharing -------------------------------------------------------------------------------- /delta_sharing_scikit_learn_local_training_and_serving/delta_sharing_scikit_learn_local_training_and_serving.py: -------------------------------------------------------------------------------- 1 | # This is a sample Python program that trains a simple scikit-learn model 2 | # on the boston-housing dataset fetched from Delta Lake, using Delta Sharing. 3 | # This implementation will work on your *local computer* or in the *AWS Cloud*. 4 | # 5 | # Delta Sharing: An Open Protocol for Secure Data Sharing 6 | # https://github.com/delta-io/delta-sharing 7 | # 8 | # Prerequisites: 9 | # 1. Install required Python packages: 10 | # `pip install -r requirements.txt` 11 | # 2. Docker Desktop installed and running on your computer: 12 | # `docker ps` 13 | # 3. You should have AWS credentials configured on your local machine 14 | # in order to be able to pull the docker image from ECR. 15 | ############################################################################################### 16 | 17 | 18 | from sagemaker.sklearn import SKLearn 19 | 20 | 21 | DUMMY_IAM_ROLE = 'arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001' 22 | 23 | 24 | def main(): 25 | 26 | print('Starting model training.') 27 | print('Note: if launching for the first time in local mode, container image download might take a few minutes to complete.') 28 | 29 | sklearn = SKLearn( 30 | entry_point="scikit_boston_housing.py", 31 | source_dir='code', 32 | framework_version="0.23-1", 33 | instance_type="local", 34 | role=DUMMY_IAM_ROLE 35 | ) 36 | 37 | delta_lake_profile_file = "file://./profile/open-datasets.share" 38 | 39 | sklearn.fit({"train": delta_lake_profile_file}) 40 | print('Completed model training') 41 | 42 | print('Deploying endpoint in local mode') 43 | predictor = sklearn.deploy(initial_instance_count=1, instance_type='local') 44 | 45 | test_sample = [[0.00632,18.0,2.31,0,0.538,6.575,65.2,4.0900,1,296,15.3,396.90,4.98]] 46 | prediction = predictor.predict(test_sample) 47 | print(f'Prediction: {prediction}') 48 | 49 | print('About to delete the endpoint to stop paying (if in cloud mode).') 50 | predictor.delete_endpoint(predictor.endpoint_name) 51 | 52 | 53 | if __name__ == "__main__": 54 | main() 55 | -------------------------------------------------------------------------------- /delta_sharing_scikit_learn_local_training_and_serving/profile/open-datasets.share: -------------------------------------------------------------------------------- 1 | { 2 | "shareCredentialsVersion": 1, 3 | "endpoint": "https://sharing.delta.io/delta-sharing/", 4 | "bearerToken": "faaie590d541265bcab1f2de9813274bf233" 5 | } -------------------------------------------------------------------------------- /delta_sharing_scikit_learn_local_training_and_serving/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | sagemaker>=2.0.0<3.0.0 4 | sagemaker[local] 5 | -------------------------------------------------------------------------------- /general_pipeline_local_debug/Readme.md: -------------------------------------------------------------------------------- 1 | See the blog post for instructions on 2 | [Debugging Python Code in Amazon SageMaker Locally Using Visual Studio Code and PyCharm: A Step-by-Step Guide](https://dev.to/arlind0xbb/debugging-python-code-in-amazon-sagemaker-locally-using-visual-studio-code-and-pycharm-a-step-by-step-guide-2cbc) 3 | -------------------------------------------------------------------------------- /gensim_with_word2vec_model_artifacts_local_serving/code/inference.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | from gensim.models import KeyedVectors 4 | 5 | 6 | def input_fn(request_body, request_content_type): 7 | print(f"request_body: {request_body}") 8 | if request_content_type == "application/json": 9 | payload = json.loads(request_body) 10 | instances = payload["instances"] 11 | return instances 12 | else: 13 | raise Exception(f"{request_content_type} content type not supported") 14 | 15 | 16 | def predict_fn(instances, word_vectors): 17 | print(f"instances: {instances}") 18 | print("calling model") 19 | predictions = word_vectors.most_similar(positive=instances) 20 | return predictions 21 | 22 | 23 | def model_fn(model_dir): 24 | print("loading model from: {}".format(model_dir)) 25 | word_vectors = KeyedVectors.load_word2vec_format(os.path.join(model_dir, "vectors.txt"), binary=False) 26 | print(f'word vectors length: {len(word_vectors)}') 27 | return word_vectors 28 | -------------------------------------------------------------------------------- /gensim_with_word2vec_model_artifacts_local_serving/code/requirements.txt: -------------------------------------------------------------------------------- 1 | gensim==4.1.2 -------------------------------------------------------------------------------- /gensim_with_word2vec_model_artifacts_local_serving/gensim_with_word2vec_model_artifacts_local_serving.py: -------------------------------------------------------------------------------- 1 | # This is a sample Python program that serves a Word2Vec model, trained with BlazingText algorithm with inference using gensim. 2 | # This implementation will work on your *local computer* or in the *AWS Cloud*. 3 | # 4 | # Prerequisites: 5 | # 1. Install required Python packages: 6 | # `pip install -r requirements.txt` 7 | # 2. Docker Desktop installed and running on your computer: 8 | # `docker ps` 9 | # 3. You should have AWS credentials configured on your local machine 10 | # in order to be able to pull the docker image from ECR. 11 | ############################################################################################### 12 | 13 | import boto3 14 | from sagemaker.deserializers import JSONDeserializer 15 | from sagemaker.serializers import JSONSerializer 16 | from sagemaker.sklearn import SKLearnModel 17 | 18 | 19 | DUMMY_IAM_ROLE = 'arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001' 20 | s3 = boto3.client('s3') 21 | 22 | 23 | def main(): 24 | 25 | # Download a pre-trained model archive file 26 | print('Downloading a pre-trained model archive file') 27 | s3.download_file('aws-ml-blog', 'artifacts/word2vec_algorithm_model_artifacts/model.tar.gz', 'model.tar.gz') 28 | 29 | print('Deploying endpoint in local mode') 30 | print( 31 | 'Note: if launching for the first time in local mode, container image download might take a few minutes to complete.') 32 | model = SKLearnModel( 33 | role=DUMMY_IAM_ROLE, 34 | model_data='file://./model.tar.gz', 35 | framework_version='0.23-1', 36 | py_version='py3', 37 | source_dir='code', 38 | entry_point='inference.py' 39 | ) 40 | 41 | print('Deploying endpoint in local mode') 42 | predictor = model.deploy(initial_instance_count=1, instance_type='local') 43 | 44 | payload = {"instances": ["dog","cat"]} 45 | predictor.serializer = JSONSerializer() 46 | predictor.deserializer = JSONDeserializer() 47 | predictions = predictor.predict(payload) 48 | print(f"Predictions: {predictions}") 49 | 50 | print('About to delete the endpoint to stop paying (if in cloud mode).') 51 | predictor.delete_endpoint(predictor.endpoint_name) 52 | 53 | 54 | if __name__ == "__main__": 55 | main() 56 | -------------------------------------------------------------------------------- /gensim_with_word2vec_model_artifacts_local_serving/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | sagemaker>=2.0.0<3.0.0 4 | sagemaker[local] 5 | -------------------------------------------------------------------------------- /hdbscan_bring_your_own_container_local_training/container/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:slim 2 | 3 | MAINTAINER Amazon AI 4 | 5 | RUN apt-get update \ 6 | && apt-get install -y --no-install-recommends \ 7 | gcc \ 8 | python3-dev 9 | 10 | RUN pip3 install --no-cache-dir -U \ 11 | numpy \ 12 | pandas \ 13 | hdbscan==0.8.27 14 | 15 | ENV PYTHONUNBUFFERED=TRUE 16 | ENV PYTHONDONTWRITEBYTECODE=TRUE 17 | ENV PATH="/opt/program:${PATH}" 18 | 19 | COPY hdbscan /opt/program 20 | WORKDIR /opt/ 21 | ENTRYPOINT ["train.py"] 22 | -------------------------------------------------------------------------------- /hdbscan_bring_your_own_container_local_training/hdbscan_bring_your_own_container_local_training.py: -------------------------------------------------------------------------------- 1 | # This is a sample Python program that trains a simple HDBSCAN model. 2 | # This implementation will work on your *local computer*. 3 | # 4 | # Prerequisites: 5 | # 1. Install required Python packages: 6 | # pip install boto3 sagemaker pandas scikit-learn 7 | # pip install 'sagemaker[local]' 8 | # 2. Docker Desktop has to be installed on your computer, and running. 9 | # 3. Open terminal and run the following commands: 10 | # docker build -t sagemaker-hdbscan-local container/. 11 | ######################################################################################################################## 12 | 13 | import os 14 | import boto3 15 | import pickle 16 | import tarfile 17 | import pandas as pd 18 | from sagemaker.estimator import Estimator 19 | from sklearn.datasets import make_blobs 20 | 21 | 22 | DUMMY_IAM_ROLE = 'arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001' 23 | local_train = './data/train/blobs.csv' 24 | s3 = boto3.resource('s3') 25 | 26 | 27 | def download_training_and_eval_data(): 28 | if os.path.isfile('./data/train/blobs.csv'): 29 | print('Training dataset exist. Skipping Download') 30 | else: 31 | print('Downloading training dataset') 32 | 33 | os.makedirs("./data", exist_ok=True) 34 | os.makedirs("./data/train", exist_ok=True) 35 | 36 | blobs, labels = make_blobs(n_samples=2000, n_features=10) 37 | train_data = pd.DataFrame(blobs) 38 | train_data.to_csv(local_train, header=None, index=False) 39 | 40 | print('Downloading completed') 41 | 42 | 43 | def main(): 44 | download_training_and_eval_data() 45 | 46 | print('Starting model training.') 47 | print('Note: if launching for the first time in local mode, container image download might take a few minutes to complete.') 48 | 49 | image = 'sagemaker-hdbscan-local' 50 | 51 | local_estimator = Estimator( 52 | image, 53 | DUMMY_IAM_ROLE, 54 | instance_count=1, 55 | instance_type="local", 56 | hyperparameters={ 57 | "min_cluster_size": 50, 58 | }) 59 | 60 | train_location = 'file://' + local_train 61 | 62 | local_estimator.fit({'train':train_location}) 63 | print('Completed model training') 64 | 65 | model_data = local_estimator.model_data 66 | print(model_data) 67 | 68 | 69 | if __name__ == "__main__": 70 | main() 71 | -------------------------------------------------------------------------------- /huggingface_hebert_sentiment_analysis_local_serving/code/inference.py: -------------------------------------------------------------------------------- 1 | import json 2 | from transformers import pipeline 3 | 4 | JSON_CONTENT_TYPE = 'application/json' 5 | 6 | 7 | def model_fn(model_dir): 8 | sentiment_analysis = pipeline( 9 | "sentiment-analysis", 10 | model=model_dir, 11 | tokenizer=model_dir, 12 | return_all_scores=True 13 | ) 14 | 15 | return sentiment_analysis 16 | 17 | 18 | def input_fn(serialized_input_data, content_type=JSON_CONTENT_TYPE): 19 | if content_type == JSON_CONTENT_TYPE: 20 | input_data = json.loads(serialized_input_data) 21 | return input_data 22 | 23 | else: 24 | raise Exception('Requested unsupported ContentType in Accept: ' + content_type) 25 | return 26 | 27 | 28 | def predict_fn(input_data, model): 29 | print('Input Data: {}'.format(input_data)) 30 | 31 | return model(input_data) 32 | 33 | 34 | def output_fn(prediction_output, accept=JSON_CONTENT_TYPE): 35 | if accept == JSON_CONTENT_TYPE: 36 | return json.dumps(prediction_output), accept 37 | 38 | raise Exception('Requested unsupported ContentType in Accept: ' + accept) -------------------------------------------------------------------------------- /huggingface_hebert_sentiment_analysis_local_serving/code/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers==4.38.0 -------------------------------------------------------------------------------- /huggingface_hebert_sentiment_analysis_local_serving/hebert_model.py: -------------------------------------------------------------------------------- 1 | from transformers import pipeline 2 | 3 | print('Initializing Pipeline') 4 | sentiment_analysis = pipeline( 5 | "sentiment-analysis", 6 | model="avichr/heBERT_sentiment_analysis", 7 | tokenizer="avichr/heBERT_sentiment_analysis", 8 | return_all_scores = True 9 | ) 10 | 11 | print('Saving model and tokenizers files') 12 | sentiment_analysis.save_pretrained("./model") 13 | -------------------------------------------------------------------------------- /huggingface_hebert_sentiment_analysis_local_serving/huggingface_hebert_sentiment_analysis_local_serving.py: -------------------------------------------------------------------------------- 1 | # This is a sample Python program that deploy a pre-trained PyTorch HeBERT model on Amazon SageMaker Endpoint. 2 | # This implementation will work on your *local computer*. 3 | # 4 | # Prerequisites: 5 | # 1. Create Python Virtual Environment: 6 | # `python3 -m venv env` 7 | # 2. Start the virtual environment 8 | # `source env/bin/activate` 9 | # 3. Install required Python packages: 10 | # `pip install -r requirements.txt` 11 | # 4. Run `hebert_model.py` to create the model: 12 | # `python hebert_model.py` 13 | # 5. Create `model.tar.gz` file for SageMaker to use: 14 | # `cd model && tar -czf ../model.tar.gz * && cd ..` 15 | # 6. Docker Desktop installed and running on your computer: 16 | # `docker ps` 17 | # 7. You should have AWS credentials configured on your local machine 18 | # in order to be able to pull the docker image from ECR. 19 | ############################################################################################## 20 | 21 | import sagemaker 22 | from sagemaker.local import LocalSession 23 | from sagemaker.pytorch.model import PyTorchModel 24 | 25 | 26 | def main(): 27 | sagemaker_session = LocalSession() 28 | sagemaker_session.config = {'local': {'local_code': True}} 29 | 30 | # For local training a dummy role will be sufficient 31 | role = 'arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001' 32 | 33 | print('Deploying local mode endpoint') 34 | print('Note: if launching for the first time in local mode, container image download might take a few minutes to complete.') 35 | 36 | pytorch_model = PyTorchModel(model_data='./model.tar.gz', 37 | role=role, 38 | framework_version="1.8", 39 | source_dir="code", 40 | py_version="py3", 41 | entry_point="inference.py") 42 | 43 | predictor = pytorch_model.deploy(initial_instance_count=1, instance_type='local') 44 | 45 | predictor.serializer = sagemaker.serializers.JSONSerializer() 46 | predictor.deserializer = sagemaker.deserializers.JSONDeserializer() 47 | 48 | result = predictor.predict("אני אוהב לעבוד באמזון") 49 | print('result: {}'.format(result)) 50 | 51 | predictor.delete_endpoint(predictor.endpoint) 52 | 53 | if __name__ == "__main__": 54 | main() -------------------------------------------------------------------------------- /huggingface_hebert_sentiment_analysis_local_serving/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | sagemaker>=2.0.0<3.0.0 4 | sagemaker[local] 5 | torch==2.2.0 6 | transformers==4.38.0 -------------------------------------------------------------------------------- /img/activate_specific_conda_environment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/activate_specific_conda_environment.png -------------------------------------------------------------------------------- /img/aws_deep_learning_containers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/aws_deep_learning_containers.png -------------------------------------------------------------------------------- /img/aws_ml.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/aws_ml.png -------------------------------------------------------------------------------- /img/create_sagemaker_local_notebook.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/create_sagemaker_local_notebook.png -------------------------------------------------------------------------------- /img/debug_and_resume_program.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/debug_and_resume_program.png -------------------------------------------------------------------------------- /img/debug_waiting_for_process_connection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/debug_waiting_for_process_connection.png -------------------------------------------------------------------------------- /img/debug_your_application.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/debug_your_application.png -------------------------------------------------------------------------------- /img/debug_your_application_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/debug_your_application_2.png -------------------------------------------------------------------------------- /img/docker.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/docker.png -------------------------------------------------------------------------------- /img/icons.actions.startDebugger.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /img/inference_success.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/inference_success.png -------------------------------------------------------------------------------- /img/install_requirements_txt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/install_requirements_txt.png -------------------------------------------------------------------------------- /img/list_conda_envs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/list_conda_envs.png -------------------------------------------------------------------------------- /img/local_machine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/local_machine.png -------------------------------------------------------------------------------- /img/map_container_code_to_your_project.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/map_container_code_to_your_project.png -------------------------------------------------------------------------------- /img/map_container_code_to_your_project_fix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/map_container_code_to_your_project_fix.png -------------------------------------------------------------------------------- /img/map_container_code_to_your_project_fixed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/map_container_code_to_your_project_fixed.png -------------------------------------------------------------------------------- /img/new.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/new.png -------------------------------------------------------------------------------- /img/open_terminal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/open_terminal.png -------------------------------------------------------------------------------- /img/open_tf_training_and_serving.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/open_tf_training_and_serving.png -------------------------------------------------------------------------------- /img/output_tf_training_and_serving.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/output_tf_training_and_serving.png -------------------------------------------------------------------------------- /img/py_remote_debug.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/py_remote_debug.png -------------------------------------------------------------------------------- /img/pycharm_sagemaker_local_processing_jobs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/pycharm_sagemaker_local_processing_jobs.png -------------------------------------------------------------------------------- /img/pycharm_sagemaker_local_serving.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/pycharm_sagemaker_local_serving.png -------------------------------------------------------------------------------- /img/pycharm_sagemaker_local_tf2_debug_create_conf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/pycharm_sagemaker_local_tf2_debug_create_conf.png -------------------------------------------------------------------------------- /img/pycharm_sagemaker_local_training.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/pycharm_sagemaker_local_training.png -------------------------------------------------------------------------------- /img/pydevd_pycharm_install.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/pydevd_pycharm_install.png -------------------------------------------------------------------------------- /img/python_interpreter_final_add_venv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/python_interpreter_final_add_venv.png -------------------------------------------------------------------------------- /img/python_interpreter_initial_add_venv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/python_interpreter_initial_add_venv.png -------------------------------------------------------------------------------- /img/python_interpreter_save_new_venv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/python_interpreter_save_new_venv.png -------------------------------------------------------------------------------- /img/run_tf_training_and_serving.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/run_tf_training_and_serving.png -------------------------------------------------------------------------------- /img/set_breakpoint.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/set_breakpoint.png -------------------------------------------------------------------------------- /img/start_run_python.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/start_run_python.png -------------------------------------------------------------------------------- /img/windows_error_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/windows_error_01.png -------------------------------------------------------------------------------- /img/windows_error_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/windows_error_02.png -------------------------------------------------------------------------------- /img/windows_image_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/windows_image_01.png -------------------------------------------------------------------------------- /img/windows_image_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/img/windows_image_02.png -------------------------------------------------------------------------------- /lightgbm_bring_your_own_container_local_training_and_serving/build_and_push.sh: -------------------------------------------------------------------------------- 1 | 2 | # The name of our algorithm 3 | algorithm_name=sagemaker-lightgbm-regression 4 | 5 | cd container 6 | 7 | chmod +x lightgbm_regression/train 8 | chmod +x lightgbm_regression/serve 9 | 10 | account=$(aws sts get-caller-identity --query Account --output text) 11 | 12 | # Get the region defined in the current configuration (default to us-west-2 if none defined) 13 | region=$(aws configure get region) 14 | region=${region:-us-west-2} 15 | 16 | fullname="${account}.dkr.ecr.${region}.amazonaws.com/${algorithm_name}:latest" 17 | 18 | # If the repository doesn't exist in ECR, create it. 19 | aws ecr describe-repositories --repository-names "${algorithm_name}" > /dev/null 2>&1 20 | 21 | if [ $? -ne 0 ] 22 | then 23 | aws ecr create-repository --repository-name "${algorithm_name}" > /dev/null 24 | fi 25 | 26 | # Get the login command from ECR and execute it directly 27 | aws ecr get-login-password --region ${region}|docker login --username AWS --password-stdin ${fullname} 28 | 29 | # Build the docker image locally with the image name and then push it to ECR 30 | # with the full name. 31 | 32 | docker build -t ${algorithm_name} . 33 | docker tag ${algorithm_name} ${fullname} 34 | 35 | docker push ${fullname} 36 | -------------------------------------------------------------------------------- /lightgbm_bring_your_own_container_local_training_and_serving/container/Dockerfile: -------------------------------------------------------------------------------- 1 | # Build an image that can do training and inference in SageMaker 2 | # This is a Python 2 image that uses the nginx, gunicorn, flask stack 3 | # for serving inferences in a stable way. 4 | 5 | FROM ubuntu:16.04 6 | 7 | MAINTAINER Amazon AI 8 | 9 | ARG CONDA_DIR=/opt/conda 10 | ENV PATH $CONDA_DIR/bin:$PATH 11 | 12 | RUN apt-get update && \ 13 | apt-get install -y --no-install-recommends \ 14 | ca-certificates \ 15 | cmake \ 16 | build-essential \ 17 | gcc \ 18 | g++ \ 19 | git \ 20 | nginx \ 21 | wget && \ 22 | # python environment 23 | wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ 24 | /bin/bash Miniconda3-latest-Linux-x86_64.sh -f -b -p $CONDA_DIR && \ 25 | export PATH="$CONDA_DIR/bin:$PATH" && \ 26 | conda config --set always_yes yes --set changeps1 no && \ 27 | # lightgbm 28 | conda install -q -y numpy scipy scikit-learn pandas flask gevent gunicorn && \ 29 | git clone --recursive --branch stable --depth 1 https://github.com/Microsoft/LightGBM && \ 30 | cd LightGBM/python-package && python setup.py install && \ 31 | # clean 32 | apt-get autoremove -y && apt-get clean && \ 33 | conda clean -a -y && \ 34 | rm -rf /usr/local/src/* 35 | 36 | 37 | ENV PYTHONUNBUFFERED=TRUE 38 | ENV PYTHONDONTWRITEBYTECODE=TRUE 39 | ENV PATH="/opt/program:${PATH}" 40 | 41 | # Set up the program in the image 42 | COPY lightgbm_regression /opt/program 43 | WORKDIR /opt/program 44 | 45 | -------------------------------------------------------------------------------- /lightgbm_bring_your_own_container_local_training_and_serving/container/lightgbm_regression/nginx.conf: -------------------------------------------------------------------------------- 1 | worker_processes 1; 2 | daemon off; # Prevent forking 3 | 4 | 5 | pid /tmp/nginx.pid; 6 | error_log /var/log/nginx/error.log; 7 | 8 | events { 9 | # defaults 10 | } 11 | 12 | http { 13 | include /etc/nginx/mime.types; 14 | default_type application/octet-stream; 15 | access_log /var/log/nginx/access.log combined; 16 | 17 | upstream gunicorn { 18 | server unix:/tmp/gunicorn.sock; 19 | } 20 | 21 | server { 22 | listen 8080 deferred; 23 | client_max_body_size 5m; 24 | 25 | keepalive_timeout 5; 26 | proxy_read_timeout 1200s; 27 | 28 | location ~ ^/(ping|invocations) { 29 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 30 | proxy_set_header Host $http_host; 31 | proxy_redirect off; 32 | proxy_pass http://gunicorn; 33 | } 34 | 35 | location / { 36 | return 404 "{}"; 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /lightgbm_bring_your_own_container_local_training_and_serving/container/lightgbm_regression/serve: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # This file implements the scoring service shell. You don't necessarily need to modify it for various 4 | # algorithms. It starts nginx and gunicorn with the correct configurations and then simply waits until 5 | # gunicorn exits. 6 | # 7 | # The flask server is specified to be the app object in wsgi.py 8 | # 9 | # We set the following parameters: 10 | # 11 | # Parameter Environment Variable Default Value 12 | # --------- -------------------- ------------- 13 | # number of workers MODEL_SERVER_WORKERS the number of CPU cores 14 | # timeout MODEL_SERVER_TIMEOUT 60 seconds 15 | 16 | from __future__ import print_function 17 | import multiprocessing 18 | import os 19 | import signal 20 | import subprocess 21 | import sys 22 | 23 | cpu_count = multiprocessing.cpu_count() 24 | 25 | model_server_timeout = os.environ.get('MODEL_SERVER_TIMEOUT', 60) 26 | model_server_workers = int(os.environ.get('MODEL_SERVER_WORKERS', cpu_count)) 27 | 28 | def sigterm_handler(nginx_pid, gunicorn_pid): 29 | try: 30 | os.kill(nginx_pid, signal.SIGQUIT) 31 | except OSError: 32 | pass 33 | try: 34 | os.kill(gunicorn_pid, signal.SIGTERM) 35 | except OSError: 36 | pass 37 | 38 | sys.exit(0) 39 | 40 | def start_server(): 41 | print('Starting the inference server with {} workers.'.format(model_server_workers)) 42 | 43 | 44 | # link the log streams to stdout/err so they will be logged to the container logs 45 | subprocess.check_call(['ln', '-sf', '/dev/stdout', '/var/log/nginx/access.log']) 46 | subprocess.check_call(['ln', '-sf', '/dev/stderr', '/var/log/nginx/error.log']) 47 | 48 | nginx = subprocess.Popen(['nginx', '-c', '/opt/program/nginx.conf']) 49 | gunicorn = subprocess.Popen(['gunicorn', 50 | '--timeout', str(model_server_timeout), 51 | '-k', 'gevent', 52 | '-b', 'unix:/tmp/gunicorn.sock', 53 | '-w', str(model_server_workers), 54 | 'wsgi:app']) 55 | 56 | signal.signal(signal.SIGTERM, lambda a, b: sigterm_handler(nginx.pid, gunicorn.pid)) 57 | 58 | # If either subprocess exits, so do we. 59 | pids = set([nginx.pid, gunicorn.pid]) 60 | while True: 61 | pid, _ = os.wait() 62 | if pid in pids: 63 | break 64 | 65 | sigterm_handler(nginx.pid, gunicorn.pid) 66 | print('Inference server exiting') 67 | 68 | # The main routine just invokes the start function. 69 | 70 | if __name__ == '__main__': 71 | start_server() 72 | -------------------------------------------------------------------------------- /lightgbm_bring_your_own_container_local_training_and_serving/container/lightgbm_regression/wsgi.py: -------------------------------------------------------------------------------- 1 | import predictor as myapp 2 | 3 | # This is just a simple wrapper for gunicorn to find your app. 4 | # If you want to change the algorithm file, simply change "predictor" above to the 5 | # new file. 6 | 7 | app = myapp.app 8 | -------------------------------------------------------------------------------- /prophet_bring_your_own_container_local_training_and_serving/container/Dockerfile: -------------------------------------------------------------------------------- 1 | # Build an image that can do training and inference in SageMaker 2 | # This is a Python 3 image that uses the nginx, gunicorn, flask stack 3 | # for serving inferences in a stable way. 4 | 5 | FROM ubuntu:18.04 6 | 7 | MAINTAINER Amazon AI 8 | 9 | RUN apt-get -y update 10 | 11 | RUN apt-get install -y --no-install-recommends \ 12 | wget \ 13 | curl \ 14 | build-essential libssl-dev libffi-dev \ 15 | libxml2-dev libxslt1-dev zlib1g-dev \ 16 | nginx \ 17 | ca-certificates 18 | 19 | 20 | RUN apt-get install -y python3-pip python3-dev \ 21 | && cd /usr/local/bin \ 22 | && ln -s /usr/bin/python3 python \ 23 | && pip3 install --upgrade pip 24 | 25 | 26 | RUN pip --no-cache-dir install \ 27 | numpy \ 28 | scipy \ 29 | sklearn \ 30 | pandas \ 31 | flask \ 32 | gevent \ 33 | gunicorn \ 34 | pystan \ 35 | lunarcalendar \ 36 | convertdate \ 37 | holidays \ 38 | tqdm 39 | 40 | RUN pip --no-cache-dir install \ 41 | fbprophet==0.7.1 42 | 43 | ENV PYTHONUNBUFFERED=TRUE 44 | ENV PYTHONDONTWRITEBYTECODE=TRUE 45 | ENV PATH="/opt/program:${PATH}" 46 | 47 | # Set up the program in the image 48 | COPY prophet /opt/program 49 | WORKDIR /opt/program 50 | 51 | -------------------------------------------------------------------------------- /prophet_bring_your_own_container_local_training_and_serving/container/prophet/nginx.conf: -------------------------------------------------------------------------------- 1 | worker_processes 1; 2 | daemon off; # Prevent forking 3 | 4 | 5 | pid /tmp/nginx.pid; 6 | error_log /var/log/nginx/error.log; 7 | 8 | events { 9 | # defaults 10 | } 11 | 12 | http { 13 | include /etc/nginx/mime.types; 14 | default_type application/octet-stream; 15 | access_log /var/log/nginx/access.log combined; 16 | 17 | upstream gunicorn { 18 | server unix:/tmp/gunicorn.sock; 19 | } 20 | 21 | server { 22 | listen 8080 deferred; 23 | client_max_body_size 5m; 24 | 25 | keepalive_timeout 5; 26 | proxy_read_timeout 1200s; 27 | 28 | location ~ ^/(ping|invocations) { 29 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 30 | proxy_set_header Host $http_host; 31 | proxy_redirect off; 32 | proxy_pass http://gunicorn; 33 | } 34 | 35 | location / { 36 | return 404 "{}"; 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /prophet_bring_your_own_container_local_training_and_serving/container/prophet/serve: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # This file implements the scoring service shell. You don't necessarily need to modify it for various 4 | # algorithms. It starts nginx and gunicorn with the correct configurations and then simply waits until 5 | # gunicorn exits. 6 | # 7 | # The flask server is specified to be the app object in wsgi.py 8 | # 9 | # We set the following parameters: 10 | # 11 | # Parameter Environment Variable Default Value 12 | # --------- -------------------- ------------- 13 | # number of workers MODEL_SERVER_WORKERS the number of CPU cores 14 | # timeout MODEL_SERVER_TIMEOUT 60 seconds 15 | 16 | from __future__ import print_function 17 | import multiprocessing 18 | import os 19 | import signal 20 | import subprocess 21 | import sys 22 | 23 | cpu_count = multiprocessing.cpu_count() 24 | 25 | model_server_timeout = os.environ.get('MODEL_SERVER_TIMEOUT', 60) 26 | model_server_workers = int(os.environ.get('MODEL_SERVER_WORKERS', cpu_count)) 27 | 28 | def sigterm_handler(nginx_pid, gunicorn_pid): 29 | try: 30 | os.kill(nginx_pid, signal.SIGQUIT) 31 | except OSError: 32 | pass 33 | try: 34 | os.kill(gunicorn_pid, signal.SIGTERM) 35 | except OSError: 36 | pass 37 | 38 | sys.exit(0) 39 | 40 | def start_server(): 41 | print('Starting the inference server with {} workers.'.format(model_server_workers)) 42 | 43 | 44 | # link the log streams to stdout/err so they will be logged to the container logs 45 | subprocess.check_call(['ln', '-sf', '/dev/stdout', '/var/log/nginx/access.log']) 46 | subprocess.check_call(['ln', '-sf', '/dev/stderr', '/var/log/nginx/error.log']) 47 | 48 | nginx = subprocess.Popen(['nginx', '-c', '/opt/program/nginx.conf']) 49 | gunicorn = subprocess.Popen(['gunicorn', 50 | '--timeout', str(model_server_timeout), 51 | '-k', 'gevent', 52 | '-b', 'unix:/tmp/gunicorn.sock', 53 | '-w', str(model_server_workers), 54 | 'wsgi:app']) 55 | 56 | signal.signal(signal.SIGTERM, lambda a, b: sigterm_handler(nginx.pid, gunicorn.pid)) 57 | 58 | # If either subprocess exits, so do we. 59 | pids = set([nginx.pid, gunicorn.pid]) 60 | while True: 61 | pid, _ = os.wait() 62 | if pid in pids: 63 | break 64 | 65 | sigterm_handler(nginx.pid, gunicorn.pid) 66 | print('Inference server exiting') 67 | 68 | # The main routine just invokes the start function. 69 | 70 | if __name__ == '__main__': 71 | start_server() 72 | -------------------------------------------------------------------------------- /prophet_bring_your_own_container_local_training_and_serving/container/prophet/wsgi.py: -------------------------------------------------------------------------------- 1 | import predictor as myapp 2 | 3 | # This is just a simple wrapper for gunicorn to find your app. 4 | # If you want to change the algorithm file, simply change "predictor" above to the 5 | # new file. 6 | 7 | app = myapp.app 8 | -------------------------------------------------------------------------------- /prophet_bring_your_own_container_local_training_and_serving/prophet_bring_your_own_container_local_training_and_serving.py: -------------------------------------------------------------------------------- 1 | # This is a sample Python program that trains a simple Prophet Forecasting model, and then performs inference. 2 | # This implementation will work on your *local computer*. 3 | # 4 | # Prerequisites: 5 | # 1. Install required Python packages: 6 | # pip install boto3 sagemaker pandas scikit-learn 7 | # pip install 'sagemaker[local]' 8 | # 2. Docker Desktop has to be installed on your computer, and running. 9 | # 3. Docker Desktop has to be allocated with 6 CPUs and 8 GB of Memory. 10 | # 4. Open terminal and run the following commands: 11 | # docker build -t sagemaker-prophet-local container/. 12 | ######################################################################################################################## 13 | 14 | import pandas as pd 15 | from sagemaker.estimator import Estimator 16 | from sagemaker.local import LocalSession 17 | from sagemaker.predictor import csv_serializer 18 | 19 | sagemaker_session = LocalSession() 20 | sagemaker_session.config = {'local': {'local_code': True}} 21 | 22 | # For local training a dummy role will be sufficient 23 | role = 'arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001' 24 | 25 | image = 'sagemaker-prophet-local' 26 | print(image) 27 | 28 | local_tseries = Estimator( 29 | image, 30 | role, 31 | instance_count=1, 32 | instance_type="local") 33 | 34 | local_tseries.fit('file://./data/') 35 | 36 | local_predictor = local_tseries.deploy(1, 'local', serializer=csv_serializer) 37 | 38 | predicted = local_predictor.predict("30").decode('utf-8') 39 | print(predicted) 40 | 41 | local_predictor.delete_endpoint() 42 | -------------------------------------------------------------------------------- /pytorch_extend_dlc_container_ofa_local_serving/code/inference.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import io 3 | 4 | import numpy as np 5 | from PIL import Image 6 | from torchvision import transforms 7 | from transformers import OFATokenizer, OFAModel 8 | 9 | NPY_CONTENT_TYPE = 'application/x-npy' 10 | 11 | 12 | class OFAImageCaptionPredictor(object): 13 | def __init__(self, model_dir): 14 | self.model = OFAModel.from_pretrained(model_dir) 15 | self.tokenizer = OFATokenizer.from_pretrained(model_dir) 16 | 17 | def patch_resize_transform(self, image): 18 | mean = [0.5, 0.5, 0.5] 19 | std = [0.5, 0.5, 0.5] 20 | resolution = 256 21 | transform_func = transforms.Compose([ 22 | lambda image: image.convert("RGB"), 23 | transforms.Resize((resolution, resolution), interpolation=Image.BICUBIC), 24 | transforms.ToTensor(), 25 | transforms.Normalize(mean=mean, std=std) 26 | ]) 27 | return transform_func(image) 28 | 29 | def predict_caption(self, image): 30 | txt = " what does the image describe?" 31 | inputs = self.tokenizer([txt], max_length=1024, return_tensors="pt")["input_ids"] 32 | patch_image = self.patch_resize_transform(image).unsqueeze(0) 33 | gen = self.model.generate(inputs, patch_images=patch_image, num_beams=4) 34 | ofa_caption = self.tokenizer.batch_decode(gen, skip_special_tokens=True) 35 | return ofa_caption 36 | 37 | 38 | def model_fn(model_dir): 39 | print(f'model_fn - model_dir: {model_dir}') 40 | 41 | for file in glob.glob(model_dir+'/*', recursive=True): 42 | print(file) 43 | 44 | predictor = OFAImageCaptionPredictor(model_dir) 45 | return predictor 46 | 47 | 48 | def input_fn(serialized_input_data, content_type=NPY_CONTENT_TYPE): 49 | print(f'input_fn - serialized_input_data length: {len(serialized_input_data)}, content_type: {content_type}') 50 | if content_type == NPY_CONTENT_TYPE: 51 | io_bytes_obj = io.BytesIO(serialized_input_data) 52 | npy_payload = np.load(io_bytes_obj) 53 | image = Image.fromarray(npy_payload) 54 | return image 55 | else: 56 | raise Exception('Requested unsupported ContentType in Accept: ' + content_type) 57 | return 58 | 59 | 60 | def predict_fn(image, predictor): 61 | print(f'predict_fn - image: {image}') 62 | print(f'predict_fn - image data length: {image}') 63 | result = predictor.predict_caption(image) 64 | print(f'predict_fn - result: {result}') 65 | return result 66 | -------------------------------------------------------------------------------- /pytorch_extend_dlc_container_ofa_local_serving/container/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG REGION=us-east-1 2 | 3 | # SageMaker PyTorch image 4 | FROM 763104351884.dkr.ecr.${REGION}.amazonaws.com/pytorch-inference:2.0.1-cpu-py310-ubuntu20.04-sagemaker 5 | 6 | RUN git clone --single-branch --branch feature/add_transformers https://github.com/OFA-Sys/OFA.git 7 | RUN pip install OFA/transformers/ 8 | 9 | ENV PATH="/opt/ml/code:${PATH}" 10 | 11 | # this environment variable is used by the SageMaker PyTorch container to determine our user code directory. 12 | ENV SAGEMAKER_SUBMIT_DIRECTORY /opt/ml/code 13 | -------------------------------------------------------------------------------- /pytorch_extend_dlc_container_ofa_local_serving/pytorch_extend_dlc_container_ofa_local_serving.py: -------------------------------------------------------------------------------- 1 | # This is a sample Python program that uses the OFA pretrained model to perform inference using a Docker image that 2 | # extends AWS DLC PyTorch. https://huggingface.co/OFA-Sys/OFA-tiny This implementation will work on your local computer. 3 | # 4 | # Prerequisites: 5 | # 1. Install required Python packages: 6 | # pip install boto3 sagemaker pandas scikit-learn Pillow 7 | # pip install 'sagemaker[local]' 8 | # 2. Docker Desktop has to be installed on your computer, and running. 9 | # 3. Open terminal and run the following commands: 10 | # aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 763104351884.dkr.ecr.us-east-1.amazonaws.com 11 | # docker build -t sagemaker-ofa-pytorch-extended-local container/. 12 | ######################################################################################################################## 13 | 14 | import sagemaker 15 | from PIL import Image 16 | import numpy as np 17 | from sagemaker.pytorch import PyTorchModel 18 | from sagemaker.local import LocalSession 19 | import boto3 20 | 21 | DUMMY_IAM_ROLE = 'arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001' 22 | LOCAL_SESSION = LocalSession() 23 | LOCAL_SESSION.config={'local': {'local_code': True}} # Ensure full code locality, see: https://sagemaker.readthedocs.io/en/stable/overview.html#local-mode 24 | 25 | 26 | def main(): 27 | 28 | image = 'sagemaker-ofa-pytorch-extended-local' 29 | 30 | print('Downloading model file from S3') 31 | s3 = boto3.client('s3') 32 | s3.download_file('aws-ml-blog', 'artifacts/pytorch-extend-dlc-container-ofa-tiny/model.tar.gz', 'model.tar.gz') 33 | print('Model downloaded') 34 | 35 | ofa_hf_model = PyTorchModel( 36 | source_dir="code", 37 | entry_point="inference.py", 38 | role=DUMMY_IAM_ROLE, 39 | model_data="file://model.tar.gz", 40 | image_uri=image, 41 | sagemaker_session=LOCAL_SESSION 42 | ) 43 | 44 | print('Deploying endpoint in local mode') 45 | predictor = ofa_hf_model.deploy( 46 | initial_instance_count=1, 47 | instance_type='local', 48 | serializer=sagemaker.serializers.NumpySerializer(), 49 | deserializer=sagemaker.deserializers.JSONDeserializer() 50 | ) 51 | 52 | img = Image.open("./test_image.jpg") 53 | payload = np.asarray(img) 54 | 55 | predictions = predictor.predict(payload) 56 | print(f'predictions: {predictions}') 57 | 58 | predictor.delete_endpoint(predictor.endpoint) 59 | 60 | if __name__ == "__main__": 61 | main() 62 | -------------------------------------------------------------------------------- /pytorch_extend_dlc_container_ofa_local_serving/test_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/pytorch_extend_dlc_container_ofa_local_serving/test_image.jpg -------------------------------------------------------------------------------- /pytorch_graviton_script_mode_local_model_inference/pytorch_graviton_script_mode_local_model_inference.py: -------------------------------------------------------------------------------- 1 | # This is a sample Python program that inference with a pretrained PyTorch CIFAR-10 model using Graviton instance. 2 | # This implementation will work on your *ARM based local computer*. 3 | # 4 | # Prerequisites: 5 | # 1. Install required Python packages: 6 | # pip install boto3 sagemaker pandas matplotlib 7 | # pip install 'sagemaker[local]' 8 | # 2. Docker Desktop has to be installed on your computer, and running. 9 | # 3. You should have AWS credentials configured on your local machine 10 | # in order to be able to pull the docker image from ECR. 11 | ############################################################################################## 12 | 13 | import os 14 | 15 | import numpy as np 16 | from sagemaker.local import LocalSession 17 | from sagemaker.pytorch import PyTorchModel 18 | 19 | DUMMY_IAM_ROLE = 'arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001' 20 | 21 | 22 | def main(): 23 | sagemaker_session = LocalSession() 24 | sagemaker_session.config = {'local': {'local_code': True}} 25 | 26 | # For local training a dummy role will be sufficient 27 | role = DUMMY_IAM_ROLE 28 | model_dir = 's3://aws-ml-blog/artifacts/pytorch-script-mode-local-model-inference/model.tar.gz' 29 | region = sagemaker_session.boto_region_name 30 | print(f'Region: {region}') 31 | 32 | model = PyTorchModel( 33 | role=role, 34 | model_data=model_dir, 35 | image_uri=f'763104351884.dkr.ecr.{region}.amazonaws.com/pytorch-inference-graviton:1.12.1-cpu-py38-ubuntu20.04-sagemaker', 36 | entry_point='inference.py' 37 | ) 38 | 39 | print('Deploying endpoint in local mode') 40 | print( 41 | 'Note: if launching for the first time in local mode, container image download might take a few minutes to complete.') 42 | predictor = model.deploy( 43 | initial_instance_count=1, 44 | instance_type='local', 45 | ) 46 | 47 | print('Endpoint deployed in local mode') 48 | payload = np.random.randn(4, 3, 32, 32).astype(np.float32) 49 | 50 | predictions = predictor.predict(payload) 51 | print("predictions: {}".format(predictions)) 52 | 53 | print('About to delete the endpoint') 54 | predictor.delete_endpoint(predictor.endpoint) 55 | 56 | if __name__ == "__main__": 57 | main() -------------------------------------------------------------------------------- /pytorch_graviton_script_mode_local_model_inference/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | sagemaker>=2.0.0<3.0.0 4 | sagemaker[local] 5 | -------------------------------------------------------------------------------- /pytorch_nlp_script_mode_local_model_inference/code/inference.py: -------------------------------------------------------------------------------- 1 | from transformers import pipeline 2 | 3 | CSV_CONTENT_TYPE = 'text/csv' 4 | 5 | 6 | def model_fn(model_dir): 7 | sentiment_analysis = pipeline( 8 | "sentiment-analysis", 9 | model=model_dir, 10 | tokenizer=model_dir, 11 | return_all_scores=True 12 | ) 13 | return sentiment_analysis 14 | 15 | 16 | def input_fn(serialized_input_data, content_type=CSV_CONTENT_TYPE): 17 | if content_type == CSV_CONTENT_TYPE: 18 | input_data = serialized_input_data.splitlines() 19 | return input_data 20 | else: 21 | raise Exception('Requested unsupported ContentType in Accept: ' + content_type) 22 | return 23 | 24 | 25 | def predict_fn(input_data, model): 26 | return model(input_data) 27 | -------------------------------------------------------------------------------- /pytorch_nlp_script_mode_local_model_inference/code/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers==4.38.0 2 | -------------------------------------------------------------------------------- /pytorch_nlp_script_mode_local_model_inference/data/test_data.csv: -------------------------------------------------------------------------------- 1 | "Never allow the same bug to bite you twice." 2 | "The best part of Amazon SageMaker is that it makes machine learning easy." 3 | "Amazon SageMaker Inference Recommender helps you choose the best available compute instance and configuration to deploy machine learning models for optimal inference performance and cost." -------------------------------------------------------------------------------- /pytorch_nlp_script_mode_local_model_inference/pytorch_nlp_script_mode_local_model_inference.py: -------------------------------------------------------------------------------- 1 | # This is a sample Python program that inference with a pretrained HuggingFace sentiment Analysis model. 2 | # This implementation will work on your *local computer*. 3 | # 4 | # Prerequisites: 5 | # 1. Install required Python packages: 6 | # pip install boto3 sagemaker pandas matplotlib torch torchvision 7 | # pip install 'sagemaker[local]' 8 | # 2. Docker Desktop has to be installed on your computer, and running. 9 | # 3. You should have AWS credentials configured on your local machine 10 | # in order to be able to pull the docker image from ECR. 11 | ############################################################################################## 12 | 13 | import pandas as pd 14 | from sagemaker.local import LocalSession 15 | from sagemaker.pytorch import PyTorchModel 16 | import sagemaker 17 | 18 | DUMMY_IAM_ROLE = 'arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001' 19 | 20 | 21 | def main(): 22 | 23 | sagemaker_session = LocalSession() 24 | sagemaker_session.config = {'local': {'local_code': True}} 25 | 26 | # For local training a dummy role will be sufficient 27 | role = DUMMY_IAM_ROLE 28 | model_dir = 's3://aws-ml-blog/artifacts/pytorch-nlp-script-mode-local-model-inference/model.tar.gz' 29 | 30 | test_data = pd.read_csv('./data/test_data.csv', header=None) 31 | print(f'test_data: {test_data}') 32 | 33 | model = PyTorchModel( 34 | role=role, 35 | model_data=model_dir, 36 | framework_version='1.8', 37 | source_dir='code', 38 | py_version='py3', 39 | entry_point='inference.py' 40 | ) 41 | 42 | print('Deploying endpoint in local mode') 43 | print( 44 | 'Note: if launching for the first time in local mode, container image download might take a few minutes to complete.') 45 | predictor = model.deploy( 46 | initial_instance_count=1, 47 | instance_type='local', 48 | ) 49 | 50 | predictor.serializer = sagemaker.serializers.CSVSerializer() 51 | predictor.deserializer = sagemaker.deserializers.CSVDeserializer() 52 | 53 | predictions = predictor.predict(test_data.to_csv(header=False, index=False)) 54 | print(f'predictions: {predictions}') 55 | 56 | predictor.delete_endpoint(predictor.endpoint) 57 | 58 | if __name__ == "__main__": 59 | main() -------------------------------------------------------------------------------- /pytorch_script_mode_local_model_inference/data/README.md: -------------------------------------------------------------------------------- 1 | # Data Folder -------------------------------------------------------------------------------- /pytorch_script_mode_local_model_inference/utils_cifar.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | import torchvision.transforms as transforms 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | 7 | classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') 8 | 9 | 10 | def _get_transform(): 11 | return transforms.Compose( 12 | [transforms.ToTensor(), 13 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 14 | 15 | 16 | def get_train_data_loader(): 17 | transform = _get_transform() 18 | trainset = torchvision.datasets.CIFAR10(root='./data', train=True, 19 | download=True, transform=transform) 20 | return torch.utils.data.DataLoader(trainset, batch_size=4, 21 | shuffle=True, num_workers=2) 22 | 23 | 24 | def get_test_data_loader(download): 25 | transform = _get_transform() 26 | testset = torchvision.datasets.CIFAR10(root='./data', train=False, 27 | download=download, transform=transform) 28 | return torch.utils.data.DataLoader(testset, batch_size=4, 29 | shuffle=False, num_workers=2) 30 | 31 | 32 | # function to show an image 33 | def imshow(img): 34 | img = img / 2 + 0.5 # unnormalize 35 | npimg = img.numpy() 36 | plt.imshow(np.transpose(npimg, (1, 2, 0))) -------------------------------------------------------------------------------- /pytorch_script_mode_local_training_and_serving/code/requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | boto3 3 | nltk -------------------------------------------------------------------------------- /pytorch_script_mode_local_training_and_serving/data/README.md: -------------------------------------------------------------------------------- 1 | # Data Folder -------------------------------------------------------------------------------- /pytorch_script_mode_local_training_and_serving/utils_cifar.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | import torchvision.transforms as transforms 4 | 5 | 6 | classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') 7 | 8 | 9 | def _get_transform(): 10 | return transforms.Compose( 11 | [transforms.ToTensor(), 12 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 13 | 14 | 15 | def get_train_data_loader(): 16 | transform = _get_transform() 17 | trainset = torchvision.datasets.CIFAR10(root='./data', train=True, 18 | download=True, transform=transform) 19 | return torch.utils.data.DataLoader(trainset, batch_size=4, 20 | shuffle=True, num_workers=2) 21 | 22 | 23 | def get_test_data_loader(download): 24 | transform = _get_transform() 25 | testset = torchvision.datasets.CIFAR10(root='./data', train=False, 26 | download=download, transform=transform) 27 | return torch.utils.data.DataLoader(testset, batch_size=4, 28 | shuffle=False, num_workers=2) 29 | -------------------------------------------------------------------------------- /pytorch_wandb_script_mode_local_training/code/requirements.txt: -------------------------------------------------------------------------------- 1 | wandb -------------------------------------------------------------------------------- /pytorch_wandb_script_mode_local_training/data/README.md: -------------------------------------------------------------------------------- 1 | # Data Folder -------------------------------------------------------------------------------- /pytorch_yolov5_local_model_inference/code/inference.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import torch 3 | import json 4 | 5 | 6 | def model_fn(model_dir): 7 | model_path = os.path.join(model_dir,'yolov5s.pt') 8 | print(f'model_fn - model_path: {model_path}') 9 | model = torch.hub.load('ultralytics/yolov5', 'custom', path=model_path) 10 | return model 11 | 12 | 13 | def input_fn(serialized_input_data, content_type): 14 | if content_type == 'application/json': 15 | print(f'input_fn - serialized_input_data: {serialized_input_data}') 16 | input_data = json.loads(serialized_input_data) 17 | return input_data 18 | else: 19 | raise Exception('Requested unsupported ContentType in Accept: ' + content_type) 20 | return 21 | 22 | 23 | def predict_fn(input_data, model): 24 | print(f'predict_fn - input_data: {input_data}') 25 | imgs = [input_data] 26 | results = model(imgs) 27 | df = results.pandas().xyxy[0] 28 | return(df.to_json(orient="split")) 29 | -------------------------------------------------------------------------------- /pytorch_yolov5_local_model_inference/code/requirements.txt: -------------------------------------------------------------------------------- 1 | seaborn -------------------------------------------------------------------------------- /pytorch_yolov5_local_model_inference/pytorch_yolov5_local_model_inference.py: -------------------------------------------------------------------------------- 1 | # This is a sample Python program for deploying a YOLOV5 pre-trained model to a SageMaker endpoint. 2 | # Inference is done with a URL of the image, which is the http payload for the SageMaker Endpoint. 3 | # This implementation will work on your *local computer*. 4 | # 5 | # This example is based on: https://github.com/aws/amazon-sagemaker-examples/blob/master/frameworks/tensorflow/get_started_mnist_deploy.ipynb 6 | # 7 | # Prerequisites: 8 | # 1. Install required Python packages: 9 | # `pip install -r requirements.txt` 10 | # 2. Docker Desktop installed and running on your computer: 11 | # `docker ps` 12 | # 3. You should have AWS credentials configured on your local machine 13 | # in order to be able to pull the docker image from ECR. 14 | ############################################################################################### 15 | from sagemaker.deserializers import JSONDeserializer 16 | from sagemaker.local import LocalSession 17 | from sagemaker.pytorch import PyTorchModel 18 | from sagemaker.serializers import JSONSerializer 19 | 20 | DUMMY_IAM_ROLE = 'arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001' 21 | 22 | 23 | def main(): 24 | session = LocalSession() 25 | session.config = {'local': {'local_code': True}} 26 | 27 | role = DUMMY_IAM_ROLE 28 | model_dir = 's3://aws-ml-blog/artifacts/pytorch-yolov5-local-model-inference/model.tar.gz' 29 | 30 | model = PyTorchModel( 31 | entry_point='inference.py', 32 | source_dir = './code', 33 | role=role, 34 | model_data=model_dir, 35 | framework_version='2.1', 36 | py_version='py310' 37 | ) 38 | 39 | print('Deploying endpoint in local mode') 40 | print( 41 | 'Note: if launching for the first time in local mode, container image download might take a few minutes to complete.') 42 | predictor = model.deploy( 43 | initial_instance_count=1, 44 | instance_type='local', 45 | ) 46 | 47 | print('Endpoint deployed in local mode') 48 | 49 | predictor.serializer = JSONSerializer() 50 | predictor.deserializer = JSONDeserializer() 51 | predictions = predictor.predict("https://ultralytics.com/images/zidane.jpg") 52 | print("predictions: {}".format(predictions)) 53 | 54 | print('About to delete the endpoint') 55 | predictor.delete_endpoint() 56 | 57 | 58 | if __name__ == "__main__": 59 | main() 60 | -------------------------------------------------------------------------------- /pytorch_yolov5_local_model_inference/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | sagemaker>=2.0.0<3.0.0 4 | sagemaker[local] 5 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | scikit-learn 2 | numpy 3 | pandas 4 | pyaml 5 | sagemaker>=2.0.0<3.0.0 6 | sagemaker[local] 7 | -------------------------------------------------------------------------------- /sagemaker_studio_docker_cli_install/README.md: -------------------------------------------------------------------------------- 1 | The instructions to install Docker CLI and Docker Compose plugin based on SageMaker Studio Environment are documented below. Follow specific instructions based on applicable [Studio Application Type](https://docs.aws.amazon.com/sagemaker/latest/dg/machine-learning-environments.html) / [Images](https://docs.aws.amazon.com/sagemaker/latest/dg/notebooks-available-images.html#notebooks-available-images-supported). These instructions adhere to [Studio platforms requirements](https://docs.aws.amazon.com/sagemaker/latest/dg/studio-updated-local.html#studio-updated-local-docker) for enabling Local Mode/Docker Access. 2 | 3 | * [**SageMaker Distribution Docker CLI Install Directions**](sagemaker-distribution-docker-cli-install.sh): This script provides instructions for Docker CLI Install in Studio JupyterLab/Studio Code Editor and Studio Classic [SageMaker Distribution Images](https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-distribution.html) which are Ubuntu-Jammy based. Do `cat /etc/os-release` to verify the OS in App Image terminal. 4 | * Applicable Studio AppType/Images: 5 | * JupyterLab 6 | * Code Editor 7 | * Amazon SageMaker Studio Classic [Kernel Gateway Applications] 8 | * Applicable Images: SageMaker Distribution v0 CPU, SageMaker Distribution v0 GPU, SageMaker Distribution v1 CPU, SageMaker Distribution v1 GPU. 9 | * [**SageMaker Classic - Debian-Bullseye Docker CLI Install Directions**](sagemaker-debian-bullseye-cli-install.sh): This script provides instructions for Docker CLI Install for Studio Classic SageMaker Images which are Debian-Bullseye based. Do `cat /etc/os-release` to verify the OS in App Image terminal. 10 | * Applicable Studio AppTypes/Images: 11 | * Amazon SageMaker Studio Classic [Kernel Gateway Applications] 12 | * Applicable Images: Base Python 3.0, Base Python 2.0, Data Science 3.0, Data Science 2.0, SparkAnalytics 2.0, SparkAnalytics 1.0. 13 | * [**SageMaker Classic - Ubuntu-Focal Docker CLI Install Directions**](sagemaker-ubuntu-focal-docker-cli-install.sh): This script provides instructions for Docker CLI Install for Studio Classic SageMaker Images which are Ubuntu-Focal based. Do `cat /etc/os-release` to verify the OS in App Image terminal. 14 | * Applicable Studio AppTypes/Images: 15 | * Amazon SageMaker Studio Classic [Kernel Gateway Applications] 16 | * Applicable Images: All currently supported Pytorch/Tensorflow Framework based Studio Images [here](https://docs.aws.amazon.com/sagemaker/latest/dg/notebooks-available-images.html#notebooks-available-images-supported). 17 | -------------------------------------------------------------------------------- /sagemaker_studio_docker_cli_install/sagemaker-debian-bullseye-cli-install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | apt-get update 4 | apt-get install ca-certificates curl 5 | install -m 0755 -d /etc/apt/keyrings 6 | curl -fsSL https://download.docker.com/linux/debian/gpg -o /etc/apt/keyrings/docker.asc 7 | chmod a+r /etc/apt/keyrings/docker.asc 8 | echo \ 9 | "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/debian \ 10 | $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ 11 | tee /etc/apt/sources.list.d/docker.list > /dev/null 12 | apt-get update 13 | 14 | # pick the latest patch from: 15 | # apt-cache madison docker-ce | awk '{ print $3 }' | grep -i 20.10 16 | VERSION_STRING=5:20.10.24~3-0~debian-bullseye 17 | apt-get install docker-ce-cli=$VERSION_STRING docker-compose-plugin -y 18 | 19 | # validate the Docker Client is able to access Docker Server at [unix:///docker/proxy.sock] 20 | docker version -------------------------------------------------------------------------------- /sagemaker_studio_docker_cli_install/sagemaker-distribution-docker-cli-install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | sudo apt-get -y install ca-certificates curl gnupg 4 | 5 | sudo install -m 0755 -d /etc/apt/keyrings 6 | curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg 7 | sudo chmod a+r /etc/apt/keyrings/docker.gpg 8 | echo \ 9 | "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \ 10 | $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ 11 | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null 12 | sudo apt-get -y update 13 | 14 | # pick the latest patch from: 15 | # apt-cache madison docker-ce | awk '{ print $3 }' | grep -i 20.10 16 | VERSION_STRING=5:20.10.24~3-0~ubuntu-jammy 17 | sudo apt-get install docker-ce-cli=$VERSION_STRING docker-compose-plugin -y 18 | 19 | # validate the Docker Client is able to access Docker Server at [unix:///docker/proxy.sock] 20 | docker version -------------------------------------------------------------------------------- /sagemaker_studio_docker_cli_install/sagemaker-ubuntu-focal-docker-cli-install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | apt-get update 4 | apt-get install ca-certificates curl gnupg -y 5 | install -m 0755 -d /etc/apt/keyrings 6 | curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg 7 | chmod a+r /etc/apt/keyrings/docker.gpg 8 | 9 | echo \ 10 | "deb [arch="$(dpkg --print-architecture)" signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \ 11 | "$(. /etc/os-release && echo "$VERSION_CODENAME")" stable" | \ 12 | tee /etc/apt/sources.list.d/docker.list > /dev/null 13 | 14 | apt-get update 15 | 16 | # pick the latest patch from: 17 | # apt-cache madison docker-ce | awk '{ print $3 }' | grep -i 20.10 18 | VERSION_STRING=5:20.10.24~3-0~ubuntu-focal 19 | apt-get install docker-ce-cli=$VERSION_STRING docker-compose-plugin -y 20 | 21 | # validate the Docker Client is able to access Docker Server at [unix:///docker/proxy.sock] 22 | docker version -------------------------------------------------------------------------------- /sagemaker_studio_docker_cli_install/sagemaker-ubuntu-jammy-docker-cli-install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # for a bit of documentation, that script is meant for jammy jellyfish, 4 | # if you want to use another version, set the VERSION_CODENAME environment 5 | # variable when running for another version, also it defaults the DOCKER_HOST 6 | # to the location of the socket but if sagemaker does evolve, you can again 7 | # just set that environment variable 8 | 9 | apt-get update 10 | apt-get install ca-certificates curl gnupg -y 11 | install -m 0755 -d /etc/apt/keyrings 12 | curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg 13 | chmod a+r /etc/apt/keyrings/docker.gpg 14 | 15 | echo \ 16 | "deb [arch="$(dpkg --print-architecture)" signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \ 17 | "$(. /etc/os-release && echo "${VERSION_CODENAME:-jammy}")" stable" | \ 18 | tee /etc/apt/sources.list.d/docker.list > /dev/null 19 | 20 | apt-get update 21 | 22 | # pick the latest patch from: 23 | # apt-cache madison docker-ce | awk '{ print $3 }' | grep -i 20.10 24 | VERSION_STRING=5:20.10.24~3-0~ubuntu-${VERSION_CODENAME:-jammy} 25 | apt-get install docker-ce-cli=$VERSION_STRING docker-compose-plugin -y 26 | 27 | # validate the Docker Client is able to access Docker Server at [unix:///docker/proxy.sock] 28 | 29 | if [ -z "${DOCKER_HOST}" ]; then 30 | export DOCKER_HOST="unix:///docker/proxy.sock" 31 | fi 32 | 33 | docker version 34 | -------------------------------------------------------------------------------- /scikit_learn_bring_your_own_container_and_own_model_local_serving/build_and_push.sh: -------------------------------------------------------------------------------- 1 | 2 | # The name of our algorithm 3 | algorithm_name=sagemaker-sklearn-rf-regressor-local 4 | 5 | cd container 6 | 7 | chmod +x sklearn_rf_regressor/serve 8 | 9 | account=$(aws sts get-caller-identity --query Account --output text) 10 | 11 | # Get the region defined in the current configuration (default to us-west-2 if none defined) 12 | region=$(aws configure get region) 13 | region=${region:-us-west-2} 14 | 15 | fullname="${account}.dkr.ecr.${region}.amazonaws.com/${algorithm_name}:latest" 16 | 17 | # If the repository doesn't exist in ECR, create it. 18 | aws ecr describe-repositories --repository-names "${algorithm_name}" > /dev/null 2>&1 19 | 20 | if [ $? -ne 0 ] 21 | then 22 | aws ecr create-repository --repository-name "${algorithm_name}" > /dev/null 23 | fi 24 | 25 | # Get the login command from ECR and execute it directly 26 | aws ecr get-login-password --region ${region}|docker login --username AWS --password-stdin ${fullname} 27 | 28 | # Build the docker image locally with the image name and then push it to ECR 29 | # with the full name. 30 | 31 | docker build -t ${algorithm_name} . 32 | docker tag ${algorithm_name} ${fullname} 33 | 34 | docker push ${fullname} 35 | -------------------------------------------------------------------------------- /scikit_learn_bring_your_own_container_and_own_model_local_serving/container/Dockerfile: -------------------------------------------------------------------------------- 1 | # Build an image that can do training and inference in SageMaker 2 | # This is a Python 3 image that uses the nginx, gunicorn, flask stack 3 | # for serving inferences in a stable way. 4 | 5 | FROM python:3.7-slim-buster 6 | 7 | RUN apt-get -y update && apt-get install -y --no-install-recommends \ 8 | wget \ 9 | nginx \ 10 | ca-certificates 11 | 12 | RUN pip install numpy==1.16.2 scipy==1.2.1 scikit-learn==0.23.2 pandas flask gevent gunicorn 13 | 14 | # Set some environment variables. PYTHONUNBUFFERED keeps Python from buffering our standard 15 | # output stream, which means that logs can be delivered to the user quickly. PYTHONDONTWRITEBYTECODE 16 | # keeps Python from writing the .pyc files which are unnecessary in this case. We also update 17 | # PATH so that the train and serve programs are found when the container is invoked. 18 | 19 | ENV PYTHONUNBUFFERED=TRUE 20 | ENV PYTHONDONTWRITEBYTECODE=TRUE 21 | ENV PATH="/opt/program:${PATH}" 22 | 23 | # Set up the program in the image 24 | COPY sklearn_rf_regressor /opt/program 25 | WORKDIR /opt/program 26 | 27 | -------------------------------------------------------------------------------- /scikit_learn_bring_your_own_container_and_own_model_local_serving/container/sklearn_rf_regressor/nginx.conf: -------------------------------------------------------------------------------- 1 | worker_processes 1; 2 | daemon off; # Prevent forking 3 | 4 | 5 | pid /tmp/nginx.pid; 6 | error_log /var/log/nginx/error.log; 7 | 8 | events { 9 | # defaults 10 | } 11 | 12 | http { 13 | include /etc/nginx/mime.types; 14 | default_type application/octet-stream; 15 | access_log /var/log/nginx/access.log combined; 16 | 17 | upstream gunicorn { 18 | server unix:/tmp/gunicorn.sock; 19 | } 20 | 21 | server { 22 | listen 8080 deferred; 23 | client_max_body_size 5m; 24 | 25 | keepalive_timeout 5; 26 | proxy_read_timeout 1200s; 27 | 28 | location ~ ^/(ping|invocations) { 29 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 30 | proxy_set_header Host $http_host; 31 | proxy_redirect off; 32 | proxy_pass http://gunicorn; 33 | } 34 | 35 | location / { 36 | return 404 "{}"; 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /scikit_learn_bring_your_own_container_and_own_model_local_serving/container/sklearn_rf_regressor/serve: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # This file implements the scoring service shell. You don't necessarily need to modify it for various 4 | # algorithms. It starts nginx and gunicorn with the correct configurations and then simply waits until 5 | # gunicorn exits. 6 | # 7 | # The flask server is specified to be the app object in wsgi.py 8 | # 9 | # We set the following parameters: 10 | # 11 | # Parameter Environment Variable Default Value 12 | # --------- -------------------- ------------- 13 | # number of workers MODEL_SERVER_WORKERS the number of CPU cores 14 | # timeout MODEL_SERVER_TIMEOUT 60 seconds 15 | 16 | from __future__ import print_function 17 | import multiprocessing 18 | import os 19 | import signal 20 | import subprocess 21 | import sys 22 | 23 | cpu_count = multiprocessing.cpu_count() 24 | 25 | model_server_timeout = os.environ.get('MODEL_SERVER_TIMEOUT', 60) 26 | model_server_workers = int(os.environ.get('MODEL_SERVER_WORKERS', cpu_count)) 27 | 28 | def sigterm_handler(nginx_pid, gunicorn_pid): 29 | try: 30 | os.kill(nginx_pid, signal.SIGQUIT) 31 | except OSError: 32 | pass 33 | try: 34 | os.kill(gunicorn_pid, signal.SIGTERM) 35 | except OSError: 36 | pass 37 | 38 | sys.exit(0) 39 | 40 | def start_server(): 41 | print('Starting the inference server with {} workers.'.format(model_server_workers)) 42 | 43 | 44 | # link the log streams to stdout/err so they will be logged to the container logs 45 | subprocess.check_call(['ln', '-sf', '/dev/stdout', '/var/log/nginx/access.log']) 46 | subprocess.check_call(['ln', '-sf', '/dev/stderr', '/var/log/nginx/error.log']) 47 | 48 | nginx = subprocess.Popen(['nginx', '-c', '/opt/program/nginx.conf']) 49 | gunicorn = subprocess.Popen(['gunicorn', 50 | '--timeout', str(model_server_timeout), 51 | '-k', 'gevent', 52 | '-b', 'unix:/tmp/gunicorn.sock', 53 | '-w', str(model_server_workers), 54 | 'wsgi:app']) 55 | 56 | signal.signal(signal.SIGTERM, lambda a, b: sigterm_handler(nginx.pid, gunicorn.pid)) 57 | 58 | # If either subprocess exits, so do we. 59 | pids = set([nginx.pid, gunicorn.pid]) 60 | while True: 61 | pid, _ = os.wait() 62 | if pid in pids: 63 | break 64 | 65 | sigterm_handler(nginx.pid, gunicorn.pid) 66 | print('Inference server exiting') 67 | 68 | # The main routine just invokes the start function. 69 | 70 | if __name__ == '__main__': 71 | start_server() 72 | -------------------------------------------------------------------------------- /scikit_learn_bring_your_own_container_and_own_model_local_serving/container/sklearn_rf_regressor/wsgi.py: -------------------------------------------------------------------------------- 1 | import predictor as myapp 2 | 3 | # This is just a simple wrapper for gunicorn to find your app. 4 | # If you want to change the algorithm file, simply change "predictor" above to the 5 | # new file. 6 | 7 | app = myapp.app 8 | -------------------------------------------------------------------------------- /scikit_learn_bring_your_own_container_local_processing/container/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.7-slim-buster 2 | 3 | # Install scikit-learn and pandas 4 | RUN pip3 install pandas==1.3.5 scikit-learn==1.0.2 5 | 6 | ENTRYPOINT ["python3"] 7 | -------------------------------------------------------------------------------- /scikit_learn_bring_your_own_container_local_processing/input_data/README.md: -------------------------------------------------------------------------------- 1 | # Input Data Folder -------------------------------------------------------------------------------- /scikit_learn_bring_your_own_container_local_processing/input_data/sample_file_1.txt: -------------------------------------------------------------------------------- 1 | This is the first sample file -------------------------------------------------------------------------------- /scikit_learn_bring_your_own_container_local_processing/input_data/sample_file_2.txt: -------------------------------------------------------------------------------- 1 | This is the second sample file -------------------------------------------------------------------------------- /scikit_learn_bring_your_own_container_local_processing/input_data/sample_file_3.txt: -------------------------------------------------------------------------------- 1 | This is the third sample file -------------------------------------------------------------------------------- /scikit_learn_bring_your_own_container_local_processing/processing_script.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from datetime import datetime 4 | 5 | input_data_path = '/opt/ml/processing/input_data/' 6 | processed_data_path = '/opt/ml/processing/processed_data' 7 | 8 | 9 | def main(): 10 | print("Processing Started") 11 | 12 | # Convert command line args into a map of args 13 | args_iter = iter(sys.argv[1:]) 14 | args = dict(zip(args_iter, args_iter)) 15 | 16 | print('Received arguments {}'.format(args)) 17 | print('Reading input data from {}'.format(input_data_path)) 18 | 19 | print("Got Args: {}".format(args)) 20 | 21 | input_files = [file for file in os.listdir(input_data_path) if file.endswith('.' + 'txt')] 22 | print('Available input text files: {}'.format(input_files)) 23 | 24 | if args['job-type'] == 'word-count': 25 | print('Word Count Job Type Started') 26 | total_words = 0 27 | for input_file in input_files: 28 | file = open(os.path.join(input_data_path, input_file), 'r') 29 | data = file.read() 30 | words = len(data.split()) 31 | print('Detected {} words in {} file'.format(words, input_file)) 32 | total_words = total_words + words 33 | 34 | print('Total words in {} files detected: {}'.format(len(input_files), total_words)) 35 | else: 36 | print('{} job-type not supported! Doing Nothing'.format(args['job-type'])) 37 | 38 | output_file = os.path.join(processed_data_path, 'total_words_'+datetime.now().strftime("%d%m%Y_%H_%M_%S")+'.txt') 39 | print('Writing output file: {}'.format(output_file)) 40 | f = open(output_file, "a") 41 | f.write('Total Words: {}'.format(total_words)) 42 | f.close() 43 | 44 | output_files = [file for file in os.listdir(processed_data_path) if file.endswith('.' + 'txt')] 45 | print('Available output text files: {}'.format(output_files)) 46 | 47 | print("Processing Complete") 48 | 49 | if __name__ == "__main__": 50 | main() -------------------------------------------------------------------------------- /scikit_learn_bring_your_own_container_local_processing/scikit_learn_bring_your_own_container_local_processing.py: -------------------------------------------------------------------------------- 1 | # This is a sample Python program that runs a simple scikit-learn processing based on a docker image you build. 2 | # This implementation will work on your *local computer*. 3 | # 4 | # Prerequisites: 5 | # 1. Install required Python packages: 6 | # pip install boto3 sagemaker pandas scikit-learn 7 | # pip install 'sagemaker[local]' 8 | # 2. Docker Desktop has to be installed on your computer, and running. 9 | # 3. Open terminal and run the following commands: 10 | # docker build -t sagemaker-scikit-learn-processing-local container/. 11 | ######################################################################################################################## 12 | 13 | from sagemaker.local import LocalSession 14 | from sagemaker.processing import ScriptProcessor, ProcessingInput, ProcessingOutput 15 | 16 | sagemaker_session = LocalSession() 17 | sagemaker_session.config = {'local': {'local_code': True}} 18 | 19 | # For local training a dummy role will be sufficient 20 | role = 'arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001' 21 | 22 | processor = ScriptProcessor(command=['python3'], 23 | image_uri='sagemaker-scikit-learn-processing-local', 24 | role=role, 25 | instance_count=1, 26 | instance_type='local') 27 | 28 | processor.run(code='processing_script.py', 29 | inputs=[ProcessingInput( 30 | source='./input_data/', 31 | destination='/opt/ml/processing/input_data/')], 32 | outputs=[ProcessingOutput( 33 | output_name='word_count_data', 34 | source='/opt/ml/processing/processed_data/')], 35 | arguments=['job-type', 'word-count'] 36 | ) 37 | 38 | preprocessing_job_description = processor.jobs[-1].describe() 39 | output_config = preprocessing_job_description['ProcessingOutputConfig'] 40 | 41 | print(output_config) 42 | 43 | for output in output_config['Outputs']: 44 | if output['OutputName'] == 'word_count_data': 45 | word_count_data_file = output['S3Output']['S3Uri'] 46 | 47 | print('Output file is located on: {}'.format(word_count_data_file)) -------------------------------------------------------------------------------- /scikit_learn_bring_your_own_model_local_serving/code/inference.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import joblib 4 | 5 | def predict_fn(input_object, model): 6 | ######################################### 7 | # Do your custom preprocessing logic here 8 | ######################################### 9 | 10 | print("calling model") 11 | predictions = model.predict(input_object) 12 | return predictions 13 | 14 | 15 | def model_fn(model_dir): 16 | print("loading model.joblib from: {}".format(model_dir)) 17 | loaded_model = joblib.load(os.path.join(model_dir, "model.joblib")) 18 | return loaded_model 19 | -------------------------------------------------------------------------------- /scikit_learn_bring_your_own_model_local_serving/code/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3 2 | requests 3 | nltk -------------------------------------------------------------------------------- /scikit_learn_bring_your_own_model_local_serving/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | sagemaker>=2.0.0<3.0.0 4 | sagemaker[local] 5 | -------------------------------------------------------------------------------- /scikit_learn_bring_your_own_model_local_serving/scikit_learn_bring_your_own_model_local_serving.py: -------------------------------------------------------------------------------- 1 | # This is a sample Python program that serve a scikit-learn model pre-trained on the California Housing dataset. 2 | # This implementation will work on your *local computer* or in the *AWS Cloud*. 3 | # 4 | # Prerequisites: 5 | # 1. Install required Python packages: 6 | # `pip install -r requirements.txt` 7 | # 2. Docker Desktop installed and running on your computer: 8 | # `docker ps` 9 | # 3. You should have AWS credentials configured on your local machine 10 | # in order to be able to pull the docker image from ECR. 11 | ############################################################################################### 12 | 13 | import boto3 14 | import pandas as pd 15 | import tarfile 16 | 17 | from sagemaker.sklearn import SKLearnModel 18 | from sklearn.datasets import fetch_california_housing 19 | from sklearn.model_selection import train_test_split 20 | 21 | DUMMY_IAM_ROLE = 'arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001' 22 | s3 = boto3.client('s3') 23 | 24 | 25 | def main(): 26 | 27 | # Prepare data for model inference - we use the Boston housing dataset 28 | print('Preparing data for model inference') 29 | data = fetch_california_housing() 30 | X_train, X_test, y_train, y_test = train_test_split( 31 | data.data, data.target, test_size=0.25, random_state=42 32 | ) 33 | 34 | # we don't train a model, so we will need only the testing data 35 | testX = pd.DataFrame(X_test, columns=data.feature_names) 36 | 37 | # Download a pre-trained model file 38 | print('Downloading a pre-trained model file') 39 | s3.download_file('aws-ml-blog', 'artifacts/scikit_learn_bring_your_own_model/model.joblib', 'model.joblib') 40 | 41 | # Creating a model.tar.gz file 42 | tar = tarfile.open('model.tar.gz', 'w:gz') 43 | tar.add('model.joblib') 44 | tar.close() 45 | 46 | model = SKLearnModel( 47 | role=DUMMY_IAM_ROLE, 48 | model_data='file://./model.tar.gz', 49 | framework_version='0.23-1', 50 | py_version='py3', 51 | source_dir='code', 52 | entry_point='inference.py' 53 | ) 54 | 55 | print('Deploying endpoint in local mode') 56 | predictor = model.deploy(initial_instance_count=1, instance_type='local') 57 | 58 | predictions = predictor.predict(testX[data.feature_names].head(5)) 59 | print(f"Predictions: {predictions}") 60 | 61 | print('About to delete the endpoint to stop paying (if in cloud mode).') 62 | predictor.delete_endpoint(predictor.endpoint_name) 63 | 64 | 65 | if __name__ == "__main__": 66 | main() 67 | -------------------------------------------------------------------------------- /scikit_learn_graviton_bring_your_own_container_local_training_and_serving/build_and_push.sh: -------------------------------------------------------------------------------- 1 | # The name of our algorithm 2 | algorithm_name=sagemaker-scikit-learn-graviton-regressor-local 3 | 4 | cd container 5 | 6 | chmod +x ./linear_regressor/train 7 | chmod +x ./linear_regressor/serve 8 | 9 | account=$(aws sts get-caller-identity --query Account --output text) 10 | 11 | # Get the region defined in the current configuration (default to us-west-2 if none defined) 12 | region=$(aws configure get region) 13 | region=${region:-us-west-2} 14 | 15 | fullname="${account}.dkr.ecr.${region}.amazonaws.com/${algorithm_name}:latest" 16 | 17 | # If the repository doesn't exist in ECR, create it. 18 | aws ecr describe-repositories --repository-names "${algorithm_name}" > /dev/null 2>&1 19 | 20 | if [ $? -ne 0 ] 21 | then 22 | aws ecr create-repository --repository-name "${algorithm_name}" > /dev/null 23 | fi 24 | 25 | # Get the login command from ECR and execute it directly 26 | aws ecr get-login-password --region ${region}|docker login --username AWS --password-stdin ${fullname} 27 | 28 | # Build the docker image locally with the image name and then push it to ECR 29 | # with the full name. 30 | 31 | docker build -t ${algorithm_name} . 32 | docker tag ${algorithm_name} ${fullname} 33 | -------------------------------------------------------------------------------- /scikit_learn_graviton_bring_your_own_container_local_training_and_serving/container/Dockerfile: -------------------------------------------------------------------------------- 1 | # Build a Graviton image that can do training and inference in SageMaker 2 | # This is a Python 3 image that uses the nginx, gunicorn, flask stack 3 | # for serving inferences in a stable way. 4 | 5 | FROM arm64v8/ubuntu 6 | 7 | RUN apt-get -y update && apt-get install -y --no-install-recommends \ 8 | wget \ 9 | nginx \ 10 | ca-certificates 11 | 12 | RUN wget https://repo.anaconda.com/miniconda/Miniconda3-py39_4.10.3-Linux-aarch64.sh 13 | RUN chmod a+x Miniconda3-py39_4.10.3-Linux-aarch64.sh 14 | RUN bash Miniconda3-py39_4.10.3-Linux-aarch64.sh -b 15 | ENV PATH /root/miniconda3/bin/:$PATH 16 | 17 | COPY ml-dependencies.yml ./ 18 | RUN conda env create -f ml-dependencies.yml 19 | 20 | ENV PATH /root/miniconda3/envs/ml-dependencies/bin:$PATH 21 | 22 | # Set some environment variables. PYTHONUNBUFFERED keeps Python from buffering our standard 23 | # output stream, which means that logs can be delivered to the user quickly. PYTHONDONTWRITEBYTECODE 24 | # keeps Python from writing the .pyc files which are unnecessary in this case. We also update 25 | # PATH so that the train and serve programs are found when the container is invoked. 26 | 27 | ENV PYTHONUNBUFFERED=TRUE 28 | ENV PYTHONDONTWRITEBYTECODE=TRUE 29 | ENV PATH="/opt/program:${PATH}" 30 | 31 | # Set up the program in the image 32 | COPY linear_regressor /opt/program 33 | WORKDIR /opt/program 34 | 35 | -------------------------------------------------------------------------------- /scikit_learn_graviton_bring_your_own_container_local_training_and_serving/container/linear_regressor/nginx.conf: -------------------------------------------------------------------------------- 1 | worker_processes 1; 2 | daemon off; # Prevent forking 3 | 4 | 5 | pid /tmp/nginx.pid; 6 | error_log /var/log/nginx/error.log; 7 | 8 | events { 9 | # defaults 10 | } 11 | 12 | http { 13 | include /etc/nginx/mime.types; 14 | default_type application/octet-stream; 15 | access_log /var/log/nginx/access.log combined; 16 | 17 | upstream gunicorn { 18 | server unix:/tmp/gunicorn.sock; 19 | } 20 | 21 | server { 22 | listen 8080 deferred; 23 | client_max_body_size 5m; 24 | 25 | keepalive_timeout 5; 26 | proxy_read_timeout 1200s; 27 | 28 | location ~ ^/(ping|invocations) { 29 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 30 | proxy_set_header Host $http_host; 31 | proxy_redirect off; 32 | proxy_pass http://gunicorn; 33 | } 34 | 35 | location / { 36 | return 404 "{}"; 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /scikit_learn_graviton_bring_your_own_container_local_training_and_serving/container/linear_regressor/serve: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # This file implements the scoring service shell. You don't necessarily need to modify it for various 4 | # algorithms. It starts nginx and gunicorn with the correct configurations and then simply waits until 5 | # gunicorn exits. 6 | # 7 | # The flask server is specified to be the app object in wsgi.py 8 | # 9 | # We set the following parameters: 10 | # 11 | # Parameter Environment Variable Default Value 12 | # --------- -------------------- ------------- 13 | # number of workers MODEL_SERVER_WORKERS the number of CPU cores 14 | # timeout MODEL_SERVER_TIMEOUT 60 seconds 15 | 16 | from __future__ import print_function 17 | import multiprocessing 18 | import os 19 | import signal 20 | import subprocess 21 | import sys 22 | 23 | cpu_count = multiprocessing.cpu_count() 24 | 25 | model_server_timeout = os.environ.get('MODEL_SERVER_TIMEOUT', 60) 26 | model_server_workers = int(os.environ.get('MODEL_SERVER_WORKERS', cpu_count)) 27 | 28 | def sigterm_handler(nginx_pid, gunicorn_pid): 29 | try: 30 | os.kill(nginx_pid, signal.SIGQUIT) 31 | except OSError: 32 | pass 33 | try: 34 | os.kill(gunicorn_pid, signal.SIGTERM) 35 | except OSError: 36 | pass 37 | 38 | sys.exit(0) 39 | 40 | def start_server(): 41 | print('Starting the inference server with {} workers.'.format(model_server_workers)) 42 | 43 | 44 | # link the log streams to stdout/err so they will be logged to the container logs 45 | subprocess.check_call(['ln', '-sf', '/dev/stdout', '/var/log/nginx/access.log']) 46 | subprocess.check_call(['ln', '-sf', '/dev/stderr', '/var/log/nginx/error.log']) 47 | 48 | nginx = subprocess.Popen(['nginx', '-c', '/opt/program/nginx.conf']) 49 | gunicorn = subprocess.Popen(['gunicorn', 50 | '--timeout', str(model_server_timeout), 51 | '-k', 'gevent', 52 | '-b', 'unix:/tmp/gunicorn.sock', 53 | '-w', str(model_server_workers), 54 | 'wsgi:app']) 55 | 56 | signal.signal(signal.SIGTERM, lambda a, b: sigterm_handler(nginx.pid, gunicorn.pid)) 57 | 58 | # If either subprocess exits, so do we. 59 | pids = set([nginx.pid, gunicorn.pid]) 60 | while True: 61 | pid, _ = os.wait() 62 | if pid in pids: 63 | break 64 | 65 | sigterm_handler(nginx.pid, gunicorn.pid) 66 | print('Inference server exiting') 67 | 68 | # The main routine just invokes the start function. 69 | 70 | if __name__ == '__main__': 71 | start_server() 72 | -------------------------------------------------------------------------------- /scikit_learn_graviton_bring_your_own_container_local_training_and_serving/container/linear_regressor/wsgi.py: -------------------------------------------------------------------------------- 1 | import predictor as myapp 2 | 3 | # This is just a simple wrapper for gunicorn to find your app. 4 | # If you want to change the algorithm file, simply change "predictor" above to the 5 | # new file. 6 | 7 | app = myapp.app 8 | -------------------------------------------------------------------------------- /scikit_learn_graviton_bring_your_own_container_local_training_and_serving/container/ml-dependencies.yml: -------------------------------------------------------------------------------- 1 | name: ml-dependencies 2 | dependencies: 3 | - numpy 4 | - pandas 5 | - scikit-learn 6 | - flask 7 | - gevent 8 | - gunicorn -------------------------------------------------------------------------------- /scikit_learn_local_processing/SKLearnProcessor_local_processing.py: -------------------------------------------------------------------------------- 1 | # This is a sample Python program that runs a simple scikit-learn processing using the SKLearnProcessor. 2 | # This implementation will work on your *local computer*. 3 | # 4 | # Prerequisites: 5 | # 1. Install required Python packages: 6 | # pip install boto3 sagemaker pandas scikit-learn 7 | # pip install 'sagemaker[local]' 8 | # 2. Docker Desktop installed and running on your computer: 9 | # `docker ps` 10 | # 3. You should have AWS credentials configured on your local machine 11 | # in order to be able to pull the docker image from ECR. 12 | ######################################################################################################################## 13 | 14 | from sagemaker.local import LocalSession 15 | from sagemaker.processing import ProcessingInput, ProcessingOutput 16 | from sagemaker.sklearn.processing import SKLearnProcessor 17 | 18 | sagemaker_session = LocalSession() 19 | sagemaker_session.config = {'local': {'local_code': True}} 20 | 21 | # For local training a dummy role will be sufficient 22 | role = 'arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001' 23 | 24 | processor = SKLearnProcessor(framework_version='0.20.0', 25 | instance_count=1, 26 | instance_type='local', 27 | role=role) 28 | 29 | print('Starting processing job.') 30 | print('Note: if launching for the first time in local mode, container image download might take a few minutes to complete.') 31 | processor.run(code='processing_script.py', 32 | inputs=[ProcessingInput( 33 | source='./input_data/', 34 | destination='/opt/ml/processing/input_data/')], 35 | outputs=[ProcessingOutput( 36 | output_name='word_count_data', 37 | source='/opt/ml/processing/processed_data/')], 38 | arguments=['job-type', 'word-count'] 39 | ) 40 | 41 | preprocessing_job_description = processor.jobs[-1].describe() 42 | output_config = preprocessing_job_description['ProcessingOutputConfig'] 43 | 44 | print(output_config) 45 | 46 | for output in output_config['Outputs']: 47 | if output['OutputName'] == 'word_count_data': 48 | word_count_data_file = output['S3Output']['S3Uri'] 49 | 50 | print('Output file is located on: {}'.format(word_count_data_file)) -------------------------------------------------------------------------------- /scikit_learn_local_processing/input_data/README.md: -------------------------------------------------------------------------------- 1 | # Input Data Folder -------------------------------------------------------------------------------- /scikit_learn_local_processing/input_data/sample_file_1.txt: -------------------------------------------------------------------------------- 1 | This is the first sample file -------------------------------------------------------------------------------- /scikit_learn_local_processing/input_data/sample_file_2.txt: -------------------------------------------------------------------------------- 1 | This is the second sample file -------------------------------------------------------------------------------- /scikit_learn_local_processing/input_data/sample_file_3.txt: -------------------------------------------------------------------------------- 1 | This is the third sample file -------------------------------------------------------------------------------- /scikit_learn_local_processing/processing_script.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from datetime import datetime 4 | 5 | input_data_path = '/opt/ml/processing/input_data/' 6 | processed_data_path = '/opt/ml/processing/processed_data' 7 | 8 | 9 | def main(): 10 | print("Processing Started") 11 | 12 | # Convert command line args into a map of args 13 | args_iter = iter(sys.argv[1:]) 14 | args = dict(zip(args_iter, args_iter)) 15 | 16 | print('Received arguments {}'.format(args)) 17 | print('Reading input data from {}'.format(input_data_path)) 18 | 19 | print("Got Args: {}".format(args)) 20 | 21 | input_files = [file for file in os.listdir(input_data_path) if file.endswith('.' + 'txt')] 22 | print('Available input text files: {}'.format(input_files)) 23 | 24 | if args['job-type'] == 'word-count': 25 | print('Word Count Job Type Started') 26 | total_words = 0 27 | for input_file in input_files: 28 | file = open(os.path.join(input_data_path, input_file), 'r') 29 | data = file.read() 30 | words = len(data.split()) 31 | print('Detected {} words in {} file'.format(words, input_file)) 32 | total_words = total_words + words 33 | 34 | print('Total words in {} files detected: {}'.format(len(input_files), total_words)) 35 | else: 36 | print('{} job-type not supported! Doing Nothing'.format(args['job-type'])) 37 | 38 | output_file = os.path.join(processed_data_path, 'total_words_'+datetime.now().strftime("%d%m%Y_%H_%M_%S")+'.txt') 39 | print('Writing output file: {}'.format(output_file)) 40 | f = open(output_file, "a") 41 | f.write('Total Words: {}'.format(total_words)) 42 | f.close() 43 | 44 | output_files = [file for file in os.listdir(processed_data_path) if file.endswith('.' + 'txt')] 45 | print('Available output text files: {}'.format(output_files)) 46 | 47 | print("Processing Complete") 48 | 49 | if __name__ == "__main__": 50 | main() -------------------------------------------------------------------------------- /scikit_learn_nltk_local_processing/FrameworkProcessor_nltk_local_processing.py: -------------------------------------------------------------------------------- 1 | # This is a sample Python program that runs a simple scikit-learn processing using the FrameworkProcessor to perform 2 | # word_tokenize using nltk. nltk is installed using dependencies installed from requirements.txt file. 3 | # This implementation will work on your *local computer*. 4 | # 5 | # Prerequisites: 6 | # 1. Install required Python packages: 7 | # pip install boto3 sagemaker pandas scikit-learn 8 | # pip install 'sagemaker[local]' 9 | # 2. Docker Desktop installed and running on your computer: 10 | # `docker ps` 11 | # 3. You should have AWS credentials configured on your local machine 12 | # in order to be able to pull the docker image from ECR. 13 | ######################################################################################################################## 14 | from sagemaker.local import LocalSession 15 | from sagemaker.processing import ProcessingInput, ProcessingOutput 16 | from sagemaker.processing import FrameworkProcessor 17 | from sagemaker.sklearn.estimator import SKLearn 18 | 19 | sagemaker_session = LocalSession() 20 | sagemaker_session.config = {'local': {'local_code': True}} 21 | 22 | # For local training a dummy role will be sufficient 23 | role = 'arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001' 24 | 25 | processor = FrameworkProcessor( 26 | estimator_cls=SKLearn, 27 | framework_version='1.2-1', 28 | instance_count=1, 29 | instance_type='local', 30 | role=role 31 | ) 32 | 33 | print('Starting processing job.') 34 | print('Note: if launching for the first time in local mode, container image download might take a few minutes to complete.') 35 | processor.run( 36 | code='processing_script.py', 37 | dependencies=['./dependencies/requirements.txt'], 38 | inputs=[ 39 | ProcessingInput( 40 | source='./input_data/', 41 | destination='/opt/ml/processing/input_data/') 42 | ], 43 | outputs=[ProcessingOutput( 44 | output_name='tokenized_words_data', 45 | source='/opt/ml/processing/processed_data/')], 46 | arguments=['job-type', 'word-tokenize'] 47 | ) 48 | 49 | preprocessing_job_description = processor.jobs[-1].describe() 50 | output_config = preprocessing_job_description['ProcessingOutputConfig'] 51 | 52 | print(output_config) 53 | 54 | for output in output_config['Outputs']: 55 | if output['OutputName'] == 'tokenized_words_data': 56 | tokenized_words_data_file = output['S3Output']['S3Uri'] 57 | 58 | print('Output file is located on: {}'.format(tokenized_words_data_file)) -------------------------------------------------------------------------------- /scikit_learn_nltk_local_processing/dependencies/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3 2 | nltk 3 | 4 | -------------------------------------------------------------------------------- /scikit_learn_nltk_local_processing/input_data/README.md: -------------------------------------------------------------------------------- 1 | # Input Data Folder -------------------------------------------------------------------------------- /scikit_learn_nltk_local_processing/input_data/sample_file_1.txt: -------------------------------------------------------------------------------- 1 | This is the first sample file -------------------------------------------------------------------------------- /scikit_learn_nltk_local_processing/input_data/sample_file_2.txt: -------------------------------------------------------------------------------- 1 | This is the second sample file -------------------------------------------------------------------------------- /scikit_learn_nltk_local_processing/input_data/sample_file_3.txt: -------------------------------------------------------------------------------- 1 | This is the third sample file -------------------------------------------------------------------------------- /scikit_learn_nltk_local_processing/processing_script.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from datetime import datetime 4 | import nltk 5 | nltk.download('punkt') 6 | from nltk.tokenize import word_tokenize 7 | 8 | input_data_path = '/opt/ml/processing/input_data/' 9 | processed_data_path = '/opt/ml/processing/processed_data' 10 | 11 | 12 | def main(): 13 | print("Processing Started") 14 | 15 | # Convert command line args into a map of args 16 | args_iter = iter(sys.argv[1:]) 17 | args = dict(zip(args_iter, args_iter)) 18 | 19 | print('Received arguments {}'.format(args)) 20 | print('Reading input data from {}'.format(input_data_path)) 21 | 22 | print("Got Args: {}".format(args)) 23 | 24 | input_files = [file for file in os.listdir(input_data_path) if file.endswith('.' + 'txt')] 25 | print('Available input text files: {}'.format(input_files)) 26 | 27 | if args['job-type'] == 'word-tokenize': 28 | print('Word Tokenize Job Type Started') 29 | all_tokenized_words = [] 30 | for input_file in input_files: 31 | file = open(os.path.join(input_data_path, input_file), 'r') 32 | data = file.read() 33 | tokenized_words = word_tokenize(data) 34 | print('Detected {} words in {} file'.format(tokenized_words, input_file)) 35 | all_tokenized_words.append(tokenized_words) 36 | else: 37 | print('{} job-type not supported! Doing Nothing'.format(args['job-type'])) 38 | 39 | output_file = os.path.join(processed_data_path, 'all_tokenized_words_'+datetime.now().strftime("%d%m%Y_%H_%M_%S")+'.txt') 40 | print('Writing output file: {}'.format(output_file)) 41 | f = open(output_file, "a") 42 | f.write('Tokenized Words: {}'.format(all_tokenized_words)) 43 | f.close() 44 | 45 | output_files = [file for file in os.listdir(processed_data_path) if file.endswith('.' + 'txt')] 46 | print('Available output text files: {}'.format(output_files)) 47 | 48 | print("Processing Complete") 49 | 50 | if __name__ == "__main__": 51 | main() -------------------------------------------------------------------------------- /scikit_learn_script_mode_local_serving_multiple_models_with_one_invocation/code/inference.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import json 3 | import logging 4 | import sys 5 | 6 | import joblib 7 | import numpy as np 8 | 9 | JSON_CONTENT_TYPE = 'application/json' 10 | 11 | logger = logging.getLogger(__name__) 12 | logger.setLevel(logging.DEBUG) 13 | logger.addHandler(logging.StreamHandler(sys.stdout)) 14 | 15 | 16 | def input_fn(serialized_input_data, content_type): 17 | logger.info(f'input_fn - serialized_input_data: {serialized_input_data}, content_type: {content_type}') 18 | 19 | if content_type == JSON_CONTENT_TYPE: 20 | payload = [np.array(json.loads(serialized_input_data))] 21 | return payload 22 | else: 23 | raise Exception('Requested unsupported ContentType in Accept: ' + content_type) 24 | return 25 | 26 | 27 | # Perform prediction on the deserialized object, with the loaded models, and returns the max result 28 | def predict_fn(input_object, models_list): 29 | logger.info("predict_fn") 30 | logger.info(f"predict_fn - input_object: {input_object}") 31 | 32 | max_prediction = 0 33 | for i in range(len(models_list)): 34 | model = models_list[i] 35 | prediction = model.predict(input_object) 36 | logger.info(f"predict_fn - result for model #{i}: {prediction}") 37 | if prediction > max_prediction: 38 | max_prediction = prediction 39 | 40 | logger.info(f"returning response: {max_prediction}") 41 | return max_prediction 42 | 43 | 44 | # Load the model files from model_dir 45 | def model_fn(model_dir): 46 | logger.info(f'model_fn - model_dir: {model_dir}') 47 | for file in glob.glob(model_dir + '/model_*', recursive=True): 48 | print(file) 49 | 50 | logger.info(f"model_fn - loading models from: {model_dir}") 51 | 52 | models_list = [] 53 | for model_file in glob.glob(model_dir + '/model_*', recursive=True): 54 | print(f'Loading model file: {model_file}') 55 | loaded_model = joblib.load(model_file) 56 | models_list.append(loaded_model) 57 | 58 | logger.info(f"model_fn - models_list length: {len(models_list)}") 59 | 60 | return models_list 61 | -------------------------------------------------------------------------------- /scikit_learn_script_mode_local_serving_multiple_models_with_one_invocation/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | sagemaker>=2.0.0<3.0.0 4 | sagemaker[local] 5 | boto3 6 | scikit-learn 7 | -------------------------------------------------------------------------------- /scikit_learn_script_mode_local_serving_no_model_artifact/code/inference.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | import numpy as np 4 | 5 | logger = logging.getLogger(__name__) 6 | logger.setLevel(logging.DEBUG) 7 | logger.addHandler(logging.StreamHandler(sys.stdout)) 8 | 9 | # Perform prediction on the deserialized object, with the dummy loaded model 10 | def predict_fn(input_object, model): 11 | logger.info("predict_fn") 12 | logger.info(f"input_object: {input_object}") 13 | 14 | response = np.average(input_object) 15 | logger.info(f"returning response, average of inputs: {response}") 16 | 17 | return response 18 | 19 | # Dummy model_fn function 20 | def model_fn(model_dir): 21 | dummy_model = {} 22 | return dummy_model -------------------------------------------------------------------------------- /scikit_learn_script_mode_local_serving_no_model_artifact/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | sagemaker>=2.0.0<3.0.0 4 | sagemaker[local] 5 | -------------------------------------------------------------------------------- /scikit_learn_script_mode_local_serving_no_model_artifact/scikit_learn_script_mode_local_serving_no_model_artifact.py: -------------------------------------------------------------------------------- 1 | # This is a sample Python program that use scikit-learn container to perform average on input. 2 | # This implementation will work on your *local computer* or in the *AWS Cloud*. 3 | # 4 | # Prerequisites: 5 | # 1. Install required Python packages: 6 | # `pip install -r requirements.txt` 7 | # 2. Docker Desktop installed and running on your computer: 8 | # `docker ps` 9 | # 3. You should have AWS credentials configured on your local machine 10 | # in order to be able to pull the docker image from ECR. 11 | ############################################################################################### 12 | import tarfile 13 | from pathlib import Path 14 | 15 | import numpy as np 16 | from sagemaker import LocalSession 17 | from sagemaker.sklearn import SKLearn, SKLearnModel 18 | 19 | 20 | DUMMY_IAM_ROLE = 'arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001' 21 | sess = LocalSession() 22 | sess.config = {'local': {'local_code': True}} 23 | 24 | 25 | def do_inference_on_local_endpoint(predictor): 26 | print(f'\nStarting Inference on endpoint (local).') 27 | inputs = np.array([1, 2, 3, 4 , 5]) 28 | print("Endpoint response: {}".format(predictor.predict(inputs))) 29 | 30 | 31 | def main(): 32 | sagemaker_session = LocalSession() 33 | sagemaker_session.config = {'local': {'local_code': True}} 34 | 35 | dummy_model_file = Path("dummy.model") 36 | dummy_model_file.touch() 37 | 38 | with tarfile.open("model.tar.gz", "w:gz") as tar: 39 | tar.add(dummy_model_file.as_posix()) 40 | 41 | # For local training a dummy role will be sufficient 42 | role = DUMMY_IAM_ROLE 43 | 44 | model = SKLearnModel( 45 | role=role, 46 | model_data='file://./model.tar.gz', 47 | framework_version='1.2-1', 48 | py_version='py3', 49 | source_dir='code', 50 | entry_point='inference.py', 51 | sagemaker_session=sess 52 | ) 53 | 54 | print('Deploying endpoint in local mode') 55 | print( 56 | 'Note: if launching for the first time in local mode, container image download might take a few minutes to complete.') 57 | predictor = model.deploy( 58 | initial_instance_count=1, 59 | instance_type='local', 60 | ) 61 | 62 | do_inference_on_local_endpoint(predictor) 63 | 64 | print('About to delete the endpoint to stop paying (if in cloud mode).') 65 | predictor.delete_endpoint() 66 | 67 | 68 | if __name__ == "__main__": 69 | main() 70 | -------------------------------------------------------------------------------- /scikit_learn_script_mode_local_training_and_serving/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | sklearn 4 | sagemaker>=2.0.0<3.0.0 5 | sagemaker[local] 6 | scikit-learn 7 | -------------------------------------------------------------------------------- /snowflake_bring_your_own_container_local_training/container/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8 2 | 3 | COPY requirements.txt requirements.txt 4 | RUN pip install -r requirements.txt -------------------------------------------------------------------------------- /snowflake_bring_your_own_container_local_training/container/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3 2 | numpy 3 | pandas 4 | scikit-learn 5 | sagemaker 6 | snowflake-snowpark-python 7 | snowflake-connector-python[pandas] 8 | sagemaker-training 9 | -------------------------------------------------------------------------------- /snowflake_bring_your_own_container_local_training/snowflake_bring_your_own_container_local_training.py: -------------------------------------------------------------------------------- 1 | # This is a sample Python program that trains a simple scikit-learn machine predictive maintenance classification model 2 | # on the dataset fetched from Snowflake, using Snowpark Python package. 3 | # 4 | # Getting Started with Snowpark for Machine Learning on SageMaker: 5 | # - https://quickstarts.snowflake.com/guide/getting_started_with_snowpark_for_machine_learning_on_sagemaker/index.html 6 | # - https://github.com/Snowflake-Labs/sfguide-getting-started-snowpark-python-sagemaker 7 | # 8 | # To be able to securely store the database access credentials, we strongly recommend using AWS Secrets Manager with Snowflake connections: 9 | # - https://docs.aws.amazon.com/secretsmanager/latest/userguide/create_secret.html 10 | # - https://aws.amazon.com/blogs/big-data/simplify-snowflake-data-loading-and-processing-with-aws-glue-databrew/ 11 | # 12 | # This implementation will work on your local computer. 13 | # 14 | # Prerequisites: 15 | # 1. Install required Python packages: 16 | # pip install boto3 sagemaker pandas scikit-learn 17 | # pip install 'sagemaker[local]' 18 | # 2. Docker Desktop has to be installed on your computer, and running. 19 | # 3. Open terminal and run the following commands: 20 | # docker build -t sagemaker-scikit-learn-snowpark-local container/. 21 | ######################################################################################################################## 22 | 23 | from sagemaker.estimator import Estimator 24 | 25 | DUMMY_IAM_ROLE = 'arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001' 26 | 27 | 28 | def main(): 29 | image = 'sagemaker-scikit-learn-snowpark-local' 30 | 31 | hyperparameters={ 32 | "secret-name": "dev/ml/snowflake", 33 | "region-name": "us-east-1" 34 | } 35 | 36 | print('Starting model training.') 37 | estimator = Estimator( 38 | image_uri=image, 39 | entry_point='predictive_maintenance_classification.py', 40 | source_dir='code', 41 | role=DUMMY_IAM_ROLE, 42 | instance_count=1, 43 | instance_type='local', 44 | hyperparameters=hyperparameters 45 | ) 46 | 47 | estimator.fit() 48 | print('Completed model training') 49 | 50 | 51 | if __name__ == "__main__": 52 | main() 53 | -------------------------------------------------------------------------------- /tensorflow_bring_your_own_california_housing_local_serving_without_tfs/container/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | 14 | # For more information on creating a Dockerfile 15 | # https://docs.docker.com/compose/gettingstarted/#step-2-create-a-dockerfile 16 | FROM tensorflow/tensorflow:2.5.0 17 | 18 | RUN apt-get update && apt-get install -y --no-install-recommends nginx curl 19 | 20 | RUN pip install pandas flask gevent gunicorn 21 | 22 | ENV PATH="/opt/program:${PATH}" 23 | 24 | # /opt/ml and all subdirectories are utilized by SageMaker, we use the /code subdirectory to store our user code. 25 | COPY california_housing /opt/program 26 | WORKDIR /opt/program -------------------------------------------------------------------------------- /tensorflow_bring_your_own_california_housing_local_serving_without_tfs/container/california_housing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/tensorflow_bring_your_own_california_housing_local_serving_without_tfs/container/california_housing/__init__.py -------------------------------------------------------------------------------- /tensorflow_bring_your_own_california_housing_local_serving_without_tfs/container/california_housing/nginx.conf: -------------------------------------------------------------------------------- 1 | worker_processes 1; 2 | daemon off; # Prevent forking 3 | 4 | 5 | pid /tmp/nginx.pid; 6 | error_log /var/log/nginx/error.log; 7 | 8 | events { 9 | # defaults 10 | } 11 | 12 | http { 13 | include /etc/nginx/mime.types; 14 | default_type application/octet-stream; 15 | access_log /var/log/nginx/access.log combined; 16 | 17 | upstream gunicorn { 18 | server unix:/tmp/gunicorn.sock; 19 | } 20 | 21 | server { 22 | listen 8080 deferred; 23 | client_max_body_size 5m; 24 | 25 | keepalive_timeout 5; 26 | proxy_read_timeout 1200s; 27 | 28 | location ~ ^/(ping|invocations) { 29 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 30 | proxy_set_header Host $http_host; 31 | proxy_redirect off; 32 | proxy_pass http://gunicorn; 33 | } 34 | 35 | location / { 36 | return 404 "{}"; 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /tensorflow_bring_your_own_california_housing_local_serving_without_tfs/container/california_housing/serve: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # This file implements the scoring service shell. You don't necessarily need to modify it for various 4 | # algorithms. It starts nginx and gunicorn with the correct configurations and then simply waits until 5 | # gunicorn exits. 6 | # 7 | # The flask server is specified to be the app object in wsgi.py 8 | # 9 | # We set the following parameters: 10 | # 11 | # Parameter Environment Variable Default Value 12 | # --------- -------------------- ------------- 13 | # number of workers MODEL_SERVER_WORKERS the number of CPU cores 14 | # timeout MODEL_SERVER_TIMEOUT 60 seconds 15 | 16 | from __future__ import print_function 17 | import multiprocessing 18 | import os 19 | import signal 20 | import subprocess 21 | import sys 22 | 23 | cpu_count = multiprocessing.cpu_count() 24 | 25 | model_server_timeout = os.environ.get('MODEL_SERVER_TIMEOUT', 60) 26 | model_server_workers = int(os.environ.get('MODEL_SERVER_WORKERS', cpu_count)) 27 | 28 | def sigterm_handler(nginx_pid, gunicorn_pid): 29 | try: 30 | os.kill(nginx_pid, signal.SIGQUIT) 31 | except OSError: 32 | pass 33 | try: 34 | os.kill(gunicorn_pid, signal.SIGTERM) 35 | except OSError: 36 | pass 37 | 38 | sys.exit(0) 39 | 40 | def start_server(): 41 | print('Starting the inference server with {} workers.'.format(model_server_workers)) 42 | 43 | 44 | # link the log streams to stdout/err so they will be logged to the container logs 45 | subprocess.check_call(['ln', '-sf', '/dev/stdout', '/var/log/nginx/access.log']) 46 | subprocess.check_call(['ln', '-sf', '/dev/stderr', '/var/log/nginx/error.log']) 47 | 48 | nginx = subprocess.Popen(['nginx', '-c', '/opt/program/nginx.conf']) 49 | gunicorn = subprocess.Popen(['gunicorn', 50 | '--timeout', str(model_server_timeout), 51 | '-k', 'gevent', 52 | '-b', 'unix:/tmp/gunicorn.sock', 53 | '-w', str(model_server_workers), 54 | 'wsgi:app']) 55 | 56 | signal.signal(signal.SIGTERM, lambda a, b: sigterm_handler(nginx.pid, gunicorn.pid)) 57 | 58 | # If either subprocess exits, so do we. 59 | pids = set([nginx.pid, gunicorn.pid]) 60 | while True: 61 | pid, _ = os.wait() 62 | if pid in pids: 63 | break 64 | 65 | sigterm_handler(nginx.pid, gunicorn.pid) 66 | print('Inference server exiting') 67 | 68 | # The main routine just invokes the start function. 69 | 70 | if __name__ == '__main__': 71 | start_server() 72 | -------------------------------------------------------------------------------- /tensorflow_bring_your_own_california_housing_local_serving_without_tfs/container/california_housing/wsgi.py: -------------------------------------------------------------------------------- 1 | import predictor as myapp 2 | 3 | # This is just a simple wrapper for gunicorn to find your app. 4 | # If you want to change the algorithm file, simply change "predictor" above to the 5 | # new file. 6 | 7 | app = myapp.app 8 | -------------------------------------------------------------------------------- /tensorflow_bring_your_own_california_housing_local_serving_without_tfs/tensorflow_bring_your_own_california_housing_local_serving_without_tfs.py: -------------------------------------------------------------------------------- 1 | # This is a sample Python program that shows how to serve a BYOC TensorFlow model with no TFS, and perform inference. 2 | # This implementation will work on your local computer. 3 | # 4 | # Prerequisites: 5 | # 1. Install required Python packages: 6 | # pip install boto3 sagemaker pandas scikit-learn 7 | # pip install 'sagemaker[local]' 8 | # 2. Docker Desktop has to be installed on your computer, and running. 9 | # 3. Open terminal and run the following commands: 10 | # docker build -t sagemaker-tensorflow2-no-tfs-local container/. 11 | ######################################################################################################################## 12 | 13 | import numpy as np 14 | from sagemaker import Model, LocalSession, Predictor 15 | from sagemaker.deserializers import JSONDeserializer 16 | from sagemaker.serializers import JSONSerializer 17 | 18 | DUMMY_IAM_ROLE = 'arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001' 19 | 20 | sagemaker_session = LocalSession() 21 | sagemaker_session.config = {'local': {'local_code': True}} 22 | 23 | 24 | def main(): 25 | 26 | image = 'sagemaker-tensorflow2-no-tfs-local' 27 | endpoint_name = "my-local-endpoint" 28 | 29 | role = DUMMY_IAM_ROLE 30 | model_dir = 's3://aws-ml-blog/artifacts/tensorflow-script-mode-no-tfs-inference/model.tar.gz' 31 | 32 | model = Model( 33 | image_uri=image, 34 | role=role, 35 | model_data=model_dir, 36 | ) 37 | 38 | print('Deploying endpoint in local mode') 39 | print( 40 | 'Note: if launching for the first time in local mode, container image download might take a few minutes to complete.') 41 | 42 | endpoint = model.deploy( 43 | initial_instance_count=1, 44 | instance_type='local', 45 | endpoint_name=endpoint_name 46 | ) 47 | 48 | predictor = Predictor(endpoint_name=endpoint_name, 49 | sagemaker_session=sagemaker_session, 50 | serializer=JSONSerializer(), 51 | deserializer=JSONDeserializer()) 52 | 53 | data = {"instances": [[1.53250854, -2.03172922, 1.15884022, 0.38779065, 0.1527185, -0.03002725, -0.925089, 0.9848863]]} 54 | results = predictor.predict(data)['predictions'] 55 | 56 | flat_list = [float('%.1f' % (item)) for sublist in results for item in sublist] 57 | print('predictions: \t{}'.format(np.array(flat_list))) 58 | 59 | print('About to delete the endpoint to stop paying (if in cloud mode).') 60 | predictor.delete_endpoint() 61 | 62 | 63 | if __name__ == "__main__": 64 | main() 65 | -------------------------------------------------------------------------------- /tensorflow_bring_your_own_california_housing_local_training_and_batch_transform/container/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | 14 | # For more information on creating a Dockerfile 15 | # https://docs.docker.com/compose/gettingstarted/#step-2-create-a-dockerfile 16 | FROM tensorflow/tensorflow:2.8.0 17 | 18 | RUN apt-get update && apt-get install -y --no-install-recommends nginx curl 19 | 20 | RUN pip install pandas flask gevent gunicorn 21 | 22 | ENV PATH="/opt/program:${PATH}" 23 | 24 | # /opt/ml and all subdirectories are utilized by SageMaker, we use the /code subdirectory to store our user code. 25 | COPY california_housing /opt/program 26 | WORKDIR /opt/program -------------------------------------------------------------------------------- /tensorflow_bring_your_own_california_housing_local_training_and_batch_transform/container/california_housing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/tensorflow_bring_your_own_california_housing_local_training_and_batch_transform/container/california_housing/__init__.py -------------------------------------------------------------------------------- /tensorflow_bring_your_own_california_housing_local_training_and_batch_transform/container/california_housing/nginx.conf: -------------------------------------------------------------------------------- 1 | worker_processes 1; 2 | daemon off; # Prevent forking 3 | 4 | 5 | pid /tmp/nginx.pid; 6 | error_log /var/log/nginx/error.log; 7 | 8 | events { 9 | # defaults 10 | } 11 | 12 | http { 13 | include /etc/nginx/mime.types; 14 | default_type application/octet-stream; 15 | access_log /var/log/nginx/access.log combined; 16 | 17 | upstream gunicorn { 18 | server unix:/tmp/gunicorn.sock; 19 | } 20 | 21 | server { 22 | listen 8080 deferred; 23 | client_max_body_size 5m; 24 | 25 | keepalive_timeout 5; 26 | proxy_read_timeout 1200s; 27 | 28 | location ~ ^/(ping|invocations) { 29 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 30 | proxy_set_header Host $http_host; 31 | proxy_redirect off; 32 | proxy_pass http://gunicorn; 33 | } 34 | 35 | location / { 36 | return 404 "{}"; 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /tensorflow_bring_your_own_california_housing_local_training_and_batch_transform/container/california_housing/serve: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # This file implements the scoring service shell. You don't necessarily need to modify it for various 4 | # algorithms. It starts nginx and gunicorn with the correct configurations and then simply waits until 5 | # gunicorn exits. 6 | # 7 | # The flask server is specified to be the app object in wsgi.py 8 | # 9 | # We set the following parameters: 10 | # 11 | # Parameter Environment Variable Default Value 12 | # --------- -------------------- ------------- 13 | # number of workers MODEL_SERVER_WORKERS the number of CPU cores 14 | # timeout MODEL_SERVER_TIMEOUT 60 seconds 15 | 16 | from __future__ import print_function 17 | import multiprocessing 18 | import os 19 | import signal 20 | import subprocess 21 | import sys 22 | 23 | cpu_count = multiprocessing.cpu_count() 24 | 25 | model_server_timeout = os.environ.get('MODEL_SERVER_TIMEOUT', 60) 26 | model_server_workers = int(os.environ.get('MODEL_SERVER_WORKERS', cpu_count)) 27 | 28 | def sigterm_handler(nginx_pid, gunicorn_pid): 29 | try: 30 | os.kill(nginx_pid, signal.SIGQUIT) 31 | except OSError: 32 | pass 33 | try: 34 | os.kill(gunicorn_pid, signal.SIGTERM) 35 | except OSError: 36 | pass 37 | 38 | sys.exit(0) 39 | 40 | def start_server(): 41 | print('Starting the inference server with {} workers.'.format(model_server_workers)) 42 | 43 | 44 | # link the log streams to stdout/err so they will be logged to the container logs 45 | subprocess.check_call(['ln', '-sf', '/dev/stdout', '/var/log/nginx/access.log']) 46 | subprocess.check_call(['ln', '-sf', '/dev/stderr', '/var/log/nginx/error.log']) 47 | 48 | nginx = subprocess.Popen(['nginx', '-c', '/opt/program/nginx.conf']) 49 | gunicorn = subprocess.Popen(['gunicorn', 50 | '--timeout', str(model_server_timeout), 51 | '-k', 'gevent', 52 | '-b', 'unix:/tmp/gunicorn.sock', 53 | '-w', str(model_server_workers), 54 | 'wsgi:app']) 55 | 56 | signal.signal(signal.SIGTERM, lambda a, b: sigterm_handler(nginx.pid, gunicorn.pid)) 57 | 58 | # If either subprocess exits, so do we. 59 | pids = set([nginx.pid, gunicorn.pid]) 60 | while True: 61 | pid, _ = os.wait() 62 | if pid in pids: 63 | break 64 | 65 | sigterm_handler(nginx.pid, gunicorn.pid) 66 | print('Inference server exiting') 67 | 68 | # The main routine just invokes the start function. 69 | 70 | if __name__ == '__main__': 71 | start_server() 72 | -------------------------------------------------------------------------------- /tensorflow_bring_your_own_california_housing_local_training_and_batch_transform/container/california_housing/wsgi.py: -------------------------------------------------------------------------------- 1 | import predictor as myapp 2 | 3 | # This is just a simple wrapper for gunicorn to find your app. 4 | # If you want to change the algorithm file, simply change "predictor" above to the 5 | # new file. 6 | 7 | app = myapp.app 8 | -------------------------------------------------------------------------------- /tensorflow_bring_your_own_california_housing_local_training_and_serving/container/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | 14 | # For more information on creating a Dockerfile 15 | # https://docs.docker.com/compose/gettingstarted/#step-2-create-a-dockerfile 16 | FROM tensorflow/tensorflow:2.5.0 17 | 18 | RUN apt-get update && apt-get install -y --no-install-recommends nginx curl 19 | 20 | # Download TensorFlow Serving 21 | # https://www.tensorflow.org/serving/setup#installing_the_modelserver 22 | RUN echo "deb [arch=amd64] http://storage.googleapis.com/tensorflow-serving-apt stable tensorflow-model-server tensorflow-model-server-universal" | tee /etc/apt/sources.list.d/tensorflow-serving.list 23 | RUN curl https://storage.googleapis.com/tensorflow-serving-apt/tensorflow-serving.release.pub.gpg | apt-key add - 24 | RUN apt-get update && apt-get install tensorflow-model-server 25 | 26 | ENV PATH="/opt/ml/code:${PATH}" 27 | 28 | # /opt/ml and all subdirectories are utilized by SageMaker, we use the /code subdirectory to store our user code. 29 | COPY /california_housing /opt/ml/code 30 | WORKDIR /opt/ml/code -------------------------------------------------------------------------------- /tensorflow_bring_your_own_california_housing_local_training_and_serving/container/california_housing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/tensorflow_bring_your_own_california_housing_local_training_and_serving/container/california_housing/__init__.py -------------------------------------------------------------------------------- /tensorflow_bring_your_own_california_housing_local_training_and_serving/container/california_housing/nginx.conf: -------------------------------------------------------------------------------- 1 | events { 2 | # determines how many requests can simultaneously be served 3 | # https://www.digitalocean.com/community/tutorials/how-to-optimize-nginx-configuration 4 | # for more information 5 | worker_connections 2048; 6 | } 7 | 8 | http { 9 | server { 10 | # configures the server to listen to the port 8080 11 | # Amazon SageMaker sends inference requests to port 8080. 12 | # For more information: https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-code-container-response 13 | listen 8080 deferred; 14 | 15 | # redirects requests from SageMaker to TF Serving 16 | location /invocations { 17 | proxy_pass http://localhost:8501/v1/models/california_housing_model:predict; 18 | } 19 | 20 | # Used by SageMaker to confirm if server is alive. 21 | # https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests 22 | location /ping { 23 | return 200 "OK"; 24 | } 25 | } 26 | } -------------------------------------------------------------------------------- /tensorflow_bring_your_own_california_housing_local_training_and_serving/container/california_housing/serve: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"). You 6 | # may not use this file except in compliance with the License. A copy of 7 | # the License is located at 8 | # 9 | # http://aws.amazon.com/apache2.0/ 10 | # 11 | # or in the "license" file accompanying this file. This file is 12 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 13 | # ANY KIND, either express or implied. See the License for the specific 14 | # language governing permissions and limitations under the License. 15 | 16 | # This file implements the hosting solution, which just starts TensorFlow Model Serving. 17 | import subprocess 18 | 19 | 20 | def start_server(): 21 | print('Starting TensorFlow Serving.') 22 | 23 | # link the log streams to stdout/err so they will be logged to the container logs 24 | subprocess.check_call(['ln', '-sf', '/dev/stdout', '/var/log/nginx/access.log']) 25 | subprocess.check_call(['ln', '-sf', '/dev/stderr', '/var/log/nginx/error.log']) 26 | 27 | # start nginx server 28 | nginx = subprocess.Popen(['nginx', '-c', '/opt/ml/code/nginx.conf']) 29 | 30 | # start TensorFlow Serving 31 | # https://www.tensorflow.org/serving/api_rest#start_modelserver_with_the_rest_api_endpoint 32 | # SageMaker copies our model artifact from our Training Job into the /opt/ml/model. 33 | # https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-code-load-artifacts 34 | tf_model_server = subprocess.call(['tensorflow_model_server', 35 | '--rest_api_port=8501', 36 | '--model_name=california_housing_model', 37 | '--model_base_path=/opt/ml/model/']) 38 | 39 | 40 | # The main routine just invokes the start function. 41 | if __name__ == '__main__': 42 | start_server() 43 | -------------------------------------------------------------------------------- /tensorflow_bring_your_own_california_housing_local_training_toolkit/code/california_housing_tf2.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import numpy as np 3 | import os 4 | import tensorflow as tf 5 | 6 | def parse_args(): 7 | 8 | parser = argparse.ArgumentParser() 9 | 10 | # hyperparameters sent by the client are passed as command-line arguments to the script 11 | parser.add_argument('--epochs', type=int, default=1) 12 | parser.add_argument('--batch_size', type=int, default=64) 13 | parser.add_argument('--learning_rate', type=float, default=0.1) 14 | 15 | # data directories 16 | parser.add_argument('--train', type=str, default=os.environ.get('SM_CHANNEL_TRAIN')) 17 | parser.add_argument('--test', type=str, default=os.environ.get('SM_CHANNEL_TEST')) 18 | 19 | # model directory 20 | parser.add_argument('--sm-model-dir', type=str, default=os.environ.get('SM_MODEL_DIR')) 21 | 22 | return parser.parse_known_args() 23 | 24 | 25 | def get_train_data(train_dir): 26 | 27 | x_train = np.load(os.path.join(train_dir, 'x_train.npy')) 28 | y_train = np.load(os.path.join(train_dir, 'y_train.npy')) 29 | print('x train', x_train.shape,'y train', y_train.shape) 30 | 31 | return x_train, y_train 32 | 33 | 34 | def get_test_data(test_dir): 35 | 36 | x_test = np.load(os.path.join(test_dir, 'x_test.npy')) 37 | y_test = np.load(os.path.join(test_dir, 'y_test.npy')) 38 | print('x test', x_test.shape,'y test', y_test.shape) 39 | 40 | return x_test, y_test 41 | 42 | 43 | def get_model(): 44 | 45 | inputs = tf.keras.Input(shape=(8,)) 46 | hidden_1 = tf.keras.layers.Dense(8, activation='tanh')(inputs) 47 | hidden_2 = tf.keras.layers.Dense(4, activation='sigmoid')(hidden_1) 48 | outputs = tf.keras.layers.Dense(1)(hidden_2) 49 | return tf.keras.Model(inputs=inputs, outputs=outputs) 50 | 51 | 52 | if __name__ == "__main__": 53 | 54 | args, _ = parse_args() 55 | 56 | print('Training data location: {}'.format(args.train)) 57 | print('Test data location: {}'.format(args.test)) 58 | x_train, y_train = get_train_data(args.train) 59 | x_test, y_test = get_test_data(args.test) 60 | 61 | batch_size = args.batch_size 62 | epochs = args.epochs 63 | learning_rate = args.learning_rate 64 | print('batch_size = {}, epochs = {}, learning rate = {}'.format(batch_size, epochs, learning_rate)) 65 | 66 | 67 | model = get_model() 68 | optimizer = tf.keras.optimizers.SGD(learning_rate) 69 | model.compile(optimizer=optimizer, loss='mse') 70 | model.fit(x_train, 71 | y_train, 72 | batch_size=batch_size, 73 | epochs=epochs, 74 | validation_data=(x_test, y_test)) 75 | 76 | # evaluate on test set 77 | scores = model.evaluate(x_test, y_test, batch_size, verbose=2) 78 | print("\nTest MSE :", scores) 79 | 80 | # save model 81 | model.save(args.sm_model_dir + '/1') 82 | 83 | -------------------------------------------------------------------------------- /tensorflow_bring_your_own_california_housing_local_training_toolkit/container/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | 14 | # For more information on creating a Dockerfile 15 | # https://docs.docker.com/compose/gettingstarted/#step-2-create-a-dockerfile 16 | FROM tensorflow/tensorflow:2.8.0 17 | 18 | COPY ./requirements.txt . 19 | 20 | RUN pip install --upgrade pip 21 | RUN pip install -r ./requirements.txt 22 | RUN pip install sagemaker-training -------------------------------------------------------------------------------- /tensorflow_bring_your_own_california_housing_local_training_toolkit/container/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3 2 | nltk -------------------------------------------------------------------------------- /tensorflow_bring_your_own_california_housing_mms_local_serving/container/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:20.04 2 | 3 | # Set a docker label to advertise multi-model support on the container 4 | LABEL com.amazonaws.sagemaker.capabilities.multi-models=true 5 | # Set a docker label to enable container to use SAGEMAKER_BIND_TO_PORT environment variable if present 6 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true 7 | 8 | RUN apt-get update 9 | 10 | # Install necessary dependencies for MMS and SageMaker Inference Toolkit 11 | RUN apt-get -y install --no-install-recommends \ 12 | build-essential \ 13 | ca-certificates \ 14 | openjdk-8-jdk-headless \ 15 | python3-dev \ 16 | curl \ 17 | vim \ 18 | && rm -rf /var/lib/apt/lists/* \ 19 | && curl -O https://bootstrap.pypa.io/get-pip.py \ 20 | && python3 get-pip.py 21 | 22 | ## Install TensorFlow, MMS, and SageMaker Inference Toolkit to set up MMS 23 | RUN pip3 --no-cache-dir install tensorflow==2.8.0 \ 24 | multi-model-server \ 25 | sagemaker-inference \ 26 | retrying \ 27 | pandas \ 28 | protobuf==3.20.* 29 | 30 | # Copy entrypoint script to the image 31 | COPY dockerd-entrypoint.py /usr/local/bin/dockerd-entrypoint.py 32 | RUN chmod +x /usr/local/bin/dockerd-entrypoint.py 33 | 34 | RUN mkdir -p /home/model-server/ 35 | 36 | # Copy the default custom service file to handle incoming data and inference requests 37 | COPY model_handler.py /home/model-server/model_handler.py 38 | 39 | # Define an entrypoint script for the docker image 40 | ENTRYPOINT ["python3", "/usr/local/bin/dockerd-entrypoint.py"] 41 | 42 | # Define command to be passed to the entrypoint 43 | CMD ["serve"] 44 | 45 | 46 | -------------------------------------------------------------------------------- /tensorflow_bring_your_own_california_housing_mms_local_serving/container/dockerd-entrypoint.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shlex 3 | import subprocess 4 | import sys 5 | from subprocess import CalledProcessError 6 | 7 | from retrying import retry 8 | from sagemaker_inference import model_server 9 | 10 | 11 | def _retry_if_error(exception): 12 | return isinstance(exception, CalledProcessError or OSError) 13 | 14 | 15 | @retry(stop_max_delay=1000 * 50, retry_on_exception=_retry_if_error) 16 | def _start_mms(): 17 | # by default the number of workers per model is 1, but we can configure it through the 18 | # environment variable below if desired. 19 | # os.environ['SAGEMAKER_MODEL_SERVER_WORKERS'] = '2' 20 | model_server.start_model_server(handler_service="/home/model-server/model_handler.py:handle") 21 | 22 | 23 | def main(): 24 | if sys.argv[1] == "serve": 25 | _start_mms() 26 | else: 27 | subprocess.check_call(shlex.split(" ".join(sys.argv[1:]))) 28 | 29 | # prevent docker exit 30 | subprocess.call(["tail", "-f", "/dev/null"]) 31 | 32 | 33 | main() 34 | -------------------------------------------------------------------------------- /tensorflow_bring_your_own_california_housing_mms_local_serving/container/model_handler.py: -------------------------------------------------------------------------------- 1 | """ 2 | ModelHandler defines an example model handler for load and inference requests for TensorFlow CPU models 3 | """ 4 | import json 5 | import os 6 | 7 | import pandas as pd 8 | import tensorflow as tf 9 | 10 | 11 | class ModelHandler(object): 12 | """ 13 | A sample Model handler implementation. 14 | """ 15 | 16 | def __init__(self): 17 | self.initialized = False 18 | self.tf_model = None 19 | self.shapes = None 20 | 21 | 22 | def initialize(self, context): 23 | """ 24 | Initialize model. This will be called during model loading time 25 | :param context: Initial context contains model server system properties. 26 | :return: 27 | """ 28 | print('initialize') 29 | self.initialized = True 30 | properties = context.system_properties 31 | # Contains the url parameter passed to the load request 32 | model_dir = properties.get("model_dir") 33 | print(f'model_dir: {model_dir}') 34 | 35 | for currentpath, folders, files in os.walk(model_dir): 36 | print(currentpath, folders, files) 37 | 38 | gpu_id = properties.get("gpu_id") 39 | print(f'gpu_id: {gpu_id}') 40 | 41 | self.tf_model = tf.keras.models.load_model(model_dir+'/1/') 42 | print('Model Loaded') 43 | 44 | 45 | def handle(self, data, context): 46 | """ 47 | Call preprocess, inference and post-process functions 48 | :param data: input data 49 | :param context: mms context 50 | """ 51 | print(f'handle') 52 | 53 | payload = data[0]["body"].decode() 54 | instances = json.loads(payload)["instances"] 55 | print(f"instances: {instances}") 56 | payload = pd.DataFrame(data=instances) 57 | print('Invoked with {} records'.format(payload.shape[0])) 58 | 59 | predictions = self.tf_model.predict(payload) 60 | 61 | # Convert from numpy back to JSON 62 | predictions_lists = predictions.tolist() 63 | print(f"Returning {len(predictions_lists)} predictions") 64 | result = [[{"predictions": predictions_lists}]] 65 | return result 66 | 67 | _service = ModelHandler() 68 | 69 | 70 | def handle(data, context): 71 | if not _service.initialized: 72 | _service.initialize(context) 73 | 74 | if data is None: 75 | return None 76 | 77 | return _service.handle(data, context) 78 | -------------------------------------------------------------------------------- /tensorflow_extend_dlc_california_housing_local_training/container/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | 14 | # For more information on creating a Dockerfile 15 | # https://docs.docker.com/compose/gettingstarted/#step-2-create-a-dockerfile 16 | ARG REGION=us-east-1 17 | 18 | # SageMaker TensorFlow image 19 | FROM 763104351884.dkr.ecr.$REGION.amazonaws.com/tensorflow-training:2.8-cpu-py39 20 | 21 | # We will use the following packages in the training script. 22 | RUN pip3 install nltk gensim 23 | 24 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /tensorflow_graviton_bring_your_own_california_housing_local_training/container/Dockerfile: -------------------------------------------------------------------------------- 1 | # Build a Graviton image that can do training and inference in SageMaker 2 | # This is a Python 3 image that uses the nginx, gunicorn, flask stack 3 | # for serving inferences in a stable way. 4 | 5 | FROM arm64v8/ubuntu 6 | 7 | RUN apt-get -y update && apt-get install -y --no-install-recommends \ 8 | wget \ 9 | nginx \ 10 | ca-certificates 11 | 12 | RUN wget https://repo.anaconda.com/miniconda/Miniconda3-py39_4.10.3-Linux-aarch64.sh 13 | RUN chmod a+x Miniconda3-py39_4.10.3-Linux-aarch64.sh 14 | RUN bash Miniconda3-py39_4.10.3-Linux-aarch64.sh -b 15 | ENV PATH /root/miniconda3/bin/:$PATH 16 | 17 | COPY ml-dependencies.yml ./ 18 | RUN conda env create -f ml-dependencies.yml 19 | 20 | ENV PATH /root/miniconda3/envs/ml-dependencies/bin:$PATH 21 | 22 | # Set some environment variables. PYTHONUNBUFFERED keeps Python from buffering our standard 23 | # output stream, which means that logs can be delivered to the user quickly. PYTHONDONTWRITEBYTECODE 24 | # keeps Python from writing the .pyc files which are unnecessary in this case. We also update 25 | # PATH so that the train and serve programs are found when the container is invoked. 26 | 27 | ENV PYTHONUNBUFFERED=TRUE 28 | ENV PYTHONDONTWRITEBYTECODE=TRUE 29 | ENV PATH="/opt/program:${PATH}" 30 | 31 | # Set up the program in the image 32 | COPY california_housing /opt/program 33 | WORKDIR /opt/program 34 | 35 | -------------------------------------------------------------------------------- /tensorflow_graviton_bring_your_own_california_housing_local_training/container/ml-dependencies.yml: -------------------------------------------------------------------------------- 1 | name: ml-dependencies 2 | dependencies: 3 | - numpy 4 | - pandas 5 | - scikit-learn 6 | - tensorflow==2.8.2 -------------------------------------------------------------------------------- /tensorflow_graviton_bring_your_own_california_housing_local_training_toolkit/code/california_housing_tf2.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import numpy as np 3 | import os 4 | import tensorflow as tf 5 | 6 | def parse_args(): 7 | 8 | parser = argparse.ArgumentParser() 9 | 10 | # hyperparameters sent by the client are passed as command-line arguments to the script 11 | parser.add_argument('--epochs', type=int, default=1) 12 | parser.add_argument('--batch_size', type=int, default=64) 13 | parser.add_argument('--learning_rate', type=float, default=0.1) 14 | 15 | # data directories 16 | parser.add_argument('--train', type=str, default=os.environ.get('SM_CHANNEL_TRAIN')) 17 | parser.add_argument('--test', type=str, default=os.environ.get('SM_CHANNEL_TEST')) 18 | 19 | # model directory 20 | parser.add_argument('--sm-model-dir', type=str, default=os.environ.get('SM_MODEL_DIR')) 21 | 22 | return parser.parse_known_args() 23 | 24 | 25 | def get_train_data(train_dir): 26 | 27 | x_train = np.load(os.path.join(train_dir, 'x_train.npy')) 28 | y_train = np.load(os.path.join(train_dir, 'y_train.npy')) 29 | print('x train', x_train.shape,'y train', y_train.shape) 30 | 31 | return x_train, y_train 32 | 33 | 34 | def get_test_data(test_dir): 35 | 36 | x_test = np.load(os.path.join(test_dir, 'x_test.npy')) 37 | y_test = np.load(os.path.join(test_dir, 'y_test.npy')) 38 | print('x test', x_test.shape,'y test', y_test.shape) 39 | 40 | return x_test, y_test 41 | 42 | 43 | def get_model(): 44 | 45 | inputs = tf.keras.Input(shape=(8,)) 46 | hidden_1 = tf.keras.layers.Dense(8, activation='tanh')(inputs) 47 | hidden_2 = tf.keras.layers.Dense(4, activation='sigmoid')(hidden_1) 48 | outputs = tf.keras.layers.Dense(1)(hidden_2) 49 | return tf.keras.Model(inputs=inputs, outputs=outputs) 50 | 51 | 52 | if __name__ == "__main__": 53 | 54 | args, _ = parse_args() 55 | 56 | print('Training data location: {}'.format(args.train)) 57 | print('Test data location: {}'.format(args.test)) 58 | x_train, y_train = get_train_data(args.train) 59 | x_test, y_test = get_test_data(args.test) 60 | 61 | batch_size = args.batch_size 62 | epochs = args.epochs 63 | learning_rate = args.learning_rate 64 | print('batch_size = {}, epochs = {}, learning rate = {}'.format(batch_size, epochs, learning_rate)) 65 | 66 | 67 | model = get_model() 68 | optimizer = tf.keras.optimizers.SGD(learning_rate) 69 | model.compile(optimizer=optimizer, loss='mse') 70 | model.fit(x_train, 71 | y_train, 72 | batch_size=batch_size, 73 | epochs=epochs, 74 | validation_data=(x_test, y_test)) 75 | 76 | # evaluate on test set 77 | scores = model.evaluate(x_test, y_test, batch_size, verbose=2) 78 | print("\nTest MSE :", scores) 79 | 80 | # save model 81 | model.save(args.sm_model_dir + '/1') 82 | 83 | -------------------------------------------------------------------------------- /tensorflow_graviton_bring_your_own_california_housing_local_training_toolkit/container/Dockerfile: -------------------------------------------------------------------------------- 1 | # Build a Graviton image that can do training and inference in SageMaker 2 | # This is a Python 3 image that uses the nginx, gunicorn, flask stack 3 | # for serving inferences in a stable way. 4 | 5 | FROM arm64v8/ubuntu 6 | 7 | RUN apt-get -y update && apt-get install -y --no-install-recommends \ 8 | wget \ 9 | nginx \ 10 | ca-certificates \ 11 | gcc \ 12 | linux-headers-generic \ 13 | libc-dev 14 | 15 | RUN wget https://repo.anaconda.com/miniconda/Miniconda3-py38_4.9.2-Linux-aarch64.sh 16 | RUN chmod a+x Miniconda3-py38_4.9.2-Linux-aarch64.sh 17 | RUN bash Miniconda3-py38_4.9.2-Linux-aarch64.sh -b 18 | ENV PATH /root/miniconda3/bin/:$PATH 19 | 20 | COPY ml-dependencies.yml ./ 21 | RUN conda env create -f ml-dependencies.yml 22 | 23 | ENV PATH /root/miniconda3/envs/ml-dependencies/bin:$PATH 24 | 25 | 26 | -------------------------------------------------------------------------------- /tensorflow_graviton_bring_your_own_california_housing_local_training_toolkit/container/ml-dependencies.yml: -------------------------------------------------------------------------------- 1 | name: ml-dependencies 2 | dependencies: 3 | - python=3.8 4 | - numpy 5 | - pandas 6 | - scikit-learn 7 | - tensorflow==2.8.2 8 | - pip 9 | - pip: 10 | - sagemaker-training -------------------------------------------------------------------------------- /tensorflow_graviton_script_mode_local_model_inference/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | sagemaker>=2.0.0<3.0.0 4 | sagemaker[local] 5 | -------------------------------------------------------------------------------- /tensorflow_graviton_script_mode_local_model_inference/tensorflow_graviton_script_mode_local_model_inference.py: -------------------------------------------------------------------------------- 1 | # This is a sample Python program for deploying a trained model to a SageMaker endpoint using Graviton instance. 2 | # This implementation will work on your *ARM based local computer*. 3 | # 4 | # This example is based on: https://github.com/aws/amazon-sagemaker-examples/blob/master/frameworks/tensorflow/get_started_mnist_deploy.ipynb 5 | # 6 | # Prerequisites: 7 | # 1. Install required Python packages: 8 | # `pip install -r requirements.txt` 9 | # 2. Docker Desktop installed and running on your computer: 10 | # `docker ps` 11 | # 3. You should have AWS credentials configured on your local machine 12 | # in order to be able to pull the docker image from ECR. 13 | ############################################################################################### 14 | 15 | from sagemaker.local import LocalSession 16 | from sagemaker.tensorflow import TensorFlowModel 17 | import numpy as np 18 | 19 | DUMMY_IAM_ROLE = 'arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001' 20 | 21 | 22 | def main(): 23 | session = LocalSession() 24 | session.config = {'local': {'local_code': True}} 25 | 26 | role = DUMMY_IAM_ROLE 27 | model_dir = 's3://aws-ml-blog/artifacts/run-ml-inference-on-graviton-based-instances-with-amazon-sagemaker/model.tar.gz' 28 | region = session.boto_region_name 29 | print(f'Region: {region}') 30 | 31 | model = TensorFlowModel( 32 | role=role, 33 | model_data=model_dir, 34 | image_uri=f'763104351884.dkr.ecr.{region}.amazonaws.com/tensorflow-inference-graviton:2.9.1-cpu-py38-ubuntu20.04-sagemaker' 35 | ) 36 | 37 | print('Deploying endpoint in local mode') 38 | print( 39 | 'Note: if launching for the first time in local mode, container image download might take a few minutes to complete.') 40 | predictor = model.deploy( 41 | initial_instance_count=1, 42 | instance_type='local', 43 | ) 44 | 45 | print('Endpoint deployed in local mode') 46 | payload = np.random.randn(1, 32, 32, 3) 47 | 48 | predictions = predictor.predict(payload) 49 | print("predictions: {}".format(predictions)) 50 | 51 | print('About to delete the endpoint') 52 | predictor.delete_endpoint(predictor.endpoint_name) 53 | 54 | 55 | if __name__ == "__main__": 56 | main() 57 | -------------------------------------------------------------------------------- /tensorflow_script_mode_california_housing_local_training_and_batch_transform/code/california_housing_tf2.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import pandas as pd 5 | import tensorflow as tf 6 | 7 | 8 | def parse_args(): 9 | 10 | parser = argparse.ArgumentParser() 11 | 12 | # hyperparameters sent by the client are passed as command-line arguments to the script 13 | parser.add_argument('--epochs', type=int, default=1) 14 | parser.add_argument('--batch_size', type=int, default=64) 15 | parser.add_argument('--learning_rate', type=float, default=0.1) 16 | 17 | # data directories 18 | parser.add_argument('--train', type=str, default=os.environ.get('SM_CHANNEL_TRAIN')) 19 | parser.add_argument('--test', type=str, default=os.environ.get('SM_CHANNEL_TEST')) 20 | 21 | # model directory 22 | parser.add_argument('--sm-model-dir', type=str, default=os.environ.get('SM_MODEL_DIR')) 23 | 24 | return parser.parse_known_args() 25 | 26 | 27 | def get_train_data(train_dir): 28 | 29 | x_train = pd.read_csv(os.path.join(train_dir, 'x_train.csv')) 30 | y_train = pd.read_csv(os.path.join(train_dir, 'y_train.csv')) 31 | print('x train', x_train.shape,'y train', y_train.shape) 32 | 33 | return x_train, y_train 34 | 35 | 36 | def get_test_data(test_dir): 37 | 38 | x_test = pd.read_csv(os.path.join(test_dir, 'x_test.csv')) 39 | y_test = pd.read_csv(os.path.join(test_dir, 'y_test.csv')) 40 | print('x test', x_test.shape,'y test', y_test.shape) 41 | 42 | return x_test, y_test 43 | 44 | 45 | def get_model(): 46 | 47 | inputs = tf.keras.Input(shape=(8,)) 48 | hidden_1 = tf.keras.layers.Dense(8, activation='tanh')(inputs) 49 | hidden_2 = tf.keras.layers.Dense(4, activation='sigmoid')(hidden_1) 50 | outputs = tf.keras.layers.Dense(1)(hidden_2) 51 | return tf.keras.Model(inputs=inputs, outputs=outputs) 52 | 53 | 54 | if __name__ == "__main__": 55 | 56 | args, _ = parse_args() 57 | 58 | print('Training data location: {}'.format(args.train)) 59 | print('Test data location: {}'.format(args.test)) 60 | x_train, y_train = get_train_data(args.train) 61 | x_test, y_test = get_test_data(args.test) 62 | 63 | batch_size = args.batch_size 64 | epochs = args.epochs 65 | learning_rate = args.learning_rate 66 | print('batch_size = {}, epochs = {}, learning rate = {}'.format(batch_size, epochs, learning_rate)) 67 | 68 | 69 | model = get_model() 70 | optimizer = tf.keras.optimizers.SGD(learning_rate) 71 | model.compile(optimizer=optimizer, loss='mse') 72 | model.fit(x_train, 73 | y_train, 74 | batch_size=batch_size, 75 | epochs=epochs, 76 | validation_data=(x_test, y_test)) 77 | 78 | # evaluate on test set 79 | scores = model.evaluate(x_test, y_test, batch_size, verbose=2) 80 | print("\nTest MSE :", scores) 81 | 82 | # save model 83 | model.save(args.sm_model_dir + '/1') 84 | 85 | -------------------------------------------------------------------------------- /tensorflow_script_mode_california_housing_local_training_and_batch_transform/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | sagemaker>=2.0.0<3.0.0 4 | sagemaker[local] 5 | -------------------------------------------------------------------------------- /tensorflow_script_mode_california_housing_local_training_and_serving/code/california_housing_tf2.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import numpy as np 3 | import os 4 | import tensorflow as tf 5 | 6 | def parse_args(): 7 | 8 | parser = argparse.ArgumentParser() 9 | 10 | # hyperparameters sent by the client are passed as command-line arguments to the script 11 | parser.add_argument('--epochs', type=int, default=1) 12 | parser.add_argument('--batch_size', type=int, default=64) 13 | parser.add_argument('--learning_rate', type=float, default=0.1) 14 | 15 | # data directories 16 | parser.add_argument('--train', type=str, default=os.environ.get('SM_CHANNEL_TRAIN')) 17 | parser.add_argument('--test', type=str, default=os.environ.get('SM_CHANNEL_TEST')) 18 | 19 | # model directory 20 | parser.add_argument('--sm-model-dir', type=str, default=os.environ.get('SM_MODEL_DIR')) 21 | 22 | return parser.parse_known_args() 23 | 24 | 25 | def get_train_data(train_dir): 26 | 27 | x_train = np.load(os.path.join(train_dir, 'x_train.npy')) 28 | y_train = np.load(os.path.join(train_dir, 'y_train.npy')) 29 | print('x train', x_train.shape,'y train', y_train.shape) 30 | 31 | return x_train, y_train 32 | 33 | 34 | def get_test_data(test_dir): 35 | 36 | x_test = np.load(os.path.join(test_dir, 'x_test.npy')) 37 | y_test = np.load(os.path.join(test_dir, 'y_test.npy')) 38 | print('x test', x_test.shape,'y test', y_test.shape) 39 | 40 | return x_test, y_test 41 | 42 | 43 | def get_model(): 44 | 45 | inputs = tf.keras.Input(shape=(8,)) 46 | hidden_1 = tf.keras.layers.Dense(8, activation='tanh')(inputs) 47 | hidden_2 = tf.keras.layers.Dense(4, activation='sigmoid')(hidden_1) 48 | outputs = tf.keras.layers.Dense(1)(hidden_2) 49 | return tf.keras.Model(inputs=inputs, outputs=outputs) 50 | 51 | 52 | if __name__ == "__main__": 53 | 54 | args, _ = parse_args() 55 | 56 | print('Training data location: {}'.format(args.train)) 57 | print('Test data location: {}'.format(args.test)) 58 | x_train, y_train = get_train_data(args.train) 59 | x_test, y_test = get_test_data(args.test) 60 | 61 | batch_size = args.batch_size 62 | epochs = args.epochs 63 | learning_rate = args.learning_rate 64 | print('batch_size = {}, epochs = {}, learning rate = {}'.format(batch_size, epochs, learning_rate)) 65 | 66 | 67 | model = get_model() 68 | optimizer = tf.keras.optimizers.SGD(learning_rate) 69 | model.compile(optimizer=optimizer, loss='mse') 70 | model.fit(x_train, 71 | y_train, 72 | batch_size=batch_size, 73 | epochs=epochs, 74 | validation_data=(x_test, y_test)) 75 | 76 | # evaluate on test set 77 | scores = model.evaluate(x_test, y_test, batch_size, verbose=2) 78 | print("\nTest MSE :", scores) 79 | 80 | # save model 81 | model.save(args.sm_model_dir + '/1') 82 | 83 | -------------------------------------------------------------------------------- /tensorflow_script_mode_california_housing_local_training_and_serving/requirements.txt: -------------------------------------------------------------------------------- 1 | sklearn 2 | numpy 3 | pandas 4 | sagemaker>=2.0.0<3.0.0 5 | sagemaker[local] 6 | scikit-learn 7 | -------------------------------------------------------------------------------- /tensorflow_script_mode_debug_local_training/data/README.md: -------------------------------------------------------------------------------- 1 | # Data Folder -------------------------------------------------------------------------------- /tensorflow_script_mode_debug_local_training/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | sagemaker>=2.0.0<3.0.0 4 | sagemaker[local] 5 | -------------------------------------------------------------------------------- /tensorflow_script_mode_debug_local_training/source_dir/requirements.txt: -------------------------------------------------------------------------------- 1 | pydevd-pycharm~=221.5591.52 2 | -------------------------------------------------------------------------------- /tensorflow_script_mode_local_gpu_training_resnet50/data/training/README.md: -------------------------------------------------------------------------------- 1 | # Data Folder -------------------------------------------------------------------------------- /tensorflow_script_mode_local_gpu_training_resnet50/data/validation/README.md: -------------------------------------------------------------------------------- 1 | # Data Folder -------------------------------------------------------------------------------- /tensorflow_script_mode_local_gpu_training_resnet50/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | sagemaker>=2.0.0<3.0.0 4 | sagemaker[local] 5 | -------------------------------------------------------------------------------- /tensorflow_script_mode_local_model_inference/code/inference.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import requests 4 | 5 | 6 | def handler(data, context): 7 | """Handle request. 8 | Args: 9 | data (obj): the request data 10 | context (Context): an object containing request and configuration details 11 | Returns: 12 | (bytes, string): data to return to client, (optional) response content type 13 | """ 14 | 15 | print("handler start") 16 | 17 | processed_input = _process_input(data, context) 18 | response = requests.post(context.rest_uri, data=processed_input) 19 | return _process_output(response, context) 20 | 21 | 22 | def _process_input(data, context): 23 | if context.request_content_type == 'application/json': 24 | # pass through json (assumes it's correctly formed) 25 | d = data.read().decode('utf-8') 26 | print('input data: {}'.format(d)) 27 | input_json = json.loads(d) 28 | 29 | return input_json 30 | 31 | if context.request_content_type == 'text/csv': 32 | # very simple csv handler 33 | return json.dumps({ 34 | 'instances': [float(x) for x in data.read().decode('utf-8').split(',')] 35 | }) 36 | 37 | raise ValueError('{{"error": "unsupported content type {}"}}'.format( 38 | context.request_content_type or "unknown")) 39 | 40 | 41 | def _process_output(data, context): 42 | if data.status_code != 200: 43 | raise ValueError(data.content.decode('utf-8')) 44 | 45 | response_content_type = context.accept_header 46 | prediction = data.content 47 | return prediction, response_content_type 48 | -------------------------------------------------------------------------------- /tensorflow_script_mode_local_model_inference/code/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3 2 | -------------------------------------------------------------------------------- /tensorflow_script_mode_local_model_inference/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | sagemaker>=2.0.0<3.0.0 4 | sagemaker[local] 5 | -------------------------------------------------------------------------------- /tensorflow_script_mode_local_model_inference/tensorflow_script_mode_local_model_inference.py: -------------------------------------------------------------------------------- 1 | # This is a sample Python program for deploying a trained model to a SageMaker endpoint. 2 | # Inference is done with a file in S3 instead of http payload for the SageMaker Endpoint. 3 | # This implementation will work on your *local computer*. 4 | # 5 | # This example is based on: https://github.com/aws/amazon-sagemaker-examples/blob/master/frameworks/tensorflow/get_started_mnist_deploy.ipynb 6 | # 7 | # Prerequisites: 8 | # 1. Install required Python packages: 9 | # `pip install -r requirements.txt` 10 | # 2. Docker Desktop installed and running on your computer: 11 | # `docker ps` 12 | # 3. You should have AWS credentials configured on your local machine 13 | # in order to be able to pull the docker image from ECR. 14 | ############################################################################################### 15 | 16 | from sagemaker.local import LocalSession 17 | from sagemaker.tensorflow import TensorFlowModel 18 | 19 | DUMMY_IAM_ROLE = 'arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001' 20 | 21 | 22 | def main(): 23 | session = LocalSession() 24 | session.config = {'local': {'local_code': True}} 25 | 26 | role = DUMMY_IAM_ROLE 27 | model_dir = 's3://aws-ml-blog/artifacts/tensorflow-script-mode-local-model-inference/model.tar.gz' 28 | 29 | model = TensorFlowModel( 30 | entry_point='inference.py', 31 | source_dir='./code', 32 | role=role, 33 | model_data=model_dir, 34 | framework_version='2.8', 35 | ) 36 | 37 | print('Deploying endpoint in local mode') 38 | print( 39 | 'Note: if launching for the first time in local mode, container image download might take a few minutes to complete.') 40 | predictor = model.deploy( 41 | initial_instance_count=1, 42 | instance_type='local', 43 | ) 44 | 45 | print('Endpoint deployed in local mode') 46 | 47 | with open("instances.json", 'r') as f: 48 | payload = f.read().strip() 49 | 50 | predictions = predictor.predict(payload) 51 | print("predictions: {}".format(predictions)) 52 | 53 | print('About to delete the endpoint') 54 | predictor.delete_endpoint(predictor.endpoint_name) 55 | 56 | 57 | if __name__ == "__main__": 58 | main() 59 | -------------------------------------------------------------------------------- /tensorflow_script_mode_local_model_inference_file/code/inference.py: -------------------------------------------------------------------------------- 1 | import json 2 | import requests 3 | import boto3 4 | 5 | s3 = boto3.resource('s3') 6 | 7 | def handler(data, context): 8 | """Handle request. 9 | Args: 10 | data (obj): the request data 11 | context (Context): an object containing request and configuration details 12 | Returns: 13 | (bytes, string): data to return to client, (optional) response content type 14 | """ 15 | 16 | print("handler start") 17 | 18 | processed_input = _process_input(data, context) 19 | response = requests.post(context.rest_uri, data=processed_input) 20 | return _process_output(response, context) 21 | 22 | 23 | def _process_input(data, context): 24 | if context.request_content_type == 'application/json': 25 | # pass through json (assumes it's correctly formed) 26 | d = data.read().decode('utf-8') 27 | print('input data: {}'.format(d)) 28 | 29 | input_json = json.loads(d) 30 | 31 | print('reading object from S3') 32 | obj = s3.Object(input_json['bucket_name'], input_json['object_name']) 33 | body = obj.get()['Body'].read() 34 | print('body: {}'.format(body)) 35 | 36 | return body 37 | 38 | if context.request_content_type == 'text/csv': 39 | # very simple csv handler 40 | return json.dumps({ 41 | 'instances': [float(x) for x in data.read().decode('utf-8').split(',')] 42 | }) 43 | 44 | raise ValueError('{{"error": "unsupported content type {}"}}'.format( 45 | context.request_content_type or "unknown")) 46 | 47 | 48 | def _process_output(data, context): 49 | if data.status_code != 200: 50 | raise ValueError(data.content.decode('utf-8')) 51 | 52 | response_content_type = context.accept_header 53 | prediction = data.content 54 | return prediction, response_content_type 55 | -------------------------------------------------------------------------------- /tensorflow_script_mode_local_model_inference_file/code/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3 2 | -------------------------------------------------------------------------------- /tensorflow_script_mode_local_model_inference_file/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | sagemaker>=2.0.0<3.0.0 4 | sagemaker[local] 5 | -------------------------------------------------------------------------------- /tensorflow_script_mode_local_model_inference_file/tensorflow_script_mode_local_model_inference_file.py: -------------------------------------------------------------------------------- 1 | # This is a sample Python program for deploying a trained model to a SageMaker endpoint. 2 | # Inference is done with a file in S3 instead of http payload for the SageMaker Endpoint. 3 | # This implementation will work on your *local computer*. 4 | # 5 | # This example is based on: https://github.com/aws/amazon-sagemaker-examples/blob/master/frameworks/tensorflow/get_started_mnist_deploy.ipynb 6 | # 7 | # Prerequisites: 8 | # 1. Install required Python packages: 9 | # `pip install -r requirements.txt` 10 | # 2. Docker Desktop installed and running on your computer: 11 | # `docker ps` 12 | # 3. You should have AWS credentials configured on your local machine 13 | # in order to be able to pull the docker image from ECR. 14 | ############################################################################################### 15 | 16 | from sagemaker.local import LocalSession 17 | from sagemaker.tensorflow import TensorFlowModel 18 | 19 | DUMMY_IAM_ROLE = 'arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001' 20 | 21 | 22 | def main(): 23 | session = LocalSession() 24 | session.config = {'local': {'local_code': True}} 25 | 26 | role = DUMMY_IAM_ROLE 27 | model_dir = 's3://aws-ml-blog/artifacts/tensorflow-script-mode-local-model-inference/model.tar.gz' 28 | 29 | model = TensorFlowModel( 30 | entry_point='inference.py', 31 | source_dir = './code', 32 | role=role, 33 | model_data=model_dir, 34 | framework_version='2.3.0', 35 | ) 36 | 37 | print('Deploying endpoint in local mode') 38 | print( 39 | 'Note: if launching for the first time in local mode, container image download might take a few minutes to complete.') 40 | predictor = model.deploy( 41 | initial_instance_count=1, 42 | instance_type='local', 43 | ) 44 | 45 | print('Endpoint deployed in local mode') 46 | 47 | dummy_inputs = { 48 | 'bucket_name': 'aws-ml-blog', 49 | 'object_name': 'artifacts/tensorflow-script-mode-local-model-inference/instances.json' 50 | } 51 | 52 | predictions = predictor.predict(dummy_inputs) 53 | print("predictions: {}".format(predictions)) 54 | 55 | print('About to delete the endpoint') 56 | predictor.delete_endpoint(predictor.endpoint_name) 57 | 58 | 59 | if __name__ == "__main__": 60 | main() 61 | -------------------------------------------------------------------------------- /tensorflow_script_mode_local_training_and_serving/code/requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | boto3 3 | nltk -------------------------------------------------------------------------------- /tensorflow_script_mode_local_training_and_serving/data/README.md: -------------------------------------------------------------------------------- 1 | # Data Folder -------------------------------------------------------------------------------- /tensorflow_script_mode_local_training_and_serving/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | sagemaker>=2.0.0<3.0.0 4 | sagemaker[local] 5 | -------------------------------------------------------------------------------- /xgboost_script_mode_local_serving_no_compressed_model/code/inference.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). 4 | # You may not use this file except in compliance with the License. 5 | # A copy of the License is located at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # or in the "license" file accompanying this file. This file is distributed 10 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 11 | # express or implied. See the License for the specific language governing 12 | # permissions and limitations under the License. 13 | import json 14 | import os 15 | import pickle as pkl 16 | 17 | import numpy as np 18 | import sagemaker_xgboost_container.encoder as xgb_encoders 19 | 20 | 21 | def model_fn(model_dir): 22 | """ 23 | Deserialize and return fitted model. 24 | """ 25 | model_file = "my-xgboost-model" 26 | booster = pkl.load(open(os.path.join(model_dir, model_file), "rb")) 27 | return booster 28 | 29 | 30 | def input_fn(request_body, request_content_type): 31 | """ 32 | The SageMaker XGBoost model server receives the request data body and the content type, 33 | and invokes the `input_fn`. 34 | 35 | Return a DMatrix (an object that can be passed to predict_fn). 36 | """ 37 | print(f"request_body: {request_body}, request_content_type: {request_content_type}") 38 | if request_content_type == "text/libsvm": 39 | return xgb_encoders.libsvm_to_dmatrix(request_body) 40 | else: 41 | raise ValueError("Content type {} is not supported.".format(request_content_type)) 42 | 43 | 44 | def predict_fn(input_data, model): 45 | """ 46 | SageMaker XGBoost model server invokes `predict_fn` on the return value of `input_fn`. 47 | 48 | Return a two-dimensional NumPy array where the first columns are predictions 49 | and the remaining columns are the feature contributions (SHAP values) for that prediction. 50 | """ 51 | print(f"input_data: {input_data}") 52 | prediction = model.predict(input_data) 53 | feature_contribs = model.predict(input_data, pred_contribs=True, validate_features=False) 54 | output = np.hstack((prediction[:, np.newaxis], feature_contribs)) 55 | return output 56 | 57 | 58 | def output_fn(predictions, content_type): 59 | """ 60 | After invoking predict_fn, the model server invokes `output_fn`. 61 | """ 62 | if content_type == "text/csv": 63 | return ",".join(str(x) for x in predictions[0]) 64 | else: 65 | raise ValueError("Content type {} is not supported.".format(content_type)) 66 | -------------------------------------------------------------------------------- /xgboost_script_mode_local_serving_no_compressed_model/model/my-xgboost-model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-local-mode/247ae347437fc723b716ccf3e5e1b8dcd658d3e8/xgboost_script_mode_local_serving_no_compressed_model/model/my-xgboost-model -------------------------------------------------------------------------------- /xgboost_script_mode_local_serving_no_compressed_model/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | sagemaker>=2.0.0<3.0.0 4 | sagemaker[local] 5 | -------------------------------------------------------------------------------- /xgboost_script_mode_local_serving_no_compressed_model/xgboost_script_mode_local_serving_no_compressed_model.py: -------------------------------------------------------------------------------- 1 | # This is a sample Python program that deploys a simple XGBoost model Endpoint, trained on Abalone dataset. 2 | # This implementation will work on your *local computer* or in the *AWS Cloud*. 3 | # 4 | # Prerequisites: 5 | # 1. Install required Python packages: 6 | # `pip install -r requirements.txt` 7 | # 2. Docker Desktop installed and running on your computer: 8 | # `docker ps` 9 | # 3. You should have AWS credentials configured on your local machine 10 | # in order to be able to pull the docker image from ECR. 11 | ############################################################################################### 12 | 13 | from sagemaker import TrainingInput 14 | from sagemaker.xgboost import XGBoost, XGBoostModel 15 | from sagemaker.local import LocalSession 16 | 17 | DUMMY_IAM_ROLE = 'arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001' 18 | LOCAL_SESSION = LocalSession() 19 | LOCAL_SESSION.config = {'local': {'local_code': True}} # Ensure full code locality, see: https://sagemaker.readthedocs.io/en/stable/overview.html#local-mode 20 | FRAMEWORK_VERSION = "1.7-1" 21 | 22 | 23 | def do_inference_on_local_endpoint(predictor, libsvm_str): 24 | label, *features = libsvm_str.strip().split() 25 | predictions = predictor.predict(" ".join(["-99"] + features)) # use dummy label -99 26 | print("Prediction: {}".format(predictions)) 27 | 28 | 29 | def main(): 30 | xgb_inference_model = XGBoostModel( 31 | model_data="file://model/my-xgboost-model", 32 | role=DUMMY_IAM_ROLE, 33 | entry_point="inference.py", 34 | source_dir="./code", 35 | framework_version=FRAMEWORK_VERSION, 36 | sagemaker_session=LOCAL_SESSION 37 | ) 38 | 39 | print('Deploying endpoint in local mode') 40 | predictor = xgb_inference_model.deploy( 41 | initial_instance_count=1, 42 | instance_type="local", 43 | ) 44 | 45 | a_young_abalone = "6 1:3 2:0.37 3:0.29 4:0.095 5:0.249 6:0.1045 7:0.058 8:0.067" 46 | do_inference_on_local_endpoint(predictor, a_young_abalone) 47 | 48 | an_old_abalone = "15 1:1 2:0.655 3:0.53 4:0.175 5:1.2635 6:0.486 7:0.2635 8:0.415" 49 | do_inference_on_local_endpoint(predictor, an_old_abalone) 50 | 51 | print('About to delete the endpoint to stop paying (if in cloud mode).') 52 | predictor.delete_endpoint(predictor.endpoint_name) 53 | 54 | 55 | if __name__ == "__main__": 56 | main() 57 | -------------------------------------------------------------------------------- /xgboost_script_mode_local_training_and_serving/code/inference.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). 4 | # You may not use this file except in compliance with the License. 5 | # A copy of the License is located at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # or in the "license" file accompanying this file. This file is distributed 10 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 11 | # express or implied. See the License for the specific language governing 12 | # permissions and limitations under the License. 13 | import json 14 | import os 15 | import pickle as pkl 16 | 17 | import numpy as np 18 | import sagemaker_xgboost_container.encoder as xgb_encoders 19 | 20 | 21 | def model_fn(model_dir): 22 | """ 23 | Deserialize and return fitted model. 24 | """ 25 | model_file = "xgboost-model" 26 | booster = pkl.load(open(os.path.join(model_dir, model_file), "rb")) 27 | return booster 28 | 29 | 30 | def input_fn(request_body, request_content_type): 31 | """ 32 | The SageMaker XGBoost model server receives the request data body and the content type, 33 | and invokes the `input_fn`. 34 | 35 | Return a DMatrix (an object that can be passed to predict_fn). 36 | """ 37 | if request_content_type == "text/libsvm": 38 | return xgb_encoders.libsvm_to_dmatrix(request_body) 39 | else: 40 | raise ValueError("Content type {} is not supported.".format(request_content_type)) 41 | 42 | 43 | def predict_fn(input_data, model): 44 | """ 45 | SageMaker XGBoost model server invokes `predict_fn` on the return value of `input_fn`. 46 | 47 | Return a two-dimensional NumPy array where the first columns are predictions 48 | and the remaining columns are the feature contributions (SHAP values) for that prediction. 49 | """ 50 | prediction = model.predict(input_data) 51 | feature_contribs = model.predict(input_data, pred_contribs=True, validate_features=False) 52 | output = np.hstack((prediction[:, np.newaxis], feature_contribs)) 53 | return output 54 | 55 | 56 | def output_fn(predictions, content_type): 57 | """ 58 | After invoking predict_fn, the model server invokes `output_fn`. 59 | """ 60 | if content_type == "text/csv": 61 | return ",".join(str(x) for x in predictions[0]) 62 | else: 63 | raise ValueError("Content type {} is not supported.".format(content_type)) 64 | -------------------------------------------------------------------------------- /xgboost_script_mode_local_training_and_serving/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | sagemaker>=2.0.0<3.0.0 4 | sagemaker[local] 5 | --------------------------------------------------------------------------------