├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTAINER_FOR_SM.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── REFS.md ├── cost_optimization ├── ptn_1_model-compile │ ├── README.md │ ├── metadata │ │ ├── coco_labels.txt │ │ ├── imagenet1000_clsidx_to_labels.txt │ │ ├── init.sh │ │ └── mscoco_label_map.pbtxt │ ├── pytorch-compile-and-deploy.ipynb │ ├── samples │ │ ├── bus.jpg │ │ ├── pizza.jpg │ │ └── strawberry.jpg │ └── src │ │ ├── infer_pytorch_neo.py │ │ └── utils.py ├── ptn_2_model-compile-on-device │ ├── README.md │ ├── classes_dict_imagenet.json │ ├── pytorch-compile-on-device.ipynb │ ├── samples │ │ ├── bus.jpg │ │ ├── pizza.jpg │ │ └── strawberry.jpg │ └── src │ │ ├── coco_labels.txt │ │ ├── imagenet1000_clsidx_to_labels.txt │ │ ├── infer_pytorch_neo.py │ │ ├── infer_utils.py │ │ ├── utils.py │ │ └── visualize.py ├── ptn_3_elastic-inference │ ├── README.md │ ├── metadata │ │ ├── coco_labels.txt │ │ ├── imagenet1000_clsidx_to_labels.txt │ │ ├── init.sh │ │ └── mscoco_label_map.pbtxt │ ├── pytorch-deploy-eia.ipynb │ ├── samples │ │ ├── bus.jpg │ │ ├── pizza.jpg │ │ └── strawberry.jpg │ └── src │ │ ├── infer_pytorch_eia.py │ │ ├── infer_pytorch_neo.py │ │ └── utils.py └── ptn_4_ml-inference-chip │ ├── README.md │ ├── inf1-bert-compile-and-deploy.ipynb │ └── src │ ├── inference.py │ ├── inference_inf1.py │ └── requirements.txt ├── images ├── cost_optimization │ ├── ptn_1_01.png │ ├── ptn_1_02.png │ ├── ptn_2_01.png │ ├── ptn_3_01.png │ └── ptn_4_01.png ├── cost_optimization_persona.png ├── key_features │ ├── ptn_1_01.png │ ├── ptn_2_01.png │ ├── ptn_3_01.png │ ├── ptn_4.1_01.png │ ├── ptn_4.2_01.png │ ├── ptn_5_01.png │ └── ptn_6_01.png ├── key_features_persona.png ├── production │ ├── ptn_1_01.png │ ├── ptn_2_01.png │ ├── ptn_2_02.png │ └── ptn_3_01_kor.png └── production_persona.png ├── key_features ├── ptn_1_realtime-inference │ ├── README.md │ ├── single_endpoint_kornlp.ipynb │ └── src │ │ ├── inference_nsmc.py │ │ ├── requirements.txt │ │ └── utils.py ├── ptn_2_batch-transform │ ├── README.md │ └── batch_transform_kornlp.ipynb ├── ptn_3_async-inference │ ├── README.md │ ├── async-inference-cv.ipynb │ ├── src │ │ ├── coco_labels.txt │ │ ├── inference.py │ │ ├── requirements.txt │ │ └── visualize.py │ └── visualization │ │ ├── generate_gif.ipynb │ │ └── output.json ├── ptn_4.1_lambda-serverless-inference │ ├── README.md │ ├── cv │ │ ├── Dockerfile │ │ ├── README.md │ │ ├── app.py │ │ ├── base.Dockerfile │ │ ├── build_docker.sh │ │ ├── coco.names │ │ ├── entry_script.sh │ │ ├── event.json │ │ ├── lambda-serverless-endpoint-cv.ipynb │ │ ├── requirements.txt │ │ ├── sample_images │ │ │ └── remote-control.jpeg │ │ └── test_lambda.sh │ └── kornlp │ │ ├── Dockerfile │ │ ├── README.md │ │ ├── app.py │ │ ├── build_docker.sh │ │ ├── entry_script.sh │ │ ├── lambda-serverless-endpoint-kornlp.ipynb │ │ ├── model-nsmc │ │ ├── config.json │ │ ├── training_args.bin │ │ └── vocab │ │ │ ├── special_tokens_map.json │ │ │ ├── tokenizer.json │ │ │ ├── tokenizer_config.json │ │ │ └── vocab.txt │ │ ├── requirements.txt │ │ └── test_lambda.sh ├── ptn_4.2_serverless-inference │ ├── README.md │ ├── model-korsts │ │ ├── config.json │ │ └── training_args.bin │ ├── model-nsmc │ │ ├── config.json │ │ ├── training_args.bin │ │ └── vocab │ │ │ ├── special_tokens_map.json │ │ │ ├── tokenizer.json │ │ │ ├── tokenizer_config.json │ │ │ └── vocab.txt │ ├── samples │ │ ├── korsts.txt │ │ ├── ner.txt │ │ └── nsmc.txt │ ├── serverless_endpoint_kornlp_korsts.ipynb │ ├── serverless_endpoint_kornlp_nsmc.ipynb │ └── src │ │ ├── inference_kobart.py │ │ ├── inference_korsts.py │ │ ├── inference_nsmc.py │ │ ├── requirements.txt │ │ └── utils.py ├── ptn_5_multi-container-endpoint │ ├── 1_local_endpoint.ipynb │ ├── 2_multi-container-endpoint.ipynb │ ├── README.md │ ├── model-kobart │ │ └── model.pth │ ├── model-korsts │ │ ├── config.json │ │ └── training_args.bin │ ├── model-nsmc │ │ ├── config.json │ │ ├── training_args.bin │ │ └── vocab │ │ │ ├── special_tokens_map.json │ │ │ ├── tokenizer.json │ │ │ ├── tokenizer_config.json │ │ │ └── vocab.txt │ ├── samples │ │ ├── .ipynb_checkpoints │ │ │ └── kobart-checkpoint.txt │ │ ├── kobart.txt │ │ ├── korsts.txt │ │ ├── ner.txt │ │ └── nsmc.txt │ └── src │ │ ├── inference_kobart.py │ │ ├── inference_korsts.py │ │ ├── inference_nsmc.py │ │ ├── requirements.txt │ │ └── utils.py └── ptn_6_inference-pipeline │ └── README.md └── production ├── ptn_1_ab-test ├── README.md └── ab_test_kornlp.ipynb ├── ptn_2_deployment-guardrail ├── README.md ├── deployment_guardrail_kornlp.ipynb ├── model-nsmc │ ├── config.json │ ├── training_args.bin │ └── vocab │ │ ├── special_tokens_map.json │ │ ├── tokenizer.json │ │ ├── tokenizer_config.json │ │ └── vocab.txt └── src │ ├── inference_nsmc.py │ ├── inference_nsmc_error.py │ ├── requirements.txt │ └── utils.py └── ptn_3_ml-pipeline ├── README.md ├── pipeline_src ├── evaluate.py ├── processing_hf.py ├── processing_sklearn.py └── train.py ├── pipeline_utils ├── __init__.py ├── deploy_handler.py └── deploy_step.py └── sm_pipeline_kornlp.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints 2 | __pycache__ 3 | .DS_Store 4 | venv 5 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /CONTAINER_FOR_SM.md: -------------------------------------------------------------------------------- 1 | # Containers for Amazon SageMaker 2 | 3 | ## Overview 4 | 5 | 단일 모델을 소규모 서비스로 배포 시에는 여러 모듈을 구성할 필요 없이 하나의 모듈 안에서 필요한 로직을 구성해도 무방합니다. 여러 종류의 모델들을 프로덕션 환경에서 배포 시,추론 환경을 안정적으로 빌드해야 함은 물론이고 각 모델의 프레임워크 종류, 프레임워크 버전 및 종속성을 고려해야 합니다. 또한, 동일한 시스템에서 실행되는 여러 모델들이 한정된 리소스를 두고 경쟁할 수 있으며, 특정 모델에서 오류 발생 시 여러 호스팅 모델들의 성능을 저하시킬 수 있습니다. 6 | 7 | 마이크로서비스 구조는 각 모듈을 독립된 형태로 구성하기 때문에 각 모듈의 관리가 쉽고 다양한 형태의 모델에 빠르게 대응할 수 있다는 장점이 있습니다. 도커(Docker)로 대표되는 컨테이너화 기술은 가상 머신과 달리 공통 운영 제체를 공유하면서 여러 모듈들에게 독립된 환경을 제공함으로써 유지 보수가 용이합니다. 8 | 9 | Amazon SageMaker는 완전 관리형 머신 러닝 플랫폼으로 피쳐 전처리, 모델 훈련 및 배포의 머신 러닝 일련의 과정에 도커 컨테이너를 활용합니다. 컨테이너 내에 런타임, 라이브러리, 코드 등 필요한 모든 것이 패키징되기에, 로컬 환경에서 프로덕션 환경까지 일관성을 가지고 동일한 환경에서 모델을 훈련하고 배포할 수 있습니다. 10 | AWS에서는 이미 딥러닝 프레임워크별로 각 태스크에 적합한(전처리, 훈련, 추론, 엘라스틱 추론 등) 전용 컨테이너를 AWS의 Docker 레지스트리 서비스인 Amazon Elastic Container Registry (이하 ECR) 에서 관리하고 있기 때문에 여러분은 컨테이너 빌드에 대한 고민을 하실 필요가 없습니다. 물론, 도커 파일들은 모두 오픈 소스로 공개되어 있기 때문에 도커 파일을 기반으로 여러분만의 컨테이너를 빌드해서 ECR로 등록할 수도 있습니다. 11 | 12 | 도커 컨테이너 개념과 ECR을 처음 접하시는 분들은 먼저 아래 링크를 통해 주요 개념을 이해하는 것을 권장 드립니다. 13 | - https://docs.docker.com/get-started/ 14 | - https://aws.amazon.com/ecr 15 | 16 | ## Built-in algorithm Containers 17 | SageMaker에서 제공하고 있는 17가지의 빌트인 알고리즘은 훈련 및 배포에 필요한 코드가 사전 패키징되어 있기에 별도의 코드를 작성할 필요가 없습니다. 18 | 19 | 빌트인 알고리즘 컨테이너 목록은 https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-algo-docker-registry-paths.html 에서 찾을 수 있습니다. 예를 들어 서울 리전(ap-northeast-2)의 Linear Learner 알고리즘에 대한 컨테이너 이름은 `835164637446.dkr.ecr.ap-northeast-2.amazonaws.com/linear-learner:latest` 입니다. 빌트인 컨테이너는 SageMaker 관리형 인스턴스(훈련 인스턴스, 서빙 인스턴스)로만 가져올 수 있으므로 로컬 환경에서 실행할 수 없습니다. 20 | 21 | 다만, 예외적으로 XGBoost와 BlazingText는 오픈소스 라이브러리(BlazingText는 FastText)와 호환되므로 온프렘에서 훈련한 모델을 `model.tar.gz`로 아카이빙하여 S3에 업로드하는 방식이 가능합니다. 22 | 23 | 자세한 내용은 https://docs.aws.amazon.com/sagemaker/latest/dg/docker-containers-prebuilt.html 를 참조해 주세요. 24 | 25 | ## Managed Framework Containers 26 | 27 | SageMaker는 가장 널리 사용되고 있는 주요 머신 러닝 프레임워크와 각 프레임워크에 적합한 의존성 패키지를 제공하고 있습니다. 각 프레임워크에 대한 전처리, 훈련 및 추론 컨테이너는 AWS에서 최신 버전으로 정기적으로 업데이트되며, 딥러닝 프레임워크에는 CPU 및 GPU 인스턴스에 대한 별도의 컨테이너가 있습니다. 이러한 모든 컨테이너를 통칭하여 딥러닝 컨테이너(https://aws.amazon.com/machine-learning/containers)라고 합니다. 28 | 29 | 따라서, 여러분은 커스텀 컨테이너를 빌드하고 유지 관리할 필요 없이 알고리즘을 구현하는 파이썬 스크립트 코드 개발에만 집중할 수 있습니다. 여러분의 스크립트 코드는 프레임워크 SDK의 엔트리포인트로 전달하면 나머지 작업은 SageMaker가 자동으로 수행해 줍니다. 30 | 31 | 각 프레임워크의 컨테이너를 빌드하기 위한 Dockerfile과 소스 코드 또한 GitHub을 통해 제공하고 있습니다. 32 | 33 | - Scikit-learn: https://github.com/aws/sagemaker-scikit-learn-container 34 | - XGBoost: https://github.com/aws/sagemaker-xgboost-container 35 | - PyTorch: https://github.com/aws/sagemaker-pytorch-container 36 | - TensorFlow: https://github.com/aws/sagemaker-tensorflow-container 37 | - MXNet: https://github.com/aws/sagemaker-mxnet-container 38 | - Spark: https://github.com/aws/sagemaker-spark-container 39 | - Hugging Face: https://github.com/aws/sagemaker-huggingface-inference-toolkit 40 | 41 | 자세한 내용은 https://docs.aws.amazon.com/sagemaker/latest/dg/docker-containers-prebuilt.html 를 참조해 주세요. 42 | 43 | ## Bring Your Own Container (BYOC) 44 | 45 | 아래와 같이 커스텀 컨테이너를 직접 빌드하는 것이 보다 효과적인 경우들이 있습니다. 46 | 47 | - 프레임워크의 특정 버전이 지원되지 않는 경우 48 | - 여러 프레임워크를 필요로 하는 경우(예: TensorFlow, PyTorch 동시 사용) 49 | - 환경에 의존하는 라이브러리들이 매우 많을 경우 50 | - 기본 환경에서 제공되지 않는 전처리/훈련/배포 솔루션을 사용하는 경우 51 | 52 | 이 때, 커스텀 컨테이너를 이용하면 SageMaker에서 사전 제공하지 않는 환경일 경우에도 SageMaker 기반으로 동작하도록 할 수 있습니다. 53 | 54 | 자세한 내용은 https://docs.aws.amazon.com/sagemaker/latest/dg/docker-containers-create.html 를 참조해 주세요. -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Daekeun Kim 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SageMaker Model Serving Patterns - AWS 전문가와 함께 익히는 모델 서빙 패턴 2 | 3 | --- 4 | 5 | ## Overview 6 | 머신 러닝 알고리즘은 지난 10여년 간 급속도로 발전했으며, 수많은 사전 훈련된(pre-trained) 모델을 그대로 활용하거나 파인 튜닝으로 특정 유즈케이스에 적합한 모델로 개선함으로써 다양한 어플리케이션을 빠르게 개발할 수 있습니다. 모델 배포는 MLOps의 핵심 구성 요소이며 MLOps가 비즈니스와 함께 확장됨에 따라 인프라 구축의 중요성이 대두되고 있습니다. 하지만, 온프레미스에서 머신 러닝 모델을 프로덕션에 배포하고 관리하려면 인프라 구축 및 관리에 많은 노력과 비용이 들어가며 머신 러닝, 인프라 관리, 소프트웨어 엔지니어링에 모두 능숙한 실무자가 필요합니다. Amazon SageMaker는 대용량 모델 훈련과 모델 서빙에 필요한 인프라 관리에 대한 부담을 덜고 핵심 로직에 집중할 수 있게 도와 주는 AWS의 핵심 서비스입니다. 컴퓨팅 인프라의 비용을 최적화하고 서비스 인프라를 탄력적으로 확장할 수 있으며, 마이크로서비스 배포에 특화되어 있기에 빠른 실험 및 배포에 적합합니다. 7 | 8 | 본 워크샵은 Amazon SageMaker의 대표적인 모델 서빙 패턴들을 익힐 수 있게 구성되어 있으며, 각 모듈은 독립적으로 수행할 수 있습니다. 학습이 목적인 분들은 스텝 바이 스텝으로 모든 모듈을 실행하셔도 되지만, 특정 모델 서빙 패턴에 대한 예시만 필요한 분들은 해당 모듈만 실행하시면 됩니다. 9 | 10 | ## SageMaker For Beginners 11 | 본 워크샵은 SageMaker에 대한 기초 개념을 이해했다고 가정합니다. 만약 SageMaker를 처음 접해 보시거나 핵심 개념을 파악하지 못하셨다면, 아래 링크 자료들을 숙지해 주세요. 12 | - [SageMaker Overview - 50 mins](https://www.youtube.com/watch?v=jF2BN98KBlg) 13 | - [SageMaker Demo - 60 mins](https://www.youtube.com/watch?v=miIVGlq6OUk) 14 | - [Containers for Amazon SageMaker Hosting](CONTAINER_FOR_SM.md) 15 | - [Self Study on SageMaker](https://github.com/gonsoomoon-ml/Self-Study-On-SageMaker) 16 | 17 | ## Key Features 18 | ![key_features_persona](images/key_features_persona.png) 19 | 20 | ### [PTN1. Real-time Inference](key_features/ptn_1_realtime-inference) 21 | 22 | SageMaker Endpoint는 REST API를 통해 실시간 추론을 수행할 수 있는 완전 관리형 서비스입니다. 기본적으로 분산 컨테이너로 고가용성, 다중 모델 로딩, A/B 테스트를 위한 인프라 환경(EC2, 로드밸런서, 오토스케일링, 모델 아티팩트 로딩 등)이 사전 구축되어 있기에 몇 줄의 코드만으로 Endpoint가 자동으로 생성되기에, 모델을 프로덕션에 빠르게 배포할 수 있습니다. 23 | 24 | ### [PTN2. Batch Inference](key_features/ptn_2_batch-transform) 25 | 26 | Latency에 덜 민감한 애플리케이션이나 일정 주기(일단위, 주단위, 월단위 등)로 수행되는 일괄 추론 작업은 SageMaker Batch Transform을 사용하여 비용을 절감하는 것을 권장합니다. 상시 띄워져 있는 엔드포인트와 달리, Batch Transform은 배치 데이터에 대한 추론이 수행되는 순간에만 인스턴스를 사용하기 때문입니다. 이 때, Amazon S3에 저장되는 Batch Transform 출력값들을 Amazon DynamoDB 또는 RDS와 같은 데이터베이스와 연동하여 저장함으로써 대용량의 배치 job을 편리하게 수행할 수 있습니다. 27 | 28 | ### [PTN3. Asynchronous Inference](key_features/ptn_3_async-inference) 29 | 30 | SageMaker 비동기(asynchronous) 추론 엔드포인트는 처리 시간이 수 분 이상 걸릴 수 있는 대규모 입력 페이로드를 사용하는 유즈케이스에 적합합니다. 31 | AsyncInvokeEndpoint API를 호출하면 Amazon S3에 payload를 배치하고 별도의 관리형 queue에 입력 데이터에 대한 요청을 포인터로 전송합니다. 호스팅 인스턴스는 포인터를 사용하여 payload 데이터를 다운로드하고 추론 컨테이너를 통해 모델 추론 결괏값을 계산하여 S3에 저장합니다. 이 때, 선택적으로 SNS를 통해 추론이 완료되었다는 알림을 받을 수 있습니다. 32 | 33 | ### [PTN4-1. Lambda Serverless Inference](key_features/ptn_4.1_lambda-serverless-inference) 34 | 35 | re:Invent 2020에 소개된 Lambda 컨테이너 기능 지원으로 기존 Lambda에서 수행하기 어려웠던 대용량 머신 모델에 대한 추론을 보다 수월하게 실행할 수 있게 되었습니다. Lambda 컨테이너 이미지를 Amazon ECR(Amazon Elastic Container Registry)에 푸시하였다면 Lambda 함수를 생성하여 직접 컨테이너 이미지를 배포하거나 SageMaker의 API 호출로 Serverless endpoint를 쉽게 배포할 수 있습니다. 36 | 37 | ### [PTN4-2. SageMaker Serverless Inference](key_features/ptn_4.2_serverless-inference) 38 | 39 | Amazon SageMaker Serverless Inference는 re:Invent 2021에 런칭된 신규 추론 옵션으로 호스팅 인프라 관리에 대한 부담 없이 머신 러닝을 모델을 쉽게 배포하고 확장할 수 있도록 제작된 신규 추론 옵션입니다. SageMaker Serverless Inference는 컴퓨팅 리소스를 자동으로 시작하고 트래픽에 따라 자동으로 스케일 인/아웃을 수행하므로 인스턴스 유형을 선택하거나 스케일링 정책을 관리할 필요가 없습니다. 따라서, 트래픽 급증 사이에 유휴 기간이 있고 콜드 스타트를 허용할 수 있는 워크로드에 이상적입니다. 40 | 41 | ### [PTN5. Multi-container Endpoint](key_features/ptn_5_multi-container-endpoint) 42 | 43 | SageMaker 멀티 컨테이너 엔드포인트를 사용하면 서로 다른 serving 스택(예: 모델 서버, 머신 러닝 프레임워크, 프레임워크 버전, 알고리즘 등)에 구축된 여러 추론 컨테이너를 하나의 엔드포인트에서 실행하고 독립적으로 각 추론 컨테이너를 호출할 수 있습니다. 44 | 45 | - 인스턴스의 전체 수용량을 포화시킬 정도의 트래픽이 없는 경우에 여러 모델(예: Object Detection, Named Entity Recognition)을 서빙 46 | - A/B 테스트와 같은 시나리오에서 서로 다른 프레임워크 버전(예: TensorFlow 1.x vs. TensorFlow 2.x)에서 실행되는 유사한 아키텍처의 비교 47 | 48 | ### [PTN6. Inference Pipeline](key_features/ptn_6_inference-pipeline) 49 | 50 | 추론 파이프라인은 단일 엔드포인트(single endpoint)에 2~5개 컨테이너(빌트인 컨테이너 or 사용자 정의 컨테이너)의 시퀀스를 단계(step)별로 연결합니다. 각 단계의 응답은 다음 단계의 추론 요청으로 사용되며, 이를 활용하여 PyTorch/TensorFlow/MXNet/scikit-learn/Spark ML 등의 다양한 프레임워크에 대한 모델 앙상블을 배포하거나 모델 전처리-추론-후처리 과정을 컨테이너로 분리하여 관리할 수 있습니다. 51 | 52 | 53 | ## Cost Optimization 54 | ![cost_optimization](images/cost_optimization_persona.png) 55 | 56 | ### [PTN1. Model Compilation using SageMaker Neo](cost_optimization/ptn_1_model-compile) 57 | 58 | SageMaker Neo는 다양한 머신 러닝 프레임워크를 지원하며 정확도 손실을 최소화하면서 자동으로 모델을 최적화합니다. SageMaker Neo 컴파일러는 타겟 디바이스의 OS 및 하드웨어 플랫폼에 맞게 모델을 자동으로 최적화하고 딥러닝 런타임에서 모델을 실행 가능한 형태로 변환합니다. 딥러닝 런타임은 머신 러닝 프레임워크와 엣지 디바이스에 상관없이 단 두 줄의 코드로 추론을 수행할 수 있으며 런타임 버전은 지속적으로 업데이트됩니다. 59 | 60 | ### [PTN2. Model Compilation for multiple on-devices](cost_optimization/ptn_2_model-compile-on-device) 61 | 62 | 단일 타겟 디바이스가 아니라 여러 종류의 타겟 디바이스에 모델을 배포하려면 어떤 방법이 좋을까요? 일일이 수동으로 컴파일해야 할까요? 63 | 그렇지 않습니다. SageMaker Neo로 과금 없이 여러 타겟 디바이스들에 적합하게 모델을 컴파일할 수 있습니다. 64 | 컴파일된 모델은 엣지 디바이스에서 곧바로 추론하거나, IoT Greengrass와 연동하여 IoT의 스트리밍 데이터를 받아서 추론을 수행할 수도 있습니다. 65 | 66 | ### [PTN3. Elastic Inference](cost_optimization/ptn_3_elastic-inference) 67 | 68 | 비싼 GPU 인스턴스를 배포 용도로 계속 띄워 놓게 되면 많은 비용이 발생할 수밖에 없고, 비용 절감을 위해 CPU 인스턴스를 쓰기에는 충분한 latency를 보장할 수 없습니다. 이럴 때 바로 Elastic Inference를 사용하시면 됩니다. Elastic Inference는 평소에는 CPU 인스턴스를 사용하다가 추론 시에 GPU 엑셀러레이터를 빌려오는 개념이며, 이를 통해 GPU의 컴퓨팅 파워를 사용하면서 GPU 인스턴스 대비 추론 비용을 최대 75%까지 절감할 수 있습니다. 호스트 인스턴스와 추론 가속 하드웨어를 분리할 수 있는 유연성이 있으므로 애플리케이션에 필요한 CPU, 메모리 및 기타 모든 리소스에 대해 하드웨어를 유연하게 최적화할 수 있습니다. 69 | 70 | ### [PTN4. ML Inference Chip (AWS Inferentia)](cost_optimization/ptn_4_ml-inference-chip) 71 | 72 | AWS Inferentia는 저렴한 비용으로 높은 처리량(throughput)과 짧은 레이턴시(low latency)의 추론 성능을 제공하기 위해 AWS에서 개발한 머신 러닝 추론 칩입니다. Inferentia 칩은 최신형 커스텀 2세대 Intel® Xeon® 프로세서 및 100Gbps 네트워킹과 결합되어 머신 러닝 추론 애플리케이션을 위한 고성능 및 업계에서 가장 낮은 비용을 제공합니다. AWS Inferentia 기반 Amazon EC2 Inf1 인스턴스는 Inferentia 칩에서 머신 러닝 모델을 컴파일&최적화할 수 있는 AWS Neuron 컴파일러, 런타임 및 프로파일링 도구가 포함되어 있습니다. 73 | 74 | 75 | ## From PoC to Production 76 | ![production_persona](images/production_persona.png) 77 | 78 | ### [PTN1. A/B Testing](production/ptn_1_ab-test) 79 | 80 | 프로덕션 ML 워크플로에서 데이터 과학자와 머신 러닝 엔지니어는 데이터/모델/컨셉 드리프트에 따른 재훈련, 하이퍼파라메터 튜닝, 피쳐 선택 등과 같은 다양한 방법들을 통해 모델을 개선합니다. 이 때 이전 모델과 신규 모델 간의 A/B 테스트를 수행함으로써, 신규 모델에 대한 검증을 충분히 해야겠죠. 그렇다면 A/B 테스트를 위해 엔드포인트를 재배포하거나 2개의 엔드포인트를 배포해야 할까요? 그렇지 않습니다. 프로덕션 Variant 기능을 사용하면, 각 variant에 대해 동일한 엔드포인트 뒤에서 여러 모델 또는 모델 버전을 테스트할 수 있습니다. 81 | 82 | ### [PTN2. Blue/Green Deployment Guardrail](production/ptn_2_deployment-guardrail) 83 | 84 | SageMaker 배포 가드레일(Deployment Guardrail)은 프로덕션 환경에서 현재 모델에서 새 모델로 안전하게 업데이트하기 위한 완전 관리형 블루/그린(Blue/Green) 배포 가드레일 서비스입니다. 카나리(Canary) 및 선형(Linear)과 같은 트래픽 전환 모드를 사용하여 업데이트 과정에서 현재 모델에서 새 모델로 트래픽 전환 프로세스를 세부적으로 제어할 수 있습니다. 또한 문제를 조기에 포착하고 프로덕션에 영향을 미치지 않게 자동 롤백과 같은 보호 기능을 제공합니다. 85 | 86 | ### [PTN3. End-to-end ML pipelines](production/ptn_3_ml-pipeline) 87 | 88 | SageMaker Pipelines은 ML 파이프라인과 CI/CD 파이프라인을 쉽고 편리하게 수행할 수 있는 관리형 서비스입니다. re:Invent 2020 서비스 런칭 이후 신규 기능들이 지속적으로 업데이트되고 있으며, 특히 2021년 8월 업데이트된 주요 기능인 Lambda Step을 사용하면 호스팅 엔드포인트 모델 배포를 비롯한 서버리스 작업들을 쉽게 수행할 수 있습니다. 또한 캐싱(caching) 기능을 사용하면 모든 파이프라인을 처음부터 재시작할 필요 없이 변경된 파라메터에 대해서만 빠르게 실험해볼 수 있습니다. 89 | 90 | 91 | 92 | ## [References](REFS.md) 93 | 94 | 95 | ## License Summary 96 | 97 | 이 샘플 코드는 MIT-0 라이센스에 따라 제공됩니다. LICENSE 파일을 참조하십시오. -------------------------------------------------------------------------------- /REFS.md: -------------------------------------------------------------------------------- 1 | # References 2 | 3 | ## Key Features 4 | --- 5 | 6 | ### Real-time Inference 7 | - [AWS Innovate 2021 - Amazon SageMaker 기반 사전 훈련된 딥러닝 모델 손쉽게 배포하기 (김대근 AIML SA)](https://www.youtube.com/watch?v=ZdOcrLKow3I) 8 | - [Developer Guide](https://docs.aws.amazon.com/sagemaker/latest/dg/realtime-endpoints.html) 9 | 10 | ### Batch Inference 11 | - [AWS AI/ML Blog](https://aws.amazon.com/blogs/machine-learning/performing-batch-inference-with-tensorflow-serving-in-amazon-sagemaker/) 12 | - [Developer Guide](https://docs.aws.amazon.com/sagemaker/latest/dg/batch-transform.html) 13 | 14 | ### Asynchronous Inference 15 | - [AWS AI/ML Blog](https://aws.amazon.com/ko/blogs/machine-learning/run-computer-vision-inference-on-large-videos-with-amazon-sagemaker-asynchronous-endpoints/) 16 | - [Developer Guide](https://docs.aws.amazon.com/sagemaker/latest/dg/async-inference.html) 17 | 18 | ### Lambda Serverless Inference 19 | - [AWS AI/ML Blog](https://aws.amazon.com/ko/blogs/korea/new-for-aws-lambda-container-image-support/) 20 | - [SageMaker Python SDK](https://sagemaker.readthedocs.io/en/stable/overview.html?highlight=lambdamodel#serverless-inference) 21 | - [AWS Builders Online - AWS Lambda 컨테이너 이미지 서비스 활용하기 (김태수 SA)](https://www.youtube.com/watch?v=tTg9Lp7Sqok) 22 | 23 | ### SageMaker Serverless Inference 24 | - [AWS AI/ML Blog](https://aws.amazon.com/ko/blogs/machine-learning/deploying-ml-models-using-sagemaker-serverless-inference-preview/) 25 | - [Developer Guide](https://docs.aws.amazon.com/sagemaker/latest/dg/serverless-endpoints.html) 26 | 27 | ### Multi-container Endpoint 28 | - [AWS AI/ML Blog](https://aws.amazon.com/ko/blogs/machine-learning/deploy-multiple-serving-containers-on-a-single-instance-using-amazon-sagemaker-multi-container-endpoints/) 29 | - [Developer Guide](https://docs.aws.amazon.com/sagemaker/latest/dg/multi-container-endpoints.html) 30 | 31 | ### Inference Pipeline 32 | - [AWS AI/ML Blog](https://aws.amazon.com/ko/blogs/machine-learning/preprocess-input-data-before-making-predictions-using-amazon-sagemaker-inference-pipelines-and-scikit-learn/) 33 | - [Developer Guide](https://docs.aws.amazon.com/sagemaker/latest/dg/inference-pipelines.html) 34 | 35 | 36 |
37 | 38 | ## Cost Optimization 39 | --- 40 | 41 | ### Model Compilation using SageMaker Neo 42 | - [AWS AI/ML Blog](https://aws.amazon.com/ko/blogs/machine-learning/unlock-performance-gains-with-xgboost-amazon-sagemaker-neo-and-serverless-artillery/) 43 | - [AWS AI/ML Blog - SageMaker Neo](https://aws.amazon.com/ko/blogs/machine-learning/category/artificial-intelligence/amazon-sagemaker-neo/) 44 | - [Developer Guide](https://docs.aws.amazon.com/sagemaker/latest/dg/neo.html) 45 | 46 | ### Model Compilation for multiple on-devices 47 | - [AWS AI/ML Blog](https://aws.amazon.com/ko/blogs/machine-learning/build-machine-learning-at-the-edge-applications-using-amazon-sagemaker-edge-manager-and-aws-iot-greengrass-v2/) 48 | - [Developer Guide](https://docs.aws.amazon.com/sagemaker/latest/dg/edge.html) 49 | 50 | ### Elastic Inference 51 | - [AWS AI/ML Blog](https://aws.amazon.com/ko/blogs/machine-learning/reduce-ml-inference-costs-on-amazon-sagemaker-with-hardware-and-software-acceleration/) 52 | - [AWS AI/ML Blog - Elastic Inference](https://aws.amazon.com/ko/blogs/machine-learning/category/artificial-intelligence/amazon-elastic-inference/) 53 | - [Developer Guide](https://docs.aws.amazon.com/elastic-inference/latest/developerguide/basics.html) 54 | 55 |
56 | 57 | ## From PoC to Production 58 | --- 59 | 60 | ### A/B Testing 61 | - [AWS AI/ML Blog](https://aws.amazon.com/ko/blogs/machine-learning/a-b-testing-ml-models-in-production-using-amazon-sagemaker/) 62 | - [AWS AI/ML Blog - Advanced](https://aws.amazon.com/ko/blogs/machine-learning/dynamic-a-b-testing-for-machine-learning-models-with-amazon-sagemaker-mlops-projects/) 63 | - [Developer Guide](https://docs.aws.amazon.com/sagemaker/latest/dg/model-ab-testing.html) 64 | 65 | ### Blue/Green Deployment Guardrail 66 | - [AWS AI/ML Blog](https://aws.amazon.com/ko/blogs/machine-learning/take-advantage-of-advanced-deployment-strategies-using-amazon-sagemaker-deployment-guardrails/) 67 | - [Developer Guide](https://docs.aws.amazon.com/sagemaker/latest/dg/deployment-guardrails.html) 68 | 69 | ### End-to-end ML pipelines 70 | - [AWS AI/ML Blog](https://aws.amazon.com/ko/blogs/machine-learning/building-automating-managing-and-scaling-ml-workflows-using-amazon-sagemaker-pipelines/) 71 | - [AWS AI/ML Blog - Advanced](https://aws.amazon.com/ko/blogs/machine-learning/building-a-scalable-machine-learning-pipeline-for-ultra-high-resolution-medical-images-using-amazon-sagemaker/) 72 | - [Developer Guide](https://docs.aws.amazon.com/sagemaker/latest/dg/pipelines.html) 73 | 74 | ### Korean NLP and Hugging Face 75 | - [KoELECTRA](https://github.com/monologg/KoELECTRA) 76 | - [Naver Sentiment Movie Corpus v1.0](https://github.com/e9t/nsmc) 77 | - [Hugging Face on Amazon SageMaker](https://huggingface.co/docs/sagemaker/main) 78 | - [Hugging Face examples](https://github.com/huggingface/notebooks/tree/master/sagemaker) 79 | -------------------------------------------------------------------------------- /cost_optimization/ptn_1_model-compile/README.md: -------------------------------------------------------------------------------- 1 | # Model Compilation using SageMaker Neo 2 | 3 | ## Neo-AI 4 | Neo-AI는 다양한 머신 러닝 프레임워크를 지원하며 정확도 손실을 최소화하면서 자동으로 모델을 최적화합니다. Neo-AI 컴파일러는 타겟 디바이스의 OS 및 하드웨어 플랫폼에 맞게 모델을 자동으로 최적화하고 딥러닝 런타임에서 모델을 실행 가능한 형태로 변환합니다. 딥러닝 런타임은 머신 러닝 프레임워크와 엣지 디바이스에 상관없이 단 두 줄의 코드로 추론을 수행할 수 있으며 런타임 버전은 지속적으로 업데이트됩니다. 5 | 6 | 그리고 AWS 계정이 있다면 Neo-AI 기반의 관리형 서비스인 Amazon SageMaker Neo를 사용할 수 있습니다. SageMaker Neo는 간단한 API 호출이나 UI로 추가 패키지나 인프라 설정 및 요금 부과 없이 동시에 여러 타켓 디바이스들에 적합한 모델을 컴파일할 수 있습니다. 7 | ![ptn_1_01](../../images/cost_optimization/ptn_1_01.png) 8 | 9 |
10 | 11 | ## SageMaker Neo Stack 12 | SageMaker Neo Stack을 자세히 살펴보겠습니다. SageMaker Neo는 먼저 컴파일러로 모델을 최적화한 다음, 클라우드 또는 온디바이스에서 런타임을 통해 컴파일된 모델을 실행합니다. 13 | 14 | ![ptn_1_02](../../images/cost_optimization/ptn_1_02.png) 15 | 16 | ### Compiler 17 | 컴파일러의 경우, 연산 그래프(computational graph) 제너레이터는 다양한 딥러닝 플랫폼에서 훈련된 딥러닝 모델을 로드하여 그래프 구조로 재구성합니다. 그런 다음, 모델에 정의된 연산자를 기본 연산자로 변환하여 계산 그래프를 생성합니다. 그래프 생성 후, 다양한 그래프 최적화 기법을 적용하여 최적화된 그래프를 생성합니다. 18 | 19 | 그러나, 모델이 작동할 타겟 하드웨어 아키텍처를 고려한 하드웨어 종속 최적화는 최적화된 그래프만으로는 불가능합니다. 따라서, 연산 그래프를 하드웨어 의존적 최적화의 한 형태인 IR(Intermediate Representation)로 변환할 필요가 있습니다. Relay IR은 생성된 IR을 기반으로 메모리 할당, 병렬화 및 실행 순서와 같은 하드웨어 종속 최적화를 수행한 후 코드를 생성합니다. 자세한 사항은 Relay IR 논문(https://arxiv.org/pdf/1810.00952.pdf)을 참조하세요. 20 | 21 | Apache TVM이 항상 사용되는 것은 아닙니다. 딥러닝 프레임워크나 하드웨어 사양에 따라 TensorRT나 TreeLite를 사용합니다. NVIDIA GPU의 경우 Neo는 TensorRT를 사용합니다. 22 | 23 | 마지막으로, 백엔드 코드 생성기는 IR을 기반으로 딥러닝 모델 워크로드가 배포되는 대상 하드웨어 아키텍처(CPU, GPU, TPU 등)에 최적화된 백엔드 코드를 생성합니다. 24 | 25 | ### DLR(Deep Learning Runtime) 26 | 런타임 부분은 DLR(Deep Learning Runtime)을 통해 수행됩니다. DLR은 SageMaker Neo로 컴파일된 모델로 추론을 수행하기 위한 런타임입니다. 내부적으로는 타겟 하드웨어와 모델에 따라 TVM 런타임, Treelite 런타임, NVIDIA TensorRT를 사용하며, 다양한 디바이스에서 컴파일된 모델을 로드하고 실행하기 위한 통합 Python/C++ API를 제공합니다. 27 | 28 | Neo-AI로 컴파일한 모델은 DLR(Deep Learning Runtime)을 설치하시면 두 줄의 코드만으로 타겟 디바이스에서 쉽게 추론할 수 있습니다. X86-64기반 CPU는 컴파일 없이 곧바로 `pip install`로 설치 가능하고 NVIDIA jetson nano의 경우에도 jetpack 4.2에서 4.4까지의 wheel 패키지를 제공하고 있습니다. 29 | 30 | ```python 31 | import dlr 32 | import numpy as np 33 | 34 | # Load model 35 | model = dlr.DLRModel(“[YOUR-MODEL-PATH]”, “cpu or gpu”) 36 | x = np.random.rand(1, 3, 224, 224) 37 | # Inference 38 | y = model.run(x) 39 | ``` 40 | 41 | #### DLR 모델 디렉토리 구조 42 | 1. `compiled.params` & `compiled.so`: SageMaker Neo로 컴파일된 모델을 추론하기 위한 모델 파라메터와 런타임입니다. 43 | 2. `compiled.meta`: `compiled.meta`는 SageMaker 모델 컴파일 설정값들에 대한 정보들을 담고 있습니다. 44 | (Target platform, target architecture, cpu/gpu, 컴파일 시간, input shape, output shape) 45 | 이 파일이 DLR 추론에 직접적으로 사용되는 것이 아니라, input/output shape이 기억나지 않거나 SageMaker Neo 컴파일 정보가 필요할 때 참조합니다. 46 | 3. `compiled_model.json`: 컴파일된 모델 계산 그래프에 대한 파라메터입니다. 특정 op에 num_inputs, num_outputs 등을 확인할 수 있으며, 어떤 컴파일러가 사용되었는지 알 수 있습니다. 참고로, SageMaker Neo는 단일 컴파일러가 아니라 타겟 디바이스/플랫폼에 따라 다른 컴파일러를 사용합니다. 예를 들어 NVIDIA GPU를 사용하는 타겟 디바이스 (예: p2/p3 instance, NVIDIA Jetson nano/xavier)는 내부적으로 TensorRT를 사용하며, 일반적인 경우에는 Apache TVM을 사용합니다. 그리고 NVIDIA GPU 디바이스를 사용하더라도 GPU on/off에 따라 컴파일러 설정이 변경됩니다. (GPU on: TensorRT, GPU off: TVM) 47 | 4. `libdlr.so` & `dlr.h`: `libdlr.so`와 `dlr.h`는 DLR(Deep Learning Library)의 번들 런타임으로 DLR이 로컬 디바이스에 이미 설치되었다면 반드시 필요하진 않습니다. 48 | 시스템에 설치된 `libdlr.so`와 `dlr.h`를 참조하려면 `use_default_dlr=True`로 부여하시면 됩니다. 49 | (`model = dlr.DLRModel("[YOUR-MODEL-PATH]", "gpu", use_default_dlr=True)` SageMaker Neo는 타겟 디바이스에 최적화하여 모델을 컴파일하기 위해 계산 그래프를 IR(Intermediate Representation)로 변환합니다. Relay IR은 생성된 IR을 기반으로 메모리 할당, 병렬화, 실행 순서와 같은 하드웨어 종속 최적화를 수행하는 역할을 합니다. 이에 대한 자세한 내용은 https://arxiv.org/pdf/1810.00952.pdf 를 참조하세요. -------------------------------------------------------------------------------- /cost_optimization/ptn_1_model-compile/metadata/coco_labels.txt: -------------------------------------------------------------------------------- 1 | 1,1,person 2 | 2,2,bicycle 3 | 3,3,car 4 | 4,4,motorcycle 5 | 5,5,airplane 6 | 6,6,bus 7 | 7,7,train 8 | 8,8,truck 9 | 9,9,boat 10 | 10,10,traffic light 11 | 11,11,fire hydrant 12 | 13,12,stop sign 13 | 14,13,parking meter 14 | 15,14,bench 15 | 16,15,bird 16 | 17,16,cat 17 | 18,17,dog 18 | 19,18,horse 19 | 20,19,sheep 20 | 21,20,cow 21 | 22,21,elephant 22 | 23,22,bear 23 | 24,23,zebra 24 | 25,24,giraffe 25 | 27,25,backpack 26 | 28,26,umbrella 27 | 31,27,handbag 28 | 32,28,tie 29 | 33,29,suitcase 30 | 34,30,frisbee 31 | 35,31,skis 32 | 36,32,snowboard 33 | 37,33,sports ball 34 | 38,34,kite 35 | 39,35,baseball bat 36 | 40,36,baseball glove 37 | 41,37,skateboard 38 | 42,38,surfboard 39 | 43,39,tennis racket 40 | 44,40,bottle 41 | 46,41,wine glass 42 | 47,42,cup 43 | 48,43,fork 44 | 49,44,knife 45 | 50,45,spoon 46 | 51,46,bowl 47 | 52,47,banana 48 | 53,48,apple 49 | 54,49,sandwich 50 | 55,50,orange 51 | 56,51,broccoli 52 | 57,52,carrot 53 | 58,53,hot dog 54 | 59,54,pizza 55 | 60,55,donut 56 | 61,56,cake 57 | 62,57,chair 58 | 63,58,couch 59 | 64,59,potted plant 60 | 65,60,bed 61 | 67,61,dining table 62 | 70,62,toilet 63 | 72,63,tv 64 | 73,64,laptop 65 | 74,65,mouse 66 | 75,66,remote 67 | 76,67,keyboard 68 | 77,68,cell phone 69 | 78,69,microwave 70 | 79,70,oven 71 | 80,71,toaster 72 | 81,72,sink 73 | 82,73,refrigerator 74 | 84,74,book 75 | 85,75,clock 76 | 86,76,vase 77 | 87,77,scissors 78 | 88,78,teddy bear 79 | 89,79,hair drier 80 | 90,80,toothbrush -------------------------------------------------------------------------------- /cost_optimization/ptn_1_model-compile/metadata/init.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | git clone https://github.com/tensorflow/models.git 6 | 7 | source activate tensorflow_p36 8 | 9 | cd /home/ec2-user/SageMaker/tfs-workshop/files/models/research 10 | protoc object_detection/protos/*.proto --python_out=. 11 | 12 | python setup.py build 13 | python setup.py install 14 | 15 | export PYTHONPATH=/home/ec2-user/SageMaker/tfs-workshop/files/models/research 16 | 17 | source deactivate tensorflow_p36 -------------------------------------------------------------------------------- /cost_optimization/ptn_1_model-compile/metadata/mscoco_label_map.pbtxt: -------------------------------------------------------------------------------- 1 | item { 2 | name: "/m/01g317" 3 | id: 1 4 | display_name: "person" 5 | } 6 | item { 7 | name: "/m/0199g" 8 | id: 2 9 | display_name: "bicycle" 10 | } 11 | item { 12 | name: "/m/0k4j" 13 | id: 3 14 | display_name: "car" 15 | } 16 | item { 17 | name: "/m/04_sv" 18 | id: 4 19 | display_name: "motorcycle" 20 | } 21 | item { 22 | name: "/m/05czz6l" 23 | id: 5 24 | display_name: "airplane" 25 | } 26 | item { 27 | name: "/m/01bjv" 28 | id: 6 29 | display_name: "bus" 30 | } 31 | item { 32 | name: "/m/07jdr" 33 | id: 7 34 | display_name: "train" 35 | } 36 | item { 37 | name: "/m/07r04" 38 | id: 8 39 | display_name: "truck" 40 | } 41 | item { 42 | name: "/m/019jd" 43 | id: 9 44 | display_name: "boat" 45 | } 46 | item { 47 | name: "/m/015qff" 48 | id: 10 49 | display_name: "traffic light" 50 | } 51 | item { 52 | name: "/m/01pns0" 53 | id: 11 54 | display_name: "fire hydrant" 55 | } 56 | item { 57 | name: "/m/02pv19" 58 | id: 13 59 | display_name: "stop sign" 60 | } 61 | item { 62 | name: "/m/015qbp" 63 | id: 14 64 | display_name: "parking meter" 65 | } 66 | item { 67 | name: "/m/0cvnqh" 68 | id: 15 69 | display_name: "bench" 70 | } 71 | item { 72 | name: "/m/015p6" 73 | id: 16 74 | display_name: "bird" 75 | } 76 | item { 77 | name: "/m/01yrx" 78 | id: 17 79 | display_name: "cat" 80 | } 81 | item { 82 | name: "/m/0bt9lr" 83 | id: 18 84 | display_name: "dog" 85 | } 86 | item { 87 | name: "/m/03k3r" 88 | id: 19 89 | display_name: "horse" 90 | } 91 | item { 92 | name: "/m/07bgp" 93 | id: 20 94 | display_name: "sheep" 95 | } 96 | item { 97 | name: "/m/01xq0k1" 98 | id: 21 99 | display_name: "cow" 100 | } 101 | item { 102 | name: "/m/0bwd_0j" 103 | id: 22 104 | display_name: "elephant" 105 | } 106 | item { 107 | name: "/m/01dws" 108 | id: 23 109 | display_name: "bear" 110 | } 111 | item { 112 | name: "/m/0898b" 113 | id: 24 114 | display_name: "zebra" 115 | } 116 | item { 117 | name: "/m/03bk1" 118 | id: 25 119 | display_name: "giraffe" 120 | } 121 | item { 122 | name: "/m/01940j" 123 | id: 27 124 | display_name: "backpack" 125 | } 126 | item { 127 | name: "/m/0hnnb" 128 | id: 28 129 | display_name: "umbrella" 130 | } 131 | item { 132 | name: "/m/080hkjn" 133 | id: 31 134 | display_name: "handbag" 135 | } 136 | item { 137 | name: "/m/01rkbr" 138 | id: 32 139 | display_name: "tie" 140 | } 141 | item { 142 | name: "/m/01s55n" 143 | id: 33 144 | display_name: "suitcase" 145 | } 146 | item { 147 | name: "/m/02wmf" 148 | id: 34 149 | display_name: "frisbee" 150 | } 151 | item { 152 | name: "/m/071p9" 153 | id: 35 154 | display_name: "skis" 155 | } 156 | item { 157 | name: "/m/06__v" 158 | id: 36 159 | display_name: "snowboard" 160 | } 161 | item { 162 | name: "/m/018xm" 163 | id: 37 164 | display_name: "sports ball" 165 | } 166 | item { 167 | name: "/m/02zt3" 168 | id: 38 169 | display_name: "kite" 170 | } 171 | item { 172 | name: "/m/03g8mr" 173 | id: 39 174 | display_name: "baseball bat" 175 | } 176 | item { 177 | name: "/m/03grzl" 178 | id: 40 179 | display_name: "baseball glove" 180 | } 181 | item { 182 | name: "/m/06_fw" 183 | id: 41 184 | display_name: "skateboard" 185 | } 186 | item { 187 | name: "/m/019w40" 188 | id: 42 189 | display_name: "surfboard" 190 | } 191 | item { 192 | name: "/m/0dv9c" 193 | id: 43 194 | display_name: "tennis racket" 195 | } 196 | item { 197 | name: "/m/04dr76w" 198 | id: 44 199 | display_name: "bottle" 200 | } 201 | item { 202 | name: "/m/09tvcd" 203 | id: 46 204 | display_name: "wine glass" 205 | } 206 | item { 207 | name: "/m/08gqpm" 208 | id: 47 209 | display_name: "cup" 210 | } 211 | item { 212 | name: "/m/0dt3t" 213 | id: 48 214 | display_name: "fork" 215 | } 216 | item { 217 | name: "/m/04ctx" 218 | id: 49 219 | display_name: "knife" 220 | } 221 | item { 222 | name: "/m/0cmx8" 223 | id: 50 224 | display_name: "spoon" 225 | } 226 | item { 227 | name: "/m/04kkgm" 228 | id: 51 229 | display_name: "bowl" 230 | } 231 | item { 232 | name: "/m/09qck" 233 | id: 52 234 | display_name: "banana" 235 | } 236 | item { 237 | name: "/m/014j1m" 238 | id: 53 239 | display_name: "apple" 240 | } 241 | item { 242 | name: "/m/0l515" 243 | id: 54 244 | display_name: "sandwich" 245 | } 246 | item { 247 | name: "/m/0cyhj_" 248 | id: 55 249 | display_name: "orange" 250 | } 251 | item { 252 | name: "/m/0hkxq" 253 | id: 56 254 | display_name: "broccoli" 255 | } 256 | item { 257 | name: "/m/0fj52s" 258 | id: 57 259 | display_name: "carrot" 260 | } 261 | item { 262 | name: "/m/01b9xk" 263 | id: 58 264 | display_name: "hot dog" 265 | } 266 | item { 267 | name: "/m/0663v" 268 | id: 59 269 | display_name: "pizza" 270 | } 271 | item { 272 | name: "/m/0jy4k" 273 | id: 60 274 | display_name: "donut" 275 | } 276 | item { 277 | name: "/m/0fszt" 278 | id: 61 279 | display_name: "cake" 280 | } 281 | item { 282 | name: "/m/01mzpv" 283 | id: 62 284 | display_name: "chair" 285 | } 286 | item { 287 | name: "/m/02crq1" 288 | id: 63 289 | display_name: "couch" 290 | } 291 | item { 292 | name: "/m/03fp41" 293 | id: 64 294 | display_name: "potted plant" 295 | } 296 | item { 297 | name: "/m/03ssj5" 298 | id: 65 299 | display_name: "bed" 300 | } 301 | item { 302 | name: "/m/04bcr3" 303 | id: 67 304 | display_name: "dining table" 305 | } 306 | item { 307 | name: "/m/09g1w" 308 | id: 70 309 | display_name: "toilet" 310 | } 311 | item { 312 | name: "/m/07c52" 313 | id: 72 314 | display_name: "tv" 315 | } 316 | item { 317 | name: "/m/01c648" 318 | id: 73 319 | display_name: "laptop" 320 | } 321 | item { 322 | name: "/m/020lf" 323 | id: 74 324 | display_name: "mouse" 325 | } 326 | item { 327 | name: "/m/0qjjc" 328 | id: 75 329 | display_name: "remote" 330 | } 331 | item { 332 | name: "/m/01m2v" 333 | id: 76 334 | display_name: "keyboard" 335 | } 336 | item { 337 | name: "/m/050k8" 338 | id: 77 339 | display_name: "cell phone" 340 | } 341 | item { 342 | name: "/m/0fx9l" 343 | id: 78 344 | display_name: "microwave" 345 | } 346 | item { 347 | name: "/m/029bxz" 348 | id: 79 349 | display_name: "oven" 350 | } 351 | item { 352 | name: "/m/01k6s3" 353 | id: 80 354 | display_name: "toaster" 355 | } 356 | item { 357 | name: "/m/0130jx" 358 | id: 81 359 | display_name: "sink" 360 | } 361 | item { 362 | name: "/m/040b_t" 363 | id: 82 364 | display_name: "refrigerator" 365 | } 366 | item { 367 | name: "/m/0bt_c3" 368 | id: 84 369 | display_name: "book" 370 | } 371 | item { 372 | name: "/m/01x3z" 373 | id: 85 374 | display_name: "clock" 375 | } 376 | item { 377 | name: "/m/02s195" 378 | id: 86 379 | display_name: "vase" 380 | } 381 | item { 382 | name: "/m/01lsmm" 383 | id: 87 384 | display_name: "scissors" 385 | } 386 | item { 387 | name: "/m/0kmg4" 388 | id: 88 389 | display_name: "teddy bear" 390 | } 391 | item { 392 | name: "/m/03wvsk" 393 | id: 89 394 | display_name: "hair drier" 395 | } 396 | item { 397 | name: "/m/012xff" 398 | id: 90 399 | display_name: "toothbrush" 400 | } -------------------------------------------------------------------------------- /cost_optimization/ptn_1_model-compile/samples/bus.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/cost_optimization/ptn_1_model-compile/samples/bus.jpg -------------------------------------------------------------------------------- /cost_optimization/ptn_1_model-compile/samples/pizza.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/cost_optimization/ptn_1_model-compile/samples/pizza.jpg -------------------------------------------------------------------------------- /cost_optimization/ptn_1_model-compile/samples/strawberry.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/cost_optimization/ptn_1_model-compile/samples/strawberry.jpg -------------------------------------------------------------------------------- /cost_optimization/ptn_1_model-compile/src/infer_pytorch_neo.py: -------------------------------------------------------------------------------- 1 | 2 | import io 3 | import json 4 | import logging 5 | import os 6 | import pickle 7 | 8 | import numpy as np 9 | import torch 10 | import torchvision.transforms as transforms 11 | from PIL import Image # Training container doesn't have this package 12 | 13 | logger = logging.getLogger(__name__) 14 | logger.setLevel(logging.DEBUG) 15 | 16 | 17 | def transform_fn(model, payload, request_content_type='application/octet-stream', 18 | response_content_type='application/json'): 19 | 20 | logger.info('Invoking user-defined transform function') 21 | 22 | if request_content_type != 'application/octet-stream': 23 | raise RuntimeError( 24 | 'Content type must be application/octet-stream. Provided: {0}'.format(request_content_type)) 25 | 26 | # preprocess 27 | decoded = Image.open(io.BytesIO(payload)) 28 | preprocess = transforms.Compose([ 29 | transforms.Resize(256), 30 | transforms.CenterCrop(224), 31 | transforms.ToTensor(), 32 | transforms.Normalize( 33 | mean=[ 34 | 0.485, 0.456, 0.406], std=[ 35 | 0.229, 0.224, 0.225]), 36 | ]) 37 | normalized = preprocess(decoded) 38 | batchified = normalized.unsqueeze(0) 39 | 40 | # predict 41 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 42 | batchified = batchified.to(device) 43 | result = model.forward(batchified) 44 | 45 | # Softmax (assumes batch size 1) 46 | result = np.squeeze(result.detach().cpu().numpy()) 47 | result_exp = np.exp(result - np.max(result)) 48 | result = result_exp / np.sum(result_exp) 49 | 50 | response_body = json.dumps(result.tolist()) 51 | 52 | return response_body, response_content_type 53 | -------------------------------------------------------------------------------- /cost_optimization/ptn_1_model-compile/src/utils.py: -------------------------------------------------------------------------------- 1 | def get_label_map(label_file): 2 | label_map = {} 3 | labels = open(label_file, 'r') 4 | 5 | for line in labels: 6 | line = line.rstrip("\n") 7 | ids = line.split(',') 8 | label_map[int(ids[0])] = ids[2] 9 | 10 | return label_map 11 | 12 | 13 | def get_label_map_imagenet(label_file): 14 | label_map = {} 15 | with open(label_file, 'r') as f: 16 | for line in f: 17 | key, val = line.strip().split(':') 18 | label_map[key] = val.replace(',', '') 19 | return label_map 20 | 21 | 22 | def delete_endpoint(client, endpoint_name): 23 | response = client.describe_endpoint_config(EndpointConfigName=endpoint_name) 24 | model_name = response['ProductionVariants'][0]['ModelName'] 25 | 26 | client.delete_model(ModelName=model_name) 27 | client.delete_endpoint(EndpointName=endpoint_name) 28 | client.delete_endpoint_config(EndpointConfigName=endpoint_name) 29 | 30 | print(f'--- Deleted model: {model_name}') 31 | print(f'--- Deleted endpoint: {endpoint_name}') 32 | print(f'--- Deleted endpoint_config: {endpoint_name}') 33 | 34 | 35 | def plot_bbox(img_resized, bboxes, scores, cids, class_info, framework='pytorch', threshold=0.5): 36 | 37 | import numpy as np 38 | import random 39 | import matplotlib.patches as patches 40 | import matplotlib.pyplot as plt 41 | 42 | if framework=='mxnet': 43 | img_np = img_resized.asnumpy() 44 | scores = scores.asnumpy() 45 | bboxes = bboxes.asnumpy() 46 | cids = cids.asnumpy() 47 | else: 48 | img_np = img_resized 49 | scores = np.array(scores) 50 | bboxes = np.array(bboxes) 51 | cids = np.array(cids) 52 | 53 | # Get only results that are above the threshold. Default threshold is 0.5. 54 | scores = scores[scores > threshold] 55 | num_detections = len(scores) 56 | bboxes = bboxes[:num_detections, :] 57 | cids = cids[:num_detections].astype('int').squeeze() 58 | 59 | # Get bounding-box colors 60 | cmap = plt.get_cmap('tab20b') 61 | colors = [cmap(i) for i in np.linspace(0, 1, 20)] 62 | random.seed(42) 63 | random.shuffle(colors) 64 | 65 | plt.figure() 66 | fig, ax = plt.subplots(1, figsize=(10,10)) 67 | ax.imshow(img_np) 68 | 69 | if cids is not None: 70 | # Get unique class labels 71 | unique_labels = set(list(cids.astype('int').squeeze())) 72 | unique_labels = np.array(list(unique_labels)) 73 | n_cls_preds = len(unique_labels) 74 | bbox_colors = colors[:n_cls_preds] 75 | 76 | for b, cls_pred, cls_conf in zip(bboxes, cids, scores): 77 | x1, y1, x2, y2 = b[0], b[1], b[2], b[3] 78 | predicted_class = class_info[int(cls_pred)] 79 | label = '{} {:.2f}'.format(predicted_class, cls_conf) 80 | 81 | # Get box height and width 82 | box_h = y2 - y1 83 | box_w = x2 - x1 84 | 85 | # Add a box with the color for this class 86 | color = bbox_colors[int(np.where(unique_labels == int(cls_pred))[0])] 87 | bbox = patches.Rectangle((x1, y1), box_w, box_h, linewidth=3, edgecolor=color, facecolor='none') 88 | ax.add_patch(bbox) 89 | 90 | plt.text(x1, y1, s=label, color='white', verticalalignment='top', 91 | bbox={'color': color, 'pad': 0}) 92 | -------------------------------------------------------------------------------- /cost_optimization/ptn_2_model-compile-on-device/README.md: -------------------------------------------------------------------------------- 1 | # Model Compilation for multiple on-devices (AIoT) 2 | 3 | ## Create Compilation Job 4 | 5 | 단일 타겟 디바이스가 아니라 여러 종류의 타겟 디바이스에 모델을 배포하려면 어떤 방법이 좋을까요? 일일이 수동으로 컴파일해야 할까요? 6 | 그렇지 않습니다. SageMaker Neo로 과금 없이 여러 타겟 디바이스들에 적합하게 모델을 컴파일할 수 있습니다. 7 | 컴파일된 모델은 엣지 디바이스에서 곧바로 추론하거나, IoT Greengrass와 연동하여 IoT의 스트리밍 데이터를 받아서 추론을 수행할 수도 있습니다. 8 | 9 | 아래와 같이 `create_compilation_job` API로 동시에 여러 타겟 디바이스들에 적합하게 모델을 컴파일할 수 있습니다. 10 | 11 | ```python 12 | response = sm_client.create_compilation_job( 13 | CompilationJobName=[JOB-NAME], 14 | RoleArn=[ROLE-ARN], 15 | InputConfig={ 16 | 'S3Uri': [YOUR-S3-INPUT-PATH}, 17 | 'DataInputConfig': input_shape, 18 | 'Framework’: [YOUR-FRAMEWORK]' 19 | }, 20 | OutputConfig={ 21 | 'S3OutputLocation': [YOUR-S3-OUTPUT-PATH], 22 | 'TargetDevice': [YOUR-DEVICE] 23 | }, 24 | StoppingCondition={ 25 | 'MaxRuntimeInSeconds': 500 26 | } 27 | ``` 28 | 29 | ## Architecture 30 | AWS IoT Greengrass를 연동하면 백만 단위의 온디바이스에 모델을 한 번에 배포할 수 있습니다. 31 | 32 | SageMaker로 모델을 훈련하고 SageMaker Neo로 타겟 디바이스(예: 라즈베리파이, NVIDIA Jetson Nano)에 맞게 컴파일 이후 Amazon S3에 모델을 저장합니다. 33 | 34 | 그리고, 개발용 디바이스에 IoT greengrass 코어를 설치하고 컴포넌트를 개발 후 클라우드로 등록합니다. 35 | 36 | 컴포넌트 배포는 여러 개의 디바이스를 `thing group` 으로 묶어서 한 번에 배포하면 편리합니다. 또한, 각 디바이스마다 일일이 보안 인증을 받는 수고를 덜기 위한 fleet 프로비저닝을 활용할 수도 있죠. 37 | 38 | 배포가 완료되었다면 지속적으로 Greengrass Core를 통해 피드백을 받아서 모델 드리프트 발생시 Amazon A2I를 연동해 모델 재훈련이 가능합니다. 39 | 40 | ![ptn_2_01](../../images/cost_optimization/ptn_2_01.png) 41 | 42 | 자세한 내용은 아래 핸즈온을 참조해 주세요. 43 | - https://github.com/aws-samples/aiot-e2e-sagemaker-greengrass-v2-nvidia-jetson -------------------------------------------------------------------------------- /cost_optimization/ptn_2_model-compile-on-device/samples/bus.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/cost_optimization/ptn_2_model-compile-on-device/samples/bus.jpg -------------------------------------------------------------------------------- /cost_optimization/ptn_2_model-compile-on-device/samples/pizza.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/cost_optimization/ptn_2_model-compile-on-device/samples/pizza.jpg -------------------------------------------------------------------------------- /cost_optimization/ptn_2_model-compile-on-device/samples/strawberry.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/cost_optimization/ptn_2_model-compile-on-device/samples/strawberry.jpg -------------------------------------------------------------------------------- /cost_optimization/ptn_2_model-compile-on-device/src/coco_labels.txt: -------------------------------------------------------------------------------- 1 | 1,1,person 2 | 2,2,bicycle 3 | 3,3,car 4 | 4,4,motorcycle 5 | 5,5,airplane 6 | 6,6,bus 7 | 7,7,train 8 | 8,8,truck 9 | 9,9,boat 10 | 10,10,traffic light 11 | 11,11,fire hydrant 12 | 13,12,stop sign 13 | 14,13,parking meter 14 | 15,14,bench 15 | 16,15,bird 16 | 17,16,cat 17 | 18,17,dog 18 | 19,18,horse 19 | 20,19,sheep 20 | 21,20,cow 21 | 22,21,elephant 22 | 23,22,bear 23 | 24,23,zebra 24 | 25,24,giraffe 25 | 27,25,backpack 26 | 28,26,umbrella 27 | 31,27,handbag 28 | 32,28,tie 29 | 33,29,suitcase 30 | 34,30,frisbee 31 | 35,31,skis 32 | 36,32,snowboard 33 | 37,33,sports ball 34 | 38,34,kite 35 | 39,35,baseball bat 36 | 40,36,baseball glove 37 | 41,37,skateboard 38 | 42,38,surfboard 39 | 43,39,tennis racket 40 | 44,40,bottle 41 | 46,41,wine glass 42 | 47,42,cup 43 | 48,43,fork 44 | 49,44,knife 45 | 50,45,spoon 46 | 51,46,bowl 47 | 52,47,banana 48 | 53,48,apple 49 | 54,49,sandwich 50 | 55,50,orange 51 | 56,51,broccoli 52 | 57,52,carrot 53 | 58,53,hot dog 54 | 59,54,pizza 55 | 60,55,donut 56 | 61,56,cake 57 | 62,57,chair 58 | 63,58,couch 59 | 64,59,potted plant 60 | 65,60,bed 61 | 67,61,dining table 62 | 70,62,toilet 63 | 72,63,tv 64 | 73,64,laptop 65 | 74,65,mouse 66 | 75,66,remote 67 | 76,67,keyboard 68 | 77,68,cell phone 69 | 78,69,microwave 70 | 79,70,oven 71 | 80,71,toaster 72 | 81,72,sink 73 | 82,73,refrigerator 74 | 84,74,book 75 | 85,75,clock 76 | 86,76,vase 77 | 87,77,scissors 78 | 88,78,teddy bear 79 | 89,79,hair drier 80 | 90,80,toothbrush -------------------------------------------------------------------------------- /cost_optimization/ptn_2_model-compile-on-device/src/infer_pytorch_neo.py: -------------------------------------------------------------------------------- 1 | 2 | import io 3 | import json 4 | import logging 5 | import os 6 | import pickle 7 | import numpy as np 8 | import torch 9 | import torchvision.transforms as transforms 10 | from PIL import Image # Training container doesn't have this package 11 | 12 | logger = logging.getLogger(__name__) 13 | logger.setLevel(logging.DEBUG) 14 | 15 | 16 | def model_fn(model_dir): 17 | import neopytorch 18 | 19 | logger.info("model_fn") 20 | neopytorch.config(model_dir=model_dir, neo_runtime=True) 21 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 22 | # The compiled model is saved as "compiled.pt" 23 | model = torch.jit.load(os.path.join(model_dir, "compiled.pt"), map_location=device) 24 | 25 | # It is recommended to run warm-up inference during model load 26 | sample_input_path = os.path.join(model_dir, "sample_input.pkl") 27 | with open(sample_input_path, "rb") as input_file: 28 | model_input = pickle.load(input_file) 29 | if torch.is_tensor(model_input): 30 | model_input = model_input.to(device) 31 | model(model_input) 32 | elif isinstance(model_input, tuple): 33 | model_input = (inp.to(device) for inp in model_input if torch.is_tensor(inp)) 34 | model(*model_input) 35 | else: 36 | print("Only supports a torch tensor or a tuple of torch tensors") 37 | 38 | return model 39 | 40 | 41 | def transform_fn(model, payload, request_content_type='application/octet-stream', 42 | response_content_type='application/json'): 43 | 44 | logger.info('Invoking user-defined transform function') 45 | 46 | if request_content_type != 'application/octet-stream': 47 | raise RuntimeError( 48 | 'Content type must be application/octet-stream. Provided: {0}'.format(request_content_type)) 49 | 50 | # preprocess 51 | decoded = Image.open(io.BytesIO(payload)) 52 | preprocess = transforms.Compose([ 53 | transforms.Resize(256), 54 | transforms.CenterCrop(224), 55 | transforms.ToTensor(), 56 | transforms.Normalize( 57 | mean=[ 58 | 0.485, 0.456, 0.406], std=[ 59 | 0.229, 0.224, 0.225]), 60 | ]) 61 | normalized = preprocess(decoded) 62 | batchified = normalized.unsqueeze(0) 63 | 64 | # predict 65 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 66 | batchified = batchified.to(device) 67 | result = model.forward(batchified) 68 | 69 | # Softmax (assumes batch size 1) 70 | result = np.squeeze(result.detach().cpu().numpy()) 71 | result_exp = np.exp(result - np.max(result)) 72 | result = result_exp / np.sum(result_exp) 73 | 74 | response_body = json.dumps(result.tolist()) 75 | 76 | return response_body, response_content_type 77 | -------------------------------------------------------------------------------- /cost_optimization/ptn_2_model-compile-on-device/src/infer_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import time 3 | import json 4 | import boto3 5 | import sagemaker 6 | from sagemaker.utils import name_from_base 7 | 8 | 9 | def get_classes(train_path): 10 | #https://github.com/pytorch/vision/blob/50d9dc5f5af89e607100cee9aa34cfda67e627fb/torchvision/datasets/folder.py#L114 11 | classes = [d.name for d in os.scandir(train_path) if d.is_dir()] 12 | classes.sort() 13 | classes_dict = {i:c for i, c in enumerate(classes)} 14 | return classes, classes_dict 15 | 16 | 17 | def save_classes_dict(classes_dict, filename='classes_dict.json'): 18 | with open(filename, "w") as fp: 19 | json.dump(classes_dict, fp) 20 | 21 | 22 | def load_classes_dict(filename): 23 | with open(filename, 'r') as f: 24 | classes_dict = json.load(f) 25 | 26 | classes_dict = {int(k):v for k, v in classes_dict.items()} 27 | return classes_dict 28 | 29 | 30 | def get_inference(img_path, predictor, classes_dict, show_img=True): 31 | with open(img_path, mode='rb') as file: 32 | payload = bytearray(file.read()) 33 | 34 | response = predictor.predict(payload) 35 | result = json.loads(response.decode()) 36 | pred_cls_idx, pred_cls_str, prob = parse_result(result, classes_dict, img_path, show_img) 37 | 38 | return pred_cls_idx, pred_cls_str, prob 39 | 40 | 41 | def parse_result(result, classes_dict, img_path=None, show_img=True): 42 | pred_cls_idx = np.argmax(result) 43 | pred_cls_str = classes_dict[pred_cls_idx] 44 | prob = np.amax(result)*100 45 | 46 | if show_img: 47 | import cv2 48 | import matplotlib.pyplot as plt 49 | im = cv2.imread(img_path, cv2.COLOR_BGR2RGB) 50 | font = cv2.FONT_HERSHEY_COMPLEX 51 | cv2.putText(im, f'{pred_cls_str} {prob:.2f}%', (10,40), font, 0.7, (255, 255, 0), 2, cv2.LINE_AA) 52 | plt.figure(figsize=(10, 10)) 53 | plt.imshow(im[:,:,::-1]) 54 | 55 | return pred_cls_idx, pred_cls_str, prob 56 | 57 | 58 | def compile_model_for_jetson(role, bucket, target_device='jetson-nano', 59 | dataset_dir=None, framework='PYTORCH', 60 | trt_ver='7.1.3', cuda_ver='10.2', gpu_code='sm_53', 61 | base_model_name='model', img_size=224, use_gpu=True): 62 | if dataset_dir is None: 63 | print("[INFO] The dataset prefix of the s3 bucket is automatically assigned as 'modelzoo'.") 64 | dataset_dir = 'modelzoo' 65 | 66 | sm_client = boto3.client('sagemaker') 67 | sess = sagemaker.Session() 68 | region = sess.boto_region_name 69 | target_device_ = target_device.replace('_', '-') 70 | 71 | if use_gpu: 72 | compilation_job_name = name_from_base(f'{target_device_}-{base_model_name}-gpu-pytorch') 73 | else: 74 | compilation_job_name = name_from_base(f'{target_device_}-{base_model_name}-cpu-pytorch') 75 | 76 | s3_compiled_model_path = 's3://{}/{}/{}/neo-output'.format(bucket, dataset_dir, compilation_job_name) 77 | key_prefix = f'{dataset_dir}/{compilation_job_name}/model' 78 | s3_model_path = sess.upload_data(path='model.tar.gz', key_prefix=key_prefix) 79 | 80 | # Configuration 81 | if use_gpu: 82 | input_config = { 83 | 'S3Uri': s3_model_path, 84 | 'DataInputConfig': f'{{"input0": [1,3,{img_size},{img_size}]}}', 85 | 'Framework': framework, 86 | } 87 | output_config = { 88 | 'S3OutputLocation': s3_compiled_model_path, 89 | 'TargetPlatform': { 90 | 'Os': 'LINUX', 91 | 'Arch': 'ARM64', # change this to X86_64 if you need 92 | 'Accelerator': 'NVIDIA' # comment this if you don't have an Nvidia GPU 93 | }, 94 | # Jetson Xavier: sm_72; Jetson Nano: sm_53 95 | 'CompilerOptions': f'{{"trt-ver": "{trt_ver}", "cuda-ver": "{cuda_ver}", "gpu-code": "{gpu_code}"}}' # Jetpack 4.5.1 96 | } 97 | else: 98 | input_config = { 99 | 'S3Uri': s3_model_path, 100 | 'DataInputConfig': f'{{"input0": [1,3,{img_size},{img_size}]}}', 101 | 'Framework': framework, 102 | } 103 | output_config = { 104 | 'S3OutputLocation': s3_compiled_model_path, 105 | 'TargetPlatform': { 106 | 'Os': 'LINUX', 107 | 'Arch': 'ARM64', # change this to X86_64 if you need 108 | }, 109 | } 110 | 111 | # Create Compilation job 112 | compilation_response = sm_client.create_compilation_job( 113 | CompilationJobName=compilation_job_name, 114 | RoleArn=role, 115 | InputConfig=input_config, 116 | OutputConfig=output_config, 117 | StoppingCondition={ 'MaxRuntimeInSeconds': 900 } 118 | ) 119 | 120 | return { 121 | 'response': compilation_response, 122 | 'job_name': compilation_job_name, 123 | 's3_compiled_model_path': s3_compiled_model_path, 124 | 's3_model_path': s3_model_path 125 | } 126 | 127 | 128 | def compile_model_for_cloud(role, bucket, target_device, 129 | dataset_dir=None, framework='PYTORCH', framework_version='1.8', 130 | base_model_name='model', img_size=224): 131 | valid_target_device = ['ml_m4', 'ml_m5', 132 | 'ml_c4', 'ml_c5', 133 | 'ml_p2', 'ml_p3', 'ml_g4dn', 134 | 'ml_inf1', 'ml_eia2'] 135 | 136 | if not target_device in valid_target_device: 137 | print('[ERROR] Please use valid target device!') 138 | return 139 | 140 | if dataset_dir is None: 141 | print("[INFO] The dataset prefix of the s3 bucket is automatically assigned as 'modelzoo'.") 142 | dataset_dir = 'modelzoo' 143 | 144 | sm_client = boto3.client('sagemaker') 145 | sess = sagemaker.Session() 146 | region = sess.boto_region_name 147 | target_device_ = target_device.replace('_', '-') 148 | 149 | compilation_job_name = name_from_base(f'{target_device_}-{base_model_name}-pytorch') 150 | 151 | s3_compiled_model_path = 's3://{}/{}/{}/neo-output'.format(bucket, dataset_dir, compilation_job_name) 152 | key_prefix = f'{dataset_dir}/{compilation_job_name}/model' 153 | s3_model_path = sess.upload_data(path='model.tar.gz', key_prefix=key_prefix) 154 | 155 | 156 | input_config = { 157 | 'S3Uri': s3_model_path, 158 | 'DataInputConfig': f'{{"input0": [1,3,{img_size},{img_size}]}}', 159 | 'Framework': framework, 160 | 'FrameworkVersion': framework_version 161 | } 162 | output_config = { 163 | 'TargetDevice': target_device, 164 | 'S3OutputLocation': s3_compiled_model_path, 165 | } 166 | 167 | # Create Compilation job 168 | compilation_response = sm_client.create_compilation_job( 169 | CompilationJobName=compilation_job_name, 170 | RoleArn=role, 171 | InputConfig=input_config, 172 | OutputConfig=output_config, 173 | StoppingCondition={ 'MaxRuntimeInSeconds': 900 } 174 | ) 175 | 176 | return { 177 | 'response': compilation_response, 178 | 'job_name': compilation_job_name, 179 | 's3_compiled_model_path': s3_compiled_model_path, 180 | 's3_model_path': s3_model_path 181 | } -------------------------------------------------------------------------------- /cost_optimization/ptn_2_model-compile-on-device/src/utils.py: -------------------------------------------------------------------------------- 1 | def get_label_map(label_file): 2 | label_map = {} 3 | labels = open(label_file, 'r') 4 | 5 | for line in labels: 6 | line = line.rstrip("\n") 7 | ids = line.split(',') 8 | label_map[int(ids[0])] = ids[2] 9 | 10 | return label_map 11 | 12 | 13 | def get_label_map_imagenet(label_file): 14 | label_map = {} 15 | with open(label_file, 'r') as f: 16 | for line in f: 17 | key, val = line.strip().split(':') 18 | label_map[key] = val.replace(',', '') 19 | return label_map 20 | 21 | 22 | def delete_endpoint(client, endpoint_name): 23 | response = client.describe_endpoint_config(EndpointConfigName=endpoint_name) 24 | model_name = response['ProductionVariants'][0]['ModelName'] 25 | 26 | client.delete_model(ModelName=model_name) 27 | client.delete_endpoint(EndpointName=endpoint_name) 28 | client.delete_endpoint_config(EndpointConfigName=endpoint_name) 29 | 30 | print(f'--- Deleted model: {model_name}') 31 | print(f'--- Deleted endpoint: {endpoint_name}') 32 | print(f'--- Deleted endpoint_config: {endpoint_name}') 33 | 34 | 35 | def plot_bbox(img_resized, bboxes, scores, cids, class_info, framework='pytorch', threshold=0.5): 36 | 37 | import numpy as np 38 | import random 39 | import matplotlib.patches as patches 40 | import matplotlib.pyplot as plt 41 | 42 | if framework=='mxnet': 43 | img_np = img_resized.asnumpy() 44 | scores = scores.asnumpy() 45 | bboxes = bboxes.asnumpy() 46 | cids = cids.asnumpy() 47 | else: 48 | img_np = img_resized 49 | scores = np.array(scores) 50 | bboxes = np.array(bboxes) 51 | cids = np.array(cids) 52 | 53 | # Get only results that are above the threshold. Default threshold is 0.5. 54 | scores = scores[scores > threshold] 55 | num_detections = len(scores) 56 | bboxes = bboxes[:num_detections, :] 57 | cids = cids[:num_detections].astype('int').squeeze() 58 | 59 | # Get bounding-box colors 60 | cmap = plt.get_cmap('tab20b') 61 | colors = [cmap(i) for i in np.linspace(0, 1, 20)] 62 | random.seed(42) 63 | random.shuffle(colors) 64 | 65 | plt.figure() 66 | fig, ax = plt.subplots(1, figsize=(10,10)) 67 | ax.imshow(img_np) 68 | 69 | if cids is not None: 70 | # Get unique class labels 71 | unique_labels = set(list(cids.astype('int').squeeze())) 72 | unique_labels = np.array(list(unique_labels)) 73 | n_cls_preds = len(unique_labels) 74 | bbox_colors = colors[:n_cls_preds] 75 | 76 | for b, cls_pred, cls_conf in zip(bboxes, cids, scores): 77 | x1, y1, x2, y2 = b[0], b[1], b[2], b[3] 78 | predicted_class = class_info[int(cls_pred)] 79 | label = '{} {:.2f}'.format(predicted_class, cls_conf) 80 | 81 | # Get box height and width 82 | box_h = y2 - y1 83 | box_w = x2 - x1 84 | 85 | # Add a box with the color for this class 86 | color = bbox_colors[int(np.where(unique_labels == int(cls_pred))[0])] 87 | bbox = patches.Rectangle((x1, y1), box_w, box_h, linewidth=3, edgecolor=color, facecolor='none') 88 | ax.add_patch(bbox) 89 | 90 | plt.text(x1, y1, s=label, color='white', verticalalignment='top', 91 | bbox={'color': color, 'pad': 0}) 92 | -------------------------------------------------------------------------------- /cost_optimization/ptn_2_model-compile-on-device/src/visualize.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import colorsys 4 | from skimage.measure import find_contours 5 | import random 6 | import matplotlib.pyplot as plt 7 | from matplotlib import patches, lines 8 | from matplotlib.patches import Polygon 9 | 10 | def get_label_map(label_file): 11 | label_map = {} 12 | labels = open(label_file, 'r') 13 | 14 | for line in labels: 15 | line = line.rstrip("\n") 16 | ids = line.split(',') 17 | label_map[int(ids[0])] = ids[2] 18 | 19 | return label_map 20 | 21 | 22 | def random_colors(N, bright=False): 23 | """ 24 | Generate random colors. 25 | To get visually distinct colors, generate them in HSV space then 26 | convert to RGB. 27 | """ 28 | brightness = 1.0 if bright else 0.7 29 | hsv = [(i / N, 1, brightness) for i in range(N)] 30 | colors = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv)) 31 | random.shuffle(colors) 32 | return colors 33 | 34 | 35 | def apply_mask(image, mask, color, alpha=0.3): 36 | """Apply the given mask to the image. 37 | """ 38 | for c in range(3): 39 | image[:, :, c] = np.where(mask == 1, 40 | image[:, :, c] * 41 | (1 - alpha) + alpha * color[c] * 255, 42 | image[:, :, c]) 43 | return image 44 | 45 | 46 | def display_instances(image, boxes, masks, class_ids, class_names, 47 | scores=None, title="", 48 | score_thres=0.5, mask_thres=0.5, 49 | figsize=(10, 10), ax=None, 50 | show_mask=True, show_bbox=True, 51 | colors=None, framework='pytorch'): 52 | """ 53 | boxes: [num_instance, (x1, y1, x2, y2, class_id)] in image coordinates. 54 | masks: [height, width, num_instances] 55 | class_ids: [num_instances] 56 | class_names: list of class names of the dataset 57 | scores: (optional) confidence scores for each box 58 | title: (optional) Figure title 59 | score_thres: To return only objects whose score is greater than to a certain value in the detected result. 60 | mask_thres: Threshold for binarizing the mask image 61 | figsize: (optional) the size of the image 62 | show_mask, show_bbox: To show masks and bounding boxes or not 63 | colors: (optional) An array or colors to use with each object 64 | framework: pytorch/mxnet 65 | """ 66 | 67 | if framework == 'mxnet': 68 | boxes = boxes.asnumpy() 69 | masks = masks.asnumpy() 70 | scores = scores.asnumpy() 71 | else: 72 | boxes = np.array(boxes) 73 | masks = np.array(masks) 74 | scores = np.array(scores) 75 | 76 | # Get only results that are above the threshold. Default threshold is 0.5. 77 | scores = scores[scores > score_thres] 78 | # Number of instances 79 | N = len(scores) 80 | 81 | if not N: 82 | print("\n*** No instances to display *** \n") 83 | 84 | # If no axis is passed, create one and automatically call show() 85 | auto_show = False 86 | if not ax: 87 | _, ax = plt.subplots(1, figsize=figsize) 88 | auto_show = True 89 | 90 | # Generate random colors 91 | colors = colors or random_colors(N) 92 | 93 | # Show area outside image boundaries. 94 | height, width = image.shape[:2] 95 | ax.set_ylim(height + 10, -10) 96 | ax.set_xlim(-10, width + 10) 97 | ax.axis('off') 98 | ax.set_title(title) 99 | masked_image = image.astype(np.uint32).copy() 100 | 101 | for i in range(N): 102 | color = colors[i] 103 | 104 | # Bounding box 105 | if not np.any(boxes[i]): 106 | # Skip this instance. Has no bbox. Likely lost in image cropping. 107 | continue 108 | x1, y1, x2, y2 = boxes[i] 109 | 110 | if show_bbox: 111 | p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, 112 | alpha=0.7, linestyle="dashed", 113 | edgecolor=color, facecolor='none') 114 | ax.add_patch(p) 115 | 116 | # Label 117 | class_id = class_ids[i] 118 | score = scores[i] if scores is not None else None 119 | #predicted_class = class_info[int(cls_pred)] 120 | label = class_names[int(class_id)] 121 | caption = "{} {:.3f}".format(label, score) if score else label 122 | ax.text(x1, y1, caption, color='w', verticalalignment='top', 123 | size=12, bbox={'color': color, 'pad': 0}) 124 | 125 | # Mask 126 | mask = (masks[:, :, i] > mask_thres) * 1 127 | if show_mask: 128 | masked_image = apply_mask(masked_image, mask, color) 129 | 130 | # Mask Polygon 131 | # Pad to ensure proper polygons for masks that touch image edges. 132 | padded_mask = np.zeros( 133 | (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8) 134 | 135 | padded_mask[1:-1, 1:-1] = mask 136 | contours = find_contours(padded_mask, 0.5) 137 | for verts in contours: 138 | # Subtract the padding and flip (y, x) to (x, y) 139 | verts = np.fliplr(verts) - 1 140 | p = Polygon(verts, facecolor="none", edgecolor=color) 141 | ax.add_patch(p) 142 | ax.imshow(masked_image.astype(np.uint8)) 143 | if auto_show: 144 | plt.show() 145 | 146 | #return masked_image 147 | -------------------------------------------------------------------------------- /cost_optimization/ptn_3_elastic-inference/README.md: -------------------------------------------------------------------------------- 1 | # Elastic Inference 2 | 3 | ## Overview 4 | 비싼 GPU 인스턴스를 배포 용도로 계속 띄워 놓게 되면 많은 비용이 발생할 수밖에 없고, 비용 절감을 위해 CPU 인스턴스를 쓰기에는 충분한 latency를 보장할 수 없습니다. 이럴 때 바로 Elastic Inference Acclerator(이하 EIA)를 사용하시면 됩니다. 5 | 6 | EIA는 평소에는 CPU 인스턴스를 사용하다가 추론 시에 GPU 엑셀러레이터를 빌려오는 개념이며, 이를 통해 GPU의 컴퓨팅 파워를 사용하면서 GPU 인스턴스 대비 추론 비용을 최대 75%까지 절감할 수 있습니다. 호스트 인스턴스와 추론 가속 하드웨어를 분리할 수 있는 유연성이 있으므로 애플리케이션에 필요한 CPU, 메모리 및 기타 모든 리소스에 대해 하드웨어를 유연하게 최적화 할 수 있습니다. 7 | 8 | 모든 Amazon SageMaker 인스턴스 유형, EC2 인스턴스 유형 또는 Amazon ECS 작업을 지원하며, 대부분의 딥러닝 프레임워크를 지원하고 있습니다. 지원되는 프레임워크 버전은 AWS CLI로 확인할 수 있습니다. 9 | 10 | ![ptn_2_01](../../images/cost_optimization/ptn_3_01.png) 11 | 12 | ## Elastic Inference 적용 방법 13 | 14 | EIA는 이미 AWS에서 각 프레임워크에 적합한 빌트인 컨테이너를 제공하고 있기 때문에, 인스턴스 타입만 `eia`로 지정해 주시면 됩니다. 단, PyTorch를 사용할 경우에는 인스턴스 타입 지정 전에 추가로 코드를 변경해야 하며, PyTorch 1.3.1과 PyTorch 1.5.1에서 EIA를 적용하기 위한 용법이 다르다는 점도 주의해 주세요. 15 | 16 | PyToch에서 저장하는 `.pth` 파일은 weight 가중치만 저장되며, 네트워크 구조를 정의한 그래프는 저장되지 않습니다. 이를 TorchScript 변환을 통해 weight와 graph를 모두 포함하게 해야 합니다. Tracing 방식과 Scripting 방식이 모두 지원되기 때문에 편리한 방식을 선택해서 모델을 `torch.jit.save()` 함수로 저장하시면 됩니다. 17 | 18 | ### PyTorch 1.3.1 (Tracing 방식 권장) 19 | 20 | #### TorchScript 생성 21 | ```python 22 | input_shape = [1, 3, 224, 224] 23 | input = torch.zeros(input_shape).float() 24 | model = torch.jit.trace(model.eval(), input) 25 | torch.jit.save(model, save_dir) 26 | ``` 27 | 28 | #### model_fn() 29 | ```python 30 | # Required when using Elastic Inference 31 | with torch.jit.optimized_execution(True, {‘target_device’: ‘eia:0’}): 32 | traced_model = torch.jit.trace(model, x) 33 | ``` 34 | 35 | #### Deploy 36 | ```python 37 | pytorch_model.deploy(..., 38 | framework_version=‘1.3.1', 39 | accelerator_type='ml.eia2.medium' 40 | ) 41 | ``` 42 | 43 | ### PyTorch 1.5.1 (Scripting 방식 권장) 44 | 45 | #### TorchScript 생성 46 | ```python 47 | model = torch.jit.script(model.eval()) 48 | torch.jit.save(model, save_dir) 49 | ``` 50 | 51 | #### model_fn() 52 | ```python 53 | model = torch.jit.load(save_dir, map_location=torch.device('cpu’)) 54 | # Disable profiling executor 55 | torch._C._jit_set_profiling_executor(False) 56 | ``` 57 | 58 | #### Deploy 59 | ```python 60 | _ecr_image=“_763104351884.dkr.ecr..amazonaws.com/pytorch-inference-eia:" 61 | model.deploy(..., 62 | image_uri=_ecr_image, 63 | framework_version='1.5.1', 64 | accelerator_type='ml.eia2.medium' 65 | ) 66 | ``` 67 | -------------------------------------------------------------------------------- /cost_optimization/ptn_3_elastic-inference/metadata/coco_labels.txt: -------------------------------------------------------------------------------- 1 | 1,1,person 2 | 2,2,bicycle 3 | 3,3,car 4 | 4,4,motorcycle 5 | 5,5,airplane 6 | 6,6,bus 7 | 7,7,train 8 | 8,8,truck 9 | 9,9,boat 10 | 10,10,traffic light 11 | 11,11,fire hydrant 12 | 13,12,stop sign 13 | 14,13,parking meter 14 | 15,14,bench 15 | 16,15,bird 16 | 17,16,cat 17 | 18,17,dog 18 | 19,18,horse 19 | 20,19,sheep 20 | 21,20,cow 21 | 22,21,elephant 22 | 23,22,bear 23 | 24,23,zebra 24 | 25,24,giraffe 25 | 27,25,backpack 26 | 28,26,umbrella 27 | 31,27,handbag 28 | 32,28,tie 29 | 33,29,suitcase 30 | 34,30,frisbee 31 | 35,31,skis 32 | 36,32,snowboard 33 | 37,33,sports ball 34 | 38,34,kite 35 | 39,35,baseball bat 36 | 40,36,baseball glove 37 | 41,37,skateboard 38 | 42,38,surfboard 39 | 43,39,tennis racket 40 | 44,40,bottle 41 | 46,41,wine glass 42 | 47,42,cup 43 | 48,43,fork 44 | 49,44,knife 45 | 50,45,spoon 46 | 51,46,bowl 47 | 52,47,banana 48 | 53,48,apple 49 | 54,49,sandwich 50 | 55,50,orange 51 | 56,51,broccoli 52 | 57,52,carrot 53 | 58,53,hot dog 54 | 59,54,pizza 55 | 60,55,donut 56 | 61,56,cake 57 | 62,57,chair 58 | 63,58,couch 59 | 64,59,potted plant 60 | 65,60,bed 61 | 67,61,dining table 62 | 70,62,toilet 63 | 72,63,tv 64 | 73,64,laptop 65 | 74,65,mouse 66 | 75,66,remote 67 | 76,67,keyboard 68 | 77,68,cell phone 69 | 78,69,microwave 70 | 79,70,oven 71 | 80,71,toaster 72 | 81,72,sink 73 | 82,73,refrigerator 74 | 84,74,book 75 | 85,75,clock 76 | 86,76,vase 77 | 87,77,scissors 78 | 88,78,teddy bear 79 | 89,79,hair drier 80 | 90,80,toothbrush -------------------------------------------------------------------------------- /cost_optimization/ptn_3_elastic-inference/metadata/init.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | git clone https://github.com/tensorflow/models.git 6 | 7 | source activate tensorflow_p36 8 | 9 | cd /home/ec2-user/SageMaker/tfs-workshop/files/models/research 10 | protoc object_detection/protos/*.proto --python_out=. 11 | 12 | python setup.py build 13 | python setup.py install 14 | 15 | export PYTHONPATH=/home/ec2-user/SageMaker/tfs-workshop/files/models/research 16 | 17 | source deactivate tensorflow_p36 -------------------------------------------------------------------------------- /cost_optimization/ptn_3_elastic-inference/metadata/mscoco_label_map.pbtxt: -------------------------------------------------------------------------------- 1 | item { 2 | name: "/m/01g317" 3 | id: 1 4 | display_name: "person" 5 | } 6 | item { 7 | name: "/m/0199g" 8 | id: 2 9 | display_name: "bicycle" 10 | } 11 | item { 12 | name: "/m/0k4j" 13 | id: 3 14 | display_name: "car" 15 | } 16 | item { 17 | name: "/m/04_sv" 18 | id: 4 19 | display_name: "motorcycle" 20 | } 21 | item { 22 | name: "/m/05czz6l" 23 | id: 5 24 | display_name: "airplane" 25 | } 26 | item { 27 | name: "/m/01bjv" 28 | id: 6 29 | display_name: "bus" 30 | } 31 | item { 32 | name: "/m/07jdr" 33 | id: 7 34 | display_name: "train" 35 | } 36 | item { 37 | name: "/m/07r04" 38 | id: 8 39 | display_name: "truck" 40 | } 41 | item { 42 | name: "/m/019jd" 43 | id: 9 44 | display_name: "boat" 45 | } 46 | item { 47 | name: "/m/015qff" 48 | id: 10 49 | display_name: "traffic light" 50 | } 51 | item { 52 | name: "/m/01pns0" 53 | id: 11 54 | display_name: "fire hydrant" 55 | } 56 | item { 57 | name: "/m/02pv19" 58 | id: 13 59 | display_name: "stop sign" 60 | } 61 | item { 62 | name: "/m/015qbp" 63 | id: 14 64 | display_name: "parking meter" 65 | } 66 | item { 67 | name: "/m/0cvnqh" 68 | id: 15 69 | display_name: "bench" 70 | } 71 | item { 72 | name: "/m/015p6" 73 | id: 16 74 | display_name: "bird" 75 | } 76 | item { 77 | name: "/m/01yrx" 78 | id: 17 79 | display_name: "cat" 80 | } 81 | item { 82 | name: "/m/0bt9lr" 83 | id: 18 84 | display_name: "dog" 85 | } 86 | item { 87 | name: "/m/03k3r" 88 | id: 19 89 | display_name: "horse" 90 | } 91 | item { 92 | name: "/m/07bgp" 93 | id: 20 94 | display_name: "sheep" 95 | } 96 | item { 97 | name: "/m/01xq0k1" 98 | id: 21 99 | display_name: "cow" 100 | } 101 | item { 102 | name: "/m/0bwd_0j" 103 | id: 22 104 | display_name: "elephant" 105 | } 106 | item { 107 | name: "/m/01dws" 108 | id: 23 109 | display_name: "bear" 110 | } 111 | item { 112 | name: "/m/0898b" 113 | id: 24 114 | display_name: "zebra" 115 | } 116 | item { 117 | name: "/m/03bk1" 118 | id: 25 119 | display_name: "giraffe" 120 | } 121 | item { 122 | name: "/m/01940j" 123 | id: 27 124 | display_name: "backpack" 125 | } 126 | item { 127 | name: "/m/0hnnb" 128 | id: 28 129 | display_name: "umbrella" 130 | } 131 | item { 132 | name: "/m/080hkjn" 133 | id: 31 134 | display_name: "handbag" 135 | } 136 | item { 137 | name: "/m/01rkbr" 138 | id: 32 139 | display_name: "tie" 140 | } 141 | item { 142 | name: "/m/01s55n" 143 | id: 33 144 | display_name: "suitcase" 145 | } 146 | item { 147 | name: "/m/02wmf" 148 | id: 34 149 | display_name: "frisbee" 150 | } 151 | item { 152 | name: "/m/071p9" 153 | id: 35 154 | display_name: "skis" 155 | } 156 | item { 157 | name: "/m/06__v" 158 | id: 36 159 | display_name: "snowboard" 160 | } 161 | item { 162 | name: "/m/018xm" 163 | id: 37 164 | display_name: "sports ball" 165 | } 166 | item { 167 | name: "/m/02zt3" 168 | id: 38 169 | display_name: "kite" 170 | } 171 | item { 172 | name: "/m/03g8mr" 173 | id: 39 174 | display_name: "baseball bat" 175 | } 176 | item { 177 | name: "/m/03grzl" 178 | id: 40 179 | display_name: "baseball glove" 180 | } 181 | item { 182 | name: "/m/06_fw" 183 | id: 41 184 | display_name: "skateboard" 185 | } 186 | item { 187 | name: "/m/019w40" 188 | id: 42 189 | display_name: "surfboard" 190 | } 191 | item { 192 | name: "/m/0dv9c" 193 | id: 43 194 | display_name: "tennis racket" 195 | } 196 | item { 197 | name: "/m/04dr76w" 198 | id: 44 199 | display_name: "bottle" 200 | } 201 | item { 202 | name: "/m/09tvcd" 203 | id: 46 204 | display_name: "wine glass" 205 | } 206 | item { 207 | name: "/m/08gqpm" 208 | id: 47 209 | display_name: "cup" 210 | } 211 | item { 212 | name: "/m/0dt3t" 213 | id: 48 214 | display_name: "fork" 215 | } 216 | item { 217 | name: "/m/04ctx" 218 | id: 49 219 | display_name: "knife" 220 | } 221 | item { 222 | name: "/m/0cmx8" 223 | id: 50 224 | display_name: "spoon" 225 | } 226 | item { 227 | name: "/m/04kkgm" 228 | id: 51 229 | display_name: "bowl" 230 | } 231 | item { 232 | name: "/m/09qck" 233 | id: 52 234 | display_name: "banana" 235 | } 236 | item { 237 | name: "/m/014j1m" 238 | id: 53 239 | display_name: "apple" 240 | } 241 | item { 242 | name: "/m/0l515" 243 | id: 54 244 | display_name: "sandwich" 245 | } 246 | item { 247 | name: "/m/0cyhj_" 248 | id: 55 249 | display_name: "orange" 250 | } 251 | item { 252 | name: "/m/0hkxq" 253 | id: 56 254 | display_name: "broccoli" 255 | } 256 | item { 257 | name: "/m/0fj52s" 258 | id: 57 259 | display_name: "carrot" 260 | } 261 | item { 262 | name: "/m/01b9xk" 263 | id: 58 264 | display_name: "hot dog" 265 | } 266 | item { 267 | name: "/m/0663v" 268 | id: 59 269 | display_name: "pizza" 270 | } 271 | item { 272 | name: "/m/0jy4k" 273 | id: 60 274 | display_name: "donut" 275 | } 276 | item { 277 | name: "/m/0fszt" 278 | id: 61 279 | display_name: "cake" 280 | } 281 | item { 282 | name: "/m/01mzpv" 283 | id: 62 284 | display_name: "chair" 285 | } 286 | item { 287 | name: "/m/02crq1" 288 | id: 63 289 | display_name: "couch" 290 | } 291 | item { 292 | name: "/m/03fp41" 293 | id: 64 294 | display_name: "potted plant" 295 | } 296 | item { 297 | name: "/m/03ssj5" 298 | id: 65 299 | display_name: "bed" 300 | } 301 | item { 302 | name: "/m/04bcr3" 303 | id: 67 304 | display_name: "dining table" 305 | } 306 | item { 307 | name: "/m/09g1w" 308 | id: 70 309 | display_name: "toilet" 310 | } 311 | item { 312 | name: "/m/07c52" 313 | id: 72 314 | display_name: "tv" 315 | } 316 | item { 317 | name: "/m/01c648" 318 | id: 73 319 | display_name: "laptop" 320 | } 321 | item { 322 | name: "/m/020lf" 323 | id: 74 324 | display_name: "mouse" 325 | } 326 | item { 327 | name: "/m/0qjjc" 328 | id: 75 329 | display_name: "remote" 330 | } 331 | item { 332 | name: "/m/01m2v" 333 | id: 76 334 | display_name: "keyboard" 335 | } 336 | item { 337 | name: "/m/050k8" 338 | id: 77 339 | display_name: "cell phone" 340 | } 341 | item { 342 | name: "/m/0fx9l" 343 | id: 78 344 | display_name: "microwave" 345 | } 346 | item { 347 | name: "/m/029bxz" 348 | id: 79 349 | display_name: "oven" 350 | } 351 | item { 352 | name: "/m/01k6s3" 353 | id: 80 354 | display_name: "toaster" 355 | } 356 | item { 357 | name: "/m/0130jx" 358 | id: 81 359 | display_name: "sink" 360 | } 361 | item { 362 | name: "/m/040b_t" 363 | id: 82 364 | display_name: "refrigerator" 365 | } 366 | item { 367 | name: "/m/0bt_c3" 368 | id: 84 369 | display_name: "book" 370 | } 371 | item { 372 | name: "/m/01x3z" 373 | id: 85 374 | display_name: "clock" 375 | } 376 | item { 377 | name: "/m/02s195" 378 | id: 86 379 | display_name: "vase" 380 | } 381 | item { 382 | name: "/m/01lsmm" 383 | id: 87 384 | display_name: "scissors" 385 | } 386 | item { 387 | name: "/m/0kmg4" 388 | id: 88 389 | display_name: "teddy bear" 390 | } 391 | item { 392 | name: "/m/03wvsk" 393 | id: 89 394 | display_name: "hair drier" 395 | } 396 | item { 397 | name: "/m/012xff" 398 | id: 90 399 | display_name: "toothbrush" 400 | } -------------------------------------------------------------------------------- /cost_optimization/ptn_3_elastic-inference/samples/bus.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/cost_optimization/ptn_3_elastic-inference/samples/bus.jpg -------------------------------------------------------------------------------- /cost_optimization/ptn_3_elastic-inference/samples/pizza.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/cost_optimization/ptn_3_elastic-inference/samples/pizza.jpg -------------------------------------------------------------------------------- /cost_optimization/ptn_3_elastic-inference/samples/strawberry.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/cost_optimization/ptn_3_elastic-inference/samples/strawberry.jpg -------------------------------------------------------------------------------- /cost_optimization/ptn_3_elastic-inference/src/infer_pytorch_eia.py: -------------------------------------------------------------------------------- 1 | 2 | import io 3 | import json 4 | import logging 5 | import os 6 | import pickle 7 | 8 | import numpy as np 9 | import torch 10 | import torchvision.transforms as transforms 11 | from PIL import Image # Training container doesn't have this package 12 | 13 | logger = logging.getLogger(__name__) 14 | logger.setLevel(logging.DEBUG) 15 | 16 | # To use new EIA inference API, customer should use attach_eia(model, eia_ordinal_number) 17 | VERSIONS_USE_NEW_API = ["1.5.1"] 18 | 19 | def model_fn(model_dir): 20 | try: 21 | loaded_model = torch.jit.load("model.pth", map_location=torch.device("cpu")) 22 | if torch.__version__ in VERSIONS_USE_NEW_API: 23 | import torcheia 24 | 25 | loaded_model = loaded_model.eval() 26 | loaded_model = torcheia.jit.attach_eia(loaded_model, 0) 27 | return loaded_model 28 | except Exception as e: 29 | logger.exception(f"Exception in model fn {e}") 30 | return None 31 | 32 | def transform_fn(model, payload, request_content_type='application/octet-stream', 33 | response_content_type='application/json'): 34 | 35 | logger.info('Invoking user-defined transform function') 36 | 37 | if request_content_type != 'application/octet-stream': 38 | raise RuntimeError( 39 | 'Content type must be application/octet-stream. Provided: {0}'.format(request_content_type)) 40 | 41 | # preprocess 42 | decoded = Image.open(io.BytesIO(payload)) 43 | preprocess = transforms.Compose([ 44 | transforms.Resize(256), 45 | transforms.CenterCrop(224), 46 | transforms.ToTensor(), 47 | transforms.Normalize( 48 | mean=[ 49 | 0.485, 0.456, 0.406], std=[ 50 | 0.229, 0.224, 0.225]), 51 | ]) 52 | normalized = preprocess(decoded) 53 | batchified = normalized.unsqueeze(0) 54 | 55 | # predict 56 | # With EI, client instance should be CPU for cost-efficiency. Subgraphs with unsupported arguments run locally. Server runs with CUDA 57 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 58 | batchified = batchified.to(device) 59 | 60 | # Please make sure model is loaded to cpu and has been eval(), in this example, we have done this step in model_fn() 61 | with torch.no_grad(): 62 | if torch.__version__ in VERSIONS_USE_NEW_API: 63 | # Please make sure torcheia has been imported 64 | import torcheia 65 | 66 | # We need to set the profiling executor for EIA 67 | torch._C._jit_set_profiling_executor(False) 68 | with torch.jit.optimized_execution(True): 69 | result = model.forward(batchified) 70 | # Set the target device to the accelerator ordinal 71 | else: 72 | with torch.jit.optimized_execution(True, {"target_device": "eia:0"}): 73 | result = model(batchified) 74 | 75 | # Softmax (assumes batch size 1) 76 | result = np.squeeze(result.detach().cpu().numpy()) 77 | result_exp = np.exp(result - np.max(result)) 78 | result = result_exp / np.sum(result_exp) 79 | 80 | response_body = json.dumps(result.tolist()) 81 | 82 | return response_body, response_content_type 83 | -------------------------------------------------------------------------------- /cost_optimization/ptn_3_elastic-inference/src/infer_pytorch_neo.py: -------------------------------------------------------------------------------- 1 | 2 | import io 3 | import json 4 | import logging 5 | import os 6 | import pickle 7 | 8 | import numpy as np 9 | import torch 10 | import torchvision.transforms as transforms 11 | from PIL import Image # Training container doesn't have this package 12 | 13 | logger = logging.getLogger(__name__) 14 | logger.setLevel(logging.DEBUG) 15 | 16 | 17 | def transform_fn(model, payload, request_content_type='application/octet-stream', 18 | response_content_type='application/json'): 19 | 20 | logger.info('Invoking user-defined transform function') 21 | 22 | if request_content_type != 'application/octet-stream': 23 | raise RuntimeError( 24 | 'Content type must be application/octet-stream. Provided: {0}'.format(request_content_type)) 25 | 26 | # preprocess 27 | decoded = Image.open(io.BytesIO(payload)) 28 | preprocess = transforms.Compose([ 29 | transforms.Resize(256), 30 | transforms.CenterCrop(224), 31 | transforms.ToTensor(), 32 | transforms.Normalize( 33 | mean=[ 34 | 0.485, 0.456, 0.406], std=[ 35 | 0.229, 0.224, 0.225]), 36 | ]) 37 | normalized = preprocess(decoded) 38 | batchified = normalized.unsqueeze(0) 39 | 40 | # predict 41 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 42 | batchified = batchified.to(device) 43 | result = model.forward(batchified) 44 | 45 | # Softmax (assumes batch size 1) 46 | result = np.squeeze(result.detach().cpu().numpy()) 47 | result_exp = np.exp(result - np.max(result)) 48 | result = result_exp / np.sum(result_exp) 49 | 50 | response_body = json.dumps(result.tolist()) 51 | 52 | return response_body, response_content_type 53 | -------------------------------------------------------------------------------- /cost_optimization/ptn_3_elastic-inference/src/utils.py: -------------------------------------------------------------------------------- 1 | def get_label_map(label_file): 2 | label_map = {} 3 | labels = open(label_file, 'r') 4 | 5 | for line in labels: 6 | line = line.rstrip("\n") 7 | ids = line.split(',') 8 | label_map[int(ids[0])] = ids[2] 9 | 10 | return label_map 11 | 12 | 13 | def get_label_map_imagenet(label_file): 14 | label_map = {} 15 | with open(label_file, 'r') as f: 16 | for line in f: 17 | key, val = line.strip().split(':') 18 | label_map[key] = val.replace(',', '') 19 | return label_map 20 | 21 | 22 | def delete_endpoint(client, endpoint_name): 23 | response = client.describe_endpoint_config(EndpointConfigName=endpoint_name) 24 | model_name = response['ProductionVariants'][0]['ModelName'] 25 | 26 | client.delete_model(ModelName=model_name) 27 | client.delete_endpoint(EndpointName=endpoint_name) 28 | client.delete_endpoint_config(EndpointConfigName=endpoint_name) 29 | 30 | print(f'--- Deleted model: {model_name}') 31 | print(f'--- Deleted endpoint: {endpoint_name}') 32 | print(f'--- Deleted endpoint_config: {endpoint_name}') 33 | 34 | 35 | def plot_bbox(img_resized, bboxes, scores, cids, class_info, framework='pytorch', threshold=0.5): 36 | 37 | import numpy as np 38 | import random 39 | import matplotlib.patches as patches 40 | import matplotlib.pyplot as plt 41 | 42 | if framework=='mxnet': 43 | img_np = img_resized.asnumpy() 44 | scores = scores.asnumpy() 45 | bboxes = bboxes.asnumpy() 46 | cids = cids.asnumpy() 47 | else: 48 | img_np = img_resized 49 | scores = np.array(scores) 50 | bboxes = np.array(bboxes) 51 | cids = np.array(cids) 52 | 53 | # Get only results that are above the threshold. Default threshold is 0.5. 54 | scores = scores[scores > threshold] 55 | num_detections = len(scores) 56 | bboxes = bboxes[:num_detections, :] 57 | cids = cids[:num_detections].astype('int').squeeze() 58 | 59 | # Get bounding-box colors 60 | cmap = plt.get_cmap('tab20b') 61 | colors = [cmap(i) for i in np.linspace(0, 1, 20)] 62 | random.seed(42) 63 | random.shuffle(colors) 64 | 65 | plt.figure() 66 | fig, ax = plt.subplots(1, figsize=(10,10)) 67 | ax.imshow(img_np) 68 | 69 | if cids is not None: 70 | # Get unique class labels 71 | unique_labels = set(list(cids.astype('int').squeeze())) 72 | unique_labels = np.array(list(unique_labels)) 73 | n_cls_preds = len(unique_labels) 74 | bbox_colors = colors[:n_cls_preds] 75 | 76 | for b, cls_pred, cls_conf in zip(bboxes, cids, scores): 77 | x1, y1, x2, y2 = b[0], b[1], b[2], b[3] 78 | predicted_class = class_info[int(cls_pred)] 79 | label = '{} {:.2f}'.format(predicted_class, cls_conf) 80 | 81 | # Get box height and width 82 | box_h = y2 - y1 83 | box_w = x2 - x1 84 | 85 | # Add a box with the color for this class 86 | color = bbox_colors[int(np.where(unique_labels == int(cls_pred))[0])] 87 | bbox = patches.Rectangle((x1, y1), box_w, box_h, linewidth=3, edgecolor=color, facecolor='none') 88 | ax.add_patch(bbox) 89 | 90 | plt.text(x1, y1, s=label, color='white', verticalalignment='top', 91 | bbox={'color': color, 'pad': 0}) 92 | -------------------------------------------------------------------------------- /cost_optimization/ptn_4_ml-inference-chip/README.md: -------------------------------------------------------------------------------- 1 | # AWS Inferentia 2 | 3 | ## Overview 4 | AWS Inferentia는 저렴한 비용으로 높은 처리량(throughput)과 짧은 레이턴시(low latency)의 추론 성능을 제공하기 위해 AWS에서 개발한 머신 러닝 추론 칩입니다. Inferentia 칩은 최신형 커스텀 2세대 Intel® Xeon® 프로세서 및 100Gbps 네트워킹과 결합되어 머신 러닝 추론 애플리케이션을 위한 고성능 및 업계에서 가장 낮은 비용을 제공합니다. AWS Inferentia 기반 Amazon EC2 Inf1 인스턴스는 Inferentia 칩에서 머신 러닝 모델을 컴파일&최적화할 수 있는 AWS Neuron 컴파일러, 런타임 및 프로파일링 도구가 포함되어 있습니다. 5 | 6 | AWS Neuron은 AWS Inferentia 칩을 사용하여 머신 러닝 추론을 실행하기 위한 SDK입니다. Neuron을 사용하면 딥러닝 프레임워크(PyTorch, TensorFlow, MXNet)에서 훈련된 컴퓨터 비전 및 자연어 처리 모델을 보다 빠르게 추론할 수 있습니다. 또한, [Dynamic Batching](https://awsdocs-neuron.readthedocs-hosted.com/en/latest/neuron-guide/appnotes/perf/torch-neuron-dataparallel-app-note.html#dynamic-batching-description)과 [Data Parallel](https://awsdocs-neuron.readthedocs-hosted.com/en/latest/neuron-guide/neuron-frameworks/pytorch-neuron/api-torch-neuron-dataparallel-api.html) 같은 기능을 활용하여 대용량 모델에 대한 추론 성능 개선이 가능합니다. 7 | 8 | Inf1 인스턴스는 SageMaker 호스팅 인스턴스로도 배포가 가능하며, 여러분은 아래 두 가지 옵션 중 하나를 선택하여 머신 러닝 모델을 쉽고 빠르게 배포할 수 있습니다. 9 | 10 | - **Option 1.** SageMaker Neo로 컴파일 후 Inf1 호스팅 인스턴스로 배포. 이 경우 SageMaker Neo에서 내부적으로 Neuron SDK를 사용하여 모델을 컴파일합니다. Hugging Face 모델은 컴파일 시에 dtype int64로 컴파일해야 합니다. 11 | - **Option 2.** 개발 환경에서 Neuron SDK로 직접 컴파일 후 Inf1 호스팅 인스턴스로 배포 12 | 13 | ![ptn_4_01](../../images/cost_optimization/ptn_4_01.png) -------------------------------------------------------------------------------- /cost_optimization/ptn_4_ml-inference-chip/src/inference.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import torch 4 | from torch import nn 5 | from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig 6 | os.environ["TOKENIZERS_PARALLELISM"] = "false" 7 | 8 | JSON_CONTENT_TYPE = 'application/json' 9 | 10 | max_seq_length = 128 11 | classes = ['not paraphrase', 'paraphrase'] 12 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 13 | model_id = 'bert-base-cased-finetuned-mrpc' 14 | 15 | def model_fn(model_dir): 16 | 17 | tokenizer_init = AutoTokenizer.from_pretrained(model_id) 18 | model = AutoModelForSequenceClassification.from_pretrained(model_id).eval().to(device) 19 | 20 | return (model, tokenizer_init) 21 | 22 | 23 | def input_fn(serialized_input_data, content_type=JSON_CONTENT_TYPE): 24 | if content_type == JSON_CONTENT_TYPE: 25 | input_data = json.loads(serialized_input_data) 26 | return input_data 27 | else: 28 | raise Exception('Requested unsupported ContentType in Accept: ' + content_type) 29 | return 30 | 31 | 32 | def predict_fn(input_data, models): 33 | 34 | model, tokenizer = models 35 | sequence_0 = input_data[0] 36 | sequence_1 = input_data[1] 37 | 38 | paraphrase = tokenizer.encode_plus( 39 | sequence_0, 40 | sequence_1, 41 | max_length=max_seq_length, 42 | padding='max_length', 43 | truncation=True, 44 | return_tensors='pt' 45 | ).to(device) 46 | 47 | 48 | # Convert example inputs to a format that is compatible with TorchScript tracing 49 | example_inputs = paraphrase['input_ids'], paraphrase['attention_mask'], paraphrase['token_type_ids'] 50 | 51 | with torch.no_grad(): 52 | logits = model(*example_inputs)[0] 53 | 54 | softmax_fn = nn.Softmax(dim=1) 55 | softmax_output = softmax_fn(logits)[0] 56 | pred_idx = softmax_output.argmax().item() 57 | pred_class = classes[pred_idx] 58 | score = softmax_output[pred_idx].item() 59 | 60 | out_str = f'pred_idx={pred_idx}, pred_class={pred_class}, prob={score:.5f}' 61 | 62 | return out_str 63 | 64 | 65 | def output_fn(prediction_output, accept=JSON_CONTENT_TYPE): 66 | if accept == JSON_CONTENT_TYPE: 67 | return json.dumps(prediction_output), accept 68 | 69 | raise Exception('Requested unsupported ContentType in Accept: ' + accept) -------------------------------------------------------------------------------- /cost_optimization/ptn_4_ml-inference-chip/src/inference_inf1.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import tensorflow # to workaround a protobuf version conflict issue 4 | import torch 5 | import torch.neuron 6 | from torch import nn 7 | from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig 8 | 9 | JSON_CONTENT_TYPE = 'application/json' 10 | 11 | max_seq_length = 128 12 | classes = ['not paraphrase', 'paraphrase'] 13 | model_id = 'bert-base-cased-finetuned-mrpc' 14 | 15 | 16 | def model_fn(model_dir): 17 | tokenizer_init = AutoTokenizer.from_pretrained(model_id) 18 | model_filepath = os.path.join(model_dir, 'neuron_compiled_model.pt') 19 | model_neuron = torch.jit.load(model_filepath) 20 | return (model_neuron, tokenizer_init) 21 | 22 | 23 | def input_fn(serialized_input_data, content_type=JSON_CONTENT_TYPE): 24 | if content_type == JSON_CONTENT_TYPE: 25 | input_data = json.loads(serialized_input_data) 26 | return input_data 27 | else: 28 | raise Exception('Requested unsupported ContentType in Accept: ' + content_type) 29 | return 30 | 31 | 32 | def predict_fn(input_data, models): 33 | 34 | model, tokenizer = models 35 | sequence_0 = input_data[0] 36 | sequence_1 = input_data[1] 37 | print(sequence_0, sequence_1) 38 | 39 | paraphrase = tokenizer.encode_plus( 40 | sequence_0, 41 | sequence_1, 42 | max_length=max_seq_length, 43 | padding='max_length', 44 | truncation=True, 45 | return_tensors='pt' 46 | ) 47 | 48 | # Convert example inputs to a format that is compatible with TorchScript tracing 49 | example_inputs = paraphrase['input_ids'], paraphrase['attention_mask'], paraphrase['token_type_ids'] 50 | 51 | with torch.no_grad(): 52 | logits = model(*example_inputs)[0] 53 | 54 | softmax_fn = nn.Softmax(dim=1) 55 | softmax_output = softmax_fn(logits)[0] 56 | pred_idx = softmax_output.argmax().item() 57 | pred_class = classes[pred_idx] 58 | score = softmax_output[pred_idx].item() 59 | 60 | out_str = f'pred_idx={pred_idx}, pred_class={pred_class}, prob={score:.5f}' 61 | 62 | return out_str 63 | 64 | 65 | def output_fn(prediction_output, accept=JSON_CONTENT_TYPE): 66 | if accept == JSON_CONTENT_TYPE: 67 | return json.dumps(prediction_output), accept 68 | 69 | raise Exception('Requested unsupported ContentType in Accept: ' + accept) 70 | -------------------------------------------------------------------------------- /cost_optimization/ptn_4_ml-inference-chip/src/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers==4.15.0 2 | -------------------------------------------------------------------------------- /images/cost_optimization/ptn_1_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/images/cost_optimization/ptn_1_01.png -------------------------------------------------------------------------------- /images/cost_optimization/ptn_1_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/images/cost_optimization/ptn_1_02.png -------------------------------------------------------------------------------- /images/cost_optimization/ptn_2_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/images/cost_optimization/ptn_2_01.png -------------------------------------------------------------------------------- /images/cost_optimization/ptn_3_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/images/cost_optimization/ptn_3_01.png -------------------------------------------------------------------------------- /images/cost_optimization/ptn_4_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/images/cost_optimization/ptn_4_01.png -------------------------------------------------------------------------------- /images/cost_optimization_persona.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/images/cost_optimization_persona.png -------------------------------------------------------------------------------- /images/key_features/ptn_1_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/images/key_features/ptn_1_01.png -------------------------------------------------------------------------------- /images/key_features/ptn_2_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/images/key_features/ptn_2_01.png -------------------------------------------------------------------------------- /images/key_features/ptn_3_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/images/key_features/ptn_3_01.png -------------------------------------------------------------------------------- /images/key_features/ptn_4.1_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/images/key_features/ptn_4.1_01.png -------------------------------------------------------------------------------- /images/key_features/ptn_4.2_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/images/key_features/ptn_4.2_01.png -------------------------------------------------------------------------------- /images/key_features/ptn_5_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/images/key_features/ptn_5_01.png -------------------------------------------------------------------------------- /images/key_features/ptn_6_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/images/key_features/ptn_6_01.png -------------------------------------------------------------------------------- /images/key_features_persona.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/images/key_features_persona.png -------------------------------------------------------------------------------- /images/production/ptn_1_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/images/production/ptn_1_01.png -------------------------------------------------------------------------------- /images/production/ptn_2_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/images/production/ptn_2_01.png -------------------------------------------------------------------------------- /images/production/ptn_2_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/images/production/ptn_2_02.png -------------------------------------------------------------------------------- /images/production/ptn_3_01_kor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/images/production/ptn_3_01_kor.png -------------------------------------------------------------------------------- /images/production_persona.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/images/production_persona.png -------------------------------------------------------------------------------- /key_features/ptn_1_realtime-inference/README.md: -------------------------------------------------------------------------------- 1 | # Real-time Inference 2 | 3 | ## Overview 4 | SageMaker Endpoint는 REST API를 통해 실시간 추론을 수행할 수 있는 완전 관리형 서비스입니다. 기본적으로 분산 컨테이너로 고가용성, 다중 모델 로딩, A/B 테스트를 위한 인프라 환경(EC2, 로드밸런서, 오토스케일링, 모델 아티팩트 로딩 등)이 사전 구축되어 있기에 몇 줄의 코드만으로 Endpoint가 자동으로 생성되기에, 모델을 프로덕션에 빠르게 배포할 수 있습니다. 5 | 6 | ![ptn_1_01](../../images/key_features/ptn_1_01.png) 7 | 8 | ## Importing models 9 | 10 | * 사전 훈련된 모델 파일들(예: model weights, model definition 등)을 model.tar.gz로 압축합니다. 11 | * S3 버킷에 모델 아티팩트 model.tar.gz을 업로드합니다. 12 | * SageMaker Model에 모델 아티팩트를 등록합니다. 13 | * SageMaker Endpoint configuration을 통해 엔드포인트 설정을 구성합니다. 14 | * SageMaker API나 AWS 콘솔을 통해 SageMaker 호스팅 job을 수행하여 SageMaker Endpoint를 배포합니다. 15 | 16 | ## 엔드포인트 생성 3단계 17 | 18 | ### Create Model 19 | ```shell 20 | aws sagemaker create-model 21 | --model-name model1 22 | --primary-container '{"Image": "123.dkr.ecr.amazonaws.com/algo", 23 | "ModelDataUrl": "s3://bkt/model1.tar.gz"}' 24 | --execution-role-arn arn:aws:iam::123:role/me 25 | ``` 26 | 27 | ### Create Endpoint Config 28 | ```shell 29 | aws sagemaker create-endpoint-config 30 | --endpoint-config-name model1-config 31 | --production-variants '{"InitialInstanceCount": 2, 32 | "InstanceType": "ml.m4.xlarge", 33 | "InitialVariantWeight": 1, 34 | "ModelName": "model1", 35 | "VariantName": "AllTraffic"}' 36 | ``` 37 | 38 | ### Create Endpoint 39 | ```shell 40 | aws sagemaker create-endpoint 41 | --endpoint-name my-endpoint 42 | --endpoint-config-name model1-config 43 | ``` -------------------------------------------------------------------------------- /key_features/ptn_1_realtime-inference/src/inference_nsmc.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | import logging 4 | import torch 5 | from torch import nn 6 | from transformers import ElectraConfig 7 | from transformers import ElectraModel, AutoTokenizer, ElectraTokenizer, ElectraForSequenceClassification 8 | 9 | logging.basicConfig( 10 | level=logging.INFO, 11 | format='[{%(filename)s:%(lineno)d} %(levelname)s - %(message)s', 12 | handlers=[ 13 | logging.FileHandler(filename='tmp.log'), 14 | logging.StreamHandler(sys.stdout) 15 | ] 16 | ) 17 | logger = logging.getLogger(__name__) 18 | 19 | max_seq_length = 128 20 | classes = ['Neg', 'Pos'] 21 | 22 | tokenizer = AutoTokenizer.from_pretrained("daekeun-ml/koelectra-small-v3-nsmc") 23 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 24 | 25 | 26 | def model_fn(model_path=None): 27 | #### 28 | # If you have your own trained model 29 | # Huggingface pre-trained model: 'monologg/koelectra-small-v3-discriminator' 30 | #### 31 | #config = ElectraConfig.from_json_file(f'{model_path}/config.json') 32 | #model = ElectraForSequenceClassification.from_pretrained(f'{model_path}/model.pth', config=config) 33 | 34 | # Download model from the Huggingface hub 35 | model = ElectraForSequenceClassification.from_pretrained('daekeun-ml/koelectra-small-v3-nsmc') 36 | model.to(device) 37 | return model 38 | 39 | 40 | def input_fn(input_data, content_type="application/jsonlines"): 41 | data_str = input_data.decode("utf-8") 42 | jsonlines = data_str.split("\n") 43 | transformed_inputs = [] 44 | 45 | for jsonline in jsonlines: 46 | text = json.loads(jsonline)["text"][0] 47 | logger.info("input text: {}".format(text)) 48 | encode_plus_token = tokenizer.encode_plus( 49 | text, 50 | max_length=max_seq_length, 51 | add_special_tokens=True, 52 | return_token_type_ids=False, 53 | padding="max_length", 54 | return_attention_mask=True, 55 | return_tensors="pt", 56 | truncation=True, 57 | ) 58 | transformed_inputs.append(encode_plus_token) 59 | 60 | return transformed_inputs 61 | 62 | 63 | def predict_fn(transformed_inputs, model): 64 | predicted_classes = [] 65 | 66 | for data in transformed_inputs: 67 | data = data.to(device) 68 | output = model(**data) 69 | 70 | softmax_fn = nn.Softmax(dim=1) 71 | softmax_output = softmax_fn(output[0]) 72 | _, prediction = torch.max(softmax_output, dim=1) 73 | 74 | predicted_class_idx = prediction.item() 75 | predicted_class = classes[predicted_class_idx] 76 | score = softmax_output[0][predicted_class_idx] 77 | logger.info("predicted_class: {}".format(predicted_class)) 78 | 79 | prediction_dict = {} 80 | prediction_dict["predicted_label"] = predicted_class 81 | prediction_dict['score'] = score.cpu().detach().numpy().tolist() 82 | 83 | jsonline = json.dumps(prediction_dict) 84 | logger.info("jsonline: {}".format(jsonline)) 85 | predicted_classes.append(jsonline) 86 | 87 | predicted_classes_jsonlines = "\n".join(predicted_classes) 88 | return predicted_classes_jsonlines 89 | 90 | 91 | def output_fn(outputs, accept="application/jsonlines"): 92 | return outputs, accept -------------------------------------------------------------------------------- /key_features/ptn_1_realtime-inference/src/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers==4.12.3 -------------------------------------------------------------------------------- /key_features/ptn_1_realtime-inference/src/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | from sagemaker.s3 import S3Uploader 4 | from sagemaker.predictor import Predictor 5 | from sagemaker.serializers import JSONLinesSerializer 6 | from sagemaker.deserializers import JSONLinesDeserializer 7 | 8 | 9 | def print_outputs(outputs): 10 | jsonlines = outputs.split('\n') 11 | 12 | for jsonline in jsonlines: 13 | print(json.loads(jsonline)) 14 | 15 | 16 | def prepare_model_artifact(model_path, 17 | model_artifact_path='model_and_code', 18 | model_artifact_name='model.tar.gz'): 19 | 20 | os.system(f'rm -rf {model_artifact_path}') 21 | os.system(f'mkdir {model_artifact_path} {model_artifact_path}/code') 22 | os.system(f'cp {model_path}/*.* {model_artifact_path}') 23 | os.system(f'cp ./src/* {model_artifact_path}/code') 24 | os.system(f'tar cvzf {model_artifact_name} -C {model_artifact_path}/ .') 25 | os.system(f'rm -rf {model_artifact_path}') 26 | print(f'Archived {model_artifact_name}') 27 | 28 | 29 | def upload_model_artifact_to_s3(model_variant, model_path, bucket, prefix, 30 | model_artifact_path='model_and_code', 31 | model_artifact_name='model.tar.gz'): 32 | prepare_model_artifact(model_path, model_artifact_path, model_artifact_name) 33 | model_s3_uri = S3Uploader.upload(model_artifact_name,'s3://{}/{}/{}'.format(bucket, prefix, model_variant)) 34 | os.system(f'rm -rf {model_artifact_name}') 35 | print(f'Uploaded to {model_s3_uri}') 36 | 37 | return model_s3_uri 38 | 39 | 40 | class NLPPredictor(Predictor): 41 | def __init__(self, endpoint_name, sagemaker_session): 42 | super().__init__( 43 | endpoint_name, 44 | sagemaker_session=sagemaker_session, 45 | serializer=JSONLinesSerializer(), 46 | deserializer=JSONLinesDeserializer(), 47 | ) -------------------------------------------------------------------------------- /key_features/ptn_2_batch-transform/README.md: -------------------------------------------------------------------------------- 1 | # Batch Transform 2 | 3 | ## Overview 4 | Latency에 덜 민감한 애플리케이션이나 일정 주기(일단위, 주단위, 월단위 등)로 수행되는 일괄 추론 작업은 SageMaker Batch Transform을 사용하여 비용을 절감하는 것을 권장합니다. 상시 띄워져 있는 엔드포인트와 달리, Batch Transform은 배치 데이터에 대한 추론이 수행되는 순간에만 인스턴스를 사용하기 때문입니다. 이 때, Amazon S3에 저장되는 Batch Transform 출력값들을 Amazon DynamoDB 또는 RDS와 같은 데이터베이스와 연동하여 저장함으로써 대용량의 배치 job을 편리하게 수행할 수 있습니다. 5 | 6 | ![ptn_2_01](../../images/key_features/ptn_2_01.png) -------------------------------------------------------------------------------- /key_features/ptn_3_async-inference/README.md: -------------------------------------------------------------------------------- 1 | # Asynchronous Inference 2 | 3 | ## Overview 4 | SageMaker 비동기(asynchronous) 추론 엔드포인트는 처리 시간이 수 분 이상 걸릴 수 있는 대규모 입력 페이로드를 사용하는 유즈케이스에 적합합니다. 5 | AsyncInvokeEndpoint API를 호출하면 Amazon S3에 payload를 배치하고 별도의 관리형 queue에 입력 데이터에 대한 요청을 포인터로 전송합니다. 호스팅 인스턴스는 포인터를 사용하여 payload 데이터를 다운로드하고 추론 컨테이너를 통해 모델 추론 결괏값을 계산하여 S3에 저장합니다. 이 때, 선택적으로 SNS를 통해 추론이 완료되었다는 알림을 받을 수 있습니다. 6 | 7 | ![ptn_3_01](../../images/key_features/ptn_3_01.png) -------------------------------------------------------------------------------- /key_features/ptn_3_async-inference/src/coco_labels.txt: -------------------------------------------------------------------------------- 1 | 1,1,person 2 | 2,2,bicycle 3 | 3,3,car 4 | 4,4,motorcycle 5 | 5,5,airplane 6 | 6,6,bus 7 | 7,7,train 8 | 8,8,truck 9 | 9,9,boat 10 | 10,10,traffic light 11 | 11,11,fire hydrant 12 | 13,12,stop sign 13 | 14,13,parking meter 14 | 15,14,bench 15 | 16,15,bird 16 | 17,16,cat 17 | 18,17,dog 18 | 19,18,horse 19 | 20,19,sheep 20 | 21,20,cow 21 | 22,21,elephant 22 | 23,22,bear 23 | 24,23,zebra 24 | 25,24,giraffe 25 | 27,25,backpack 26 | 28,26,umbrella 27 | 31,27,handbag 28 | 32,28,tie 29 | 33,29,suitcase 30 | 34,30,frisbee 31 | 35,31,skis 32 | 36,32,snowboard 33 | 37,33,sports ball 34 | 38,34,kite 35 | 39,35,baseball bat 36 | 40,36,baseball glove 37 | 41,37,skateboard 38 | 42,38,surfboard 39 | 43,39,tennis racket 40 | 44,40,bottle 41 | 46,41,wine glass 42 | 47,42,cup 43 | 48,43,fork 44 | 49,44,knife 45 | 50,45,spoon 46 | 51,46,bowl 47 | 52,47,banana 48 | 53,48,apple 49 | 54,49,sandwich 50 | 55,50,orange 51 | 56,51,broccoli 52 | 57,52,carrot 53 | 58,53,hot dog 54 | 59,54,pizza 55 | 60,55,donut 56 | 61,56,cake 57 | 62,57,chair 58 | 63,58,couch 59 | 64,59,potted plant 60 | 65,60,bed 61 | 67,61,dining table 62 | 70,62,toilet 63 | 72,63,tv 64 | 73,64,laptop 65 | 74,65,mouse 66 | 75,66,remote 67 | 76,67,keyboard 68 | 77,68,cell phone 69 | 78,69,microwave 70 | 79,70,oven 71 | 80,71,toaster 72 | 81,72,sink 73 | 82,73,refrigerator 74 | 84,74,book 75 | 85,75,clock 76 | 86,76,vase 77 | 87,77,scissors 78 | 88,78,teddy bear 79 | 89,79,hair drier 80 | 90,80,toothbrush -------------------------------------------------------------------------------- /key_features/ptn_3_async-inference/src/inference.py: -------------------------------------------------------------------------------- 1 | 2 | import cv2 3 | import json 4 | import torch 5 | import torchvision.transforms as transforms 6 | from six import BytesIO 7 | import io 8 | import numpy as np 9 | import tempfile 10 | 11 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 12 | 13 | def video2frame(file_path, frame_width, frame_height, interval): 14 | """ 15 | Extract frame from video by interval 16 | :param video_src_path: video src path 17 | :param video: video file name 18 | :param frame_width: frame width 19 | :param frame_height: frame height 20 | :param interval: interval for frame to extract 21 | :return: list of numpy.ndarray 22 | """ 23 | video_frames = [] 24 | cap = cv2.VideoCapture(file_path) 25 | frame_index = 0 26 | frame_count = 0 27 | if cap.isOpened(): 28 | success = True 29 | else: 30 | success = False 31 | print("Read failed!") 32 | 33 | while success: 34 | success, frame = cap.read() 35 | if frame_index % interval == 0: 36 | print("---> Reading the %d frame:" % frame_index, success) 37 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 38 | resize_frame = cv2.resize( 39 | frame, (frame_width, frame_height), interpolation=cv2.INTER_AREA 40 | ) 41 | video_frames.append(resize_frame) 42 | frame_count += 1 43 | 44 | frame_index += 1 45 | 46 | cap.release() 47 | print(f'Total frames={frame_index}, Number of extracted frames={frame_count}') 48 | return video_frames 49 | 50 | 51 | def model_fn(model_dir): 52 | ''' 53 | Loads the model into memory from storage and return the model. 54 | ''' 55 | model = torch.load(model_dir + '/model.pth', map_location=torch.device(device)) 56 | model = model.eval() 57 | return model 58 | 59 | 60 | def input_fn(request_body, request_content_type=None): 61 | frame_width = 256 62 | frame_height = 256 63 | interval = 30 64 | print("content_type=") 65 | print(request_content_type) 66 | 67 | f = io.BytesIO(request_body) 68 | with tempfile.NamedTemporaryFile(delete=False) as tfile: 69 | tfile.write(f.read()) 70 | filename = tfile.name 71 | 72 | video_frames = video2frame(filename, frame_width, frame_height, interval) 73 | return video_frames 74 | 75 | 76 | def predict_fn(video_frames, model): 77 | transform = transforms.Compose([ 78 | transforms.Lambda(lambda video_frames: torch.stack([transforms.ToTensor()(frame) for frame in video_frames])) # returns a 4D tensor 79 | ]) 80 | image_tensors = transform(video_frames).to(device) 81 | 82 | with torch.no_grad(): 83 | output = model(image_tensors) 84 | return output 85 | 86 | 87 | def output_fn(output_batch, accept='application/json'): 88 | res = [] 89 | 90 | print(f'output list length={len(output_batch)}') 91 | for output in output_batch: 92 | boxes = output['boxes'].detach().cpu().numpy() 93 | labels = output['labels'].detach().cpu().numpy() 94 | scores = output['scores'].detach().cpu().numpy() 95 | masks = output['masks'].detach().cpu().numpy() 96 | masks = np.squeeze(masks.transpose(2,3,0,1)) # 4D(batch x 1 height x width) to 3D(height x width x batch) 97 | 98 | res.append({ 99 | 'boxes': boxes.tolist(), 100 | 'labels': labels.tolist(), 101 | 'scores': scores.tolist(), 102 | 'masks': masks.tolist() 103 | }) 104 | 105 | return json.dumps(res) 106 | -------------------------------------------------------------------------------- /key_features/ptn_3_async-inference/src/requirements.txt: -------------------------------------------------------------------------------- 1 | opencv-python -------------------------------------------------------------------------------- /key_features/ptn_3_async-inference/src/visualize.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import colorsys 4 | from skimage.measure import find_contours 5 | import random 6 | import matplotlib.pyplot as plt 7 | from matplotlib import patches, lines 8 | from matplotlib.patches import Polygon 9 | 10 | def get_label_map(label_file): 11 | label_map = {} 12 | labels = open(label_file, 'r') 13 | 14 | for line in labels: 15 | line = line.rstrip("\n") 16 | ids = line.split(',') 17 | label_map[int(ids[0])] = ids[2] 18 | 19 | return label_map 20 | 21 | 22 | def random_colors(N, bright=False): 23 | """ 24 | Generate random colors. 25 | To get visually distinct colors, generate them in HSV space then 26 | convert to RGB. 27 | """ 28 | brightness = 1.0 if bright else 0.7 29 | hsv = [(i / N, 1, brightness) for i in range(N)] 30 | colors = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv)) 31 | random.shuffle(colors) 32 | return colors 33 | 34 | 35 | def apply_mask(image, mask, color, alpha=0.3): 36 | """Apply the given mask to the image. 37 | """ 38 | for c in range(3): 39 | image[:, :, c] = np.where(mask == 1, 40 | image[:, :, c] * 41 | (1 - alpha) + alpha * color[c] * 255, 42 | image[:, :, c]) 43 | return image 44 | 45 | 46 | def display_instances(image, boxes, masks, class_ids, class_names, 47 | scores=None, title="", 48 | score_thres=0.5, mask_thres=0.5, 49 | figsize=(10, 10), ax=None, 50 | show_mask=True, show_bbox=True, 51 | colors=None, framework='pytorch'): 52 | """ 53 | boxes: [num_instance, (x1, y1, x2, y2, class_id)] in image coordinates. 54 | masks: [height, width, num_instances] 55 | class_ids: [num_instances] 56 | class_names: list of class names of the dataset 57 | scores: (optional) confidence scores for each box 58 | title: (optional) Figure title 59 | score_thres: To return only objects whose score is greater than to a certain value in the detected result. 60 | mask_thres: Threshold for binarizing the mask image 61 | figsize: (optional) the size of the image 62 | show_mask, show_bbox: To show masks and bounding boxes or not 63 | colors: (optional) An array or colors to use with each object 64 | framework: pytorch/mxnet 65 | """ 66 | 67 | if framework == 'mxnet': 68 | boxes = boxes.asnumpy() 69 | masks = masks.asnumpy() 70 | scores = scores.asnumpy() 71 | else: 72 | boxes = np.array(boxes) 73 | masks = np.array(masks) 74 | scores = np.array(scores) 75 | 76 | # Get only results that are above the threshold. Default threshold is 0.5. 77 | scores = scores[scores > score_thres] 78 | # Number of instances 79 | N = len(scores) 80 | 81 | if not N: 82 | print("\n*** No instances to display *** \n") 83 | 84 | # If no axis is passed, create one and automatically call show() 85 | auto_show = False 86 | if not ax: 87 | _, ax = plt.subplots(1, figsize=figsize) 88 | auto_show = True 89 | 90 | # Generate random colors 91 | colors = colors or random_colors(N) 92 | 93 | # Show area outside image boundaries. 94 | height, width = image.shape[:2] 95 | ax.set_ylim(height + 10, -10) 96 | ax.set_xlim(-10, width + 10) 97 | ax.axis('off') 98 | ax.set_title(title) 99 | masked_image = image.astype(np.uint32).copy() 100 | 101 | for i in range(N): 102 | color = colors[i] 103 | 104 | # Bounding box 105 | if not np.any(boxes[i]): 106 | # Skip this instance. Has no bbox. Likely lost in image cropping. 107 | continue 108 | x1, y1, x2, y2 = boxes[i] 109 | 110 | if show_bbox: 111 | p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, 112 | alpha=0.7, linestyle="dashed", 113 | edgecolor=color, facecolor='none') 114 | ax.add_patch(p) 115 | 116 | # Label 117 | class_id = class_ids[i] 118 | score = scores[i] if scores is not None else None 119 | #predicted_class = class_info[int(cls_pred)] 120 | label = class_names[int(class_id)] 121 | caption = "{} {:.3f}".format(label, score) if score else label 122 | ax.text(x1, y1, caption, color='w', verticalalignment='top', 123 | size=12, bbox={'color': color, 'pad': 0}) 124 | 125 | # Mask 126 | mask = (masks[:, :, i] > mask_thres) * 1 127 | if show_mask: 128 | masked_image = apply_mask(masked_image, mask, color) 129 | 130 | # Mask Polygon 131 | # Pad to ensure proper polygons for masks that touch image edges. 132 | padded_mask = np.zeros( 133 | (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8) 134 | 135 | padded_mask[1:-1, 1:-1] = mask 136 | contours = find_contours(padded_mask, 0.5) 137 | for verts in contours: 138 | # Subtract the padding and flip (y, x) to (x, y) 139 | verts = np.fliplr(verts) - 1 140 | p = Polygon(verts, facecolor="none", edgecolor=color) 141 | ax.add_patch(p) 142 | ax.imshow(masked_image.astype(np.uint8)) 143 | if auto_show: 144 | plt.show() 145 | 146 | #return masked_image 147 | -------------------------------------------------------------------------------- /key_features/ptn_3_async-inference/visualization/generate_gif.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "0e9f6a44", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import os\n", 11 | "import json\n", 12 | "\n", 13 | "import cv2\n", 14 | "import torch\n", 15 | "import numpy as np\n", 16 | "import matplotlib.pyplot as plt\n", 17 | "import torchvision.transforms.functional as F\n", 18 | "from torchvision.utils import draw_bounding_boxes\n", 19 | "from torchvision.io import read_image" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "id": "e169a705", 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "os.makedirs(\"visualization/frames/\", exist_ok=True)\n", 30 | "os.makedirs(\"visualization/annotated_frames/\", exist_ok=True)" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "id": "66c1e603", 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "def video2frame(\n", 41 | " tfile,frame_width, frame_height, interval):\n", 42 | " \"\"\"\n", 43 | " Extract frame from video by interval\n", 44 | " :param video_src_path: video src path\n", 45 | " :param video: video file name\n", 46 | " :param frame_width: frame width\n", 47 | " :param frame_height: frame height\n", 48 | " :param interval: interval for frame to extract\n", 49 | " :return: list of numpy.ndarray \n", 50 | " \"\"\"\n", 51 | " video_frames = []\n", 52 | " cap = cv2.VideoCapture(tfile)\n", 53 | " frame_index = 0\n", 54 | " frame_count = 0\n", 55 | " if cap.isOpened():\n", 56 | " success = True\n", 57 | " else:\n", 58 | " success = False\n", 59 | " print(\"Read failed!\")\n", 60 | "\n", 61 | " while success:\n", 62 | " success, frame = cap.read()\n", 63 | "\n", 64 | " if frame_index % interval == 0:\n", 65 | " print(\"---> Reading the %d frame:\" % frame_index, success)\n", 66 | " resize_frame = cv2.resize(\n", 67 | " frame, (frame_width, frame_height), interpolation=cv2.INTER_AREA\n", 68 | " )\n", 69 | " video_frames.append(resize_frame)\n", 70 | " frame_count += 1\n", 71 | "\n", 72 | " frame_index += 1\n", 73 | "\n", 74 | " cap.release()\n", 75 | " \n", 76 | " print('Number of frames')\n", 77 | " print(frame_count)\n", 78 | " return video_frames" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "id": "cfb14262", 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "video_frames = video2frame('videos/ducks.mp4', 1024, 1024, 30)\n", 89 | "for i in range(len(video_frames)):\n", 90 | " cv2.imwrite(f\"visualization/frames/image-{i}.jpg\", video_frames[i])" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "id": "692fe844", 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "plt.rcParams[\"savefig.bbox\"] = 'tight'\n", 101 | "\n", 102 | "\n", 103 | "def save(imgs, img_num):\n", 104 | " if not isinstance(imgs, list):\n", 105 | " imgs = [imgs]\n", 106 | " fix, axs = plt.subplots(ncols=len(imgs), squeeze=False)\n", 107 | " for i, img in enumerate(imgs):\n", 108 | " img = img.detach()\n", 109 | " img = F.to_pil_image(img)\n", 110 | " \n", 111 | " axs[0, i].imshow(np.asarray(img))\n", 112 | " axs[0, i].set(xticklabels=[], yticklabels=[], xticks=[], yticks=[])\n", 113 | " plt.savefig(f\"visualization/annotated_frames/{str(img_num).zfill(3)}.jpg\")" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "id": "1b49141d", 120 | "metadata": {}, 121 | "outputs": [], 122 | "source": [ 123 | "with open(\"visualization/output.json\", \"r\") as read_file:\n", 124 | " data = json.load(read_file)\n" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": null, 130 | "id": "c1878aa8", 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "for i in range(len(data)):\n", 135 | " score_threshold = .9\n", 136 | " scores = torch.from_numpy(np.array(data[i]['scores']))\n", 137 | " boxes = torch.from_numpy(np.array(data[i]['boxes']))\n", 138 | "\n", 139 | " birds_with_boxes = [\n", 140 | " draw_bounding_boxes(read_image(f'visualization/frames/image-{i}.jpg'), boxes=boxes[scores > score_threshold], width=10)\n", 141 | " ]\n", 142 | " save(birds_with_boxes, i)" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "id": "ff5a0748", 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "import glob\n", 153 | "from PIL import Image\n", 154 | "\n", 155 | "# filepaths\n", 156 | "fp_in = \"visualization/annotated_frames/*.jpg\"\n", 157 | "fp_out = \"visualization/annotated_frames/birds.gif\"\n", 158 | "\n", 159 | "\n", 160 | "img, *imgs = [Image.open(f) for f in sorted(glob.glob(fp_in))]\n", 161 | "img.save(fp=fp_out, format='GIF', append_images=imgs,\n", 162 | " save_all=True, duration=200, loop=0)\n" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "id": "e4ba837f", 169 | "metadata": {}, 170 | "outputs": [], 171 | "source": [] 172 | } 173 | ], 174 | "metadata": { 175 | "kernelspec": { 176 | "display_name": "conda_python3", 177 | "language": "python", 178 | "name": "conda_python3" 179 | }, 180 | "language_info": { 181 | "codemirror_mode": { 182 | "name": "ipython", 183 | "version": 3 184 | }, 185 | "file_extension": ".py", 186 | "mimetype": "text/x-python", 187 | "name": "python", 188 | "nbconvert_exporter": "python", 189 | "pygments_lexer": "ipython3", 190 | "version": "3.6.13" 191 | } 192 | }, 193 | "nbformat": 4, 194 | "nbformat_minor": 5 195 | } 196 | -------------------------------------------------------------------------------- /key_features/ptn_4.1_lambda-serverless-inference/README.md: -------------------------------------------------------------------------------- 1 | # Lambda Serverless Inference 2 | 3 | ## Overview 4 | re:Invent 2020에 소개된 Lambda 컨테이너 기능 지원으로 기존 Lambda에서 수행하기 어려웠던 대용량 머신 모델에 대한 추론을 보다 수월하게 실행할 수 있게 되었습니다. Lambda 컨테이너 이미지를 Amazon ECR(Amazon Elastic Container Registry)에 푸시하였다면 Lambda 함수를 생성하여 직접 컨테이너 이미지를 배포하거나 SageMaker의 API 호출로 Serverless endpoint를 쉽게 배포할 수 있습니다. 5 | 6 | ![ptn_4.1_01](../../images/key_features/ptn_4.1_01.png) -------------------------------------------------------------------------------- /key_features/ptn_4.1_lambda-serverless-inference/cv/Dockerfile: -------------------------------------------------------------------------------- 1 | # Stage 1 lambda insight extensions 2 | # FROM public.ecr.aws/serverless/extensions/lambda-insights:12 AS lambda-insights 3 | 4 | # Stage 2 5 | FROM python:3.9.1-slim-buster AS build-image 6 | 7 | RUN apt-get update 8 | RUN apt-get upgrade -y 9 | RUN apt-get install ffmpeg libsm6 libxext6 -y 10 | RUN mkdir -p /app 11 | WORKDIR /app 12 | 13 | COPY requirements.txt ./ 14 | RUN python -m pip install -r requirements.txt -t . 15 | RUN python -m pip install awslambdaric --target . 16 | 17 | # COPY --from=lambda-insights /opt /opt 18 | COPY yolov3.cfg yolov3.weights coco.names samples/remote-control.jpeg ./ 19 | COPY app.py ./ 20 | 21 | # RIC 엔트리포인트 세팅 22 | ENTRYPOINT [ "/usr/local/bin/python", "-m", "awslambdaric" ] 23 | CMD ["app.lambda_handler"] -------------------------------------------------------------------------------- /key_features/ptn_4.1_lambda-serverless-inference/cv/README.md: -------------------------------------------------------------------------------- 1 | # Deploy Lambda Serverless endpoint - Object Detection (YOLO-v3) 2 | 3 | ## How to Run 4 | 1. 여러분의 로컬 개발 환경에서 아래 쉘스크립트를 실행하세요. 5 | ``` 6 | ./build_docker.sh yolov3 7 | ``` 8 | 2. (Optional) 로컬 환경에서 Lambda 함수를 테스트하세요. 9 | 3. SageMaker 노트북을 런칭한 다음, `lambda-serverless-endpoint-cv.ipynb`를 실행하세요. -------------------------------------------------------------------------------- /key_features/ptn_4.1_lambda-serverless-inference/cv/app.py: -------------------------------------------------------------------------------- 1 | import json 2 | import cv2 3 | import numpy as np 4 | import base64 5 | 6 | classes = [] 7 | with open("coco.names", "r") as f: 8 | classes = [line.strip() for line in f.readlines()] 9 | 10 | # Load YOLO network 11 | net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg") 12 | layer_names = net.getLayerNames() 13 | output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()] 14 | 15 | # Create random RGB array with number of classes 16 | colors = np.random.uniform(0, 255, size=(len(classes), 3)) 17 | 18 | def load_image(name): 19 | # Open image 20 | img = cv2.imread(name) 21 | height, width = img.shape[:2] 22 | 23 | return img, height, width 24 | 25 | def load_image_from_base64(img_string): 26 | # Decode the base64 string into an image 27 | base_img = base64.b64decode(img_string) 28 | npimg = np.fromstring(base_img, dtype=np.uint8) 29 | img = cv2.imdecode(npimg, 1) 30 | 31 | # fetch image height and width 32 | height, width = img.shape[:2] 33 | 34 | return img, height, width 35 | 36 | 37 | def infer_image(img, output_layers): 38 | # pre-processing the image before feeding into the network 39 | blob = cv2.dnn.blobFromImage(img, 1 / 255.0, (416, 416), (0, 0, 0), True, crop=False) 40 | 41 | # Feed the pre-processed blob to the network 42 | net.setInput(blob) 43 | 44 | # Fetch the result 45 | outs = net.forward(output_layers) 46 | 47 | return outs 48 | 49 | def generate_bounding_boxes(outs, height, width, target_confidence): 50 | class_ids = [] 51 | confidences = [] 52 | boxes = [] 53 | for out in outs: 54 | for detection in out: 55 | scores = detection[5:] 56 | class_id = np.argmax(scores) 57 | confidence = scores[class_id] 58 | if confidence > target_confidence: 59 | # Object detected 60 | center_x = int(detection[0] * width) 61 | center_y = int(detection[1] * height) 62 | w = int(detection[2] * width) 63 | h = int(detection[3] * height) 64 | # Rectangle coordinates 65 | x = int(center_x - w / 2) 66 | y = int(center_y - h / 2) 67 | boxes.append([x, y, w, h]) 68 | confidences.append(float(confidence)) 69 | class_ids.append(class_id) 70 | 71 | return boxes, confidences, class_ids 72 | 73 | def draw_boxes(img, boxes, confidences, class_ids, indexes, colors, classes): 74 | for i in range(len(boxes)): 75 | if i in indexes: 76 | x, y, w, h = boxes[i] 77 | label = f"{classes[class_ids[i]]} {confidences[i]:.2f}" 78 | color = colors[i] 79 | cv2.rectangle(img, (x, y), (x + w, y + h), color, 2) 80 | cv2.putText(img, label, (x, y + 30), cv2.FONT_HERSHEY_PLAIN, 3, color, 3) 81 | 82 | return img 83 | 84 | def lambda_handler(event, context): 85 | image_string = json.loads(event.get('body')) 86 | img, height, width = load_image_from_base64(image_string['image']) 87 | 88 | outs = infer_image(img, output_layers) 89 | boxes, confidences, class_ids = generate_bounding_boxes(outs, height, width, 0.5) 90 | indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.3) 91 | 92 | img = draw_boxes(img, boxes, confidences, class_ids, indexes, colors, classes) 93 | retval, buffer_img = cv2.imencode('.jpg', img) 94 | image_string = base64.b64encode(buffer_img).decode('utf8') 95 | payload = {'body': image_string} 96 | 97 | return { 98 | 'statusCode': 200, 99 | 'body': json.dumps(payload), 100 | 'headers': { 101 | 'Content-Type': 'application/json', 102 | 'Access-Control-Allow-Origin': '*' 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /key_features/ptn_4.1_lambda-serverless-inference/cv/base.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM public.ecr.aws/lambda/python:3.8 2 | COPY requirements.txt ./ 3 | RUN python -m pip install -r requirements.txt -t . 4 | COPY yolov3.cfg yolov3.weights coco.names sample_images/remote-control.jpeg ./ 5 | COPY app.py ./ 6 | CMD ["app.lambda_handler"] -------------------------------------------------------------------------------- /key_features/ptn_4.1_lambda-serverless-inference/cv/build_docker.sh: -------------------------------------------------------------------------------- 1 | algorithm_name=$1 2 | 3 | echo ==-------- Settings ---------== 4 | echo $algorithm_name 5 | 6 | if [ "$algorithm_name" == "" ] 7 | then 8 | echo "Usage: $0 " 9 | exit 1 10 | fi 11 | 12 | # Download the pre-trained yolo3 model 13 | wget https://pjreddie.com/media/files/yolov3.weights 14 | 15 | account=$(aws sts get-caller-identity --query Account --output text) 16 | 17 | # Get the region defined in the current configuration 18 | region=$(aws configure get region) 19 | #region=${region:-us-east-1} 20 | 21 | fullname="${account}.dkr.ecr.${region}.amazonaws.com/${algorithm_name}:latest" 22 | 23 | echo ==-------- Create ECR ---------== 24 | # If the repository doesn't exist in ECR, create it. 25 | aws ecr describe-repositories --repository-names "${algorithm_name}" > /dev/null 2>&1 26 | 27 | if [ $? -ne 0 ] 28 | then 29 | aws ecr create-repository --repository-name "${algorithm_name}" \ 30 | --image-scanning-configuration scanOnPush=true \ 31 | --region "${region}" > /dev/null 32 | fi 33 | 34 | # Get the login command from ECR and execute it directly 35 | aws ecr get-login-password --region ${region}|docker login --username AWS --password-stdin ${fullname} 36 | 37 | echo ==-------- Build Docker Image ---------== 38 | # Build the docker image locally with the image name and then push it to ECR 39 | # with the full name. 40 | docker build -f Dockerfile -t ${algorithm_name} . 41 | docker tag ${algorithm_name} ${fullname} 42 | 43 | echo Local Docker Image : ${algorithm_name} 44 | echo ECR Docker Image : ${fullname} 45 | 46 | echo ==-------- Push Docker Image to ECR ---------== 47 | docker push ${fullname} 48 | 49 | echo == -------- Testing Docker Image. Open a new terminal and test the lambda function with test_lambda.sh or postman. --------== 50 | #docker run --rm -p 9000:8080 ${algorithm_name}:latest -------------------------------------------------------------------------------- /key_features/ptn_4.1_lambda-serverless-inference/cv/coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /key_features/ptn_4.1_lambda-serverless-inference/cv/entry_script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | if [ -z "${AWS_LAMBDA_RUNTIME_API}" ]; then 3 | exec /usr/bin/aws-lambda-rie /usr/local/bin/python -m awslambdaric $1 4 | else 5 | exec /usr/local/bin/python -m awslambdaric $1 6 | fi -------------------------------------------------------------------------------- /key_features/ptn_4.1_lambda-serverless-inference/cv/requirements.txt: -------------------------------------------------------------------------------- 1 | opencv-python-headless==4.5.3.56 2 | numpy==1.21.2 3 | aws-xray-sdk==2.8.0 -------------------------------------------------------------------------------- /key_features/ptn_4.1_lambda-serverless-inference/cv/sample_images/remote-control.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/key_features/ptn_4.1_lambda-serverless-inference/cv/sample_images/remote-control.jpeg -------------------------------------------------------------------------------- /key_features/ptn_4.1_lambda-serverless-inference/cv/test_lambda.sh: -------------------------------------------------------------------------------- 1 | 2 | curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" \ 3 | -d @event.json -------------------------------------------------------------------------------- /key_features/ptn_4.1_lambda-serverless-inference/kornlp/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9.1-slim-buster AS custom-build-image 2 | 3 | RUN apt-get update && apt-get upgrade -y && mkdir -p /app 4 | WORKDIR /app 5 | 6 | COPY requirements.txt ./ 7 | COPY model-nsmc ./model-nsmc 8 | COPY app.py ./ 9 | 10 | RUN python -m pip install -r requirements.txt -t . && python -m pip install awslambdaric --target . 11 | 12 | 13 | # Stage 2 최종 이미지 14 | FROM python:3.9.1-slim-buster 15 | WORKDIR /app 16 | 17 | # 최종 이미지에 State 1에서 빌드된 종속성 결과물 추가하기 18 | COPY --from=custom-build-image /app /app 19 | # 로컬 테스트를 위한 AWS Lambda Runtime Interface Emulator 추가 20 | ADD https://github.com/aws/aws-lambda-runtime-interface-emulator/releases/latest/download/aws-lambda-rie /usr/bin/aws-lambda-rie 21 | RUN chmod 755 /usr/bin/aws-lambda-rie 22 | COPY entry_script.sh / 23 | RUN chmod +x /entry_script.sh 24 | ENTRYPOINT [ "/entry_script.sh" ] 25 | CMD [ "app.lambda_handler" ] -------------------------------------------------------------------------------- /key_features/ptn_4.1_lambda-serverless-inference/kornlp/README.md: -------------------------------------------------------------------------------- 1 | # Deploy Lambda Serverless endpoint - Korean NLP (Sentiment Classification for Naver Movie corpus) 2 | 3 | ## How to Run 4 | 1. 여러분의 로컬 개발 환경에서 아래 쉘스크립트를 실행하세요. 5 | ``` 6 | ./build_docker.sh kornlp-nsmc 7 | ``` 8 | 2. (Optional) 로컬 환경에서 Lambda 함수를 테스트하세요. 9 | 3. SageMaker 노트북을 런칭한 다음, `lambda-serverless-endpoint-kornlp.ipynb`를 실행하세요. -------------------------------------------------------------------------------- /key_features/ptn_4.1_lambda-serverless-inference/kornlp/app.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | import logging 4 | import torch 5 | from torch import nn 6 | from transformers import ElectraConfig 7 | from transformers import ElectraModel, AutoTokenizer, ElectraTokenizer, ElectraForSequenceClassification 8 | logger = logging.getLogger(__name__) 9 | 10 | model_path = 'model-nsmc' 11 | max_seq_length = 128 12 | classes = ['Neg', 'Pos'] 13 | tokenizer = AutoTokenizer.from_pretrained(f"{model_path}/vocab") 14 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 15 | 16 | # Huggingface pre-trained model: 'monologg/koelectra-small-v3-discriminator' 17 | def model_fn(model_path): 18 | config = ElectraConfig.from_json_file(f'{model_path}/config.json') 19 | model = ElectraForSequenceClassification.from_pretrained(f'{model_path}/model.pth', config=config) 20 | model.to(device) 21 | return model 22 | 23 | model = model_fn(model_path) 24 | 25 | 26 | def input_fn(input_data, content_type="application/json"): 27 | 28 | text = input_data["text"] 29 | logger.info("input text: {}".format(text)) 30 | encode_plus_token = tokenizer.encode_plus( 31 | text, 32 | max_length=max_seq_length, 33 | add_special_tokens=True, 34 | return_token_type_ids=False, 35 | padding="max_length", 36 | return_attention_mask=True, 37 | return_tensors="pt", 38 | truncation=True, 39 | ) 40 | 41 | return encode_plus_token 42 | 43 | 44 | def predict_fn(data, model): 45 | 46 | data = data.to(device) 47 | output = model(**data) 48 | 49 | softmax_fn = nn.Softmax(dim=1) 50 | softmax_output = softmax_fn(output[0]) 51 | _, prediction = torch.max(softmax_output, dim=1) 52 | 53 | predicted_class_idx = prediction.item() 54 | predicted_class = classes[predicted_class_idx] 55 | score = softmax_output[0][predicted_class_idx] 56 | logger.info("predicted_class: {}".format(predicted_class)) 57 | 58 | prediction_dict = {} 59 | prediction_dict["predicted_label"] = predicted_class 60 | prediction_dict['score'] = score.cpu().detach().numpy().tolist() 61 | 62 | return prediction_dict 63 | 64 | def output_fn(outputs, accept="application/json"): 65 | return { 66 | 'statusCode': 200, 67 | 'body': json.dumps(outputs), 68 | 'headers': { 69 | 'Content-Type': accept, 70 | 'Access-Control-Allow-Origin': '*' 71 | } 72 | } 73 | 74 | 75 | def lambda_handler(event, context): 76 | print('lambda') 77 | print(event.get('body')) 78 | body = json.loads(event.get('body')) 79 | 80 | features = input_fn(body) 81 | preds = predict_fn(features, model) 82 | outputs = output_fn(preds) 83 | return outputs -------------------------------------------------------------------------------- /key_features/ptn_4.1_lambda-serverless-inference/kornlp/build_docker.sh: -------------------------------------------------------------------------------- 1 | algorithm_name=$1 2 | 3 | echo ==-------- Settings ---------== 4 | echo $algorithm_name 5 | 6 | if [ "$algorithm_name" == "" ] 7 | then 8 | echo "Usage: $0 " 9 | exit 1 10 | fi 11 | 12 | # Copy models to current directory 13 | cp -r ../../multi-container-endpoint/model-nsmc ./model-nsmc 14 | 15 | account=$(aws sts get-caller-identity --query Account --output text) 16 | 17 | # Get the region defined in the current configuration 18 | region=$(aws configure get region) 19 | #region=${region:-us-east-1} 20 | 21 | fullname="${account}.dkr.ecr.${region}.amazonaws.com/${algorithm_name}:latest" 22 | 23 | echo ==-------- Create ECR ---------== 24 | # If the repository doesn't exist in ECR, create it. 25 | aws ecr describe-repositories --repository-names "${algorithm_name}" > /dev/null 2>&1 26 | 27 | if [ $? -ne 0 ] 28 | then 29 | aws ecr create-repository --repository-name "${algorithm_name}" \ 30 | --image-scanning-configuration scanOnPush=true \ 31 | --region "${region}" > /dev/null 32 | fi 33 | 34 | # Get the login command from ECR and execute it directly 35 | aws ecr get-login-password --region ${region}|docker login --username AWS --password-stdin ${fullname} 36 | 37 | echo ==-------- Build Docker Image ---------== 38 | # Build the docker image locally with the image name and then push it to ECR 39 | # with the full name. 40 | docker build -f Dockerfile -t ${algorithm_name} . 41 | docker tag ${algorithm_name} ${fullname} 42 | 43 | echo Local Docker Image : ${algorithm_name} 44 | echo ECR Docker Image : ${fullname} 45 | 46 | echo ==-------- Push Docker Image to ECR ---------== 47 | docker push ${fullname} 48 | 49 | echo == -------- Testing Docker Image. Open a new terminal and test the lambda function with test_lambda.sh or postman. --------== 50 | docker run --rm -p 9000:8080 ${algorithm_name}:latest -------------------------------------------------------------------------------- /key_features/ptn_4.1_lambda-serverless-inference/kornlp/entry_script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | if [ -z "${AWS_LAMBDA_RUNTIME_API}" ]; then 3 | exec /usr/bin/aws-lambda-rie /usr/local/bin/python -m awslambdaric $1 4 | else 5 | exec /usr/local/bin/python -m awslambdaric $1 6 | fi -------------------------------------------------------------------------------- /key_features/ptn_4.1_lambda-serverless-inference/kornlp/lambda-serverless-endpoint-kornlp.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "54c7d6db", 6 | "metadata": {}, 7 | "source": [ 8 | "# Deploy Serverless endpoint - Korean NLP (Sentiment Classification for Naver Movie corpus)\n", 9 | "---\n", 10 | "\n", 11 | "\n", 12 | "## Overview\n", 13 | "\n", 14 | "re:Invent 2020에 소개된 Lambda 컨테이너 기능 지원으로 기존 Lambda에서 수행하기 어려웠던 대용량 머신 모델에 대한 추론을 보다 수월하게 실행할 수 있게 되었습니다. Lambda 컨테이너 이미지를 Amazon ECR(Amazon Elastic Container Registry)에 푸시하였다면 Lambda 함수를 생성하여 직접 컨테이너 이미지를 배포하거나 SageMaker의 API 호출로 Serverless endpoint를 쉽게 배포할 수 있습니다.\n", 15 | "\n", 16 | "자세한 내용은 아래 링크를 참조해 주세요.\n", 17 | "- AWS Lambda의 새로운 기능 — 컨테이너 이미지 지원: https://aws.amazon.com/ko/blogs/korea/new-for-aws-lambda-container-image-support/\n", 18 | "- SageMaker Serverless Inference: https://sagemaker.readthedocs.io/en/stable/overview.html?highlight=lambdamodel#serverless-inference\n", 19 | "- AWS Builders Online - AWS Lambda 컨테이너 이미지 서비스 활용하기 (김태수 SA): https://www.youtube.com/watch?v=tTg9Lp7Sqok" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "id": "a3fac3f4", 25 | "metadata": {}, 26 | "source": [ 27 | "
\n", 28 | "\n", 29 | "## 1. Preparation\n", 30 | "---\n", 31 | "\n", 32 | "필요한 함수들을 정의하고 Serverless 추론에 필요한 권한을 아래와 같이 설정합니다. 참고로, 직접 Lambda Container 함수를 배포 시에는 ECR 리포지토리에 대한 억세스를 자동으로 생성해 줍니다.\n", 33 | "\n", 34 | "- SageMaker과 연결된 role 대해 ECR 억세스를 허용하는 policy 생성 및 연결\n", 35 | "- SageMaker 노트북에서 lambda를 실행할 수 있는 role 생성\n", 36 | "- Lambda 함수가 ECR private 리포지토리에 연결하는 억세스를 허용하는 policy 생성 및 연결 " 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "id": "be7bddc5", 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "import json\n", 47 | "import time\n", 48 | "import boto3\n", 49 | "import sagemaker\n", 50 | "import base64\n", 51 | "from sagemaker import get_execution_role\n", 52 | "iam = boto3.client('iam')\n", 53 | "ecr = boto3.client('ecr')\n", 54 | "\n", 55 | "sm_role_arn = get_execution_role()\n", 56 | "sm_role_name = sm_role_arn.split('/')[-1]\n", 57 | "boto_session = boto3.session.Session()\n", 58 | "region = boto_session.region_name\n", 59 | "account = boto3.client('sts').get_caller_identity()['Account']" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "id": "f7db9908", 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "def attach_sm_ecr_policy(sm_role_name):\n", 70 | " iam = boto3.client('iam')\n", 71 | " try:\n", 72 | " policy_response = iam.attach_role_policy(\n", 73 | " RoleName=sm_role_name,\n", 74 | " PolicyArn='arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryFullAccess'\n", 75 | " )\n", 76 | " return policy_response\n", 77 | " except iam.exceptions.from_code('iam:AttachRolePolicy'):\n", 78 | " print(f'[ERROR] SageMaker is not authorized to perform: iam:AttachRolePolicy on {sm_role_name}. Please add iam policy to this role') \n", 79 | "\n", 80 | "def attach_private_ecr_policy(repository_name, region, account):\n", 81 | " ecr = boto3.client('ecr') \n", 82 | " ecr_policy_json = {\n", 83 | " \"Version\": \"2008-10-17\",\n", 84 | " \"Statement\": [\n", 85 | " {\n", 86 | " \"Sid\": \"LambdaECRImageRetrievalPolicy\",\n", 87 | " \"Effect\": \"Allow\",\n", 88 | " \"Principal\": {\n", 89 | " \"Service\": \"lambda.amazonaws.com\"\n", 90 | " },\n", 91 | " \"Action\": [\n", 92 | " \"ecr:BatchGetImage\",\n", 93 | " \"ecr:DeleteRepositoryPolicy\",\n", 94 | " \"ecr:GetDownloadUrlForLayer\",\n", 95 | " \"ecr:GetRepositoryPolicy\",\n", 96 | " \"ecr:SetRepositoryPolicy\"\n", 97 | " ],\n", 98 | " \"Condition\": {\n", 99 | " \"StringLike\": {\n", 100 | " \"aws:sourceArn\": f\"arn:aws:lambda:{region}:{account}:function:*\"\n", 101 | " }\n", 102 | " }\n", 103 | " }\n", 104 | " ]\n", 105 | " }\n", 106 | " \n", 107 | " try:\n", 108 | " response = ecr.set_repository_policy(repositoryName=repository_name, policyText=json.dumps(ecr_policy_json))\n", 109 | " return response\n", 110 | " except ecr.exceptions.from_code('AccessDeniedException'):\n", 111 | " print(f'Please add ECR policy on {sm_role_name}') \n", 112 | " \n", 113 | "\n", 114 | "def create_lambda_role(role_name):\n", 115 | " iam = boto3.client('iam')\n", 116 | " lambda_policy = {\n", 117 | " \"Version\": \"2012-10-17\",\n", 118 | " \"Statement\": [\n", 119 | " {\n", 120 | " \"Effect\": \"Allow\",\n", 121 | " \"Principal\": {\n", 122 | " \"Service\": \"lambda.amazonaws.com\"\n", 123 | " },\n", 124 | " \"Action\": [ \n", 125 | " \"sts:AssumeRole\"\n", 126 | " ]\n", 127 | " }\n", 128 | " ]\n", 129 | " } \n", 130 | " \n", 131 | " response = iam.create_role(\n", 132 | " RoleName=role_name,\n", 133 | " AssumeRolePolicyDocument=json.dumps(lambda_policy)\n", 134 | " ) \n", 135 | " print(response)\n", 136 | "\n", 137 | " policy_response = iam.attach_role_policy(\n", 138 | " RoleName=role_name,\n", 139 | " PolicyArn='arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole'\n", 140 | " )\n", 141 | " return response['Role']['Arn']\n", 142 | " \n", 143 | " \n", 144 | "def delete_lambda_role(role_name):\n", 145 | " iam = boto3.client('iam')\n", 146 | " response = iam.detach_role_policy(\n", 147 | " RoleName=role_name,\n", 148 | " PolicyArn='arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole'\n", 149 | " )\n", 150 | " response = iam.delete_role(RoleName=role_name)" 151 | ] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "id": "9d87aa7a", 156 | "metadata": {}, 157 | "source": [ 158 | "### Attach SageMaker policy" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": null, 164 | "id": "36a68a5e", 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | "attach_sm_ecr_policy(sm_role_name)" 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "id": "d7b74626", 174 | "metadata": {}, 175 | "source": [ 176 | "### Create Lambda Role for Serverless Inference" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": null, 182 | "id": "27945459", 183 | "metadata": {}, 184 | "outputs": [], 185 | "source": [ 186 | "role_name = 'lambda-role-kornlp-hol'\n", 187 | "repository_name = 'kornlp-nsmc'\n", 188 | "lambda_role_arn = create_lambda_role(role_name)\n", 189 | "attach_private_ecr_policy(repository_name, region, account)\n", 190 | "time.sleep(10)" 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "id": "3a8fd1d4", 196 | "metadata": {}, 197 | "source": [ 198 | "
\n", 199 | "\n", 200 | "## 2. Deploy & Test\n", 201 | "---\n", 202 | "\n", 203 | "도커 이미지가 ECR에 푸시되고 적절한 Lambda Role이 생성되었다면, 단 두 줄의 코드로 `LambdaModel` 및 `LambdaPredictor` 리소스를 순차적으로 생성하여 Serverless Endpoint를 쉽게 생성할 수 있습니다. Serverless Endpoint는 내부적으로 Lambda Container 함수와 동일하므로 Endpoint에 대한 내역을 AWS Console 페이지의 AWS Lambda에서 확인할 수 있으며, 배포 전 Lambda 콘솔 창에서 직접 테스트를 수행할 수도 있습니다. " 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "id": "16c3db05", 209 | "metadata": {}, 210 | "source": [ 211 | "### Deploy\n" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": null, 217 | "id": "31e9bb36", 218 | "metadata": {}, 219 | "outputs": [], 220 | "source": [ 221 | "from sagemaker.serverless import LambdaModel\n", 222 | "image_uri = f'{account}.dkr.ecr.{region}.amazonaws.com/{repository_name}:latest'\n", 223 | "model = LambdaModel(image_uri=image_uri, role=lambda_role_arn)\n", 224 | "predictor = model.deploy(\"my-lambda-function-nlp\", timeout=50, memory_size=2048)" 225 | ] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "id": "340d4467", 230 | "metadata": {}, 231 | "source": [ 232 | "### Test\n", 233 | "\n", 234 | "Lambda 최초 호출 시 Cold start로 지연 시간이 발생하지만, 최초 호출 이후에는 warm 상태를 유지하기 때문에 빠르게 응답합니다. 물론 수 분 동안 호출이 되지 않거나 요청이 많아지면 cold 상태로 바뀐다는 점을 유의해 주세요." 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": null, 240 | "id": "9fd33ce3", 241 | "metadata": {}, 242 | "outputs": [], 243 | "source": [ 244 | "def get_kornlp_prediction(input_str):\n", 245 | " input_json = {\n", 246 | " \"body\": \"{\\\"text\\\": \\\"\" + input_str + \"\\\"}\"\n", 247 | " }\n", 248 | "\n", 249 | " results = predictor.predict(input_json) \n", 250 | " return json.loads(results['body']) " 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": null, 256 | "id": "79889792", 257 | "metadata": {}, 258 | "outputs": [], 259 | "source": [ 260 | "input_str = \"개인적으로 액션을 좋아하지 않지만, 이 영화는 예외입니다. 반전을 거듭하는 멋진 스토리와 박력 있는 연출이 일품!\"\n", 261 | "get_kornlp_prediction(input_str)" 262 | ] 263 | }, 264 | { 265 | "cell_type": "markdown", 266 | "id": "bee6e32d", 267 | "metadata": {}, 268 | "source": [ 269 | "최초 호출 이후에는 빠르게 추론 결과를 얻을 수 있습니다." 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": null, 275 | "id": "8e47c47f", 276 | "metadata": {}, 277 | "outputs": [], 278 | "source": [ 279 | "input_str = \"어휴, 이렇게 재미없을 수가 있을까요? 시간이 아깝습니다.\"\n", 280 | "get_kornlp_prediction(input_str)" 281 | ] 282 | }, 283 | { 284 | "cell_type": "markdown", 285 | "id": "86479eaf", 286 | "metadata": {}, 287 | "source": [ 288 | "여러분이 작성한 문장으로 자유롭게 테스트해 보세요. 아래 코드 셀을 반복해서 실행하셔도 됩니다." 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": null, 294 | "id": "5eb578ce", 295 | "metadata": {}, 296 | "outputs": [], 297 | "source": [ 298 | "your_input_str = input()\n", 299 | "print(get_kornlp_prediction(your_input_str))" 300 | ] 301 | }, 302 | { 303 | "cell_type": "markdown", 304 | "id": "8082a915", 305 | "metadata": {}, 306 | "source": [ 307 | "### Check Model Latency" 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": null, 313 | "id": "82a0b446", 314 | "metadata": {}, 315 | "outputs": [], 316 | "source": [ 317 | "import time\n", 318 | "start = time.time()\n", 319 | "for _ in range(100):\n", 320 | " result = get_kornlp_prediction(input_str)\n", 321 | "inference_time = (time.time()-start)\n", 322 | "print(f'Inference time is {inference_time:.4f} ms.')" 323 | ] 324 | }, 325 | { 326 | "cell_type": "markdown", 327 | "id": "c41594c5", 328 | "metadata": {}, 329 | "source": [ 330 | "
\n", 331 | "\n", 332 | "## Clean up\n", 333 | "---\n", 334 | "테스트를 완료했으면 `delete_model()` 및 `delete_predictor()` 메소드를 사용하여 LambdaModel 및 LambdaPredictor 리소스를 해제합니다." 335 | ] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "execution_count": null, 340 | "id": "00e4ba97", 341 | "metadata": {}, 342 | "outputs": [], 343 | "source": [ 344 | "model.delete_model()\n", 345 | "predictor.delete_predictor()\n", 346 | "delete_lambda_role(role_name)" 347 | ] 348 | } 349 | ], 350 | "metadata": { 351 | "kernelspec": { 352 | "display_name": "conda_pytorch_latest_p37", 353 | "language": "python", 354 | "name": "conda_pytorch_latest_p37" 355 | }, 356 | "language_info": { 357 | "codemirror_mode": { 358 | "name": "ipython", 359 | "version": 3 360 | }, 361 | "file_extension": ".py", 362 | "mimetype": "text/x-python", 363 | "name": "python", 364 | "nbconvert_exporter": "python", 365 | "pygments_lexer": "ipython3", 366 | "version": "3.7.12" 367 | } 368 | }, 369 | "nbformat": 4, 370 | "nbformat_minor": 5 371 | } 372 | -------------------------------------------------------------------------------- /key_features/ptn_4.1_lambda-serverless-inference/kornlp/model-nsmc/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_name_or_path": "monologg/koelectra-small-v3-discriminator", 3 | "architectures": [ 4 | "ElectraForSequenceClassification" 5 | ], 6 | "attention_probs_dropout_prob": 0.1, 7 | "classifier_dropout": null, 8 | "embedding_size": 128, 9 | "hidden_act": "gelu", 10 | "hidden_dropout_prob": 0.1, 11 | "hidden_size": 256, 12 | "id2label": { 13 | "0": "0", 14 | "1": "1" 15 | }, 16 | "initializer_range": 0.02, 17 | "intermediate_size": 1024, 18 | "label2id": { 19 | "0": 0, 20 | "1": 1 21 | }, 22 | "layer_norm_eps": 1e-12, 23 | "max_position_embeddings": 512, 24 | "model_type": "electra", 25 | "num_attention_heads": 4, 26 | "num_hidden_layers": 12, 27 | "pad_token_id": 0, 28 | "position_embedding_type": "absolute", 29 | "problem_type": "single_label_classification", 30 | "summary_activation": "gelu", 31 | "summary_last_dropout": 0.1, 32 | "summary_type": "first", 33 | "summary_use_proj": true, 34 | "torch_dtype": "float32", 35 | "transformers_version": "4.11.3", 36 | "type_vocab_size": 2, 37 | "vocab_size": 35000 38 | } 39 | -------------------------------------------------------------------------------- /key_features/ptn_4.1_lambda-serverless-inference/kornlp/model-nsmc/training_args.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/key_features/ptn_4.1_lambda-serverless-inference/kornlp/model-nsmc/training_args.bin -------------------------------------------------------------------------------- /key_features/ptn_4.1_lambda-serverless-inference/kornlp/model-nsmc/vocab/special_tokens_map.json: -------------------------------------------------------------------------------- 1 | {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"} -------------------------------------------------------------------------------- /key_features/ptn_4.1_lambda-serverless-inference/kornlp/model-nsmc/vocab/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | {"do_lower_case": false, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "monologg/koelectra-base-v3-discriminator", "do_basic_tokenize": true, "never_split": null, "tokenizer_class": "ElectraTokenizer"} -------------------------------------------------------------------------------- /key_features/ptn_4.1_lambda-serverless-inference/kornlp/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers[torch]==4.11.3 -------------------------------------------------------------------------------- /key_features/ptn_4.1_lambda-serverless-inference/kornlp/test_lambda.sh: -------------------------------------------------------------------------------- 1 | curl --header "Content-Type: application/json" \ 2 | --request POST \ 3 | --data '{"body": "{\"text\": \"반전에 반전을 거듭하는 멋진 스토리! 100점을 줘도 모자란 불후의 명작입니다. 꼭 보세요\"}"}' \ 4 | http://localhost:9000/2015-03-31/functions/function/invocations -------------------------------------------------------------------------------- /key_features/ptn_4.2_serverless-inference/README.md: -------------------------------------------------------------------------------- 1 | # SageMaker Serverless Inference 2 | 3 | ## Overview 4 | Amazon SageMaker Serverless Inference는 re:Invent 2021에 런칭된 신규 추론 옵션으로 호스팅 인프라 관리에 대한 부담 없이 머신 러닝을 모델을 쉽게 배포하고 확장할 수 있도록 제작된 신규 추론 옵션입니다. SageMaker Serverless Inference는 컴퓨팅 리소스를 자동으로 시작하고 트래픽에 따라 자동으로 스케일 인/아웃을 수행하므로 인스턴스 유형을 선택하거나 스케일링 정책을 관리할 필요가 없습니다. 따라서, 트래픽 급증 사이에 유휴 기간이 있고 콜드 스타트를 허용할 수 있는 워크로드에 이상적입니다. 5 | 6 | ![ptn_4.2_01](../../images/key_features/ptn_4.2_01.png) -------------------------------------------------------------------------------- /key_features/ptn_4.2_serverless-inference/model-korsts/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_name_or_path": "monologg/koelectra-small-v3-discriminator", 3 | "architectures": [ 4 | "ElectraForSequenceClassification" 5 | ], 6 | "attention_probs_dropout_prob": 0.1, 7 | "classifier_dropout": null, 8 | "embedding_size": 128, 9 | "hidden_act": "gelu", 10 | "hidden_dropout_prob": 0.1, 11 | "hidden_size": 256, 12 | "id2label": { 13 | "0": "LABEL_0" 14 | }, 15 | "initializer_range": 0.02, 16 | "intermediate_size": 1024, 17 | "label2id": { 18 | "LABEL_0": 0 19 | }, 20 | "layer_norm_eps": 1e-12, 21 | "max_position_embeddings": 512, 22 | "model_type": "electra", 23 | "num_attention_heads": 4, 24 | "num_hidden_layers": 12, 25 | "pad_token_id": 0, 26 | "position_embedding_type": "absolute", 27 | "problem_type": "regression", 28 | "summary_activation": "gelu", 29 | "summary_last_dropout": 0.1, 30 | "summary_type": "first", 31 | "summary_use_proj": true, 32 | "torch_dtype": "float32", 33 | "transformers_version": "4.11.3", 34 | "type_vocab_size": 2, 35 | "vocab_size": 35000 36 | } 37 | -------------------------------------------------------------------------------- /key_features/ptn_4.2_serverless-inference/model-korsts/training_args.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/key_features/ptn_4.2_serverless-inference/model-korsts/training_args.bin -------------------------------------------------------------------------------- /key_features/ptn_4.2_serverless-inference/model-nsmc/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_name_or_path": "monologg/koelectra-small-v3-discriminator", 3 | "architectures": [ 4 | "ElectraForSequenceClassification" 5 | ], 6 | "attention_probs_dropout_prob": 0.1, 7 | "classifier_dropout": null, 8 | "embedding_size": 128, 9 | "hidden_act": "gelu", 10 | "hidden_dropout_prob": 0.1, 11 | "hidden_size": 256, 12 | "id2label": { 13 | "0": "0", 14 | "1": "1" 15 | }, 16 | "initializer_range": 0.02, 17 | "intermediate_size": 1024, 18 | "label2id": { 19 | "0": 0, 20 | "1": 1 21 | }, 22 | "layer_norm_eps": 1e-12, 23 | "max_position_embeddings": 512, 24 | "model_type": "electra", 25 | "num_attention_heads": 4, 26 | "num_hidden_layers": 12, 27 | "pad_token_id": 0, 28 | "position_embedding_type": "absolute", 29 | "problem_type": "single_label_classification", 30 | "summary_activation": "gelu", 31 | "summary_last_dropout": 0.1, 32 | "summary_type": "first", 33 | "summary_use_proj": true, 34 | "torch_dtype": "float32", 35 | "transformers_version": "4.11.3", 36 | "type_vocab_size": 2, 37 | "vocab_size": 35000 38 | } 39 | -------------------------------------------------------------------------------- /key_features/ptn_4.2_serverless-inference/model-nsmc/training_args.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/key_features/ptn_4.2_serverless-inference/model-nsmc/training_args.bin -------------------------------------------------------------------------------- /key_features/ptn_4.2_serverless-inference/model-nsmc/vocab/special_tokens_map.json: -------------------------------------------------------------------------------- 1 | {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"} -------------------------------------------------------------------------------- /key_features/ptn_4.2_serverless-inference/model-nsmc/vocab/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | {"do_lower_case": false, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "monologg/koelectra-base-v3-discriminator", "do_basic_tokenize": true, "never_split": null, "tokenizer_class": "ElectraTokenizer"} -------------------------------------------------------------------------------- /key_features/ptn_4.2_serverless-inference/samples/korsts.txt: -------------------------------------------------------------------------------- 1 | {"text": ["맛있는 라면을 먹고 싶어요", "후루룩 쩝쩝 후루룩 쩝쩝 맛좋은 라면"]} 2 | {"text": ["뽀로로는 내친구", "머신러닝은 러닝머신이 아닙니다."]} -------------------------------------------------------------------------------- /key_features/ptn_4.2_serverless-inference/samples/ner.txt: -------------------------------------------------------------------------------- 1 | {"text": ["이 영화를 보고 나서 감동이 몰려왔습니다. 1950대 영화의 향수를 불러일으키면서도 주연 배우의 연기력, 반전 스토리가 정말 일품이네요"]} 2 | {"text": ["이 영화를 보고 나서 감동이 몰려왔습니다. 1950대 영화의 향수를 불러일으키면서도 주연 배우의 연기력, 반전 스토리가 정말 일품이네요"]} -------------------------------------------------------------------------------- /key_features/ptn_4.2_serverless-inference/samples/nsmc.txt: -------------------------------------------------------------------------------- 1 | {"text": ["이 영화는 최고의 영화입니다"]} 2 | {"text": ["최악이에요. 배우의 연기력도 좋지 않고 내용도 너무 허접합니다"]} -------------------------------------------------------------------------------- /key_features/ptn_4.2_serverless-inference/src/inference_kobart.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | import logging 4 | import torch 5 | from torch import nn 6 | from transformers import PreTrainedTokenizerFast, BartForConditionalGeneration 7 | 8 | logging.basicConfig( 9 | level=logging.INFO, 10 | format='[{%(filename)s:%(lineno)d} %(levelname)s - %(message)s', 11 | handlers=[ 12 | logging.FileHandler(filename='tmp.log'), 13 | logging.StreamHandler(sys.stdout) 14 | ] 15 | ) 16 | logger = logging.getLogger(__name__) 17 | 18 | tokenizer = PreTrainedTokenizerFast.from_pretrained("ainize/kobart-news") 19 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 20 | 21 | 22 | def model_fn(model_path=None): 23 | model = BartForConditionalGeneration.from_pretrained("ainize/kobart-news") 24 | model.to(device) 25 | return model 26 | 27 | 28 | def transform_fn(model, input_data, content_type="application/jsonlines", accept="application/jsonlines"): 29 | data_str = input_data.decode("utf-8") 30 | jsonlines = data_str.split("\n") 31 | 32 | predicted = [] 33 | 34 | for jsonline in jsonlines: 35 | text = json.loads(jsonline)["text"][0] 36 | logger.info("input text: {}".format(text)) 37 | 38 | input_ids = tokenizer.encode(text, return_tensors="pt") 39 | input_ids = input_ids.to(device) 40 | # Generate Summary Text Ids 41 | summary_text_ids = model.generate( 42 | input_ids=input_ids, 43 | bos_token_id=model.config.bos_token_id, 44 | eos_token_id=model.config.eos_token_id, 45 | length_penalty=2.0, 46 | max_length=512, 47 | min_length=32, 48 | num_beams=4, 49 | ) 50 | 51 | # Decoding Text 52 | summary_outputs = tokenizer.decode(summary_text_ids[0], skip_special_tokens=True) 53 | logger.info("summary_outputs: {}".format(summary_outputs)) 54 | 55 | prediction_dict = {} 56 | prediction_dict["summary"] = summary_outputs 57 | 58 | jsonline = json.dumps(prediction_dict) 59 | predicted.append(jsonline) 60 | 61 | predicted_jsonlines = "\n".join(predicted) 62 | return predicted_jsonlines -------------------------------------------------------------------------------- /key_features/ptn_4.2_serverless-inference/src/inference_korsts.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | import logging 4 | import torch 5 | from torch import nn 6 | from transformers import ElectraConfig 7 | from transformers import ElectraModel, AutoTokenizer, ElectraTokenizer, ElectraForSequenceClassification 8 | 9 | logging.basicConfig( 10 | level=logging.INFO, 11 | format='[{%(filename)s:%(lineno)d} %(levelname)s - %(message)s', 12 | handlers=[ 13 | logging.FileHandler(filename='tmp.log'), 14 | logging.StreamHandler(sys.stdout) 15 | ] 16 | ) 17 | logger = logging.getLogger(__name__) 18 | 19 | max_seq_length = 128 20 | tokenizer = AutoTokenizer.from_pretrained("daekeun-ml/koelectra-small-v3-korsts") 21 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 22 | 23 | 24 | # Huggingface pre-trained model: 'monologg/koelectra-small-v3-discriminator' 25 | def model_fn(model_path=None): 26 | #### 27 | # If you have your own trained model 28 | # Huggingface pre-trained model: 'monologg/koelectra-small-v3-discriminator' 29 | #### 30 | #config = ElectraConfig.from_json_file(f'{model_path}/config.json') 31 | #model = ElectraForSequenceClassification.from_pretrained(f'{model_path}/model.pth', config=config) 32 | model = ElectraForSequenceClassification.from_pretrained('daekeun-ml/koelectra-small-v3-korsts') 33 | model.to(device) 34 | return model 35 | 36 | 37 | def input_fn(input_data, content_type="application/jsonlines"): 38 | data_str = input_data.decode("utf-8") 39 | jsonlines = data_str.split("\n") 40 | transformed_inputs = [] 41 | 42 | for jsonline in jsonlines: 43 | text = json.loads(jsonline)["text"] 44 | logger.info("input text: {}".format(text)) 45 | encode_plus_token = tokenizer.encode_plus( 46 | text, 47 | max_length=max_seq_length, 48 | add_special_tokens=True, 49 | return_token_type_ids=False, 50 | padding="max_length", 51 | return_attention_mask=True, 52 | return_tensors="pt", 53 | truncation=True, 54 | ) 55 | transformed_inputs.append(encode_plus_token) 56 | 57 | return transformed_inputs 58 | 59 | 60 | def predict_fn(transformed_inputs, model): 61 | predicted_classes = [] 62 | 63 | for data in transformed_inputs: 64 | data = data.to(device) 65 | output = model(**data) 66 | 67 | prediction_dict = {} 68 | prediction_dict['score'] = output[0].squeeze().cpu().detach().numpy().tolist() 69 | 70 | jsonline = json.dumps(prediction_dict) 71 | logger.info("jsonline: {}".format(jsonline)) 72 | predicted_classes.append(jsonline) 73 | 74 | predicted_classes_jsonlines = "\n".join(predicted_classes) 75 | return predicted_classes_jsonlines 76 | 77 | 78 | def output_fn(outputs, accept="application/jsonlines"): 79 | return outputs, accept -------------------------------------------------------------------------------- /key_features/ptn_4.2_serverless-inference/src/inference_nsmc.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | import logging 4 | import torch 5 | from torch import nn 6 | from transformers import ElectraConfig 7 | from transformers import ElectraModel, AutoTokenizer, ElectraTokenizer, ElectraForSequenceClassification 8 | 9 | logging.basicConfig( 10 | level=logging.INFO, 11 | format='[{%(filename)s:%(lineno)d} %(levelname)s - %(message)s', 12 | handlers=[ 13 | logging.FileHandler(filename='tmp.log'), 14 | logging.StreamHandler(sys.stdout) 15 | ] 16 | ) 17 | logger = logging.getLogger(__name__) 18 | 19 | max_seq_length = 128 20 | classes = ['Neg', 'Pos'] 21 | 22 | tokenizer = AutoTokenizer.from_pretrained("daekeun-ml/koelectra-small-v3-nsmc") 23 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 24 | 25 | 26 | def model_fn(model_path=None): 27 | #### 28 | # If you have your own trained model 29 | # Huggingface pre-trained model: 'monologg/koelectra-small-v3-discriminator' 30 | #### 31 | #config = ElectraConfig.from_json_file(f'{model_path}/config.json') 32 | #model = ElectraForSequenceClassification.from_pretrained(f'{model_path}/model.pth', config=config) 33 | 34 | # Download model from the Huggingface hub 35 | model = ElectraForSequenceClassification.from_pretrained('daekeun-ml/koelectra-small-v3-nsmc') 36 | model.to(device) 37 | return model 38 | 39 | 40 | def input_fn(input_data, content_type="application/jsonlines"): 41 | data_str = input_data.decode("utf-8") 42 | jsonlines = data_str.split("\n") 43 | transformed_inputs = [] 44 | 45 | for jsonline in jsonlines: 46 | text = json.loads(jsonline)["text"][0] 47 | logger.info("input text: {}".format(text)) 48 | encode_plus_token = tokenizer.encode_plus( 49 | text, 50 | max_length=max_seq_length, 51 | add_special_tokens=True, 52 | return_token_type_ids=False, 53 | padding="max_length", 54 | return_attention_mask=True, 55 | return_tensors="pt", 56 | truncation=True, 57 | ) 58 | transformed_inputs.append(encode_plus_token) 59 | 60 | return transformed_inputs 61 | 62 | 63 | def predict_fn(transformed_inputs, model): 64 | predicted_classes = [] 65 | 66 | for data in transformed_inputs: 67 | data = data.to(device) 68 | output = model(**data) 69 | 70 | softmax_fn = nn.Softmax(dim=1) 71 | softmax_output = softmax_fn(output[0]) 72 | _, prediction = torch.max(softmax_output, dim=1) 73 | 74 | predicted_class_idx = prediction.item() 75 | predicted_class = classes[predicted_class_idx] 76 | score = softmax_output[0][predicted_class_idx] 77 | logger.info("predicted_class: {}".format(predicted_class)) 78 | 79 | prediction_dict = {} 80 | prediction_dict["predicted_label"] = predicted_class 81 | prediction_dict['score'] = score.cpu().detach().numpy().tolist() 82 | 83 | jsonline = json.dumps(prediction_dict) 84 | logger.info("jsonline: {}".format(jsonline)) 85 | predicted_classes.append(jsonline) 86 | 87 | predicted_classes_jsonlines = "\n".join(predicted_classes) 88 | return predicted_classes_jsonlines 89 | 90 | 91 | def output_fn(outputs, accept="application/jsonlines"): 92 | return outputs, accept -------------------------------------------------------------------------------- /key_features/ptn_4.2_serverless-inference/src/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers==4.11.3 -------------------------------------------------------------------------------- /key_features/ptn_4.2_serverless-inference/src/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | from sagemaker.s3 import S3Uploader 4 | from sagemaker.predictor import Predictor 5 | from sagemaker.serializers import JSONLinesSerializer 6 | from sagemaker.deserializers import JSONLinesDeserializer 7 | 8 | 9 | def print_outputs(outputs): 10 | jsonlines = outputs.split('\n') 11 | 12 | for jsonline in jsonlines: 13 | print(json.loads(jsonline)) 14 | 15 | 16 | def prepare_model_artifact(model_path, 17 | model_artifact_path='model_and_code', 18 | model_artifact_name='model.tar.gz'): 19 | 20 | os.system(f'rm -rf {model_artifact_path}') 21 | os.system(f'mkdir {model_artifact_path} {model_artifact_path}/code') 22 | os.system(f'cp {model_path}/*.* {model_artifact_path}') 23 | os.system(f'cp ./src/* {model_artifact_path}/code') 24 | os.system(f'tar cvzf {model_artifact_name} -C {model_artifact_path}/ .') 25 | os.system(f'rm -rf {model_artifact_path}') 26 | print(f'Archived {model_artifact_name}') 27 | 28 | 29 | def upload_model_artifact_to_s3(model_variant, model_path, bucket, prefix, 30 | model_artifact_path='model_and_code', 31 | model_artifact_name='model.tar.gz'): 32 | prepare_model_artifact(model_path, model_artifact_path, model_artifact_name) 33 | model_s3_uri = S3Uploader.upload(model_artifact_name,'s3://{}/{}/{}'.format(bucket, prefix, model_variant)) 34 | os.system(f'rm -rf {model_artifact_name}') 35 | print(f'Uploaded to {model_s3_uri}') 36 | 37 | return model_s3_uri 38 | 39 | 40 | class NLPPredictor(Predictor): 41 | def __init__(self, endpoint_name, sagemaker_session): 42 | super().__init__( 43 | endpoint_name, 44 | sagemaker_session=sagemaker_session, 45 | serializer=JSONLinesSerializer(), 46 | deserializer=JSONLinesDeserializer(), 47 | ) -------------------------------------------------------------------------------- /key_features/ptn_5_multi-container-endpoint/README.md: -------------------------------------------------------------------------------- 1 | # Multi-container Endpoint 2 | 3 | ## Overview 4 | SageMaker 멀티 컨테이너 엔드포인트를 사용하면 서로 다른 serving 스택(예: 모델 서버, 머신 러닝 프레임워크, 프레임워크 버전, 알고리즘 등)에 구축된 여러 추론 컨테이너를 하나의 엔드포인트에서 실행하고 독립적으로 각 추론 컨테이너를 호출할 수 있습니다. 5 | 6 | - 인스턴스의 전체 수용량을 포화시킬 정도의 트래픽이 없는 경우에 여러 모델(예: Object Detection, Named Entity Recognition)을 서빙 7 | - A/B 테스트와 같은 시나리오에서 서로 다른 프레임워크 버전(예: TensorFlow 1.x vs. TensorFlow 2.x)에서 실행되는 유사한 아키텍처의 비교 8 | - 9 | ![ptn_5_01](../../images/key_features/ptn_5_01.png) -------------------------------------------------------------------------------- /key_features/ptn_5_multi-container-endpoint/model-kobart/model.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/key_features/ptn_5_multi-container-endpoint/model-kobart/model.pth -------------------------------------------------------------------------------- /key_features/ptn_5_multi-container-endpoint/model-korsts/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_name_or_path": "monologg/koelectra-small-v3-discriminator", 3 | "architectures": [ 4 | "ElectraForSequenceClassification" 5 | ], 6 | "attention_probs_dropout_prob": 0.1, 7 | "classifier_dropout": null, 8 | "embedding_size": 128, 9 | "hidden_act": "gelu", 10 | "hidden_dropout_prob": 0.1, 11 | "hidden_size": 256, 12 | "id2label": { 13 | "0": "LABEL_0" 14 | }, 15 | "initializer_range": 0.02, 16 | "intermediate_size": 1024, 17 | "label2id": { 18 | "LABEL_0": 0 19 | }, 20 | "layer_norm_eps": 1e-12, 21 | "max_position_embeddings": 512, 22 | "model_type": "electra", 23 | "num_attention_heads": 4, 24 | "num_hidden_layers": 12, 25 | "pad_token_id": 0, 26 | "position_embedding_type": "absolute", 27 | "problem_type": "regression", 28 | "summary_activation": "gelu", 29 | "summary_last_dropout": 0.1, 30 | "summary_type": "first", 31 | "summary_use_proj": true, 32 | "torch_dtype": "float32", 33 | "transformers_version": "4.11.3", 34 | "type_vocab_size": 2, 35 | "vocab_size": 35000 36 | } 37 | -------------------------------------------------------------------------------- /key_features/ptn_5_multi-container-endpoint/model-korsts/training_args.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/key_features/ptn_5_multi-container-endpoint/model-korsts/training_args.bin -------------------------------------------------------------------------------- /key_features/ptn_5_multi-container-endpoint/model-nsmc/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_name_or_path": "monologg/koelectra-small-v3-discriminator", 3 | "architectures": [ 4 | "ElectraForSequenceClassification" 5 | ], 6 | "attention_probs_dropout_prob": 0.1, 7 | "classifier_dropout": null, 8 | "embedding_size": 128, 9 | "hidden_act": "gelu", 10 | "hidden_dropout_prob": 0.1, 11 | "hidden_size": 256, 12 | "id2label": { 13 | "0": "0", 14 | "1": "1" 15 | }, 16 | "initializer_range": 0.02, 17 | "intermediate_size": 1024, 18 | "label2id": { 19 | "0": 0, 20 | "1": 1 21 | }, 22 | "layer_norm_eps": 1e-12, 23 | "max_position_embeddings": 512, 24 | "model_type": "electra", 25 | "num_attention_heads": 4, 26 | "num_hidden_layers": 12, 27 | "pad_token_id": 0, 28 | "position_embedding_type": "absolute", 29 | "problem_type": "single_label_classification", 30 | "summary_activation": "gelu", 31 | "summary_last_dropout": 0.1, 32 | "summary_type": "first", 33 | "summary_use_proj": true, 34 | "torch_dtype": "float32", 35 | "transformers_version": "4.11.3", 36 | "type_vocab_size": 2, 37 | "vocab_size": 35000 38 | } 39 | -------------------------------------------------------------------------------- /key_features/ptn_5_multi-container-endpoint/model-nsmc/training_args.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/key_features/ptn_5_multi-container-endpoint/model-nsmc/training_args.bin -------------------------------------------------------------------------------- /key_features/ptn_5_multi-container-endpoint/model-nsmc/vocab/special_tokens_map.json: -------------------------------------------------------------------------------- 1 | {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"} -------------------------------------------------------------------------------- /key_features/ptn_5_multi-container-endpoint/model-nsmc/vocab/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | {"do_lower_case": false, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "monologg/koelectra-base-v3-discriminator", "do_basic_tokenize": true, "never_split": null, "tokenizer_class": "ElectraTokenizer"} -------------------------------------------------------------------------------- /key_features/ptn_5_multi-container-endpoint/samples/.ipynb_checkpoints/kobart-checkpoint.txt: -------------------------------------------------------------------------------- 1 | {"text": ["AWS가 10일 AWS 리인벤트를 통해 머신러닝 서비스인 아마존 세이지메이커(Amazon SageMaker)의 9가지 새로운 기능을 발표했다. 아마존 세이지메이커 데이터 랭글러(Amazon SageMaker Data Wrangler)를 비롯해 아마존 세이지메이커 피처 스토어 등 다양한 기술들이 속속 베일을 벗었다. 스와미 시바수브라마니안(Swami Sivasubramanian), AWS 아마존 머신러닝 부사장은 '수십만 명의 일반 개발자와 데이터 과학자가 업계 최고의 머신러닝 서비스인 아마존 세이지메이커를 활용해 맞춤형 머신러닝 모델 제작, 훈련 및 배치에 대한 장벽을 제거했다'면서 '개발자가 더 나은 가시성, 설명 가능성 및 자동화를 대규모로 구현하는 맞춤형 머신러닝 모델을 준비, 제작, 훈련, 설명, 검사, 모니터링, 디버그 및 실행하기 위한 엔드투엔드 머신러닝 파이프라인을 더 쉽게 구축할 수 있도록 지원한다'고 말했다."]} 2 | {"text": ["대한항공은 이와 같은 필요성에 따라 AWS와 AWS의 국내 파트너사인 LG CNS와 함께 기존 사내 데이터 센터에서 운영했던 데이터와 네트워크, 보안 시스템을 비롯한 각종 IT시스템을 단계적으로 AWS의 클라우드로 이전해 효율성을 높이고 IT 관리를 단순화했다. 대한항공은 이번 전사 IT시스템의 클라우드 이전 완료에 따라 데이터 분석 능력, 머신러닝등 아마존웹서비스가 갖고 있는 클라우드 기능을 바탕으로 경영 프로세스 혁신, 여객서비스 강화, 예약·발권 시스템 편의성 증대, 기상예측 정확도 제고 등을 추진해 나간다. 대한항공은 먼저 ‘클라우드 머신러닝 관리 서비스’를 도입한다. 이는 머신러닝 모델의 구축, 학습, 적용을 모두 하나의 환경에서 관리할 수 있도록 해주는 서비스로 정확한 수요 및 통계 예측을 지원함으로써 보다 나은 고객 서비스를 제공할 수 있게 한다. 특히 악천후로 인한 항공기 지연 예상시간, 항공기 정비 소요시간 예측 등을 토대로 고객들에게 적절한 시점에 필요한 조치를 할 수 있을 것으로 기대된다. 또 AWS 클라우드로 구축된 고객 데이터 플랫폼에서 고객별 특성에 따른 고유 디지털 식별 정보가 부여돼, 맞춤형 고객 서비스 제공도 가능해질 것으로 보고있다. 다시 말해, 그 동안 고객이 대한항공으로부터 제공 받은 서비스를 포함한 각종 정보들을 종합적으로 분석해 고객 니즈에 맞는 맞춤 서비스를 추천하는 기능도 제공된다는 것이다."]} -------------------------------------------------------------------------------- /key_features/ptn_5_multi-container-endpoint/samples/kobart.txt: -------------------------------------------------------------------------------- 1 | {"text": ["AWS가 10일 AWS 리인벤트를 통해 머신러닝 서비스인 아마존 세이지메이커(Amazon SageMaker)의 9가지 새로운 기능을 발표했다. 아마존 세이지메이커 데이터 랭글러(Amazon SageMaker Data Wrangler)를 비롯해 아마존 세이지메이커 피처 스토어 등 다양한 기술들이 속속 베일을 벗었다. 스와미 시바수브라마니안(Swami Sivasubramanian), AWS 아마존 머신러닝 부사장은 '수십만 명의 일반 개발자와 데이터 과학자가 업계 최고의 머신러닝 서비스인 아마존 세이지메이커를 활용해 맞춤형 머신러닝 모델 제작, 훈련 및 배치에 대한 장벽을 제거했다'면서 '개발자가 더 나은 가시성, 설명 가능성 및 자동화를 대규모로 구현하는 맞춤형 머신러닝 모델을 준비, 제작, 훈련, 설명, 검사, 모니터링, 디버그 및 실행하기 위한 엔드투엔드 머신러닝 파이프라인을 더 쉽게 구축할 수 있도록 지원한다'고 말했다."]} 2 | {"text": ["대한항공은 이와 같은 필요성에 따라 AWS와 AWS의 국내 파트너사인 LG CNS와 함께 기존 사내 데이터 센터에서 운영했던 데이터와 네트워크, 보안 시스템을 비롯한 각종 IT시스템을 단계적으로 AWS의 클라우드로 이전해 효율성을 높이고 IT 관리를 단순화했다. 대한항공은 이번 전사 IT시스템의 클라우드 이전 완료에 따라 데이터 분석 능력, 머신러닝등 아마존웹서비스가 갖고 있는 클라우드 기능을 바탕으로 경영 프로세스 혁신, 여객서비스 강화, 예약·발권 시스템 편의성 증대, 기상예측 정확도 제고 등을 추진해 나간다. 대한항공은 먼저 ‘클라우드 머신러닝 관리 서비스’를 도입한다. 이는 머신러닝 모델의 구축, 학습, 적용을 모두 하나의 환경에서 관리할 수 있도록 해주는 서비스로 정확한 수요 및 통계 예측을 지원함으로써 보다 나은 고객 서비스를 제공할 수 있게 한다. 특히 악천후로 인한 항공기 지연 예상시간, 항공기 정비 소요시간 예측 등을 토대로 고객들에게 적절한 시점에 필요한 조치를 할 수 있을 것으로 기대된다. 또 AWS 클라우드로 구축된 고객 데이터 플랫폼에서 고객별 특성에 따른 고유 디지털 식별 정보가 부여돼, 맞춤형 고객 서비스 제공도 가능해질 것으로 보고있다. 다시 말해, 그 동안 고객이 대한항공으로부터 제공 받은 서비스를 포함한 각종 정보들을 종합적으로 분석해 고객 니즈에 맞는 맞춤 서비스를 추천하는 기능도 제공된다는 것이다."]} -------------------------------------------------------------------------------- /key_features/ptn_5_multi-container-endpoint/samples/korsts.txt: -------------------------------------------------------------------------------- 1 | {"text": ["맛있는 라면을 먹고 싶어요", "후루룩 쩝쩝 후루룩 쩝쩝 맛좋은 라면"]} 2 | {"text": ["뽀로로는 내친구", "머신러닝은 러닝머신이 아닙니다."]} -------------------------------------------------------------------------------- /key_features/ptn_5_multi-container-endpoint/samples/ner.txt: -------------------------------------------------------------------------------- 1 | {"text": ["이 영화를 보고 나서 감동이 몰려왔습니다. 1950대 영화의 향수를 불러일으키면서도 주연 배우의 연기력, 반전 스토리가 정말 일품이네요"]} 2 | {"text": ["이 영화를 보고 나서 감동이 몰려왔습니다. 1950대 영화의 향수를 불러일으키면서도 주연 배우의 연기력, 반전 스토리가 정말 일품이네요"]} -------------------------------------------------------------------------------- /key_features/ptn_5_multi-container-endpoint/samples/nsmc.txt: -------------------------------------------------------------------------------- 1 | {"text": ["이 영화는 최고의 영화입니다"]} 2 | {"text": ["최악이에요. 배우의 연기력도 좋지 않고 내용도 너무 허접합니다"]} -------------------------------------------------------------------------------- /key_features/ptn_5_multi-container-endpoint/src/inference_kobart.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | import logging 4 | import torch 5 | from torch import nn 6 | from transformers import PreTrainedTokenizerFast, BartForConditionalGeneration 7 | 8 | logging.basicConfig( 9 | level=logging.INFO, 10 | format='[{%(filename)s:%(lineno)d} %(levelname)s - %(message)s', 11 | handlers=[ 12 | logging.FileHandler(filename='tmp.log'), 13 | logging.StreamHandler(sys.stdout) 14 | ] 15 | ) 16 | logger = logging.getLogger(__name__) 17 | 18 | tokenizer = PreTrainedTokenizerFast.from_pretrained("ainize/kobart-news") 19 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 20 | 21 | 22 | def model_fn(model_path=None): 23 | model = BartForConditionalGeneration.from_pretrained("ainize/kobart-news") 24 | model.to(device) 25 | return model 26 | 27 | 28 | def transform_fn(model, input_data, content_type="application/jsonlines", accept="application/jsonlines"): 29 | data_str = input_data.decode("utf-8") 30 | jsonlines = data_str.split("\n") 31 | 32 | predicted = [] 33 | 34 | for jsonline in jsonlines: 35 | text = json.loads(jsonline)["text"][0] 36 | logger.info("input text: {}".format(text)) 37 | 38 | input_ids = tokenizer.encode(text, return_tensors="pt") 39 | input_ids = input_ids.to(device) 40 | # Generate Summary Text Ids 41 | summary_text_ids = model.generate( 42 | input_ids=input_ids, 43 | bos_token_id=model.config.bos_token_id, 44 | eos_token_id=model.config.eos_token_id, 45 | length_penalty=2.0, 46 | max_length=512, 47 | min_length=32, 48 | num_beams=4, 49 | ) 50 | 51 | # Decoding Text 52 | summary_outputs = tokenizer.decode(summary_text_ids[0], skip_special_tokens=True) 53 | logger.info("summary_outputs: {}".format(summary_outputs)) 54 | 55 | prediction_dict = {} 56 | prediction_dict["summary"] = summary_outputs 57 | 58 | jsonline = json.dumps(prediction_dict) 59 | predicted.append(jsonline) 60 | 61 | predicted_jsonlines = "\n".join(predicted) 62 | return predicted_jsonlines -------------------------------------------------------------------------------- /key_features/ptn_5_multi-container-endpoint/src/inference_korsts.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import sys 4 | import logging 5 | import torch 6 | from torch import nn 7 | from transformers import ElectraConfig 8 | from transformers import ElectraModel, AutoTokenizer, ElectraTokenizer, ElectraForSequenceClassification 9 | os.environ["TOKENIZERS_PARALLELISM"] = "false" 10 | 11 | logging.basicConfig( 12 | level=logging.INFO, 13 | format='[{%(filename)s:%(lineno)d} %(levelname)s - %(message)s', 14 | handlers=[ 15 | logging.FileHandler(filename='tmp.log'), 16 | logging.StreamHandler(sys.stdout) 17 | ] 18 | ) 19 | logger = logging.getLogger(__name__) 20 | 21 | max_seq_length = 128 22 | tokenizer = AutoTokenizer.from_pretrained("daekeun-ml/koelectra-small-v3-korsts") 23 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 24 | 25 | 26 | # Huggingface pre-trained model: 'monologg/koelectra-small-v3-discriminator' 27 | def model_fn(model_path=None): 28 | #### 29 | # If you have your own trained model 30 | # Huggingface pre-trained model: 'monologg/koelectra-small-v3-discriminator' 31 | #### 32 | #config = ElectraConfig.from_json_file(f'{model_path}/config.json') 33 | #model = ElectraForSequenceClassification.from_pretrained(f'{model_path}/model.pth', config=config) 34 | model = ElectraForSequenceClassification.from_pretrained('daekeun-ml/koelectra-small-v3-korsts') 35 | model.to(device) 36 | return model 37 | 38 | 39 | def input_fn(input_data, content_type="application/jsonlines"): 40 | data_str = input_data.decode("utf-8") 41 | jsonlines = data_str.split("\n") 42 | transformed_inputs = [] 43 | 44 | for jsonline in jsonlines: 45 | text = json.loads(jsonline)["text"] 46 | logger.info("input text: {}".format(text)) 47 | encode_plus_token = tokenizer.encode_plus( 48 | text, 49 | max_length=max_seq_length, 50 | add_special_tokens=True, 51 | return_token_type_ids=False, 52 | padding="max_length", 53 | return_attention_mask=True, 54 | return_tensors="pt", 55 | truncation=True, 56 | ) 57 | transformed_inputs.append(encode_plus_token) 58 | 59 | return transformed_inputs 60 | 61 | 62 | def predict_fn(transformed_inputs, model): 63 | predicted_classes = [] 64 | 65 | for data in transformed_inputs: 66 | data = data.to(device) 67 | output = model(**data) 68 | 69 | prediction_dict = {} 70 | prediction_dict['score'] = output[0].squeeze().cpu().detach().numpy().tolist() 71 | 72 | jsonline = json.dumps(prediction_dict) 73 | logger.info("jsonline: {}".format(jsonline)) 74 | predicted_classes.append(jsonline) 75 | 76 | predicted_classes_jsonlines = "\n".join(predicted_classes) 77 | return predicted_classes_jsonlines 78 | 79 | 80 | def output_fn(outputs, accept="application/jsonlines"): 81 | return outputs, accept -------------------------------------------------------------------------------- /key_features/ptn_5_multi-container-endpoint/src/inference_nsmc.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import sys 4 | import logging 5 | import torch 6 | from torch import nn 7 | from transformers import ElectraConfig 8 | from transformers import ElectraModel, AutoTokenizer, ElectraTokenizer, ElectraForSequenceClassification 9 | os.environ["TOKENIZERS_PARALLELISM"] = "false" 10 | 11 | logging.basicConfig( 12 | level=logging.INFO, 13 | format='[{%(filename)s:%(lineno)d} %(levelname)s - %(message)s', 14 | handlers=[ 15 | logging.FileHandler(filename='tmp.log'), 16 | logging.StreamHandler(sys.stdout) 17 | ] 18 | ) 19 | logger = logging.getLogger(__name__) 20 | 21 | max_seq_length = 128 22 | classes = ['Neg', 'Pos'] 23 | 24 | tokenizer = AutoTokenizer.from_pretrained("daekeun-ml/koelectra-small-v3-nsmc") 25 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 26 | 27 | 28 | def model_fn(model_path=None): 29 | #### 30 | # If you have your own trained model 31 | # Huggingface pre-trained model: 'monologg/koelectra-small-v3-discriminator' 32 | #### 33 | #config = ElectraConfig.from_json_file(f'{model_path}/config.json') 34 | #model = ElectraForSequenceClassification.from_pretrained(f'{model_path}/model.pth', config=config) 35 | 36 | # Download model from the Huggingface hub 37 | model = ElectraForSequenceClassification.from_pretrained('daekeun-ml/koelectra-small-v3-nsmc') 38 | model.to(device) 39 | return model 40 | 41 | 42 | def input_fn(input_data, content_type="application/jsonlines"): 43 | data_str = input_data.decode("utf-8") 44 | jsonlines = data_str.split("\n") 45 | transformed_inputs = [] 46 | 47 | for jsonline in jsonlines: 48 | text = json.loads(jsonline)["text"][0] 49 | logger.info("input text: {}".format(text)) 50 | encode_plus_token = tokenizer.encode_plus( 51 | text, 52 | max_length=max_seq_length, 53 | add_special_tokens=True, 54 | return_token_type_ids=False, 55 | padding="max_length", 56 | return_attention_mask=True, 57 | return_tensors="pt", 58 | truncation=True, 59 | ) 60 | transformed_inputs.append(encode_plus_token) 61 | 62 | return transformed_inputs 63 | 64 | 65 | def predict_fn(transformed_inputs, model): 66 | predicted_classes = [] 67 | 68 | for data in transformed_inputs: 69 | data = data.to(device) 70 | output = model(**data) 71 | 72 | softmax_fn = nn.Softmax(dim=1) 73 | softmax_output = softmax_fn(output[0]) 74 | _, prediction = torch.max(softmax_output, dim=1) 75 | 76 | predicted_class_idx = prediction.item() 77 | predicted_class = classes[predicted_class_idx] 78 | score = softmax_output[0][predicted_class_idx] 79 | logger.info("predicted_class: {}".format(predicted_class)) 80 | 81 | prediction_dict = {} 82 | prediction_dict["predicted_label"] = predicted_class 83 | prediction_dict['score'] = score.cpu().detach().numpy().tolist() 84 | 85 | jsonline = json.dumps(prediction_dict) 86 | logger.info("jsonline: {}".format(jsonline)) 87 | predicted_classes.append(jsonline) 88 | 89 | predicted_classes_jsonlines = "\n".join(predicted_classes) 90 | return predicted_classes_jsonlines 91 | 92 | 93 | def output_fn(outputs, accept="application/jsonlines"): 94 | return outputs, accept -------------------------------------------------------------------------------- /key_features/ptn_5_multi-container-endpoint/src/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers==4.12.5 -------------------------------------------------------------------------------- /key_features/ptn_5_multi-container-endpoint/src/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | from sagemaker.s3 import S3Uploader 4 | from sagemaker.predictor import Predictor 5 | from sagemaker.serializers import JSONLinesSerializer 6 | from sagemaker.deserializers import JSONLinesDeserializer 7 | 8 | 9 | def print_outputs(outputs): 10 | jsonlines = outputs.split('\n') 11 | 12 | for jsonline in jsonlines: 13 | print(json.loads(jsonline)) 14 | 15 | 16 | def prepare_model_artifact(model_path, 17 | model_artifact_path='model_and_code', 18 | model_artifact_name='model.tar.gz'): 19 | 20 | os.system(f'rm -rf {model_artifact_path}') 21 | os.system(f'mkdir {model_artifact_path} {model_artifact_path}/code') 22 | os.system(f'cp {model_path}/*.* {model_artifact_path}') 23 | os.system(f'cp ./src/* {model_artifact_path}/code') 24 | os.system(f'tar cvzf {model_artifact_name} -C {model_artifact_path}/ .') 25 | os.system(f'rm -rf {model_artifact_path}') 26 | print(f'Archived {model_artifact_name}') 27 | 28 | 29 | def upload_model_artifact_to_s3(model_variant, model_path, bucket, prefix, 30 | model_artifact_path='model_and_code', 31 | model_artifact_name='model.tar.gz'): 32 | prepare_model_artifact(model_path, model_artifact_path, model_artifact_name) 33 | model_s3_uri = S3Uploader.upload(model_artifact_name,'s3://{}/{}/{}'.format(bucket, prefix, model_variant)) 34 | os.system(f'rm -rf {model_artifact_name}') 35 | print(f'Uploaded to {model_s3_uri}') 36 | 37 | return model_s3_uri 38 | 39 | 40 | class NLPPredictor(Predictor): 41 | def __init__(self, endpoint_name, sagemaker_session): 42 | super().__init__( 43 | endpoint_name, 44 | sagemaker_session=sagemaker_session, 45 | serializer=JSONLinesSerializer(), 46 | deserializer=JSONLinesDeserializer(), 47 | ) -------------------------------------------------------------------------------- /key_features/ptn_6_inference-pipeline/README.md: -------------------------------------------------------------------------------- 1 | # Inference pipeline (a sequence of up to 5 models) 2 | 3 | ## Overview 4 | 추론 파이프라인은 단일 엔드포인트(single endpoint)에 2~5개 컨테이너(빌트인 컨테이너 or 사용자 정의 컨테이너)의 시퀀스를 단계(step)별로 연결합니다. 각 단계의 응답은 다음 단계의 추론 요청으로 사용되며, 이를 활용하여 PyTorch/TensorFlow/MXNet/scikit-learn/Spark ML 등의 다양한 프레임워크에 대한 모델 앙상블을 배포하거나 모델 전처리-추론-후처리 과정을 컨테이너로 분리하여 관리할 수 있습니다. BERT 모델을 배포하는 대표적인 예시를 들어 보겠습니다. 5 | 6 | * (1: 사용자 정의 scikit-learn 컨테이너에서 사용자의 요청 및 context에 대한 전처리 수행 7 | * (2): (1)의 output을 입력으로 받아 빌트인 PyTorch 컨테이너에서 BERT 모델 추론 수행 8 | * (3): (2)의 output을 입력으로 받아 빌트인 scikit-learn 컨테이너에서 응답 데이터 생성을 위한 후처리 수행 9 | 10 | ![ptn_6_01](../../images/key_features/ptn_6_01.png) -------------------------------------------------------------------------------- /production/ptn_1_ab-test/README.md: -------------------------------------------------------------------------------- 1 | # A/B Testing 2 | 3 | ## Overview 4 | 프로덕션 ML 워크플로에서 데이터 과학자와 머신 러닝 엔지니어는 데이터/모델/컨셉 드리프트에 따른 재훈련, 하이퍼파라메터 튜닝, 피쳐 선택 등과 같은 다양한 방법들을 통해 모델을 개선합니다. 이 때 이전 모델과 신규 모델 간의 A/B 테스트를 수행함으로써, 신규 모델에 대한 검증을 충분히 해야겠죠. 그렇다면 A/B 테스트를 위해 엔드포인트를 재배포하거나 2개의 엔드포인트를 배포해야 할까요? 그렇지 않습니다. 프로덕션 Variant 기능을 사용하면, 각 variant에 대해 동일한 엔드포인트 뒤에서 여러 모델 또는 모델 버전을 테스트할 수 있습니다. 5 | 6 | ![ptn_1_01](../../images/production/ptn_1_01.png) 7 | 8 | ## Production Variant 9 | SageMaker 엔드포인트에 연결된 로드밸런서는 Endpoint에서 추론 트래픽을 분할하여 여러 모델들로 라우팅하여 AB 테스트를 수행하는 기능을 제공하고 있습니다. 이를 프로덕션 variant라고 하며, 단일 SageMaker Endpoint에서 여러 모델들을 테스트하고 배포할 수 있습니다. 예를 들어, 카나리 롤아웃(canary rollout) 및 블루/그린 배포(blue/green deployment)를 위해 엔드포인트의 모델 간에 트래픽을 이동할 수 있습니다. 물론, 초당 요청 수(requests per second)과 같은 지표를 기반으로 엔드포인트를 자동으로 확장하거나 축소하도록 오토스케일링 policy를 구성할 수도 있습니다. 10 | 11 | 본 실습에서는 아래와 같은 기능들을 체험해 봅니다. 12 | - 2개의 프로덕션 variant들을 배포 (Variant1: CPU, Variant2: GPU) 13 | - 트래픽 분포 변경 (50:50 -> 80:20 -> 100:0) 14 | - Variant2 삭제 15 | -------------------------------------------------------------------------------- /production/ptn_2_deployment-guardrail/README.md: -------------------------------------------------------------------------------- 1 | # Blue/Green Deployment Guardrail 2 | 3 | ## Overview 4 | SageMaker 배포 가드레일(Deployment Guardrail)은 프로덕션 환경에서 현재 모델에서 새 모델로 안전하게 업데이트하기 위한 완전 관리형 블루/그린(Blue/Green) 배포 가드레일 서비스입니다. 카나리(Canary) 및 선형(Linear)과 같은 트래픽 전환 모드를 사용하여 업데이트 과정에서 현재 모델에서 새 모델로 트래픽 전환 프로세스를 세부적으로 제어할 수 있습니다. 또한 문제를 조기에 포착하고 프로덕션에 영향을 미치지 않게 자동 롤백과 같은 보호 기능을 제공합니다. 5 | 6 | ![ptn_2_01](../../images/production/ptn_2_01.png) 7 | ![ptn_2_02](../../images/production/ptn_2_02.png) -------------------------------------------------------------------------------- /production/ptn_2_deployment-guardrail/model-nsmc/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_name_or_path": "monologg/koelectra-small-v3-discriminator", 3 | "architectures": [ 4 | "ElectraForSequenceClassification" 5 | ], 6 | "attention_probs_dropout_prob": 0.1, 7 | "classifier_dropout": null, 8 | "embedding_size": 128, 9 | "hidden_act": "gelu", 10 | "hidden_dropout_prob": 0.1, 11 | "hidden_size": 256, 12 | "id2label": { 13 | "0": "0", 14 | "1": "1" 15 | }, 16 | "initializer_range": 0.02, 17 | "intermediate_size": 1024, 18 | "label2id": { 19 | "0": 0, 20 | "1": 1 21 | }, 22 | "layer_norm_eps": 1e-12, 23 | "max_position_embeddings": 512, 24 | "model_type": "electra", 25 | "num_attention_heads": 4, 26 | "num_hidden_layers": 12, 27 | "pad_token_id": 0, 28 | "position_embedding_type": "absolute", 29 | "problem_type": "single_label_classification", 30 | "summary_activation": "gelu", 31 | "summary_last_dropout": 0.1, 32 | "summary_type": "first", 33 | "summary_use_proj": true, 34 | "torch_dtype": "float32", 35 | "transformers_version": "4.11.3", 36 | "type_vocab_size": 2, 37 | "vocab_size": 35000 38 | } 39 | -------------------------------------------------------------------------------- /production/ptn_2_deployment-guardrail/model-nsmc/training_args.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/production/ptn_2_deployment-guardrail/model-nsmc/training_args.bin -------------------------------------------------------------------------------- /production/ptn_2_deployment-guardrail/model-nsmc/vocab/special_tokens_map.json: -------------------------------------------------------------------------------- 1 | {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"} -------------------------------------------------------------------------------- /production/ptn_2_deployment-guardrail/model-nsmc/vocab/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | {"do_lower_case": false, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "monologg/koelectra-base-v3-discriminator", "do_basic_tokenize": true, "never_split": null, "tokenizer_class": "ElectraTokenizer"} -------------------------------------------------------------------------------- /production/ptn_2_deployment-guardrail/src/inference_nsmc.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | import logging 4 | import torch 5 | from torch import nn 6 | from transformers import ElectraConfig 7 | from transformers import ElectraModel, AutoTokenizer, ElectraTokenizer, ElectraForSequenceClassification 8 | 9 | logging.basicConfig( 10 | level=logging.INFO, 11 | format='[{%(filename)s:%(lineno)d} %(levelname)s - %(message)s', 12 | handlers=[ 13 | logging.FileHandler(filename='tmp.log'), 14 | logging.StreamHandler(sys.stdout) 15 | ] 16 | ) 17 | logger = logging.getLogger(__name__) 18 | 19 | max_seq_length = 128 20 | classes = ['Neg', 'Pos'] 21 | 22 | tokenizer = AutoTokenizer.from_pretrained("daekeun-ml/koelectra-small-v3-nsmc") 23 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 24 | 25 | 26 | def model_fn(model_path=None): 27 | #### 28 | # If you have your own trained model 29 | # Huggingface pre-trained model: 'monologg/koelectra-small-v3-discriminator' 30 | #### 31 | #config = ElectraConfig.from_json_file(f'{model_path}/config.json') 32 | #model = ElectraForSequenceClassification.from_pretrained(f'{model_path}/model.pth', config=config) 33 | 34 | # Download model from the Huggingface hub 35 | model = ElectraForSequenceClassification.from_pretrained('daekeun-ml/koelectra-small-v3-nsmc') 36 | model.to(device) 37 | return model 38 | 39 | 40 | def input_fn(input_data, content_type="application/jsonlines"): 41 | data_str = input_data.decode("utf-8") 42 | jsonlines = data_str.split("\n") 43 | transformed_inputs = [] 44 | 45 | for jsonline in jsonlines: 46 | text = json.loads(jsonline)["text"][0] 47 | logger.info("input text: {}".format(text)) 48 | encode_plus_token = tokenizer.encode_plus( 49 | text, 50 | max_length=max_seq_length, 51 | add_special_tokens=True, 52 | return_token_type_ids=False, 53 | padding="max_length", 54 | return_attention_mask=True, 55 | return_tensors="pt", 56 | truncation=True, 57 | ) 58 | transformed_inputs.append(encode_plus_token) 59 | 60 | return transformed_inputs 61 | 62 | 63 | def predict_fn(transformed_inputs, model): 64 | predicted_classes = [] 65 | 66 | for data in transformed_inputs: 67 | data = data.to(device) 68 | output = model(**data) 69 | 70 | softmax_fn = nn.Softmax(dim=1) 71 | softmax_output = softmax_fn(output[0]) 72 | _, prediction = torch.max(softmax_output, dim=1) 73 | 74 | predicted_class_idx = prediction.item() 75 | predicted_class = classes[predicted_class_idx] 76 | score = softmax_output[0][predicted_class_idx] 77 | logger.info("predicted_class: {}".format(predicted_class)) 78 | 79 | prediction_dict = {} 80 | prediction_dict["predicted_label"] = predicted_class 81 | prediction_dict['score'] = score.cpu().detach().numpy().tolist() 82 | 83 | jsonline = json.dumps(prediction_dict) 84 | logger.info("jsonline: {}".format(jsonline)) 85 | predicted_classes.append(jsonline) 86 | 87 | predicted_classes_jsonlines = "\n".join(predicted_classes) 88 | return predicted_classes_jsonlines 89 | 90 | 91 | def output_fn(outputs, accept="application/jsonlines"): 92 | return outputs, accept -------------------------------------------------------------------------------- /production/ptn_2_deployment-guardrail/src/inference_nsmc_error.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | import logging 4 | import torch 5 | from torch import nn 6 | from transformers import ElectraConfig 7 | from transformers import ElectraModel, AutoTokenizer, ElectraTokenizer, ElectraForSequenceClassification 8 | 9 | logging.basicConfig( 10 | level=logging.INFO, 11 | format='[{%(filename)s:%(lineno)d} %(levelname)s - %(message)s', 12 | handlers=[ 13 | logging.FileHandler(filename='tmp.log'), 14 | logging.StreamHandler(sys.stdout) 15 | ] 16 | ) 17 | logger = logging.getLogger(__name__) 18 | 19 | max_seq_length = 128 20 | classes = ['Neg', 'Pos'] 21 | 22 | tokenizer = AutoTokenizer.from_pretrained("daekeun-ml/koelectra-small-v3-nsmc") 23 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 24 | 25 | 26 | def model_fn(model_path=None): 27 | #### 28 | # If you have your own trained model 29 | # Huggingface pre-trained model: 'monologg/koelectra-small-v3-discriminator' 30 | #### 31 | #config = ElectraConfig.from_json_file(f'{model_path}/config.json') 32 | #model = ElectraForSequenceClassification.from_pretrained(f'{model_path}/model.pth', config=config) 33 | 34 | # Download model from the Huggingface hub 35 | model = ElectraForSequenceClassification.from_pretrained('daekeun-ml/koelectra-small-v3-nsmc') 36 | model.to(device) 37 | return model 38 | 39 | 40 | def input_fn(input_data, content_type="application/jsonlines"): 41 | data_str = input_data.decode("utf-8") 42 | jsonlines = data_str.split("\n") 43 | transformed_inputs = [] 44 | 45 | for jsonline in jsonlines: 46 | text = json.loads(jsonline)["text"][0] 47 | logger.info("input text: {}".format(text)) 48 | encode_plus_token = tokenizer.encode_plus( 49 | text, 50 | max_length=max_seq_length, 51 | add_special_tokens=True, 52 | return_token_type_ids=False, 53 | padding="max_length", 54 | return_attention_mask=True, 55 | return_tensors="pt", 56 | truncation=True, 57 | ) 58 | transformed_inputs.append(encode_plus_token) 59 | 60 | return transformed_inputs 61 | 62 | 63 | def predict_fn(transformed_inputs, model): 64 | predicted_classes = [] 65 | 66 | a = '12' 67 | b = '34' 68 | a/b 69 | 70 | for data in transformed_inputs: 71 | data = data.to(device) 72 | output = model(**data) 73 | 74 | softmax_fn = nn.Softmax(dim=1) 75 | softmax_output = softmax_fn(output[0]) 76 | _, prediction = torch.max(softmax_output, dim=1) 77 | 78 | predicted_class_idx = prediction.item() 79 | predicted_class = classes[predicted_class_idx] 80 | score = softmax_output[0][predicted_class_idx] 81 | logger.info("predicted_class: {}".format(predicted_class)) 82 | 83 | prediction_dict = {} 84 | prediction_dict["predicted_label"] = predicted_class 85 | prediction_dict['score'] = score.cpu().detach().numpy().tolist() 86 | 87 | jsonline = json.dumps(prediction_dict) 88 | logger.info("jsonline: {}".format(jsonline)) 89 | predicted_classes.append(jsonline) 90 | 91 | predicted_classes_jsonlines = "\n".join(predicted_classes) 92 | return predicted_classes_jsonlines 93 | 94 | 95 | def output_fn(outputs, accept="application/jsonlines"): 96 | return outputs, accept -------------------------------------------------------------------------------- /production/ptn_2_deployment-guardrail/src/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers==4.12.3 -------------------------------------------------------------------------------- /production/ptn_2_deployment-guardrail/src/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | from sagemaker.s3 import S3Uploader 4 | from sagemaker.predictor import Predictor 5 | from sagemaker.serializers import JSONLinesSerializer 6 | from sagemaker.deserializers import JSONLinesDeserializer 7 | 8 | 9 | def print_outputs(outputs): 10 | jsonlines = outputs.split('\n') 11 | 12 | for jsonline in jsonlines: 13 | print(json.loads(jsonline)) 14 | 15 | 16 | def prepare_model_artifact(model_path, 17 | model_artifact_path='model_and_code', 18 | model_artifact_name='model.tar.gz'): 19 | 20 | os.system(f'rm -rf {model_artifact_path}') 21 | os.system(f'mkdir {model_artifact_path} {model_artifact_path}/code') 22 | os.system(f'cp {model_path}/*.* {model_artifact_path}') 23 | os.system(f'cp ./src/* {model_artifact_path}/code') 24 | os.system(f'tar cvzf {model_artifact_name} -C {model_artifact_path}/ .') 25 | os.system(f'rm -rf {model_artifact_path}') 26 | print(f'Archived {model_artifact_name}') 27 | 28 | 29 | def upload_model_artifact_to_s3(model_variant, model_path, bucket, prefix, 30 | model_artifact_path='model_and_code', 31 | model_artifact_name='model.tar.gz'): 32 | prepare_model_artifact(model_path, model_artifact_path, model_artifact_name) 33 | model_s3_uri = S3Uploader.upload(model_artifact_name,'s3://{}/{}/{}'.format(bucket, prefix, model_variant)) 34 | os.system(f'rm -rf {model_artifact_name}') 35 | print(f'Uploaded to {model_s3_uri}') 36 | 37 | return model_s3_uri 38 | 39 | 40 | class NLPPredictor(Predictor): 41 | def __init__(self, endpoint_name, sagemaker_session): 42 | super().__init__( 43 | endpoint_name, 44 | sagemaker_session=sagemaker_session, 45 | serializer=JSONLinesSerializer(), 46 | deserializer=JSONLinesDeserializer(), 47 | ) -------------------------------------------------------------------------------- /production/ptn_3_ml-pipeline/README.md: -------------------------------------------------------------------------------- 1 | # End-to-end ML pipelines 2 | 3 | ## Overview 4 | SageMaker Pipelines은 ML 파이프라인과 CI/CD 파이프라인을 쉽고 편리하게 수행할 수 있는 관리형 서비스입니다. re:Invent 2020 서비스 런칭 이후 t신규 기능들이 지속적으로 업데이트되고 있으며, 특히 2021년 8월 업데이트된 주요 기능인 Lambda Step을 사용하면 호스팅 엔드포인트 모델 배포를 비롯한 서버리스 작업들을 쉽게 수행할 수 있습니다. 또한 캐싱(caching) 기능을 사용하면 모든 파이프라인을 처음부터 재시작할 필요 없이 변경된 파라메터에 대해서만 빠르게 실험해볼 수 있습니다. 5 | 6 | ![ptn_3_01](../../images/production/ptn_3_01_kor.png) -------------------------------------------------------------------------------- /production/ptn_3_ml-pipeline/pipeline_src/evaluate.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import sys 3 | import json 4 | import logging 5 | import pathlib 6 | import tarfile 7 | import os 8 | 9 | import numpy as np 10 | import pandas as pd 11 | 12 | 13 | logger = logging.getLogger() 14 | logger.setLevel(logging.INFO) 15 | logger.addHandler(logging.StreamHandler()) 16 | 17 | if __name__ == "__main__": 18 | logger.debug("Starting evaluation.") 19 | model_path = "/opt/ml/processing/model/model.tar.gz" 20 | with tarfile.open(model_path) as tar: 21 | tar.extractall(path="./hf_model") 22 | 23 | logger.debug(os.listdir("./hf_model")) 24 | 25 | with open("./hf_model/evaluation.json") as f: 26 | eval_result = json.load(f) 27 | 28 | logger.debug(eval_result) 29 | output_dir = "/opt/ml/processing/evaluation" 30 | pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True) 31 | 32 | evaluation_path = f"{output_dir}/evaluation.json" 33 | with open(evaluation_path, "w") as f: 34 | f.write(json.dumps(eval_result)) 35 | -------------------------------------------------------------------------------- /production/ptn_3_ml-pipeline/pipeline_src/processing_hf.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import sys 4 | import argparse 5 | import logging 6 | import numpy as np 7 | from datasets import load_dataset 8 | from transformers import ElectraTokenizer 9 | 10 | def install(package): 11 | subprocess.check_call([sys.executable, "-m", "pip", "install", package]) 12 | 13 | 14 | def parser_args(train_notebook=False): 15 | parser = argparse.ArgumentParser() 16 | 17 | parser.add_argument("--model_id", default='daekeun-ml/koelectra-small-v3-nsmc') 18 | parser.add_argument("--tokenizer_id", default='daekeun-ml/koelectra-small-v3-nsmc') 19 | parser.add_argument("--dataset_name", type=str, default='nsmc') 20 | parser.add_argument("--small_subset_for_debug", type=bool, default=True) 21 | parser.add_argument("--train_dir", type=str, default='/opt/ml/processing/train') 22 | parser.add_argument("--validation_dir", type=str, default='/opt/ml/processing/validation') 23 | parser.add_argument("--test_dir", type=str, default='/opt/ml/processing/test') 24 | 25 | if train_notebook: 26 | args = parser.parse_args([]) 27 | else: 28 | args = parser.parse_args() 29 | return args 30 | 31 | 32 | if __name__ == "__main__": 33 | args = parser_args() 34 | 35 | # download tokenizer 36 | tokenizer = ElectraTokenizer.from_pretrained(args.model_id) 37 | 38 | # tokenizer helper function 39 | def tokenize(batch): 40 | return tokenizer(batch['document'], padding='max_length', max_length=128, truncation=True) 41 | 42 | # load dataset 43 | train_dataset, test_dataset = load_dataset(args.dataset_name, split=["train", "test"]) 44 | 45 | if args.small_subset_for_debug: 46 | train_dataset = train_dataset.shuffle().select(range(1000)) 47 | test_dataset = test_dataset.shuffle().select(range(1000)) 48 | 49 | # tokenize dataset 50 | train_dataset = train_dataset.map(tokenize, batched=True) 51 | test_dataset = test_dataset.map(tokenize, batched=True) 52 | 53 | # set format for pytorch 54 | train_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'label']) 55 | test_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'label']) 56 | 57 | args.train_dir = './train' 58 | args.test_dir = './test' 59 | 60 | train_dataset.save_to_disk(args.train_dir) 61 | test_dataset.save_to_disk(args.test_dir) -------------------------------------------------------------------------------- /production/ptn_3_ml-pipeline/pipeline_src/processing_sklearn.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import sys 4 | import argparse 5 | import logging 6 | import numpy as np 7 | 8 | def install(package): 9 | subprocess.check_call([sys.executable, "-m", "pip", "install", package]) 10 | 11 | 12 | def parser_args(train_notebook=False): 13 | parser = argparse.ArgumentParser() 14 | 15 | parser.add_argument("--model_id", default='daekeun-ml/koelectra-small-v3-nsmc') 16 | parser.add_argument("--tokenizer_id", default='daekeun-ml/koelectra-small-v3-nsmc') 17 | parser.add_argument("--dataset_name", type=str, default='nsmc') 18 | parser.add_argument("--small_subset_for_debug", type=bool, default=True) 19 | parser.add_argument("--train_dir", type=str, default='/opt/ml/processing/train') 20 | parser.add_argument("--validation_dir", type=str, default='/opt/ml/processing/validation') 21 | parser.add_argument("--test_dir", type=str, default='/opt/ml/processing/test') 22 | parser.add_argument("--transformers_version", type=str, default='4.11.0') 23 | parser.add_argument("--pytorch_version", type=str, default='1.9.0') 24 | 25 | if train_notebook: 26 | args = parser.parse_args([]) 27 | else: 28 | args = parser.parse_args() 29 | return args 30 | 31 | 32 | if __name__ == "__main__": 33 | args = parser_args() 34 | 35 | install(f"torch=={args.pytorch_version}") 36 | install(f"transformers=={args.transformers_version}") 37 | install("datasets[s3]") 38 | 39 | from datasets import load_dataset 40 | from transformers import ElectraTokenizer 41 | 42 | # download tokenizer 43 | tokenizer = ElectraTokenizer.from_pretrained(args.model_id) 44 | 45 | # tokenizer helper function 46 | def tokenize(batch): 47 | return tokenizer(batch['document'], padding='max_length', max_length=128, truncation=True) 48 | 49 | # load dataset 50 | train_dataset, test_dataset = load_dataset(args.dataset_name, split=["train", "test"]) 51 | 52 | if args.small_subset_for_debug: 53 | train_dataset = train_dataset.shuffle().select(range(1000)) 54 | test_dataset = test_dataset.shuffle().select(range(1000)) 55 | 56 | # tokenize dataset 57 | train_dataset = train_dataset.map(tokenize, batched=True) 58 | test_dataset = test_dataset.map(tokenize, batched=True) 59 | 60 | # set format for pytorch 61 | train_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'label']) 62 | test_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'label']) 63 | 64 | train_dataset.save_to_disk(args.train_dir) 65 | test_dataset.save_to_disk(args.test_dir) -------------------------------------------------------------------------------- /production/ptn_3_ml-pipeline/pipeline_src/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import argparse 4 | import logging 5 | import sys 6 | import numpy as np 7 | import torch 8 | from datasets import load_from_disk, load_metric 9 | from transformers import ( 10 | ElectraModel, ElectraTokenizer, ElectraForSequenceClassification, Trainer, TrainingArguments, set_seed 11 | ) 12 | from transformers.trainer_utils import get_last_checkpoint 13 | from sklearn.metrics import accuracy_score, precision_recall_fscore_support 14 | 15 | 16 | def parser_args(train_notebook=False): 17 | parser = argparse.ArgumentParser() 18 | 19 | # Default Setting 20 | parser.add_argument("--epochs", type=int, default=1) 21 | parser.add_argument("--seed", type=int, default=42) 22 | parser.add_argument("--train_batch_size", type=int, default=32) 23 | parser.add_argument("--eval_batch_size", type=int, default=128) 24 | parser.add_argument("--warmup_steps", type=int, default=0) 25 | parser.add_argument("--learning_rate", type=str, default=5e-5) 26 | parser.add_argument("--disable_tqdm", type=bool, default=True) 27 | parser.add_argument("--fp16", type=bool, default=True) 28 | parser.add_argument("--tokenizer_id", type=str, default='monologg/koelectra-small-v3-discriminator') 29 | parser.add_argument("--model_id", type=str, default='monologg/koelectra-small-v3-discriminator') 30 | 31 | # SageMaker Container environment 32 | parser.add_argument("--output_data_dir", type=str, default=os.environ["SM_OUTPUT_DATA_DIR"]) 33 | parser.add_argument("--model_dir", type=str, default=os.environ["SM_MODEL_DIR"]) 34 | parser.add_argument("--n_gpus", type=str, default=os.environ["SM_NUM_GPUS"]) 35 | parser.add_argument("--train_dir", type=str, default=os.environ["SM_CHANNEL_TRAIN"]) 36 | parser.add_argument("--test_dir", type=str, default=os.environ["SM_CHANNEL_TEST"]) 37 | parser.add_argument('--chkpt_dir', type=str, default='/opt/ml/checkpoints') 38 | 39 | if train_notebook: 40 | args = parser.parse_args([]) 41 | else: 42 | args = parser.parse_args() 43 | return args 44 | 45 | 46 | # compute metrics function for binary classification 47 | def compute_metrics(pred): 48 | labels = pred.label_ids 49 | preds = pred.predictions.argmax(-1) 50 | precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average="binary") 51 | acc = accuracy_score(labels, preds) 52 | return {"accuracy": acc, "f1": f1, "precision": precision, "recall": recall} 53 | 54 | 55 | def main(): 56 | 57 | is_sm_container = True 58 | if os.environ.get('SM_CURRENT_HOST') is None: 59 | is_sm_container = False 60 | train_dir = 'datasets/train' 61 | test_dir = 'datasets/test' 62 | model_dir = 'model' 63 | output_data_dir = 'data' 64 | src_dir = '/'.join(os.getcwd().split('/')[:-1]) 65 | #src_dir = os.getcwd() 66 | os.environ['SM_MODEL_DIR'] = f'{src_dir}/{model_dir}' 67 | os.environ['SM_OUTPUT_DATA_DIR'] = f'{src_dir}/{output_data_dir}' 68 | os.environ['SM_NUM_GPUS'] = str(1) 69 | os.environ['SM_CHANNEL_TRAIN'] = f'{src_dir}/{train_dir}' 70 | os.environ['SM_CHANNEL_TEST'] = f'{src_dir}/{test_dir}' 71 | 72 | # Set up logging 73 | logging.basicConfig( 74 | level=logging.INFO, 75 | format='[{%(filename)s:%(lineno)d} %(levelname)s - %(message)s', 76 | handlers=[logging.StreamHandler(sys.stdout)] 77 | ) 78 | logger = logging.getLogger(__name__) 79 | args = parser_args() 80 | n_gpus = torch.cuda.device_count() 81 | 82 | if os.getenv("SM_NUM_GPUS")==None: 83 | print("Explicitly specifying the number of GPUs.") 84 | os.environ["GPU_NUM_DEVICES"] = n_gpus 85 | else: 86 | os.environ["GPU_NUM_DEVICES"] = os.environ["SM_NUM_GPUS"] 87 | 88 | logger.info("***** Arguments *****") 89 | logger.info(''.join(f'{k}={v}\n' for k, v in vars(args).items())) 90 | 91 | os.makedirs(args.model_dir, exist_ok=True) 92 | os.makedirs(args.output_data_dir, exist_ok=True) 93 | 94 | # load datasets 95 | train_dataset = load_from_disk(args.train_dir) 96 | test_dataset = load_from_disk(args.test_dir) 97 | 98 | logger.info(f" loaded train_dataset length is: {len(train_dataset)}") 99 | logger.info(f" loaded test_dataset length is: {len(test_dataset)}") 100 | logger.info(train_dataset[0]) 101 | 102 | # download tokenizer 103 | tokenizer = ElectraTokenizer.from_pretrained(args.tokenizer_id) 104 | 105 | # tokenizer helper function 106 | def tokenize(batch): 107 | return tokenizer(batch['document'], padding='max_length', truncation=True) 108 | 109 | # tokenize dataset 110 | train_dataset = train_dataset.map(tokenize, batched=True) 111 | test_dataset = test_dataset.map(tokenize, batched=True) 112 | 113 | # set format for pytorch 114 | train_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'label']) 115 | test_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'label']) 116 | 117 | # Prepare model labels - useful in inference API 118 | labels = train_dataset.features["label"].names 119 | num_labels = len(labels) 120 | label2id, id2label = dict(), dict() 121 | for i, label in enumerate(labels): 122 | label2id[label] = str(i) 123 | id2label[str(i)] = label 124 | 125 | # Set seed before initializing model 126 | set_seed(args.seed) 127 | 128 | # Download pytorch model 129 | model = ElectraForSequenceClassification.from_pretrained( 130 | args.model_id, num_labels=num_labels, label2id=label2id, id2label=id2label 131 | ) 132 | 133 | # define training args 134 | training_args = TrainingArguments( 135 | output_dir=args.chkpt_dir, 136 | overwrite_output_dir=True if get_last_checkpoint(args.chkpt_dir) is not None else False, 137 | num_train_epochs=args.epochs, 138 | per_device_train_batch_size=args.train_batch_size, 139 | per_device_eval_batch_size=args.eval_batch_size, 140 | warmup_steps=args.warmup_steps, 141 | fp16=args.fp16, 142 | evaluation_strategy="epoch", 143 | save_strategy="epoch", 144 | save_total_limit=1, 145 | disable_tqdm=args.disable_tqdm, 146 | logging_dir=f"{args.output_data_dir}/logs", 147 | learning_rate=float(args.learning_rate), 148 | load_best_model_at_end=True, 149 | metric_for_best_model="accuracy", 150 | ) 151 | 152 | # create Trainer instance 153 | trainer = Trainer( 154 | model=model, 155 | args=training_args, 156 | train_dataset=train_dataset, 157 | eval_dataset=test_dataset, 158 | tokenizer=tokenizer, 159 | compute_metrics=compute_metrics 160 | ) 161 | 162 | # train model 163 | if get_last_checkpoint(args.chkpt_dir) is not None: 164 | logger.info("***** Continue Training *****") 165 | last_checkpoint = get_last_checkpoint(args.chkpt_dir) 166 | trainer.train(resume_from_checkpoint=last_checkpoint) 167 | else: 168 | trainer.train() 169 | 170 | # evaluate model 171 | eval_result = trainer.evaluate(eval_dataset=test_dataset) 172 | 173 | # writes eval result to file which can be accessed later in s3 ouput 174 | with open(os.path.join(args.model_dir, "evaluation.json"), "w") as writer: 175 | logger.info(f"***** Evaluation results *****") 176 | logger.info(eval_result) 177 | writer.write(json.dumps(eval_result)) 178 | 179 | # with open(os.path.join(args.output_data_dir, "eval_results.txt"), "w") as writer: 180 | # print("***** Evaluation results *****") 181 | # for key, value in sorted(eval_result.items()): 182 | # writer.write(f"{key} = {value}\n") 183 | # logger.info(f"{key} = {value}\n") 184 | 185 | # Saves the model to s3 uses os.environ["SM_MODEL_DIR"] to make sure checkpointing works 186 | trainer.save_model(args.model_dir) 187 | 188 | 189 | def _mp_fn(index): 190 | # For xla_spawn (TPUs) 191 | main() 192 | 193 | if __name__ == "__main__": 194 | main() -------------------------------------------------------------------------------- /production/ptn_3_ml-pipeline/pipeline_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sm-model-serving-patterns/b2c086a1ad512c05ed4195e9a9cde3e7a595bd39/production/ptn_3_ml-pipeline/pipeline_utils/__init__.py -------------------------------------------------------------------------------- /production/ptn_3_ml-pipeline/pipeline_utils/deploy_handler.py: -------------------------------------------------------------------------------- 1 | import json 2 | import boto3 3 | 4 | 5 | def lambda_handler(event, context): 6 | sm_client = boto3.client("sagemaker") 7 | 8 | # The name of the model created in the Pipeline CreateModelStep 9 | model_name = event["model_name"] 10 | model_package_arn = event["model_package_arn"] 11 | endpoint_config_name = event["endpoint_config_name"] 12 | endpoint_name = event["endpoint_name"] 13 | endpoint_instance_type = event["endpoint_instance_type"] 14 | role = event["role"] 15 | container = {"ModelPackageName": model_package_arn} 16 | 17 | create_model_respose = sm_client.create_model(ModelName=model_name, ExecutionRoleArn=role, Containers=[container]) 18 | 19 | create_endpoint_config_response = sm_client.create_endpoint_config( 20 | EndpointConfigName=endpoint_config_name, 21 | ProductionVariants=[ 22 | { 23 | "InstanceType": endpoint_instance_type, 24 | "InitialVariantWeight": 1, 25 | "InitialInstanceCount": 1, 26 | "ModelName": model_name, 27 | "VariantName": "AllTraffic", 28 | } 29 | ], 30 | ) 31 | 32 | create_endpoint_response = sm_client.create_endpoint( 33 | EndpointName=endpoint_name, EndpointConfigName=endpoint_config_name 34 | ) 35 | 36 | return { 37 | "statusCode": 200, 38 | "body": json.dumps("Created Endpoint!"), 39 | "other_key": "example_value", 40 | } -------------------------------------------------------------------------------- /production/ptn_3_ml-pipeline/pipeline_utils/deploy_step.py: -------------------------------------------------------------------------------- 1 | import time 2 | import json 3 | import boto3 4 | import os 5 | from sagemaker.workflow.step_collections import StepCollection 6 | from sagemaker.workflow._utils import _RegisterModelStep 7 | from sagemaker.lambda_helper import Lambda 8 | from sagemaker.workflow.lambda_step import ( 9 | LambdaStep, 10 | LambdaOutput, 11 | LambdaOutputTypeEnum, 12 | ) 13 | 14 | 15 | class ModelDeployment(StepCollection): 16 | """custom step to deploy model as SageMaker Endpoint""" 17 | 18 | def __init__( 19 | self, 20 | model_name: str, 21 | registered_model: _RegisterModelStep, 22 | endpoint_instance_type, 23 | sagemaker_endpoint_role: str, 24 | autoscaling_policy: dict = None, 25 | ): 26 | self.name = "sagemaker-pipelines-model-deployment" 27 | self.model_package_arn = registered_model.properties.ModelPackageArn 28 | self.lambda_role = self.create_lambda_role(self.name) 29 | # Use the current time to define unique names for the resources created 30 | current_time = time.strftime("%m-%d-%H-%M-%S", time.localtime()) 31 | 32 | steps = [] 33 | lambda_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "deploy_handler.py") 34 | # Lambda helper class can be used to create the Lambda function 35 | self.func = Lambda( 36 | function_name=f"{self.name}-{current_time}", 37 | execution_role_arn=self.lambda_role, 38 | script=lambda_file, 39 | handler="deploy_handler.lambda_handler", 40 | timeout=600, 41 | memory_size=256, 42 | ) 43 | 44 | # The dictionary retured by the Lambda function is captured by LambdaOutput, each key in the dictionary corresponds to a 45 | # LambdaOutput 46 | 47 | output_param_1 = LambdaOutput(output_name="statusCode", output_type=LambdaOutputTypeEnum.String) 48 | output_param_2 = LambdaOutput(output_name="body", output_type=LambdaOutputTypeEnum.String) 49 | output_param_3 = LambdaOutput(output_name="other_key", output_type=LambdaOutputTypeEnum.String) 50 | 51 | # The inputs provided to the Lambda function can be retrieved via the `event` object within the `lambda_handler` function 52 | # in the Lambda 53 | lambda_step = LambdaStep( 54 | name="HuggingFaceModelDeployment", 55 | lambda_func=self.func, 56 | inputs={ 57 | "model_name": model_name + current_time, 58 | "endpoint_config_name": model_name + current_time, 59 | "endpoint_name": model_name, 60 | "endpoint_instance_type": endpoint_instance_type, 61 | "model_package_arn": self.model_package_arn, 62 | "role": sagemaker_endpoint_role, 63 | }, 64 | outputs=[output_param_1, output_param_2, output_param_3], 65 | ) 66 | steps.append(lambda_step) 67 | self.steps = steps 68 | 69 | def create_lambda_role(self, name): 70 | """ 71 | Create a role for the Lambda function 72 | """ 73 | role_name = f"{name}-role" 74 | iam = boto3.client("iam") 75 | try: 76 | response = iam.create_role( 77 | RoleName=role_name, 78 | AssumeRolePolicyDocument=json.dumps( 79 | { 80 | "Version": "2012-10-17", 81 | "Statement": [ 82 | { 83 | "Effect": "Allow", 84 | "Principal": {"Service": "lambda.amazonaws.com"}, 85 | "Action": "sts:AssumeRole", 86 | } 87 | ], 88 | } 89 | ), 90 | Description="Role for Lambda to call ECS Fargate task", 91 | ) 92 | 93 | role_arn = response["Role"]["Arn"] 94 | 95 | response = iam.attach_role_policy( 96 | RoleName=role_name, PolicyArn="arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" 97 | ) 98 | 99 | response = iam.attach_role_policy( 100 | PolicyArn="arn:aws:iam::aws:policy/AmazonSageMakerFullAccess", RoleName=role_name 101 | ) 102 | 103 | return role_arn 104 | 105 | except iam.exceptions.EntityAlreadyExistsException: 106 | print(f"Using ARN from existing role: {role_name}") 107 | response = iam.get_role(RoleName=role_name) 108 | return response["Role"]["Arn"] 109 | --------------------------------------------------------------------------------