├── workshop_3_mlops ├── utils │ ├── __init__.py │ ├── deploy_handler.py │ └── deploy_step.py ├── imgs │ ├── overview.png │ └── pipeline.png ├── README.md ├── scripts │ ├── evaluate.py │ ├── preprocessing.py │ └── train.py └── lab_1_sagemaker_pipeline.ipynb ├── imgs ├── cover.png ├── setup1.png ├── setup10.png ├── setup11.png ├── setup2.png ├── setup3.png ├── setup4.png ├── setup5.png ├── setup6.png ├── setup7.png ├── setup8.png ├── setup9.png ├── sm-endpoint.png ├── emotion-widget.png ├── batch-transform-v2.png ├── hf-inference-toolkit.png └── sagemaker-platform.png ├── workshop_2_going_production ├── imgs │ ├── sm-endpoint.png │ ├── scaling-options.jpeg │ ├── batch-transform-v2.png │ ├── autoscaling-endpoint.png │ ├── hf-inference-toolkit.png │ └── model-monitoring-dashboard.png ├── README.md ├── lab1_real_time_endpoint.ipynb ├── lab2_batch_transform.ipynb └── lab3_autoscaling.ipynb ├── workshop_1_getting_started_with_amazon_sagemaker ├── README.md ├── scripts │ └── train.py └── lab_3_spot_instances.ipynb ├── LICENSE ├── .gitignore └── README.md /workshop_3_mlops/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /imgs/cover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/huggingface-sagemaker-workshop-series/main/imgs/cover.png -------------------------------------------------------------------------------- /imgs/setup1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/huggingface-sagemaker-workshop-series/main/imgs/setup1.png -------------------------------------------------------------------------------- /imgs/setup10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/huggingface-sagemaker-workshop-series/main/imgs/setup10.png -------------------------------------------------------------------------------- /imgs/setup11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/huggingface-sagemaker-workshop-series/main/imgs/setup11.png -------------------------------------------------------------------------------- /imgs/setup2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/huggingface-sagemaker-workshop-series/main/imgs/setup2.png -------------------------------------------------------------------------------- /imgs/setup3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/huggingface-sagemaker-workshop-series/main/imgs/setup3.png -------------------------------------------------------------------------------- /imgs/setup4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/huggingface-sagemaker-workshop-series/main/imgs/setup4.png -------------------------------------------------------------------------------- /imgs/setup5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/huggingface-sagemaker-workshop-series/main/imgs/setup5.png -------------------------------------------------------------------------------- /imgs/setup6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/huggingface-sagemaker-workshop-series/main/imgs/setup6.png -------------------------------------------------------------------------------- /imgs/setup7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/huggingface-sagemaker-workshop-series/main/imgs/setup7.png -------------------------------------------------------------------------------- /imgs/setup8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/huggingface-sagemaker-workshop-series/main/imgs/setup8.png -------------------------------------------------------------------------------- /imgs/setup9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/huggingface-sagemaker-workshop-series/main/imgs/setup9.png -------------------------------------------------------------------------------- /imgs/sm-endpoint.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/huggingface-sagemaker-workshop-series/main/imgs/sm-endpoint.png -------------------------------------------------------------------------------- /imgs/emotion-widget.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/huggingface-sagemaker-workshop-series/main/imgs/emotion-widget.png -------------------------------------------------------------------------------- /imgs/batch-transform-v2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/huggingface-sagemaker-workshop-series/main/imgs/batch-transform-v2.png -------------------------------------------------------------------------------- /imgs/hf-inference-toolkit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/huggingface-sagemaker-workshop-series/main/imgs/hf-inference-toolkit.png -------------------------------------------------------------------------------- /imgs/sagemaker-platform.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/huggingface-sagemaker-workshop-series/main/imgs/sagemaker-platform.png -------------------------------------------------------------------------------- /workshop_3_mlops/imgs/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/huggingface-sagemaker-workshop-series/main/workshop_3_mlops/imgs/overview.png -------------------------------------------------------------------------------- /workshop_3_mlops/imgs/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/huggingface-sagemaker-workshop-series/main/workshop_3_mlops/imgs/pipeline.png -------------------------------------------------------------------------------- /workshop_2_going_production/imgs/sm-endpoint.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/huggingface-sagemaker-workshop-series/main/workshop_2_going_production/imgs/sm-endpoint.png -------------------------------------------------------------------------------- /workshop_2_going_production/imgs/scaling-options.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/huggingface-sagemaker-workshop-series/main/workshop_2_going_production/imgs/scaling-options.jpeg -------------------------------------------------------------------------------- /workshop_2_going_production/imgs/batch-transform-v2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/huggingface-sagemaker-workshop-series/main/workshop_2_going_production/imgs/batch-transform-v2.png -------------------------------------------------------------------------------- /workshop_2_going_production/imgs/autoscaling-endpoint.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/huggingface-sagemaker-workshop-series/main/workshop_2_going_production/imgs/autoscaling-endpoint.png -------------------------------------------------------------------------------- /workshop_2_going_production/imgs/hf-inference-toolkit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/huggingface-sagemaker-workshop-series/main/workshop_2_going_production/imgs/hf-inference-toolkit.png -------------------------------------------------------------------------------- /workshop_2_going_production/imgs/model-monitoring-dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/huggingface-sagemaker-workshop-series/main/workshop_2_going_production/imgs/model-monitoring-dashboard.png -------------------------------------------------------------------------------- /workshop_3_mlops/README.md: -------------------------------------------------------------------------------- 1 | ## Workshop 3: **MLOps: End-to-End Hugging Face Transformers with the Hub & SageMaker Pipelines** 2 | 3 | In Workshop 3 learn how to build an End-to-End MLOps Pipeline for Hugging Face Transformers from training to production using Amazon SageMaker. 4 | 5 | We are going to create an automated SageMaker Pipeline which: 6 | 7 | - processes a dataset and uploads it to s3 8 | - fine-tunes a Hugging Face Transformer model with the processed dataset 9 | - evaluates the model against an evaluation set 10 | - deploys the model if it performed better than a certain threshold 11 | 12 | --- 13 | 14 | 🧑🏻‍💻 Code Assets: [https://github.com/philschmid/huggingface-sagemaker-workshop-series/tree/main/workshop_3_mlops](https://github.com/philschmid/huggingface-sagemaker-workshop-series/tree/main/workshop_3_mlops) 15 | 16 | 📺 Youtube: [https://www.youtube.com/watch?v=XGyt8gGwbY0&list=PLo2EIpI_JMQtPhGR5Eo2Ab0_Vb89XfhDJ&index=7](https://www.youtube.com/watch?v=XGyt8gGwbY0&list=PLo2EIpI_JMQtPhGR5Eo2Ab0_Vb89XfhDJ&index=7) 17 | -------------------------------------------------------------------------------- /workshop_2_going_production/README.md: -------------------------------------------------------------------------------- 1 | ## Workshop 2: **Going Production: Deploying, Scaling & Monitoring Hugging Face Transformer models with Amazon SageMaker** 2 | 3 | In Workshop 2 Learn how to use Amazon SageMaker to deploy, scale & monitor your Hugging Face Transformer models for production workloads. 4 | 5 | - Run Batch Prediction on JSON files using a Batch Transform 6 | - Deploy a model from [hf.co/models](https://hf.co/models) to Amazon SageMaker and run predictions 7 | - Configure autoscaling for the deployed model 8 | - Monitor the model to see avg. request time and set up alarms 9 | 10 | --- 11 | 12 | 🧑🏻‍💻 Code Assets: [https://github.com/philschmid/huggingface-sagemaker-workshop-series/tree/main/workshop_2_going_production](https://github.com/philschmid/huggingface-sagemaker-workshop-series/tree/main/workshop_2_going_production) 13 | 14 | 📺 Youtube: [https://www.youtube.com/watch?v=whwlIEITXoY&list=PLo2EIpI_JMQtPhGR5Eo2Ab0_Vb89XfhDJ&index=6&t=61s](https://www.youtube.com/watch?v=whwlIEITXoY&list=PLo2EIpI_JMQtPhGR5Eo2Ab0_Vb89XfhDJ&index=6&t=61s) 15 | -------------------------------------------------------------------------------- /workshop_3_mlops/scripts/evaluate.py: -------------------------------------------------------------------------------- 1 | """Evaluation script for measuring mean squared error.""" 2 | 3 | import subprocess 4 | import sys 5 | import json 6 | import logging 7 | import pathlib 8 | import tarfile 9 | import os 10 | 11 | import numpy as np 12 | import pandas as pd 13 | 14 | 15 | logger = logging.getLogger() 16 | logger.setLevel(logging.INFO) 17 | logger.addHandler(logging.StreamHandler()) 18 | 19 | if __name__ == "__main__": 20 | logger.debug("Starting evaluation.") 21 | model_path = "/opt/ml/processing/model/model.tar.gz" 22 | with tarfile.open(model_path) as tar: 23 | tar.extractall(path="./hf_model") 24 | 25 | logger.debug(os.listdir("./hf_model")) 26 | 27 | with open("./hf_model/evaluation.json") as f: 28 | eval_result = json.load(f) 29 | 30 | logger.debug(eval_result) 31 | output_dir = "/opt/ml/processing/evaluation" 32 | pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True) 33 | 34 | evaluation_path = f"{output_dir}/evaluation.json" 35 | with open(evaluation_path, "w") as f: 36 | f.write(json.dumps(eval_result)) 37 | -------------------------------------------------------------------------------- /workshop_1_getting_started_with_amazon_sagemaker/README.md: -------------------------------------------------------------------------------- 1 | ## Workshop 1: **Getting Started with Amazon SageMaker: Training your first NLP Transformer model with Hugging Face and deploying it** 2 | 3 | In Workshop 1 you will learn how to use Amazon SageMaker to train a Hugging Face Transformer model and deploy it afterwards. 4 | 5 | - Prepare and upload a test dataset to S3 6 | - Prepare a fine-tuning script to be used with Amazon SageMaker Training jobs 7 | - Launch a training job and store the trained model into S3 8 | - Deploy the model after successful training 9 | 10 | --- 11 | 12 | 🧑🏻‍💻 Code Assets: [https://github.com/philschmid/huggingface-sagemaker-workshop-series/tree/main/workshop_1_getting_started_with_amazon_sagemaker](https://github.com/philschmid/huggingface-sagemaker-workshop-series/tree/main/workshop_1_getting_started_with_amazon_sagemaker) 13 | 14 | 📺 Youtube: [https://www.youtube.com/watch?v=pYqjCzoyWyo&list=PLo2EIpI_JMQtPhGR5Eo2Ab0_Vb89XfhDJ&index=6&t=5s&ab_channel=HuggingFace](https://www.youtube.com/watch?v=pYqjCzoyWyo&list=PLo2EIpI_JMQtPhGR5Eo2Ab0_Vb89XfhDJ&index=6&t=5s&ab_channel=HuggingFace) 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Philipp Schmid 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /workshop_3_mlops/utils/deploy_handler.py: -------------------------------------------------------------------------------- 1 | import json 2 | import boto3 3 | 4 | 5 | def lambda_handler(event, context): 6 | """ """ 7 | sm_client = boto3.client("sagemaker") 8 | 9 | # The name of the model created in the Pipeline CreateModelStep 10 | model_name = event["model_name"] 11 | model_package_arn = event["model_package_arn"] 12 | endpoint_config_name = event["endpoint_config_name"] 13 | endpoint_name = event["endpoint_name"] 14 | endpoint_instance_type = event["endpoint_instance_type"] 15 | role = event["role"] 16 | 17 | container = {"ModelPackageName": model_package_arn} 18 | 19 | create_model_respose = sm_client.create_model(ModelName=model_name, ExecutionRoleArn=role, Containers=[container]) 20 | 21 | create_endpoint_config_response = sm_client.create_endpoint_config( 22 | EndpointConfigName=endpoint_config_name, 23 | ProductionVariants=[ 24 | { 25 | "InstanceType": endpoint_instance_type, 26 | "InitialVariantWeight": 1, 27 | "InitialInstanceCount": 1, 28 | "ModelName": model_name, 29 | "VariantName": "AllTraffic", 30 | } 31 | ], 32 | ) 33 | 34 | create_endpoint_response = sm_client.create_endpoint( 35 | EndpointName=endpoint_name, EndpointConfigName=endpoint_config_name 36 | ) 37 | 38 | return { 39 | "statusCode": 200, 40 | "body": json.dumps("Created Endpoint!"), 41 | "other_key": "example_value", 42 | } 43 | -------------------------------------------------------------------------------- /workshop_3_mlops/scripts/preprocessing.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import pandas as pd 4 | import subprocess 5 | import sys 6 | import argparse 7 | import logging 8 | 9 | 10 | def install(package): 11 | subprocess.check_call([sys.executable, "-m", "pip", "install", package]) 12 | 13 | 14 | if __name__ == "__main__": 15 | parser = argparse.ArgumentParser() 16 | 17 | parser.add_argument("--model_id", type=str) 18 | parser.add_argument("--dataset_name", type=str) 19 | parser.add_argument("--transformers_version", type=str) 20 | parser.add_argument("--pytorch_version", type=str) 21 | 22 | args, _ = parser.parse_known_args() 23 | 24 | install(f"torch=={args.pytorch_version}") 25 | install(f"transformers=={args.transformers_version}") 26 | install("datasets[s3]") 27 | 28 | from datasets import load_dataset 29 | from transformers import AutoTokenizer 30 | 31 | # Set up logging 32 | logger = logging.getLogger(__name__) 33 | 34 | logging.basicConfig( 35 | level=logging.getLevelName("INFO"), 36 | handlers=[logging.StreamHandler(sys.stdout)], 37 | format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", 38 | ) 39 | 40 | # download tokenizer 41 | tokenizer = AutoTokenizer.from_pretrained(args.model_id) 42 | 43 | # tokenizer helper function 44 | def tokenize(batch): 45 | return tokenizer(batch["text"], padding="max_length", truncation=True) 46 | 47 | # load dataset 48 | train_dataset, test_dataset = load_dataset(args.dataset_name, split=["train", "test"]) 49 | test_dataset = test_dataset.shuffle().select(range(1000)) # smaller the size for test dataset to 1k 50 | 51 | # tokenize dataset 52 | train_dataset = train_dataset.map(tokenize, batched=True) 53 | test_dataset = test_dataset.map(tokenize, batched=True) 54 | 55 | # set format for pytorch 56 | train_dataset = train_dataset.rename_column("label", "labels") 57 | train_dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"]) 58 | 59 | test_dataset = test_dataset.rename_column("label", "labels") 60 | test_dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"]) 61 | 62 | train_dataset.save_to_disk("/opt/ml/processing/train") 63 | test_dataset.save_to_disk("/opt/ml/processing/test") 64 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /workshop_3_mlops/utils/deploy_step.py: -------------------------------------------------------------------------------- 1 | import time 2 | import json 3 | import boto3 4 | import os 5 | from sagemaker.workflow.step_collections import StepCollection 6 | from sagemaker.workflow._utils import _RegisterModelStep 7 | from sagemaker.lambda_helper import Lambda 8 | from sagemaker.workflow.lambda_step import ( 9 | LambdaStep, 10 | LambdaOutput, 11 | LambdaOutputTypeEnum, 12 | ) 13 | 14 | 15 | class ModelDeployment(StepCollection): 16 | """custom step to deploy model as SageMaker Endpoint""" 17 | 18 | def __init__( 19 | self, 20 | model_name: str, 21 | registered_model: _RegisterModelStep, 22 | endpoint_instance_type, 23 | sagemaker_endpoint_role: str, 24 | autoscaling_policy: dict = None, 25 | ): 26 | self.name = "sagemaker-pipelines-model-deployment" 27 | self.model_package_arn = registered_model.properties.ModelPackageArn 28 | self.lambda_role = self.create_lambda_role(self.name) 29 | # Use the current time to define unique names for the resources created 30 | current_time = time.strftime("%m-%d-%H-%M-%S", time.localtime()) 31 | 32 | steps = [] 33 | lambda_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "deploy_handler.py") 34 | # Lambda helper class can be used to create the Lambda function 35 | self.func = Lambda( 36 | function_name=f"{self.name}-{current_time}", 37 | execution_role_arn=self.lambda_role, 38 | script=lambda_file, 39 | handler="deploy_handler.lambda_handler", 40 | timeout=600, 41 | memory_size=256, 42 | ) 43 | 44 | # The dictionary retured by the Lambda function is captured by LambdaOutput, each key in the dictionary corresponds to a 45 | # LambdaOutput 46 | 47 | output_param_1 = LambdaOutput(output_name="statusCode", output_type=LambdaOutputTypeEnum.String) 48 | output_param_2 = LambdaOutput(output_name="body", output_type=LambdaOutputTypeEnum.String) 49 | output_param_3 = LambdaOutput(output_name="other_key", output_type=LambdaOutputTypeEnum.String) 50 | 51 | # The inputs provided to the Lambda function can be retrieved via the `event` object within the `lambda_handler` function 52 | # in the Lambda 53 | lambda_step = LambdaStep( 54 | name="HuggingFaceModelDeployment", 55 | lambda_func=self.func, 56 | inputs={ 57 | "model_name": model_name + current_time, 58 | "endpoint_config_name": model_name + current_time, 59 | "endpoint_name": model_name, 60 | "endpoint_instance_type": endpoint_instance_type, 61 | "model_package_arn": self.model_package_arn, 62 | "role": sagemaker_endpoint_role, 63 | }, 64 | outputs=[output_param_1, output_param_2, output_param_3], 65 | ) 66 | steps.append(lambda_step) 67 | self.steps = steps 68 | 69 | def create_lambda_role(self, name): 70 | """ 71 | Create a role for the Lambda function 72 | """ 73 | role_name = f"{name}-role" 74 | iam = boto3.client("iam") 75 | try: 76 | response = iam.create_role( 77 | RoleName=role_name, 78 | AssumeRolePolicyDocument=json.dumps( 79 | { 80 | "Version": "2012-10-17", 81 | "Statement": [ 82 | { 83 | "Effect": "Allow", 84 | "Principal": {"Service": "lambda.amazonaws.com"}, 85 | "Action": "sts:AssumeRole", 86 | } 87 | ], 88 | } 89 | ), 90 | Description="Role for Lambda to call ECS Fargate task", 91 | ) 92 | 93 | role_arn = response["Role"]["Arn"] 94 | 95 | response = iam.attach_role_policy( 96 | RoleName=role_name, PolicyArn="arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" 97 | ) 98 | 99 | response = iam.attach_role_policy( 100 | PolicyArn="arn:aws:iam::aws:policy/AmazonSageMakerFullAccess", RoleName=role_name 101 | ) 102 | 103 | return role_arn 104 | 105 | except iam.exceptions.EntityAlreadyExistsException: 106 | print(f"Using ARN from existing role: {role_name}") 107 | response = iam.get_role(RoleName=role_name) 108 | return response["Role"]["Arn"] 109 | -------------------------------------------------------------------------------- /workshop_3_mlops/scripts/train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import os 4 | import random 5 | import sys 6 | 7 | import numpy as np 8 | import torch 9 | import json 10 | from datasets import load_from_disk, load_metric 11 | from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments 12 | from transformers.trainer_utils import get_last_checkpoint 13 | 14 | if __name__ == "__main__": 15 | 16 | parser = argparse.ArgumentParser() 17 | 18 | # hyperparameters sent by the client are passed as command-line arguments to the script. 19 | parser.add_argument("--epochs", type=int, default=3) 20 | parser.add_argument("--train_batch_size", type=int, default=32) 21 | parser.add_argument("--eval_batch_size", type=int, default=64) 22 | parser.add_argument("--warmup_steps", type=int, default=500) 23 | parser.add_argument("--model_id", type=str) 24 | parser.add_argument("--learning_rate", type=str, default=5e-5) 25 | parser.add_argument("--fp16", type=bool, default=True) 26 | 27 | # Data, model, and output directories 28 | parser.add_argument("--output_data_dir", type=str, default=os.environ["SM_OUTPUT_DATA_DIR"]) 29 | parser.add_argument("--output_dir", type=str, default=os.environ["SM_MODEL_DIR"]) 30 | parser.add_argument("--n_gpus", type=str, default=os.environ["SM_NUM_GPUS"]) 31 | parser.add_argument("--training_dir", type=str, default=os.environ["SM_CHANNEL_TRAIN"]) 32 | parser.add_argument("--test_dir", type=str, default=os.environ["SM_CHANNEL_TEST"]) 33 | 34 | args, _ = parser.parse_known_args() 35 | 36 | # Set up logging 37 | logger = logging.getLogger(__name__) 38 | 39 | logging.basicConfig( 40 | level=logging.getLevelName("INFO"), 41 | handlers=[logging.StreamHandler(sys.stdout)], 42 | format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", 43 | ) 44 | 45 | # load datasets 46 | train_dataset = load_from_disk(args.training_dir) 47 | test_dataset = load_from_disk(args.test_dir) 48 | 49 | logger.info(f" loaded train_dataset length is: {len(train_dataset)}") 50 | logger.info(f" loaded test_dataset length is: {len(test_dataset)}") 51 | 52 | metric = load_metric("accuracy") 53 | 54 | def compute_metrics(eval_pred): 55 | predictions, labels = eval_pred 56 | predictions = np.argmax(predictions, axis=1) 57 | return metric.compute(predictions=predictions, references=labels) 58 | 59 | # Prepare model labels - useful in inference API 60 | labels = train_dataset.features["labels"].names 61 | num_labels = len(labels) 62 | label2id, id2label = dict(), dict() 63 | for i, label in enumerate(labels): 64 | label2id[label] = str(i) 65 | id2label[str(i)] = label 66 | 67 | # download model from model hub 68 | model = AutoModelForSequenceClassification.from_pretrained( 69 | args.model_id, num_labels=num_labels, label2id=label2id, id2label=id2label 70 | ) 71 | tokenizer = AutoTokenizer.from_pretrained(args.model_id) 72 | 73 | # define training args 74 | training_args = TrainingArguments( 75 | output_dir=args.output_dir, 76 | overwrite_output_dir=True if get_last_checkpoint(args.output_dir) is not None else False, 77 | num_train_epochs=int(args.epochs), 78 | per_device_train_batch_size=int(args.train_batch_size), 79 | per_device_eval_batch_size=int(args.eval_batch_size), 80 | warmup_steps=args.warmup_steps, 81 | fp16=True, 82 | evaluation_strategy="epoch", 83 | save_strategy="epoch", 84 | save_total_limit=2, 85 | logging_dir=f"{args.output_data_dir}/logs", 86 | learning_rate=float(args.learning_rate), 87 | load_best_model_at_end=True, 88 | metric_for_best_model="accuracy", 89 | ) 90 | 91 | # create Trainer instance 92 | trainer = Trainer( 93 | model=model, 94 | args=training_args, 95 | compute_metrics=compute_metrics, 96 | train_dataset=train_dataset, 97 | eval_dataset=test_dataset, 98 | tokenizer=tokenizer, 99 | ) 100 | 101 | # train model 102 | if get_last_checkpoint(args.output_dir) is not None: 103 | logger.info("***** continue training *****") 104 | last_checkpoint = get_last_checkpoint(args.output_dir) 105 | trainer.train(resume_from_checkpoint=last_checkpoint) 106 | else: 107 | trainer.train() 108 | 109 | # evaluate model 110 | eval_result = trainer.evaluate(eval_dataset=test_dataset) 111 | 112 | # writes eval result to file which can be accessed later in s3 ouput 113 | with open(os.path.join(os.environ["SM_MODEL_DIR"], "evaluation.json"), "w") as writer: 114 | print(f"***** Eval results *****") 115 | print(eval_result) 116 | writer.write(json.dumps(eval_result)) 117 | 118 | # Saves the model to s3 uses os.environ["SM_MODEL_DIR"] to make sure checkpointing works 119 | trainer.save_model(os.environ["SM_MODEL_DIR"]) 120 | -------------------------------------------------------------------------------- /workshop_1_getting_started_with_amazon_sagemaker/scripts/train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import os 4 | import random 5 | import sys 6 | 7 | import numpy as np 8 | import torch 9 | from datasets import load_from_disk, load_metric 10 | from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments 11 | from transformers.trainer_utils import get_last_checkpoint 12 | 13 | if __name__ == "__main__": 14 | 15 | parser = argparse.ArgumentParser() 16 | 17 | # hyperparameters sent by the client are passed as command-line arguments to the script. 18 | parser.add_argument("--epochs", type=int, default=3) 19 | parser.add_argument("--train_batch_size", type=int, default=32) 20 | parser.add_argument("--eval_batch_size", type=int, default=64) 21 | parser.add_argument("--warmup_steps", type=int, default=500) 22 | parser.add_argument("--model_id", type=str) 23 | parser.add_argument("--learning_rate", type=str, default=5e-5) 24 | parser.add_argument("--fp16", type=bool, default=True) 25 | 26 | # Data, model, and output directories 27 | parser.add_argument("--output_data_dir", type=str, default=os.environ["SM_OUTPUT_DATA_DIR"]) 28 | parser.add_argument("--output_dir", type=str, default=os.environ["SM_MODEL_DIR"]) 29 | parser.add_argument("--n_gpus", type=str, default=os.environ["SM_NUM_GPUS"]) 30 | parser.add_argument("--training_dir", type=str, default=os.environ["SM_CHANNEL_TRAIN"]) 31 | parser.add_argument("--test_dir", type=str, default=os.environ["SM_CHANNEL_TEST"]) 32 | 33 | args, _ = parser.parse_known_args() 34 | 35 | # Set up logging 36 | logger = logging.getLogger(__name__) 37 | 38 | logging.basicConfig( 39 | level=logging.getLevelName("INFO"), 40 | handlers=[logging.StreamHandler(sys.stdout)], 41 | format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", 42 | ) 43 | 44 | # load datasets 45 | train_dataset = load_from_disk(args.training_dir) 46 | test_dataset = load_from_disk(args.test_dir) 47 | 48 | logger.info(f" loaded train_dataset length is: {len(train_dataset)}") 49 | logger.info(f" loaded test_dataset length is: {len(test_dataset)}") 50 | 51 | metric = load_metric("accuracy") 52 | 53 | def compute_metrics(eval_pred): 54 | predictions, labels = eval_pred 55 | predictions = np.argmax(predictions, axis=1) 56 | return metric.compute(predictions=predictions, references=labels) 57 | 58 | # Prepare model labels - useful in inference API 59 | labels = train_dataset.features["labels"].names 60 | num_labels = len(labels) 61 | label2id, id2label = dict(), dict() 62 | for i, label in enumerate(labels): 63 | label2id[label] = str(i) 64 | id2label[str(i)] = label 65 | 66 | # download model from model hub 67 | model = AutoModelForSequenceClassification.from_pretrained( 68 | args.model_id, num_labels=num_labels, label2id=label2id, id2label=id2label 69 | ) 70 | tokenizer = AutoTokenizer.from_pretrained(args.model_id) 71 | 72 | # define training args 73 | training_args = TrainingArguments( 74 | output_dir=args.output_dir, 75 | overwrite_output_dir=True if get_last_checkpoint(args.output_dir) is not None else False, 76 | num_train_epochs=args.epochs, 77 | per_device_train_batch_size=args.train_batch_size, 78 | per_device_eval_batch_size=args.eval_batch_size, 79 | warmup_steps=args.warmup_steps, 80 | fp16=args.fp16, 81 | evaluation_strategy="epoch", 82 | save_strategy="epoch", 83 | save_total_limit=2, 84 | logging_dir=f"{args.output_data_dir}/logs", 85 | learning_rate=float(args.learning_rate), 86 | load_best_model_at_end=True, 87 | metric_for_best_model="accuracy", 88 | ) 89 | 90 | # create Trainer instance 91 | trainer = Trainer( 92 | model=model, 93 | args=training_args, 94 | compute_metrics=compute_metrics, 95 | train_dataset=train_dataset, 96 | eval_dataset=test_dataset, 97 | tokenizer=tokenizer, 98 | ) 99 | 100 | # train model 101 | if get_last_checkpoint(args.output_dir) is not None: 102 | logger.info("***** continue training *****") 103 | last_checkpoint = get_last_checkpoint(args.output_dir) 104 | trainer.train(resume_from_checkpoint=last_checkpoint) 105 | else: 106 | trainer.train() 107 | 108 | # evaluate model 109 | eval_result = trainer.evaluate(eval_dataset=test_dataset) 110 | 111 | # writes eval result to file which can be accessed later in s3 ouput 112 | with open(os.path.join(args.output_data_dir, "eval_results.txt"), "w") as writer: 113 | print(f"***** Eval results *****") 114 | for key, value in sorted(eval_result.items()): 115 | writer.write(f"{key} = {value}\n") 116 | print(f"{key} = {value}\n") 117 | 118 | # Saves the model to s3 uses os.environ["SM_MODEL_DIR"] to make sure checkpointing works 119 | trainer.save_model(os.environ["SM_MODEL_DIR"]) 120 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Workshop: Enterprise-Scale NLP with Hugging Face & Amazon SageMaker 2 | 3 | ![](./imgs/cover.png) 4 | 5 | Earlier this year we announced a strategic collaboration with Amazon to make it easier for companies to use Hugging Face Transformers in Amazon SageMaker, and ship cutting-edge Machine Learning features faster. We introduced new Hugging Face Deep Learning Containers (DLCs) to train and deploy Hugging Face Transformers in Amazon SageMaker. 6 | 7 | In addition to the Hugging Face Inference DLCs, we created a [Hugging Face Inference Toolkit for SageMaker](https://github.com/aws/sagemaker-huggingface-inference-toolkit). This Inference Toolkit leverages the `pipelines` from the `transformers` library to allow zero-code deployments of models, without requiring any code for pre-or post-processing. 8 | 9 | In October and November, we held a workshop series on “**Enterprise-Scale NLP with Hugging Face & Amazon SageMaker**”. This workshop series consisted out of 3 parts and covers: 10 | 11 | - Getting Started with Amazon SageMaker: Training your first NLP Transformer model with Hugging Face and deploying it 12 | - Going Production: Deploying, Scaling & Monitoring Hugging Face Transformer models with Amazon SageMaker 13 | - MLOps: End-to-End Hugging Face Transformers with the Hub & SageMaker Pipelines 14 | 15 | We recorded all of them so you are now able to do the whole workshop series on your own to enhance your Hugging Face Transformers skills with Amazon SageMaker or vice-versa. 16 | 17 | Below you can find all the details of each workshop and how to get started. 18 | 19 | 🧑🏻‍💻 Github Repository: https://github.com/philschmid/huggingface-sagemaker-workshop-series 20 | 21 | 📺  Youtube Playlist: [https://www.youtube.com/playlist?list=PLo2EIpI_JMQtPhGR5Eo2Ab0_Vb89XfhDJ](https://www.youtube.com/playlist?list=PLo2EIpI_JMQtPhGR5Eo2Ab0_Vb89XfhDJ) 22 | 23 | *Note: The Repository contains instructions on how to access a temporary AWS, which was available during the workshops. To be able to do the workshop now you need to use your own or your company AWS Account.* 24 | 25 | In Addition to the workshop we created a fully dedicated [Documentation](https://huggingface.co/docs/sagemaker/main) for Hugging Face and Amazon SageMaker, which includes all the necessary information. 26 | If the workshop is not enough for you we also have 15 additional getting samples [Notebook Github repository](https://github.com/huggingface/notebooks/tree/master/sagemaker), which cover topics like distributed training or leveraging [Spot Instances](https://aws.amazon.com/ec2/spot/?nc1=h_ls&cards.sort-by=item.additionalFields.startDateTime&cards.sort-order=asc). 27 | 28 | 29 | ## Workshop 1: **Getting Started with Amazon SageMaker: Training your first NLP Transformer model with Hugging Face and deploying it** 30 | 31 | In Workshop 1 you will learn how to use Amazon SageMaker to train a Hugging Face Transformer model and deploy it afterwards. 32 | 33 | - Prepare and upload a test dataset to S3 34 | - Prepare a fine-tuning script to be used with Amazon SageMaker Training jobs 35 | - Launch a training job and store the trained model into S3 36 | - Deploy the model after successful training 37 | 38 | --- 39 | 40 | 🧑🏻‍💻 Code Assets: [https://github.com/philschmid/huggingface-sagemaker-workshop-series/tree/main/workshop_1_getting_started_with_amazon_sagemaker](https://github.com/philschmid/huggingface-sagemaker-workshop-series/tree/main/workshop_1_getting_started_with_amazon_sagemaker) 41 | 42 | 📺 Youtube: [https://www.youtube.com/watch?v=pYqjCzoyWyo&list=PLo2EIpI_JMQtPhGR5Eo2Ab0_Vb89XfhDJ&index=6&t=5s&ab_channel=HuggingFace](https://www.youtube.com/watch?v=pYqjCzoyWyo&list=PLo2EIpI_JMQtPhGR5Eo2Ab0_Vb89XfhDJ&index=6&t=5s&ab_channel=HuggingFace) 43 | 44 | ## Workshop 2: **Going Production: Deploying, Scaling & Monitoring Hugging Face Transformer models with Amazon SageMaker** 45 | 46 | In Workshop 2 learn how to use Amazon SageMaker to deploy, scale & monitor your Hugging Face Transformer models for production workloads. 47 | 48 | - Run Batch Prediction on JSON files using a Batch Transform 49 | - Deploy a model from [hf.co/models](https://hf.co/models) to Amazon SageMaker and run predictions 50 | - Configure autoscaling for the deployed model 51 | - Monitor the model to see avg. request time and set up alarms 52 | 53 | --- 54 | 55 | 🧑🏻‍💻 Code Assets: [https://github.com/philschmid/huggingface-sagemaker-workshop-series/tree/main/workshop_2_going_production](https://github.com/philschmid/huggingface-sagemaker-workshop-series/tree/main/workshop_2_going_production) 56 | 57 | 📺 Youtube: [https://www.youtube.com/watch?v=whwlIEITXoY&list=PLo2EIpI_JMQtPhGR5Eo2Ab0_Vb89XfhDJ&index=6&t=61s](https://www.youtube.com/watch?v=whwlIEITXoY&list=PLo2EIpI_JMQtPhGR5Eo2Ab0_Vb89XfhDJ&index=6&t=61s) 58 | 59 | ## Workshop 3: **MLOps: End-to-End Hugging Face Transformers with the Hub & SageMaker Pipelines** 60 | 61 | In Workshop 3 learn how to build an End-to-End MLOps Pipeline for Hugging Face Transformers from training to production using Amazon SageMaker. 62 | 63 | We are going to create an automated SageMaker Pipeline which: 64 | 65 | - processes a dataset and uploads it to s3 66 | - fine-tunes a Hugging Face Transformer model with the processed dataset 67 | - evaluates the model against an evaluation set 68 | - deploys the model if it performed better than a certain threshold 69 | 70 | --- 71 | 72 | 🧑🏻‍💻 Code Assets: [https://github.com/philschmid/huggingface-sagemaker-workshop-series/tree/main/workshop_3_mlops](https://github.com/philschmid/huggingface-sagemaker-workshop-series/tree/main/workshop_3_mlops) 73 | 74 | 📺 Youtube: [https://www.youtube.com/watch?v=XGyt8gGwbY0&list=PLo2EIpI_JMQtPhGR5Eo2Ab0_Vb89XfhDJ&index=7](https://www.youtube.com/watch?v=XGyt8gGwbY0&list=PLo2EIpI_JMQtPhGR5Eo2Ab0_Vb89XfhDJ&index=7) 75 | 76 | # Access Workshop AWS Account 77 | 78 | For this workshop you’ll get access to a temporary AWS Account already pre-configured with Amazon SageMaker Notebook Instances. Follow the steps in this section to login to your AWS Account and download the workshop material. 79 | 80 | 81 | ### 1. To get started navigate to - https://dashboard.eventengine.run/login 82 | 83 | ![setup1](./imgs/setup1.png) 84 | 85 | Click on Accept Terms & Login 86 | 87 | ### 2. Click on Email One-Time OTP (Allow for up to 2 mins to receive the passcode) 88 | 89 | ![setup2](./imgs/setup2.png) 90 | 91 | ### 3. Provide your email address 92 | 93 | ![setup3](./imgs/setup3.png) 94 | 95 | ### 4. Enter your OTP code 96 | 97 | ![setup4](./imgs/setup4.png) 98 | 99 | ### 5. Click on AWS Console 100 | 101 | ![setup5](./imgs/setup5.png) 102 | 103 | ### 6. Click on Open AWS Console 104 | 105 | ![setup6](./imgs/setup6.png) 106 | 107 | ### 7. In the AWS Console click on Amazon SageMaker 108 | 109 | ![setup7](./imgs/setup7.png) 110 | 111 | ### 8. Click on Notebook and then on Notebook instances 112 | 113 | ![setup8](./imgs/setup8.png) 114 | 115 | ### 9. Create a new Notebook instance 116 | 117 | ![setup9](./imgs/setup9.png) 118 | 119 | ### 10. Configure Notebook instances 120 | 121 | * Make sure to increase the Volume Size of the Notebook if you want to work with big models and datasets 122 | * Add your IAM_Role with permissions to run your SageMaker Training And Inference Jobs 123 | * Add the Workshop Github Repository to the Notebook to preload the notebooks: `https://github.com/philschmid/huggingface-sagemaker-workshop-series.git` 124 | 125 | ![setup10](./imgs/setup10.png) 126 | 127 | 128 | ### 11. Open the Lab and select the right kernel you want to do and have fun! 129 | 130 | Open the workshop you want to do (`workshop_1_getting_started_with_amazon_sagemaker/`) and select the pytorch kernel 131 | 132 | ![setup11](./imgs/setup11.png) 133 | 134 | -------------------------------------------------------------------------------- /workshop_2_going_production/lab1_real_time_endpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "00d2464c", 6 | "metadata": {}, 7 | "source": [ 8 | "# Huggingface Sagemaker-sdk - Deploy 🤗 Transformers for inference\n" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "a8ce3cec", 14 | "metadata": {}, 15 | "source": [ 16 | "Welcome to this getting started guide, we will use the new Hugging Face Inference DLCs and Amazon SageMaker Python SDK to deploy a transformer model for inference. \n", 17 | "In this example we directly deploy one of the 10 000+ Hugging Face Transformers from the [Hub](https://huggingface.co/models) to Amazon SageMaker for Inference." 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "id": "b7272df2", 23 | "metadata": {}, 24 | "source": [ 25 | "## API - [SageMaker Hugging Face Inference Toolkit](https://github.com/aws/sagemaker-huggingface-inference-toolkit)\n" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "id": "6daeacf3", 31 | "metadata": {}, 32 | "source": [ 33 | "Using the `transformers pipelines`, we designed an API, which makes it easy for you to benefit from all `pipelines` features. The API is oriented at the API of the [🤗 Accelerated Inference API](https://api-inference.huggingface.co/docs/python/html/detailed_parameters.html), meaning your inputs need to be defined in the `inputs` key and if you want additional supported `pipelines` parameters you can add them in the `parameters` key. Below you can find examples for requests. \n", 34 | "\n", 35 | "**text-classification request body**\n", 36 | "```python\n", 37 | "{\n", 38 | "\t\"inputs\": \"Camera - You are awarded a SiPix Digital Camera! call 09061221066 fromm landline. Delivery within 28 days.\"\n", 39 | "}\n", 40 | "```\n", 41 | "**question-answering request body**\n", 42 | "```python\n", 43 | "{\n", 44 | "\t\"inputs\": {\n", 45 | "\t\t\"question\": \"What is used for inference?\",\n", 46 | "\t\t\"context\": \"My Name is Philipp and I live in Nuremberg. This model is used with sagemaker for inference.\"\n", 47 | "\t}\n", 48 | "}\n", 49 | "```\n", 50 | "**zero-shot classification request body**\n", 51 | "```python\n", 52 | "{\n", 53 | "\t\"inputs\": \"Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!\",\n", 54 | "\t\"parameters\": {\n", 55 | "\t\t\"candidate_labels\": [\n", 56 | "\t\t\t\"refund\",\n", 57 | "\t\t\t\"legal\",\n", 58 | "\t\t\t\"faq\"\n", 59 | "\t\t]\n", 60 | "\t}\n", 61 | "}\n", 62 | "```" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "id": "03d984c3", 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "!pip install \"sagemaker>=2.48.0\" --upgrade" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "id": "53db7eca", 78 | "metadata": {}, 79 | "source": [ 80 | "## Deploy one of the 10 000+ Hugging Face Transformers to Amazon SageMaker for Inference\n", 81 | "\n", 82 | "_This is an experimental feature, where the model will be loaded after the endpoint is created. This could lead to errors, e.g. models > 10GB_\n", 83 | "\n", 84 | "To deploy a model directly from the Hub to SageMaker we need to define 2 environment variables when creating the `HuggingFaceModel` . We need to define:\n", 85 | "\n", 86 | "- `HF_MODEL_ID`: defines the model id, which will be automatically loaded from [huggingface.co/models](http://huggingface.co/models) when creating or SageMaker Endpoint. The 🤗 Hub provides +10 000 models all available through this environment variable.\n", 87 | "- `HF_TASK`: defines the task for the used 🤗 Transformers pipeline. A full list of tasks can be find [here](https://huggingface.co/transformers/main_classes/pipelines.html)." 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "id": "8c03085f", 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "from sagemaker.huggingface import HuggingFaceModel\n", 98 | "import sagemaker \n", 99 | "\n", 100 | "role = sagemaker.get_execution_role()\n", 101 | "\n", 102 | "# Hub Model configuration. https://huggingface.co/models\n", 103 | "hub = {\n", 104 | " 'HF_MODEL_ID':'distilbert-base-uncased-distilled-squad', # model_id from hf.co/models\n", 105 | " 'HF_TASK':'question-answering' # NLP task you want to use for predictions\n", 106 | "}\n", 107 | "\n", 108 | "# create Hugging Face Model Class\n", 109 | "huggingface_model = HuggingFaceModel(\n", 110 | " env=hub,\n", 111 | " role=role, # iam role with permissions to create an Endpoint\n", 112 | " transformers_version=\"4.6\", # transformers version used\n", 113 | " pytorch_version=\"1.7\", # pytorch version used\n", 114 | " py_version=\"py36\", # python version of the DLC\n", 115 | ")" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "id": "1704b52b", 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "# deploy model to SageMaker Inference\n", 126 | "predictor = huggingface_model.deploy(\n", 127 | " initial_instance_count=1,\n", 128 | " instance_type=\"ml.m5.xlarge\"\n", 129 | ")" 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "id": "9a84c3f8", 135 | "metadata": {}, 136 | "source": [ 137 | "**Architecture**\n", 138 | "\n", 139 | "The [Hugging Face Inference Toolkit for SageMaker](https://github.com/aws/sagemaker-huggingface-inference-toolkit) is an open-source library for serving Hugging Face transformer models on SageMaker. It utilizes the SageMaker Inference Toolkit for starting up the model server, which is responsible for handling inference requests. The SageMaker Inference Toolkit uses [Multi Model Server (MMS)](https://github.com/awslabs/multi-model-server) for serving ML models. It bootstraps MMS with a configuration and settings that make it compatible with SageMaker and allow you to adjust important performance parameters, such as the number of workers per model, depending on the needs of your scenario.\n", 140 | "\n", 141 | "![](./imgs/hf-inference-toolkit.png)\n", 142 | "\n", 143 | "**Deploying a model using SageMaker hosting services is a three-step process:**\n", 144 | "\n", 145 | "1. **Create a model in SageMaker** —By creating a model, you tell SageMaker where it can find the model components. \n", 146 | "2. **Create an endpoint configuration for an HTTPS endpoint** —You specify the name of one or more models in production variants and the ML compute instances that you want SageMaker to launch to host each production variant.\n", 147 | "3. **Create an HTTPS endpoint** —Provide the endpoint configuration to SageMaker. The service launches the ML compute instances and deploys the model or models as specified in the configuration\n", 148 | "\n", 149 | "![](./imgs/sm-endpoint.png)\n" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "id": "11a1a1cb", 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "# example request, you always need to define \"inputs\"\n", 160 | "data = {\n", 161 | "\"inputs\": {\n", 162 | " \"question\": \"What is used for inference?\",\n", 163 | " \"context\": \"My Name is Philipp and I live in Nuremberg. This model is used with sagemaker for inference.\"\n", 164 | " }\n", 165 | "}\n", 166 | "\n", 167 | "# request\n", 168 | "predictor.predict(data)" 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "id": "901166ce", 174 | "metadata": {}, 175 | "source": [ 176 | "## clean up" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 8, 182 | "id": "6b1bf7e0", 183 | "metadata": {}, 184 | "outputs": [], 185 | "source": [ 186 | "# delete endpoint\n", 187 | "predictor.delete_endpoint()" 188 | ] 189 | } 190 | ], 191 | "metadata": { 192 | "interpreter": { 193 | "hash": "c281c456f1b8161c8906f4af2c08ed2c40c50136979eaae69688b01f70e9f4a9" 194 | }, 195 | "kernelspec": { 196 | "display_name": "conda_pytorch_latest_p36", 197 | "language": "python", 198 | "name": "conda_pytorch_latest_p36" 199 | }, 200 | "language_info": { 201 | "name": "python", 202 | "version": "" 203 | } 204 | }, 205 | "nbformat": 4, 206 | "nbformat_minor": 5 207 | } 208 | -------------------------------------------------------------------------------- /workshop_2_going_production/lab2_batch_transform.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "661cc03e", 6 | "metadata": {}, 7 | "source": [ 8 | "# Huggingface Sagemaker-sdk - Run a batch transform inference job with 🤗 Transformers\n" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "d5bbff57", 14 | "metadata": {}, 15 | "source": [ 16 | "1. [Introduction](#Introduction) \n", 17 | "2. [Run Batch Transform after training a model](#Run-Batch-Transform-after-training-a-model) \n", 18 | "3. [Run Batch Transform Inference Job with a fine-tuned model using `jsonl`](#Run-Batch-Transform-Inference-Job-with-a-fine-tuned-model-using-jsonl) \n", 19 | "\n", 20 | "Welcome to this getting started guide, we will use the new Hugging Face Inference DLCs and Amazon SageMaker Python SDK to deploy two transformer model for inference. \n", 21 | "In the first example we deploy a trained Hugging Face Transformer model on to SageMaker for inference.\n", 22 | "In the second example we directly deploy one of the 10 000+ Hugging Face Transformers from the [Hub](https://huggingface.co/models) to Amazon SageMaker for Inference.<" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "id": "1b1db259", 28 | "metadata": {}, 29 | "source": [ 30 | "## Run Batch Transform after training a model \n", 31 | "_not included in the notebook_\n", 32 | "\n", 33 | "After you train a model, you can use [Amazon SageMaker Batch Transform](https://docs.aws.amazon.com/sagemaker/latest/dg/how-it-works-batch.html) to perform inferences with the model. In Batch Transform you provide your inference data as a S3 uri and SageMaker will care of downloading it, running the prediction and uploading the results afterwards to S3 again. You can find more documentation for Batch Transform [here](https://docs.aws.amazon.com/sagemaker/latest/dg/batch-transform.html)\n", 34 | "\n", 35 | "If you trained the model using the **HuggingFace estimator**, you can invoke `transformer()` method to create a transform job for a model based on the training job.\n", 36 | "\n", 37 | "```python\n", 38 | "# create the Estimator\n", 39 | "huggingface_estimator = HuggingFace(\n", 40 | "....\n", 41 | ")\n", 42 | "\n", 43 | "# run training\n", 44 | "huggingface_estimator.fit(data)\n", 45 | "\n", 46 | "# create Transformers based on training\n", 47 | "batch_job = huggingface_estimator.transformer(\n", 48 | " instance_count=1,\n", 49 | " instance_type='ml.c5.2xlarge',\n", 50 | " strategy='SingleRecord')\n", 51 | "\n", 52 | "# run transform job\n", 53 | "batch_job.transform(\n", 54 | " data='s3://s3-uri-to-batch-data',\n", 55 | " content_type='application/json', \n", 56 | " split_type='Line')\n", 57 | "```\n", 58 | "For more details about what can be specified here, see [API docs](https://sagemaker.readthedocs.io/en/stable/overview.html#sagemaker-batch-transform).\n", 59 | "\n" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "id": "69ac88e8", 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "!pip install \"sagemaker>=2.48.0\" \"datasets==1.11\" --upgrade" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "id": "cb8a47e4", 75 | "metadata": {}, 76 | "source": [ 77 | "# Run Batch Transform Inference Job with a fine-tuned model using `jsonl`" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "id": "7d004b76", 83 | "metadata": {}, 84 | "source": [ 85 | "## Data Pre-Processing\n", 86 | "\n", 87 | "In this example we are using the provided `tweet_data.csv` as dataset. The `csv` contains ~1800 tweets about different airlines. The `csv` contains 1 column `\"inputs\"` with the tweets. To use this `csv` we need to convert it into a `jsonl` file and upload it to s3. Due to the complex structure of text are only `jsonl` file supported for batch transform. As pre-processing we are removing the `@` in the beginning of the tweet to get the names/identities correct.\n", 88 | "\n", 89 | "_**NOTE**: While preprocessing you need to make sure that your `inputs` fit the `max_length`." 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "id": "38ee4aaa", 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "import csv\n", 100 | "import json\n", 101 | "import sagemaker\n", 102 | "from sagemaker.s3 import S3Uploader,s3_path_join\n", 103 | "\n", 104 | "# get the s3 bucket\n", 105 | "sess = sagemaker.Session()\n", 106 | "role = sagemaker.get_execution_role()\n", 107 | "sagemaker_session_bucket = sess.default_bucket()\n", 108 | "\n", 109 | "# datset files\n", 110 | "dataset_csv_file=\"./data/tweet_data.csv\"\n", 111 | "dataset_jsonl_file=\"./tweet_data.jsonl\"\n", 112 | "\n", 113 | "with open(dataset_csv_file, \"r+\") as infile, open(dataset_jsonl_file, \"w+\") as outfile:\n", 114 | " reader = csv.DictReader(infile)\n", 115 | " for row in reader:\n", 116 | " # remove @\n", 117 | " row[\"inputs\"] = row[\"inputs\"].replace(\"@\",\"\")\n", 118 | " json.dump(row, outfile)\n", 119 | " outfile.write('\\n')\n", 120 | "\n", 121 | " \n", 122 | "# uploads a given file to S3.\n", 123 | "input_s3_path = s3_path_join(\"s3://\",sagemaker_session_bucket,\"batch_transform/input\")\n", 124 | "output_s3_path = s3_path_join(\"s3://\",sagemaker_session_bucket,\"batch_transform/output\")\n", 125 | "s3_file_uri = S3Uploader.upload(dataset_jsonl_file,input_s3_path)\n", 126 | "\n", 127 | "print(f\"{dataset_jsonl_file} uploaded to {s3_file_uri}\")" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "id": "85f5b4f2", 133 | "metadata": {}, 134 | "source": [ 135 | "The created file looks like this\n", 136 | "\n", 137 | "```json\n", 138 | "{\"inputs\": \"VirginAmerica What dhepburn said.\"}\n", 139 | "{\"inputs\": \"VirginAmerica plus you've added commercials to the experience... tacky.\"}\n", 140 | "{\"inputs\": \"VirginAmerica I didn't today... Must mean I need to take another trip!\"}\n", 141 | "{\"inputs\": \"VirginAmerica it's really aggressive to blast obnoxious \\\"entertainment\\\"....\"}\n", 142 | "{\"inputs\": \"VirginAmerica and it's a really big bad thing about it\"}\n", 143 | "{\"inputs\": \"VirginAmerica seriously would pay $30 a flight for seats that didn't h....\"}\n", 144 | "{\"inputs\": \"VirginAmerica yes, nearly every time I fly VX this \\u201cear worm\\u201d won\\u2019t go away :)\"}\n", 145 | "{\"inputs\": \"VirginAmerica Really missed a prime opportunity for Men Without ...\"}\n", 146 | "{\"inputs\": \"virginamerica Well, I didn't\\u2026but NOW I DO! :-D\"}\n", 147 | "{\"inputs\": \"VirginAmerica it was amazing, and arrived an hour early. You're too good to me.\"}\n", 148 | "{\"inputs\": \"VirginAmerica did you know that suicide is the second leading cause of death among teens 10-24\"}\n", 149 | "{\"inputs\": \"VirginAmerica I <3 pretty graphics. so much better than minimal iconography. :D\"}\n", 150 | "{\"inputs\": \"VirginAmerica This is such a great deal! Already thinking about my 2nd trip ...\"}\n", 151 | "....\n", 152 | "```" 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "id": "a49400b8", 158 | "metadata": {}, 159 | "source": [ 160 | "## Create Inference Transformer to run the batch job\n", 161 | "\n", 162 | "We use the [twitter-roberta-base-sentiment](https://huggingface.co/cardiffnlp/twitter-roberta-base-sentiment) model running our batch transform job. This is a RoBERTa-base model trained on ~58M tweets and finetuned for sentiment analysis with the TweetEval benchmark.\n" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "id": "37897523", 169 | "metadata": {}, 170 | "outputs": [], 171 | "source": [ 172 | "from sagemaker.huggingface.model import HuggingFaceModel\n", 173 | "\n", 174 | "# Hub Model configuration. \n", 175 | "hub = {\n", 176 | " 'HF_MODEL_ID':'cardiffnlp/twitter-roberta-base-sentiment',\n", 177 | " 'HF_TASK':'text-classification'\n", 178 | "}\n", 179 | "\n", 180 | "# create Hugging Face Model Class\n", 181 | "huggingface_model = HuggingFaceModel(\n", 182 | " env=hub, # configuration for loading model from Hub\n", 183 | " role=role, # iam role with permissions to create an Endpoint\n", 184 | " transformers_version=\"4.6\", # transformers version used\n", 185 | " pytorch_version=\"1.7\", # pytorch version used\n", 186 | " py_version='py36', # python version used\n", 187 | ")\n", 188 | "\n", 189 | "# create Transformer to run our batch job\n", 190 | "batch_job = huggingface_model.transformer(\n", 191 | " instance_count=1, # number of instances used for running the batch job\n", 192 | " instance_type='ml.g4dn.xlarge',# instance type for the batch job\n", 193 | " output_path=output_s3_path, # we are using the same s3 path to save the output with the input\n", 194 | " strategy='SingleRecord') # How we are sending the \"requests\" to the endpoint\n", 195 | "\n", 196 | "# starts batch transform job and uses s3 data as input\n", 197 | "batch_job.transform(\n", 198 | " data=s3_file_uri, # preprocessed file location on s3 \n", 199 | " content_type='application/json',# mime-type of the file \n", 200 | " split_type='Line') # how the datapoints are split, here lines since it is `.jsonl`" 201 | ] 202 | }, 203 | { 204 | "cell_type": "markdown", 205 | "id": "60ccea94", 206 | "metadata": {}, 207 | "source": [ 208 | "Use batch transform when you:\n", 209 | "\n", 210 | "* Want to get inferences for an entire dataset and index them to serve inferences in real time\n", 211 | "* Don't need a persistent endpoint that applications (for example, web or mobile apps) can call to get inferences\n", 212 | "* Don't need the subsecond latency that SageMaker hosted endpoints provide\n", 213 | "\n", 214 | "You can also use batch transform to preprocess your data before using it to train a new model or generate inferences.\n", 215 | "The following diagram shows the workflow of a batch transform job:\n", 216 | "\n", 217 | "![batch-transform](./imgs/batch-transform-v2.png)" 218 | ] 219 | }, 220 | { 221 | "cell_type": "markdown", 222 | "id": "4079348a", 223 | "metadata": {}, 224 | "source": [ 225 | "## Access Prediction file\n", 226 | "\n", 227 | "After the batch transform job successfully run, it creates an output file with the same name and the `.out` file extension. For multiple input files, such as `input1.jsonl` and `input2.jsonl`, the output files are named `input1.jsonl.out` and `input2.jsonl.out`. The batch transform job stores the output files in the specified location in Amazon S3, such as `s3://awsexamplebucket/output/`.\n", 228 | "\n", 229 | "It is only possible to merge the input file with the output file. Therefore you need to use `join_source` parameter in your `Transformer`. You can read more [here](https://docs.aws.amazon.com/sagemaker/latest/dg/batch-transform-data-processing.html)\n", 230 | "\n" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": 4, 236 | "id": "c192b7fd", 237 | "metadata": {}, 238 | "outputs": [ 239 | { 240 | "name": "stdout", 241 | "output_type": "stream", 242 | "text": [ 243 | "[{'label': 'LABEL_1', 'score': 0.766870379447937}, {'label': 'LABEL_0', 'score': 0.8912612199783325}, {'label': 'LABEL_1', 'score': 0.5760677456855774}]\n" 244 | ] 245 | } 246 | ], 247 | "source": [ 248 | "import json\n", 249 | "from sagemaker.s3 import S3Downloader\n", 250 | "from ast import literal_eval\n", 251 | "# creating s3 uri for result file -> input file + .out\n", 252 | "output_file = f\"{dataset_jsonl_file}.out\"\n", 253 | "output_path = s3_path_join(output_s3_path,output_file)\n", 254 | "\n", 255 | "# download file\n", 256 | "S3Downloader.download(output_path,'.')\n", 257 | "\n", 258 | "batch_transform_result = []\n", 259 | "with open(output_file) as f:\n", 260 | " for line in f:\n", 261 | " # converts jsonline array to normal array\n", 262 | " line = \"[\" + line.replace(\"[\",\"\").replace(\"]\",\",\") + \"]\"\n", 263 | " batch_transform_result = literal_eval(line) \n", 264 | " \n", 265 | "# print results \n", 266 | "print(batch_transform_result[:3])" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": null, 272 | "id": "64f180ed", 273 | "metadata": {}, 274 | "outputs": [], 275 | "source": [] 276 | } 277 | ], 278 | "metadata": { 279 | "interpreter": { 280 | "hash": "c281c456f1b8161c8906f4af2c08ed2c40c50136979eaae69688b01f70e9f4a9" 281 | }, 282 | "kernelspec": { 283 | "display_name": "conda_pytorch_latest_p36", 284 | "language": "python", 285 | "name": "conda_pytorch_latest_p36" 286 | }, 287 | "language_info": { 288 | "codemirror_mode": { 289 | "name": "ipython", 290 | "version": 3 291 | }, 292 | "file_extension": ".py", 293 | "mimetype": "text/x-python", 294 | "name": "python", 295 | "nbconvert_exporter": "python", 296 | "pygments_lexer": "ipython3", 297 | "version": "3.6.13" 298 | } 299 | }, 300 | "nbformat": 4, 301 | "nbformat_minor": 5 302 | } 303 | -------------------------------------------------------------------------------- /workshop_2_going_production/lab3_autoscaling.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "8fde41d2", 6 | "metadata": {}, 7 | "source": [ 8 | "# Going Production: Auto-scale Hugging Face Transformer Endpoints with Amazon SageMaker\n" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "552a996c", 14 | "metadata": {}, 15 | "source": [ 16 | "Welcome to this getting started guide, we will use the new Hugging Face Inference DLCs and Amazon SageMaker Python SDK to deploy a transformer model for real-time inference. \n", 17 | "In this example we are going to deploy a trained Hugging Face Transformer model on to SageMaker for inference." 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": null, 23 | "id": "89f94d7a", 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "!pip install \"sagemaker>=2.66.2\" --upgrade" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 2, 33 | "id": "6e4c1bbf", 34 | "metadata": {}, 35 | "outputs": [ 36 | { 37 | "data": { 38 | "text/plain": [ 39 | "'2.67.0'" 40 | ] 41 | }, 42 | "execution_count": 2, 43 | "metadata": {}, 44 | "output_type": "execute_result" 45 | } 46 | ], 47 | "source": [ 48 | "import sagemaker\n", 49 | "sagemaker.__version__" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "id": "166feef3", 55 | "metadata": {}, 56 | "source": [ 57 | "## Deploy one of the 15 000+ Hugging Face Transformers to Amazon SageMaker for Inference\n", 58 | "\n", 59 | "To deploy a model directly from the Hub to SageMaker we need to define 2 environment variables when creating the `HuggingFaceModel` . We need to define:\n", 60 | "\n", 61 | "- `HF_MODEL_ID`: defines the model id, which will be automatically loaded from [huggingface.co/models](http://huggingface.co/models) when creating or SageMaker Endpoint. The 🤗 Hub provides +15 000 models all available through this environment variable.\n", 62 | "- `HF_TASK`: defines the task for the used 🤗 Transformers pipeline. A full list of tasks can be find [here](https://huggingface.co/transformers/main_classes/pipelines.html)." 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 3, 68 | "id": "0119b2eb", 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "from sagemaker.huggingface import HuggingFaceModel\n", 73 | "from uuid import uuid4\n", 74 | "import sagemaker \n", 75 | "\n", 76 | "role = sagemaker.get_execution_role()\n", 77 | "\n", 78 | "# Hub Model configuration. https://huggingface.co/models\n", 79 | "hub = {\n", 80 | " 'HF_MODEL_ID':'yiyanghkust/finbert-tone', # model_id from hf.co/models\n", 81 | " 'HF_TASK':'text-classification' # NLP task you want to use for predictions\n", 82 | "}\n", 83 | "\n", 84 | "# endpoint name\n", 85 | "endpoint_name=f'{hub[\"HF_MODEL_ID\"].split(\"/\")[1]}-{str(uuid4())}' # model and endpoint name\n", 86 | "\n", 87 | "# create Hugging Face Model Class\n", 88 | "huggingface_model = HuggingFaceModel(\n", 89 | " env=hub,\n", 90 | " role=role, # iam role with permissions to create an Endpoint\n", 91 | " name=endpoint_name, # model and endpoint name\n", 92 | " transformers_version=\"4.11\", # transformers version used\n", 93 | " pytorch_version=\"1.9\", # pytorch version used\n", 94 | " py_version=\"py38\", # python version of the DLC\n", 95 | ")\n" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 4, 101 | "id": "653ca1e5", 102 | "metadata": {}, 103 | "outputs": [ 104 | { 105 | "name": "stdout", 106 | "output_type": "stream", 107 | "text": [ 108 | "------!" 109 | ] 110 | } 111 | ], 112 | "source": [ 113 | "# deploy model to SageMaker Inference\n", 114 | "predictor = huggingface_model.deploy(\n", 115 | " initial_instance_count=1,\n", 116 | " instance_type=\"ml.c5.large\"\n", 117 | ")\n", 118 | "# get aws region for dashboards\n", 119 | "aws_region = predictor.sagemaker_session.boto_region_name" 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "id": "3f35b735", 125 | "metadata": {}, 126 | "source": [ 127 | "**Architecture**\n", 128 | "\n", 129 | "The [Hugging Face Inference Toolkit for SageMaker](https://github.com/aws/sagemaker-huggingface-inference-toolkit) is an open-source library for serving Hugging Face transformer models on SageMaker. It utilizes the SageMaker Inference Toolkit for starting up the model server, which is responsible for handling inference requests. The SageMaker Inference Toolkit uses [Multi Model Server (MMS)](https://github.com/awslabs/multi-model-server) for serving ML models. It bootstraps MMS with a configuration and settings that make it compatible with SageMaker and allow you to adjust important performance parameters, such as the number of workers per model, depending on the needs of your scenario.\n", 130 | "\n", 131 | "![](./imgs/hf-inference-toolkit.png)\n", 132 | "\n", 133 | "**Deploying a model using SageMaker hosting services is a three-step process:**\n", 134 | "\n", 135 | "1. **Create a model in SageMaker** —By creating a model, you tell SageMaker where it can find the model components. \n", 136 | "2. **Create an endpoint configuration for an HTTPS endpoint** —You specify the name of one or more models in production variants and the ML compute instances that you want SageMaker to launch to host each production variant.\n", 137 | "3. **Create an HTTPS endpoint** —Provide the endpoint configuration to SageMaker. The service launches the ML compute instances and deploys the model or models as specified in the configuration\n", 138 | "\n", 139 | "![](./imgs/sm-endpoint.png)\n" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 5, 145 | "id": "ad5d7aa3", 146 | "metadata": {}, 147 | "outputs": [ 148 | { 149 | "data": { 150 | "text/plain": [ 151 | "[{'label': 'negative', 'score': 0.9870443940162659}]" 152 | ] 153 | }, 154 | "execution_count": 5, 155 | "metadata": {}, 156 | "output_type": "execute_result" 157 | } 158 | ], 159 | "source": [ 160 | "# example request, you always need to define \"inputs\"\n", 161 | "data = {\n", 162 | " \"inputs\": \"There is a shortage of capital for project SageMaker. We need extra financing\"\n", 163 | "}\n", 164 | "\n", 165 | "# request\n", 166 | "predictor.predict(data)" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": 6, 172 | "id": "a3fd586e", 173 | "metadata": {}, 174 | "outputs": [], 175 | "source": [ 176 | "for i in range(500):\n", 177 | " predictor.predict(data)" 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "id": "55f5bb84", 183 | "metadata": {}, 184 | "source": [ 185 | "## Model Monitoring\n", 186 | "\n" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 34, 192 | "id": "8977a0b3", 193 | "metadata": {}, 194 | "outputs": [ 195 | { 196 | "name": "stdout", 197 | "output_type": "stream", 198 | "text": [ 199 | "https://console.aws.amazon.com/cloudwatch/home?region=us-east-1#metricsV2:graph=~(metrics~(~(~'AWS*2fSageMaker~'ModelLatency~'EndpointName~'finbert-tone-2d863b7d-3aa4-47c8-a8ca-dd-2021-11-02-07-20-52-760~'VariantName~'AllTraffic))~view~'timeSeries~stacked~false~start~'-PT15M~end~'P0D~region~'us-east-1~stat~'SampleCount~period~30);query=~'*7bAWS*2fSageMaker*2cEndpointName*2cVariantName*7d*20finbert-tone-2d863b7d-3aa4-47c8-a8ca-dd-2021-11-02-07-20-52-760\n" 200 | ] 201 | } 202 | ], 203 | "source": [ 204 | "print(f\"https://console.aws.amazon.com/cloudwatch/home?region={aws_region}#metricsV2:graph=~(metrics~(~(~'AWS*2fSageMaker~'ModelLatency~'EndpointName~'{predictor.endpoint_name}~'VariantName~'AllTraffic))~view~'timeSeries~stacked~false~start~'-PT15M~end~'P0D~region~'{aws_region}~stat~'SampleCount~period~30);query=~'*7bAWS*2fSageMaker*2cEndpointName*2cVariantName*7d*20{predictor.endpoint_name}\")\n" 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "id": "f8caddfd", 210 | "metadata": {}, 211 | "source": [ 212 | "![model-monitoring-dashboard](./imgs/model-monitoring-dashboard.png)" 213 | ] 214 | }, 215 | { 216 | "cell_type": "markdown", 217 | "id": "a1f65f0e", 218 | "metadata": {}, 219 | "source": [ 220 | "# Auto Scaling your Model\n", 221 | "\n", 222 | "[Amazon SageMaker](https://aws.amazon.com/sagemaker/) is a fully managed service that provides every developer and data scientist with the ability to quickly build, train, and deploy machine learning (ML) models at scale.\n", 223 | "\n", 224 | "Autoscaling is an out-of-the-box feature that monitors your workloads and dynamically adjusts the capacity to maintain steady and predictable performance at the possible lowest cost.\n", 225 | "\n", 226 | "The following diagram is a sample architecture that showcases how a model is served as a endpoint with autoscaling enabled.\n", 227 | "\n", 228 | "\n", 229 | "\n", 230 | "![autoscaling-endpoint](./imgs/autoscaling-endpoint.png)\n", 231 | "\n", 232 | "\n", 233 | "### Reference Blog post [Configuring autoscaling inference endpoints in Amazon SageMaker](https://aws.amazon.com/de/blogs/machine-learning/configuring-autoscaling-inference-endpoints-in-amazon-sagemaker/)" 234 | ] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "id": "bc744d93", 239 | "metadata": {}, 240 | "source": [ 241 | "## Configure Autoscaling for our Endpoint\n", 242 | "\n", 243 | "You can define minimum, desired, and maximum number of instances per endpoint and, based on the autoscaling configurations, instances are managed dynamically. The following diagram illustrates this architecture. \n", 244 | "\n", 245 | "![scaling-options](./imgs/scaling-options.jpeg)\n", 246 | "\n", 247 | "AWS offers many different [ways to auto-scale your endpoints](https://docs.aws.amazon.com/autoscaling/application/userguide/application-auto-scaling-target-tracking.html). One of them Simple-Scaling, where you scale the instance capacity based on `CPUUtilization` of the instances or `SageMakerVariantInvocationsPerInstance`. \n", 248 | "\n", 249 | "In this example we are going to use `SageMakerVariantInvocationsPerInstance` to auto-scale our Endpoint\n", 250 | "\n" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": 9, 256 | "id": "7b733d07", 257 | "metadata": {}, 258 | "outputs": [], 259 | "source": [ 260 | "import boto3\n", 261 | "\n", 262 | "# Let us define a client to play with autoscaling options\n", 263 | "asg_client = boto3.client('application-autoscaling') # Common class representing Application Auto Scaling for SageMaker amongst other services\n", 264 | "\n", 265 | "# here resource type is variant and the unique identifier is the resource ID.\n", 266 | "# Example: endpoint/my-bert-fine-tuned/variant/AllTraffic .\n", 267 | "resource_id=f\"endpoint/{predictor.endpoint_name}/variant/AllTraffic\"\n", 268 | "\n", 269 | "# scaling configuration\n", 270 | "response = asg_client.register_scalable_target(\n", 271 | " ServiceNamespace='sagemaker', #\n", 272 | " ResourceId=resource_id,\n", 273 | " ScalableDimension='sagemaker:variant:DesiredInstanceCount', \n", 274 | " MinCapacity=1,\n", 275 | " MaxCapacity=4\n", 276 | ")\n" 277 | ] 278 | }, 279 | { 280 | "cell_type": "markdown", 281 | "id": "38ce53a0", 282 | "metadata": {}, 283 | "source": [ 284 | "Create Scaling Policy with configuration details, e.g. `TargetValue` when the instance should be scaled." 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": 10, 290 | "id": "fcc92f18", 291 | "metadata": {}, 292 | "outputs": [], 293 | "source": [ 294 | "response = asg_client.put_scaling_policy(\n", 295 | " PolicyName=f'Request-ScalingPolicy-{predictor.endpoint_name}',\n", 296 | " ServiceNamespace='sagemaker',\n", 297 | " ResourceId=resource_id,\n", 298 | " ScalableDimension='sagemaker:variant:DesiredInstanceCount',\n", 299 | " PolicyType='TargetTrackingScaling',\n", 300 | " TargetTrackingScalingPolicyConfiguration={\n", 301 | " 'TargetValue': 10.0, # Threshold\n", 302 | " 'PredefinedMetricSpecification': {\n", 303 | " 'PredefinedMetricType': 'SageMakerVariantInvocationsPerInstance',\n", 304 | " },\n", 305 | " 'ScaleInCooldown': 300, # duration until scale in\n", 306 | " 'ScaleOutCooldown': 60 # duration between scale out\n", 307 | " }\n", 308 | ")" 309 | ] 310 | }, 311 | { 312 | "cell_type": "markdown", 313 | "id": "3a13397e", 314 | "metadata": {}, 315 | "source": [ 316 | "stress test the endpoint with threaded requests" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": null, 322 | "id": "de4c3f4f", 323 | "metadata": {}, 324 | "outputs": [], 325 | "source": [ 326 | "import time\n", 327 | "\n", 328 | "request_duration_in_seconds = 4*65\n", 329 | "end_time = time.time() + request_duration_in_seconds\n", 330 | "\n", 331 | "print(f\"test will run {request_duration_in_seconds} seconds\")\n", 332 | "\n", 333 | "while time.time() < end_time:\n", 334 | " predictor.predict(data)" 335 | ] 336 | }, 337 | { 338 | "cell_type": "markdown", 339 | "id": "63a324ee", 340 | "metadata": {}, 341 | "source": [ 342 | "Monitor the `InvocationsPerInstance` in cloudwatch " 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": null, 348 | "id": "a75a6b3e", 349 | "metadata": {}, 350 | "outputs": [], 351 | "source": [ 352 | "print(f\"https://console.aws.amazon.com/cloudwatch/home?region={aws_region}#metricsV2:graph=~(metrics~(~(~'AWS*2fSageMaker~'InvocationsPerInstance~'EndpointName~'{predictor.endpoint_name}~'VariantName~'AllTraffic))~view~'timeSeries~stacked~false~region~'{aws_region}~start~'-PT15M~end~'P0D~stat~'SampleCount~period~60);query=~'*7bAWS*2fSageMaker*2cEndpointName*2cVariantName*7d*20{predictor.endpoint_name}\")" 353 | ] 354 | }, 355 | { 356 | "cell_type": "markdown", 357 | "id": "d17a0eff", 358 | "metadata": {}, 359 | "source": [ 360 | "check the endpoint instance_count number" 361 | ] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "execution_count": null, 366 | "id": "34adaf5e", 367 | "metadata": {}, 368 | "outputs": [], 369 | "source": [ 370 | "bt_sm = boto3.client('sagemaker')\n", 371 | "response = bt_sm.describe_endpoint(EndpointName=predictor.endpoint_name)" 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": 36, 377 | "id": "8b32b7e6", 378 | "metadata": {}, 379 | "outputs": [ 380 | { 381 | "name": "stdout", 382 | "output_type": "stream", 383 | "text": [ 384 | "Endpoint finbert-tone-2d863b7d-3aa4-47c8-a8ca-dd-2021-11-02-07-20-52-760 has \n", 385 | "Current Instance Count: 4\n", 386 | "With a desired instance count of 4\n" 387 | ] 388 | } 389 | ], 390 | "source": [ 391 | "print(f\"Endpoint {response['EndpointName']} has \\nCurrent Instance Count: {response['ProductionVariants'][0]['CurrentInstanceCount']}\\nWith a desired instance count of {response['ProductionVariants'][0]['DesiredInstanceCount']}\")" 392 | ] 393 | }, 394 | { 395 | "cell_type": "code", 396 | "execution_count": null, 397 | "id": "346260a9", 398 | "metadata": {}, 399 | "outputs": [], 400 | "source": [ 401 | "print(f\"https://console.aws.amazon.com/sagemaker/home?region={aws_region}#/endpoints/{predictor.endpoint_name}\")" 402 | ] 403 | }, 404 | { 405 | "cell_type": "markdown", 406 | "id": "cecb31f5", 407 | "metadata": {}, 408 | "source": [ 409 | "## Clean up" 410 | ] 411 | }, 412 | { 413 | "cell_type": "code", 414 | "execution_count": 33, 415 | "id": "27187fc3", 416 | "metadata": {}, 417 | "outputs": [], 418 | "source": [ 419 | "# delete endpoint\n", 420 | "predictor.delete_endpoint()" 421 | ] 422 | } 423 | ], 424 | "metadata": { 425 | "instance_type": "ml.t3.medium", 426 | "interpreter": { 427 | "hash": "ec1370a512a4612a2908be3c3c8b0de1730d00dc30104daff827065aeaf438b7" 428 | }, 429 | "kernelspec": { 430 | "display_name": "conda_pytorch_latest_p36", 431 | "language": "python", 432 | "name": "conda_pytorch_latest_p36" 433 | }, 434 | "language_info": { 435 | "codemirror_mode": { 436 | "name": "ipython", 437 | "version": 3 438 | }, 439 | "file_extension": ".py", 440 | "mimetype": "text/x-python", 441 | "name": "python", 442 | "nbconvert_exporter": "python", 443 | "pygments_lexer": "ipython3", 444 | "version": "3.6.13" 445 | } 446 | }, 447 | "nbformat": 4, 448 | "nbformat_minor": 5 449 | } 450 | -------------------------------------------------------------------------------- /workshop_3_mlops/lab_1_sagemaker_pipeline.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# MLOps: End-to-End Hugging Face Transformers with the Hub & SageMaker Pipelines\n", 8 | "\n", 9 | "This notebook demonstrates how to use [SageMaker Pipelines](https://docs.aws.amazon.com/sagemaker/latest/dg/pipelines-sdk.html) to train a [Hugging Face](https://docs.aws.amazon.com/sagemaker/latest/dg/hugging-face.html) Transformer model and deploy it. The SageMaker integration with Hugging Face makes it easy to train and deploy advanced NLP models. A Lambda step in SageMaker Pipelines enables you to easily do lightweight model deployments and other serverless operations.\n", 10 | "\n", 11 | "In this example we are going to fine-tune and deploy a DistilBERT model on the imdb dataset.\n", 12 | "\n", 13 | "**Prerequisites**: \n", 14 | "- Make sure your notebook environment has IAM managed policy `AmazonSageMakerPipelinesIntegrations` as well as `AmazonSageMakerFullAccess`\n", 15 | "\n", 16 | "**Blog Post**\n", 17 | "* [Use a SageMaker Pipeline Lambda step for lightweight model deployments](https://aws.amazon.com/de/blogs/machine-learning/use-a-sagemaker-pipeline-lambda-step-for-lightweight-model-deployments/)" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "# Development Environment and Permissions " 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "## Installation & Imports\n", 32 | "\n", 33 | "We'll start by updating the SageMaker SDK, and importing some necessary packages." 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "!pip install \"sagemaker>=2.48.0\" --upgrade" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 1, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "import boto3\n", 52 | "import os\n", 53 | "import numpy as np\n", 54 | "import pandas as pd\n", 55 | "import sagemaker\n", 56 | "import sys\n", 57 | "import time\n", 58 | "\n", 59 | "from sagemaker.workflow.parameters import ParameterInteger, ParameterFloat, ParameterString\n", 60 | "\n", 61 | "from sagemaker.lambda_helper import Lambda\n", 62 | "\n", 63 | "from sagemaker.sklearn.processing import SKLearnProcessor\n", 64 | "\n", 65 | "from sagemaker.processing import ProcessingInput, ProcessingOutput\n", 66 | "from sagemaker.workflow.steps import CacheConfig, ProcessingStep\n", 67 | "\n", 68 | "from sagemaker.huggingface import HuggingFace, HuggingFaceModel\n", 69 | "import sagemaker.huggingface\n", 70 | "\n", 71 | "from sagemaker.inputs import TrainingInput\n", 72 | "from sagemaker.workflow.steps import TrainingStep\n", 73 | "\n", 74 | "from sagemaker.processing import ScriptProcessor\n", 75 | "from sagemaker.workflow.properties import PropertyFile\n", 76 | "from sagemaker.workflow.step_collections import CreateModelStep, RegisterModel\n", 77 | "\n", 78 | "from sagemaker.workflow.conditions import ConditionLessThanOrEqualTo,ConditionGreaterThanOrEqualTo\n", 79 | "from sagemaker.workflow.condition_step import ConditionStep, JsonGet\n", 80 | "\n", 81 | "from sagemaker.workflow.pipeline import Pipeline, PipelineExperimentConfig\n", 82 | "from sagemaker.workflow.execution_variables import ExecutionVariables" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "## Permissions" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "_If you are going to use Sagemaker in a local environment. You need access to an IAM Role with the required permissions for Sagemaker. You can find [here](https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-roles.html) more about it._" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "import sagemaker\n", 106 | "\n", 107 | "sess = sagemaker.Session()\n", 108 | "region = sess.boto_region_name\n", 109 | "\n", 110 | "# sagemaker session bucket -> used for uploading data, models and logs\n", 111 | "# sagemaker will automatically create this bucket if it not exists\n", 112 | "sagemaker_session_bucket=None\n", 113 | "if sagemaker_session_bucket is None and sess is not None:\n", 114 | " # set to default bucket if a bucket name is not given\n", 115 | " sagemaker_session_bucket = sess.default_bucket()\n", 116 | "\n", 117 | "role = sagemaker.get_execution_role()\n", 118 | "sagemaker_session = sagemaker.Session(default_bucket=sagemaker_session_bucket)\n", 119 | "\n", 120 | "print(f\"sagemaker role arn: {role}\")\n", 121 | "print(f\"sagemaker bucket: {sagemaker_session.default_bucket()}\")\n", 122 | "print(f\"sagemaker session region: {sagemaker_session.boto_region_name}\")" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "# Pipeline Overview" 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": {}, 135 | "source": [ 136 | "![pipeline](./imgs/overview.png)" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": {}, 142 | "source": [ 143 | "# Defining the Pipeline\n", 144 | "\n", 145 | "## 0. Pipeline parameters\n", 146 | "\n", 147 | "Before defining the pipeline, it is important to parameterize it. SageMaker Pipeline can directly be parameterized, including instance types and counts.\n", 148 | "\n", 149 | "Read more about Parameters in the [documentation](https://docs.aws.amazon.com/sagemaker/latest/dg/build-and-manage-parameters.html)\n", 150 | "\n" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 3, 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "# S3 prefix where every assets will be stored\n", 160 | "s3_prefix = \"hugging-face-pipeline-demo\"\n", 161 | "\n", 162 | "# s3 bucket used for storing assets and artifacts\n", 163 | "bucket = sagemaker_session.default_bucket()\n", 164 | "\n", 165 | "# aws region used\n", 166 | "region = sagemaker_session.boto_region_name\n", 167 | "\n", 168 | "# base name prefix for sagemaker jobs (training, processing, inference)\n", 169 | "base_job_prefix = s3_prefix\n", 170 | "\n", 171 | "# Cache configuration for workflow\n", 172 | "cache_config = CacheConfig(enable_caching=False, expire_after=\"30d\")\n", 173 | "\n", 174 | "\n", 175 | "# package versions\n", 176 | "transformers_version = \"4.11.0\"\n", 177 | "pytorch_version = \"1.9.0\"\n", 178 | "py_version = \"py38\"\n", 179 | "\n", 180 | "model_id_=\"distilbert-base-uncased\"\n", 181 | "dataset_name_=\"imdb\"\n", 182 | "\n", 183 | "model_id = ParameterString(name=\"ModelId\", default_value=\"distilbert-base-uncased\")\n", 184 | "dataset_name = ParameterString(name=\"DatasetName\", default_value=\"imdb\")" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "metadata": {}, 190 | "source": [ 191 | "## 1. Processing Step\n", 192 | "\n", 193 | "A SKLearn Processing step is used to invoke a SageMaker Processing job with a custom python script - `preprocessing.py`. " 194 | ] 195 | }, 196 | { 197 | "cell_type": "markdown", 198 | "metadata": {}, 199 | "source": [ 200 | "### Processing Parameter" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": 5, 206 | "metadata": {}, 207 | "outputs": [], 208 | "source": [ 209 | "processing_instance_type = ParameterString(name=\"ProcessingInstanceType\", default_value=\"ml.c5.2xlarge\")\n", 210 | "processing_instance_count = ParameterInteger(name=\"ProcessingInstanceCount\", default_value=1)\n", 211 | "processing_script = ParameterString(name=\"ProcessingScript\", default_value=\"./scripts/preprocessing.py\")" 212 | ] 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "metadata": {}, 217 | "source": [ 218 | "### Processor" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": 6, 224 | "metadata": {}, 225 | "outputs": [], 226 | "source": [ 227 | "processing_output_destination = f\"s3://{bucket}/{s3_prefix}/data\"\n", 228 | "\n", 229 | "\n", 230 | "sklearn_processor = SKLearnProcessor(\n", 231 | " framework_version=\"0.23-1\",\n", 232 | " instance_type=processing_instance_type,\n", 233 | " instance_count=processing_instance_count,\n", 234 | " base_job_name=base_job_prefix + \"/preprocessing\",\n", 235 | " sagemaker_session=sagemaker_session,\n", 236 | " role=role,\n", 237 | ")\n", 238 | "\n", 239 | "step_process = ProcessingStep(\n", 240 | " name=\"ProcessDataForTraining\",\n", 241 | " cache_config=cache_config,\n", 242 | " processor=sklearn_processor,\n", 243 | " job_arguments=[\"--transformers_version\",transformers_version,\n", 244 | " \"--pytorch_version\",pytorch_version,\n", 245 | " \"--model_id\",model_id_,\n", 246 | " \"--dataset_name\",dataset_name_],\n", 247 | " outputs=[\n", 248 | " ProcessingOutput(\n", 249 | " output_name=\"train\",\n", 250 | " destination=f\"{processing_output_destination}/train\",\n", 251 | " source=\"/opt/ml/processing/train\",\n", 252 | " ),\n", 253 | " ProcessingOutput(\n", 254 | " output_name=\"test\",\n", 255 | " destination=f\"{processing_output_destination}/test\",\n", 256 | " source=\"/opt/ml/processing/test\",\n", 257 | " ),\n", 258 | " ProcessingOutput(\n", 259 | " output_name=\"validation\",\n", 260 | " destination=f\"{processing_output_destination}/test\",\n", 261 | " source=\"/opt/ml/processing/validation\",\n", 262 | " ),\n", 263 | " ],\n", 264 | " code=processing_script,\n", 265 | ")" 266 | ] 267 | }, 268 | { 269 | "cell_type": "markdown", 270 | "metadata": {}, 271 | "source": [ 272 | "## 2. Model Training Step\n", 273 | "\n", 274 | "We use SageMaker's [Hugging Face](https://sagemaker.readthedocs.io/en/stable/frameworks/huggingface/sagemaker.huggingface.html) Estimator class to create a model training step for the Hugging Face [DistilBERT](https://huggingface.co/distilbert-base-uncased) model. Transformer-based models such as the original BERT can be very large and slow to train. DistilBERT, however, is a small, fast, cheap and light Transformer model trained by distilling BERT base. It reduces the size of a BERT model by 40%, while retaining 97% of its language understanding capabilities and being 60% faster. " 275 | ] 276 | }, 277 | { 278 | "cell_type": "markdown", 279 | "metadata": {}, 280 | "source": [ 281 | "The Hugging Face estimator also takes hyperparameters as a dictionary. The training instance type and size are pipeline parameters that can be easily varied in future pipeline runs without changing any code. " 282 | ] 283 | }, 284 | { 285 | "cell_type": "markdown", 286 | "metadata": {}, 287 | "source": [ 288 | "### Training Parameter" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": 7, 294 | "metadata": {}, 295 | "outputs": [], 296 | "source": [ 297 | "# training step parameters\n", 298 | "training_entry_point = ParameterString(name=\"TrainingEntryPoint\", default_value=\"train.py\")\n", 299 | "training_source_dir = ParameterString(name=\"TrainingSourceDir\", default_value=\"./scripts\")\n", 300 | "training_instance_type = ParameterString(name=\"TrainingInstanceType\", default_value=\"ml.p3.2xlarge\")\n", 301 | "training_instance_count = ParameterInteger(name=\"TrainingInstanceCount\", default_value=1)\n", 302 | "\n", 303 | "# hyperparameters, which are passed into the training job\n", 304 | "epochs=ParameterString(name=\"Epochs\", default_value=\"1\")\n", 305 | "eval_batch_size=ParameterString(name=\"EvalBatchSize\", default_value=\"32\") \n", 306 | "train_batch_size=ParameterString(name=\"TrainBatchSize\", default_value=\"16\") \n", 307 | "learning_rate=ParameterString(name=\"LearningRate\", default_value=\"3e-5\") \n", 308 | "fp16=ParameterString(name=\"Fp16\", default_value=\"True\")" 309 | ] 310 | }, 311 | { 312 | "cell_type": "markdown", 313 | "metadata": {}, 314 | "source": [ 315 | "### Hugging Face Estimator" 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": 8, 321 | "metadata": {}, 322 | "outputs": [], 323 | "source": [ 324 | "huggingface_estimator = HuggingFace(\n", 325 | " entry_point=training_entry_point,\n", 326 | " source_dir=training_source_dir,\n", 327 | " base_job_name=base_job_prefix + \"/training\",\n", 328 | " instance_type=training_instance_type,\n", 329 | " instance_count=training_instance_count,\n", 330 | " role=role,\n", 331 | " transformers_version=transformers_version,\n", 332 | " pytorch_version=pytorch_version,\n", 333 | " py_version=py_version,\n", 334 | " hyperparameters={\n", 335 | " 'epochs':epochs, \n", 336 | " 'eval_batch_size': eval_batch_size, \n", 337 | " 'train_batch_size': train_batch_size, \n", 338 | " 'learning_rate': learning_rate, \n", 339 | " 'model_id': model_id,\n", 340 | " 'fp16': fp16\n", 341 | " },\n", 342 | " sagemaker_session=sagemaker_session,\n", 343 | ")\n", 344 | "\n", 345 | "step_train = TrainingStep(\n", 346 | " name=\"TrainHuggingFaceModel\",\n", 347 | " estimator=huggingface_estimator,\n", 348 | " inputs={\n", 349 | " \"train\": TrainingInput(\n", 350 | " s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\n", 351 | " \"train\"\n", 352 | " ].S3Output.S3Uri\n", 353 | " ),\n", 354 | " \"test\": TrainingInput(\n", 355 | " s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\n", 356 | " \"test\"\n", 357 | " ].S3Output.S3Uri\n", 358 | " ),\n", 359 | " },\n", 360 | " cache_config=cache_config,\n", 361 | ")" 362 | ] 363 | }, 364 | { 365 | "cell_type": "markdown", 366 | "metadata": {}, 367 | "source": [ 368 | "## 3. Model evaluation Step\n", 369 | "\n", 370 | "A ProcessingStep is used to evaluate the performance of the trained model. Based on the results of the evaluation, either the model is created, registered, and deployed, or the pipeline stops.\n", 371 | "\n", 372 | "In the training job, the model was evaluated against the test dataset, and the result of the evaluation was stored in the `model.tar.gz` file saved by the training job. The results of that evaluation are copied into a `PropertyFile` in this ProcessingStep so that it can be used in the ConditionStep. " 373 | ] 374 | }, 375 | { 376 | "cell_type": "markdown", 377 | "metadata": {}, 378 | "source": [ 379 | "### Evaluation Parameter" 380 | ] 381 | }, 382 | { 383 | "cell_type": "code", 384 | "execution_count": 10, 385 | "metadata": {}, 386 | "outputs": [], 387 | "source": [ 388 | "evaluation_script = ParameterString(name=\"EvaluationScript\", default_value=\"./scripts/evaluate.py\")" 389 | ] 390 | }, 391 | { 392 | "cell_type": "markdown", 393 | "metadata": {}, 394 | "source": [ 395 | "### Evaluator" 396 | ] 397 | }, 398 | { 399 | "cell_type": "code", 400 | "execution_count": 11, 401 | "metadata": {}, 402 | "outputs": [], 403 | "source": [ 404 | "script_eval = SKLearnProcessor(\n", 405 | " framework_version=\"0.23-1\",\n", 406 | " instance_type=processing_instance_type,\n", 407 | " instance_count=processing_instance_count,\n", 408 | " base_job_name=base_job_prefix + \"/evaluation\",\n", 409 | " sagemaker_session=sagemaker_session,\n", 410 | " role=role,\n", 411 | ")\n", 412 | "\n", 413 | "evaluation_report = PropertyFile(\n", 414 | " name=\"HuggingFaceEvaluationReport\",\n", 415 | " output_name=\"evaluation\",\n", 416 | " path=\"evaluation.json\",\n", 417 | ")\n", 418 | "\n", 419 | "step_eval = ProcessingStep(\n", 420 | " name=\"HuggingfaceEvalLoss\",\n", 421 | " processor=script_eval,\n", 422 | " inputs=[\n", 423 | " ProcessingInput(\n", 424 | " source=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n", 425 | " destination=\"/opt/ml/processing/model\",\n", 426 | " )\n", 427 | " ],\n", 428 | " outputs=[\n", 429 | " ProcessingOutput(\n", 430 | " output_name=\"evaluation\",\n", 431 | " source=\"/opt/ml/processing/evaluation\",\n", 432 | " destination=f\"s3://{bucket}/{s3_prefix}/evaluation_report\",\n", 433 | " ),\n", 434 | " ],\n", 435 | " code=evaluation_script,\n", 436 | " property_files=[evaluation_report],\n", 437 | " cache_config=cache_config,\n", 438 | ")" 439 | ] 440 | }, 441 | { 442 | "cell_type": "markdown", 443 | "metadata": {}, 444 | "source": [ 445 | "## 4. Register the model\n", 446 | "\n", 447 | "The trained model is registered in the Model Registry under a Model Package Group. Each time a new model is registered, it is given a new version number by default. The model is registered in the \"Approved\" state so that it can be deployed. Registration will only happen if the output of the [6. Condition for deployment](#6.-Condition-for-deployment) is true, i.e, the metrics being checked are within the threshold defined." 448 | ] 449 | }, 450 | { 451 | "cell_type": "code", 452 | "execution_count": 12, 453 | "metadata": {}, 454 | "outputs": [], 455 | "source": [ 456 | "model = HuggingFaceModel(\n", 457 | " model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n", 458 | " role=role,\n", 459 | " transformers_version=transformers_version,\n", 460 | " pytorch_version=pytorch_version,\n", 461 | " py_version=py_version,\n", 462 | " sagemaker_session=sagemaker_session,\n", 463 | ")\n", 464 | "model_package_group_name = \"HuggingFaceModelPackageGroup\"\n", 465 | "step_register = RegisterModel(\n", 466 | " name=\"HuggingFaceRegisterModel\",\n", 467 | " model=model,\n", 468 | " content_types=[\"application/json\"],\n", 469 | " response_types=[\"application/json\"],\n", 470 | " inference_instances=[\"ml.g4dn.xlarge\", \"ml.m5.xlarge\"],\n", 471 | " transform_instances=[\"ml.g4dn.xlarge\", \"ml.m5.xlarge\"],\n", 472 | " model_package_group_name=model_package_group_name,\n", 473 | " approval_status=\"Approved\",\n", 474 | ")" 475 | ] 476 | }, 477 | { 478 | "cell_type": "markdown", 479 | "metadata": {}, 480 | "source": [ 481 | "## 5. Model Deployment\n", 482 | "\n", 483 | "We create a custom step `ModelDeployment` derived from the provided `LambdaStep`. This Step will create a Lambda function and invocate to deploy our model as SageMaker Endpoint." 484 | ] 485 | }, 486 | { 487 | "cell_type": "code", 488 | "execution_count": 13, 489 | "metadata": {}, 490 | "outputs": [ 491 | { 492 | "name": "stdout", 493 | "output_type": "stream", 494 | "text": [ 495 | "Using ARN from existing role: sagemaker-pipelines-model-deployment-role\n" 496 | ] 497 | } 498 | ], 499 | "source": [ 500 | "# custom Helper Step for ModelDeployment\n", 501 | "from utils.deploy_step import ModelDeployment\n", 502 | "\n", 503 | "# we will use the iam role from the notebook session for the created endpoint\n", 504 | "# this role will be attached to our endpoint and need permissions, e.g. to download assets from s3\n", 505 | "sagemaker_endpoint_role=sagemaker.get_execution_role()\n", 506 | "\n", 507 | "\n", 508 | "step_deployment = ModelDeployment(\n", 509 | " model_name=f\"{model_id_}-{dataset_name_}\",\n", 510 | " registered_model=step_register.steps[0],\n", 511 | " endpoint_instance_type=\"ml.g4dn.xlarge\",\n", 512 | " sagemaker_endpoint_role=sagemaker_endpoint_role,\n", 513 | " autoscaling_policy=None,\n", 514 | ")" 515 | ] 516 | }, 517 | { 518 | "cell_type": "markdown", 519 | "metadata": {}, 520 | "source": [ 521 | "## 6. Condition for deployment\n", 522 | "\n", 523 | "For the condition to be `True` and the steps after evaluation to run, the evaluated accuracy of the Hugging Face model must be greater than our `TresholdAccuracy` parameter." 524 | ] 525 | }, 526 | { 527 | "cell_type": "markdown", 528 | "metadata": {}, 529 | "source": [ 530 | "### Condition Parameter" 531 | ] 532 | }, 533 | { 534 | "cell_type": "code", 535 | "execution_count": 14, 536 | "metadata": {}, 537 | "outputs": [], 538 | "source": [ 539 | "threshold_accuracy = ParameterFloat(name=\"ThresholdAccuracy\", default_value=0.8)" 540 | ] 541 | }, 542 | { 543 | "cell_type": "markdown", 544 | "metadata": {}, 545 | "source": [ 546 | "### Condition" 547 | ] 548 | }, 549 | { 550 | "cell_type": "code", 551 | "execution_count": 15, 552 | "metadata": {}, 553 | "outputs": [ 554 | { 555 | "name": "stderr", 556 | "output_type": "stream", 557 | "text": [ 558 | "The class JsonGet has been renamed in sagemaker>=2.\n", 559 | "See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.\n" 560 | ] 561 | } 562 | ], 563 | "source": [ 564 | "cond_gte = ConditionGreaterThanOrEqualTo(\n", 565 | " left=JsonGet(\n", 566 | " step=step_eval,\n", 567 | " property_file=evaluation_report,\n", 568 | " json_path=\"eval_accuracy\",\n", 569 | " ),\n", 570 | " right=threshold_accuracy,\n", 571 | ")\n", 572 | "\n", 573 | "step_cond = ConditionStep(\n", 574 | " name=\"CheckHuggingfaceEvalAccuracy\",\n", 575 | " conditions=[cond_gte],\n", 576 | " if_steps=[step_register, step_deployment],\n", 577 | " else_steps=[],\n", 578 | ")" 579 | ] 580 | }, 581 | { 582 | "cell_type": "markdown", 583 | "metadata": {}, 584 | "source": [ 585 | "# Pipeline definition and execution\n", 586 | "\n", 587 | "SageMaker Pipelines constructs the pipeline graph from the implicit definition created by the way pipeline steps inputs and outputs are specified. There's no need to specify that a step is a \"parallel\" or \"serial\" step. Steps such as model registration after the condition step are not listed in the pipeline definition because they do not run unless the condition is true. If so, they are run in order based on their specified inputs and outputs.\n", 588 | "\n", 589 | "Each Parameter we defined holds a default value, which can be overwritten before starting the pipeline. [Parameter Documentation](https://docs.aws.amazon.com/sagemaker/latest/dg/build-and-manage-parameters.html)\n", 590 | "\n", 591 | "\n" 592 | ] 593 | }, 594 | { 595 | "cell_type": "markdown", 596 | "metadata": {}, 597 | "source": [ 598 | "### Overwriting Parameters" 599 | ] 600 | }, 601 | { 602 | "cell_type": "code", 603 | "execution_count": 16, 604 | "metadata": {}, 605 | "outputs": [], 606 | "source": [ 607 | "# define parameter which should be overwritten\n", 608 | "pipeline_parameters=dict(\n", 609 | " ModelId=\"distilbert-base-uncased\",\n", 610 | " ThresholdAccuracy=0.7,\n", 611 | " Epochs=\"3\",\n", 612 | " TrainBatchSize=\"32\",\n", 613 | " EvalBatchSize=\"64\",\n", 614 | " )" 615 | ] 616 | }, 617 | { 618 | "cell_type": "markdown", 619 | "metadata": {}, 620 | "source": [ 621 | "### Create Pipeline" 622 | ] 623 | }, 624 | { 625 | "cell_type": "code", 626 | "execution_count": 17, 627 | "metadata": {}, 628 | "outputs": [], 629 | "source": [ 630 | "pipeline = Pipeline(\n", 631 | " name=f\"HuggingFaceDemoPipeline\",\n", 632 | " parameters=[\n", 633 | " model_id,\n", 634 | " dataset_name,\n", 635 | " processing_instance_type,\n", 636 | " processing_instance_count,\n", 637 | " processing_script,\n", 638 | " training_entry_point,\n", 639 | " training_source_dir,\n", 640 | " training_instance_type,\n", 641 | " training_instance_count,\n", 642 | " evaluation_script,\n", 643 | " threshold_accuracy,\n", 644 | " epochs,\n", 645 | " eval_batch_size,\n", 646 | " train_batch_size,\n", 647 | " learning_rate,\n", 648 | " fp16\n", 649 | " ],\n", 650 | " steps=[step_process, step_train, step_eval, step_cond],\n", 651 | " sagemaker_session=sagemaker_session,\n", 652 | ")" 653 | ] 654 | }, 655 | { 656 | "cell_type": "markdown", 657 | "metadata": {}, 658 | "source": [ 659 | "We can examine the pipeline definition in JSON format. You also can inspect the pipeline graph in SageMaker Studio by going to the page for your pipeline. " 660 | ] 661 | }, 662 | { 663 | "cell_type": "code", 664 | "execution_count": null, 665 | "metadata": {}, 666 | "outputs": [], 667 | "source": [ 668 | "import json\n", 669 | "\n", 670 | "json.loads(pipeline.definition())" 671 | ] 672 | }, 673 | { 674 | "cell_type": "markdown", 675 | "metadata": {}, 676 | "source": [ 677 | "![pipeline](./imgs/pipeline.png)" 678 | ] 679 | }, 680 | { 681 | "cell_type": "markdown", 682 | "metadata": {}, 683 | "source": [ 684 | "`upsert` creates or updates the pipeline." 685 | ] 686 | }, 687 | { 688 | "cell_type": "code", 689 | "execution_count": null, 690 | "metadata": {}, 691 | "outputs": [], 692 | "source": [ 693 | "pipeline.upsert(role_arn=role)" 694 | ] 695 | }, 696 | { 697 | "cell_type": "markdown", 698 | "metadata": {}, 699 | "source": [ 700 | "### Run the pipeline" 701 | ] 702 | }, 703 | { 704 | "cell_type": "code", 705 | "execution_count": 20, 706 | "metadata": {}, 707 | "outputs": [], 708 | "source": [ 709 | "execution = pipeline.start(parameters=pipeline_parameters)" 710 | ] 711 | }, 712 | { 713 | "cell_type": "code", 714 | "execution_count": null, 715 | "metadata": {}, 716 | "outputs": [], 717 | "source": [ 718 | "execution.wait()" 719 | ] 720 | }, 721 | { 722 | "cell_type": "markdown", 723 | "metadata": {}, 724 | "source": [ 725 | "## Getting predictions from the endpoint\n", 726 | "\n", 727 | "After the previous cell completes, you can check whether the endpoint has finished deploying.\n", 728 | "\n", 729 | "We can use the `endpoint_name` to create up a `HuggingFacePredictor` object that will be used to get predictions." 730 | ] 731 | }, 732 | { 733 | "cell_type": "code", 734 | "execution_count": 24, 735 | "metadata": {}, 736 | "outputs": [], 737 | "source": [ 738 | "from sagemaker.huggingface import HuggingFacePredictor\n", 739 | "\n", 740 | "endpoint_name = f\"{model_id}-{dataset_name}\"\n", 741 | "\n", 742 | "# check if endpoint is up and running\n", 743 | "print(f\"https://console.aws.amazon.com/sagemaker/home?region={region}#/endpoints/{endpoint_name}\")\n" 744 | ] 745 | }, 746 | { 747 | "cell_type": "code", 748 | "execution_count": null, 749 | "metadata": {}, 750 | "outputs": [], 751 | "source": [ 752 | "hf_predictor = HuggingFacePredictor(endpoint_name,sagemaker_session=sagemaker_session)" 753 | ] 754 | }, 755 | { 756 | "cell_type": "markdown", 757 | "metadata": {}, 758 | "source": [ 759 | "### Test data\n", 760 | "\n", 761 | "Here are a couple of sample reviews we would like to classify as positive (`pos`) or negative (`neg`). Demonstrating the power of advanced Transformer-based models such as this Hugging Face model, the model should do quite well even though the reviews are mixed. " 762 | ] 763 | }, 764 | { 765 | "cell_type": "code", 766 | "execution_count": 25, 767 | "metadata": {}, 768 | "outputs": [ 769 | { 770 | "data": { 771 | "text/plain": [ 772 | "[{'label': 'pos', 'score': 0.9690886735916138}]" 773 | ] 774 | }, 775 | "execution_count": 25, 776 | "metadata": {}, 777 | "output_type": "execute_result" 778 | } 779 | ], 780 | "source": [ 781 | "sentiment_input1 = {\"inputs\":\"Although the movie had some plot weaknesses, it was engaging. Special effects were mind boggling. Can't wait to see what this creative team does next.\"}\n", 782 | "\n", 783 | "hf_predictor.predict(sentiment_input1)" 784 | ] 785 | }, 786 | { 787 | "cell_type": "code", 788 | "execution_count": 26, 789 | "metadata": {}, 790 | "outputs": [ 791 | { 792 | "data": { 793 | "text/plain": [ 794 | "[{'label': 'neg', 'score': 0.9938264489173889}]" 795 | ] 796 | }, 797 | "execution_count": 26, 798 | "metadata": {}, 799 | "output_type": "execute_result" 800 | } 801 | ], 802 | "source": [ 803 | "sentiment_input2 = {\"inputs\":\"There was some good acting, but the story was ridiculous. The other sequels in this franchise were better. It's time to take a break from this IP, but if they switch it up for the next one, I'll check it out.\"}\n", 804 | "\n", 805 | "hf_predictor.predict(sentiment_input2)" 806 | ] 807 | }, 808 | { 809 | "cell_type": "markdown", 810 | "metadata": {}, 811 | "source": [ 812 | "## Cleanup Resources\n", 813 | "\n", 814 | "The following cell will delete the resources created by the Lambda function and the Lambda itself. \n", 815 | "Deleting other resources such as the S3 bucket and the IAM role for the Lambda function are the responsibility of the notebook user. " 816 | ] 817 | }, 818 | { 819 | "cell_type": "code", 820 | "execution_count": null, 821 | "metadata": {}, 822 | "outputs": [], 823 | "source": [ 824 | "sm_client = boto3.client(\"sagemaker\")\n", 825 | "\n", 826 | "# Delete the Lambda function\n", 827 | "step_deployment.func.delete()\n", 828 | "\n", 829 | "# Delete the endpoint\n", 830 | "hf_predictor.delete_endpoint()" 831 | ] 832 | } 833 | ], 834 | "metadata": { 835 | "instance_type": "ml.t3.medium", 836 | "kernelspec": { 837 | "display_name": "Python 3", 838 | "language": "python", 839 | "name": "python3" 840 | }, 841 | "language_info": { 842 | "codemirror_mode": { 843 | "name": "ipython", 844 | "version": 3 845 | }, 846 | "file_extension": ".py", 847 | "mimetype": "text/x-python", 848 | "name": "python", 849 | "nbconvert_exporter": "python", 850 | "pygments_lexer": "ipython3", 851 | "version": "3.8.5" 852 | } 853 | }, 854 | "nbformat": 4, 855 | "nbformat_minor": 4 856 | } 857 | -------------------------------------------------------------------------------- /workshop_1_getting_started_with_amazon_sagemaker/lab_3_spot_instances.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Huggingface Sagemaker-sdk - Spot instances example\n", 8 | "### Binary Classification with `Trainer` and `imdb` dataset" 9 | ] 10 | }, 11 | { 12 | "attachments": { 13 | "image.png": { 14 | "image/png": "" 15 | } 16 | }, 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "# Introduction\n", 21 | "\n", 22 | "Welcome to our end-to-end binary Text-Classification example. In this demo, we will use the Hugging Faces `transformers` and `datasets` library together with a custom Amazon sagemaker-sdk extension to fine-tune a pre-trained transformer on binary text classification. In particular, the pre-trained model will be fine-tuned using the `imdb` dataset. To get started, we need to set up the environment with a few prerequisite steps, for permissions, configurations, and so on. This demo will also show you can use spot instances and continue training.\n", 23 | "\n", 24 | "![image.png](attachment:image.png)\n", 25 | "\n", 26 | "_**NOTE: You can run this demo in Sagemaker Studio, your local machine or Sagemaker Notebook Instances**_" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "# Development Environment and Permissions " 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "## Installation\n", 41 | "\n", 42 | "_*Note:* we only install the required libraries from Hugging Face and AWS. You also need PyTorch or Tensorflow, if you haven´t it installed_" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "!pip install \"sagemaker>=2.48.0\" \"transformers==4.6.1\" \"datasets[s3]==1.6.2\" --upgrade" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 1, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "import sagemaker.huggingface" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "## Permissions" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "_If you are going to use Sagemaker in a local environment. You need access to an IAM Role with the required permissions for Sagemaker. You can find [here](https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-roles.html) more about it._" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "import sagemaker\n", 84 | "\n", 85 | "sess = sagemaker.Session()\n", 86 | "# sagemaker session bucket -> used for uploading data, models and logs\n", 87 | "# sagemaker will automatically create this bucket if it not exists\n", 88 | "sagemaker_session_bucket=None\n", 89 | "if sagemaker_session_bucket is None and sess is not None:\n", 90 | " # set to default bucket if a bucket name is not given\n", 91 | " sagemaker_session_bucket = sess.default_bucket()\n", 92 | "\n", 93 | "role = sagemaker.get_execution_role()\n", 94 | "sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)\n", 95 | "\n", 96 | "print(f\"sagemaker role arn: {role}\")\n", 97 | "print(f\"sagemaker bucket: {sess.default_bucket()}\")\n", 98 | "print(f\"sagemaker session region: {sess.boto_region_name}\")" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "# Preprocessing\n", 106 | "\n", 107 | "We are using the `datasets` library to download and preprocess the `imdb` dataset. After preprocessing, the dataset will be uploaded to our `sagemaker_session_bucket` to be used within our training job. The [imdb](http://ai.stanford.edu/~amaas/data/sentiment/) dataset consists of 25000 training and 25000 testing highly polar movie reviews." 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "## Tokenization " 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 5, 120 | "metadata": {}, 121 | "outputs": [], 122 | "source": [ 123 | "from datasets import load_dataset\n", 124 | "from transformers import AutoTokenizer\n", 125 | "\n", 126 | "# tokenizer used in preprocessing\n", 127 | "tokenizer_name = 'distilbert-base-uncased'\n", 128 | "\n", 129 | "# dataset used\n", 130 | "dataset_name = 'imdb'\n", 131 | "\n", 132 | "# s3 key prefix for the data\n", 133 | "s3_prefix = 'samples/datasets/imdb'" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [ 142 | "# load dataset\n", 143 | "dataset = load_dataset(dataset_name)\n", 144 | "\n", 145 | "# download tokenizer\n", 146 | "tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)\n", 147 | "\n", 148 | "# tokenizer helper function\n", 149 | "def tokenize(batch):\n", 150 | " return tokenizer(batch['text'], padding='max_length', truncation=True)\n", 151 | "\n", 152 | "# load dataset\n", 153 | "train_dataset, test_dataset = load_dataset('imdb', split=['train', 'test'])\n", 154 | "test_dataset = test_dataset.shuffle().select(range(10000)) # smaller the size for test dataset to 10k \n", 155 | "\n", 156 | "\n", 157 | "# tokenize dataset\n", 158 | "train_dataset = train_dataset.map(tokenize, batched=True)\n", 159 | "test_dataset = test_dataset.map(tokenize, batched=True)\n", 160 | "\n", 161 | "# set format for pytorch\n", 162 | "train_dataset = train_dataset.rename_column(\"label\", \"labels\")\n", 163 | "train_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])\n", 164 | "test_dataset = test_dataset.rename_column(\"label\", \"labels\")\n", 165 | "test_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "## Uploading data to `sagemaker_session_bucket`\n", 173 | "\n", 174 | "After we processed the `datasets` we are going to use the new `FileSystem` [integration](https://huggingface.co/docs/datasets/filesystems.html) to upload our dataset to S3." 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 8, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [ 183 | "import botocore\n", 184 | "from datasets.filesystems import S3FileSystem\n", 185 | "\n", 186 | "s3 = S3FileSystem() \n", 187 | "\n", 188 | "# save train_dataset to s3\n", 189 | "training_input_path = f's3://{sess.default_bucket()}/{s3_prefix}/train'\n", 190 | "train_dataset.save_to_disk(training_input_path,fs=s3)\n", 191 | "\n", 192 | "# save test_dataset to s3\n", 193 | "test_input_path = f's3://{sess.default_bucket()}/{s3_prefix}/test'\n", 194 | "test_dataset.save_to_disk(test_input_path,fs=s3)\n" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 6, 200 | "metadata": {}, 201 | "outputs": [], 202 | "source": [ 203 | "training_input_path = f's3://{sess.default_bucket()}/{s3_prefix}/train'\n", 204 | "test_input_path = f's3://{sess.default_bucket()}/{s3_prefix}/test'\n" 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "metadata": {}, 210 | "source": [ 211 | "# Fine-tuning & starting Sagemaker Training Job\n", 212 | "\n", 213 | "In order to create a sagemaker training job we need an `HuggingFace` Estimator. The Estimator handles end-to-end Amazon SageMaker training and deployment tasks. In a Estimator we define, which fine-tuning script should be used as `entry_point`, which `instance_type` should be used, which `hyperparameters` are passed in .....\n", 214 | "\n", 215 | "\n", 216 | "\n", 217 | "```python\n", 218 | "huggingface_estimator = HuggingFace(entry_point='train.py',\n", 219 | " source_dir='./scripts',\n", 220 | " base_job_name='huggingface-sdk-extension',\n", 221 | " instance_type='ml.p3.2xlarge',\n", 222 | " instance_count=1,\n", 223 | " transformers_version='4.4',\n", 224 | " pytorch_version='1.6',\n", 225 | " py_version='py36',\n", 226 | " role=role,\n", 227 | " hyperparameters = {'epochs': 1,\n", 228 | " 'train_batch_size': 32,\n", 229 | " 'model_name':'distilbert-base-uncased'\n", 230 | " })\n", 231 | "```\n", 232 | "\n", 233 | "When we create a SageMaker training job, SageMaker takes care of starting and managing all the required ec2 instances for us with the `huggingface` container, uploads the provided fine-tuning script `train.py` and downloads the data from our `sagemaker_session_bucket` into the container at `/opt/ml/input/data`. Then, it starts the training job by running. \n", 234 | "\n", 235 | "```python\n", 236 | "/opt/conda/bin/python train.py --epochs 1 --model_name distilbert-base-uncased --train_batch_size 32\n", 237 | "```\n", 238 | "\n", 239 | "The `hyperparameters` you define in the `HuggingFace` estimator are passed in as named arguments. \n", 240 | "\n", 241 | "Sagemaker is providing useful properties about the training environment through various environment variables, including the following:\n", 242 | "\n", 243 | "* `SM_MODEL_DIR`: A string that represents the path where the training job writes the model artifacts to. After training, artifacts in this directory are uploaded to S3 for model hosting.\n", 244 | "\n", 245 | "* `SM_NUM_GPUS`: An integer representing the number of GPUs available to the host.\n", 246 | "\n", 247 | "* `SM_CHANNEL_XXXX:` A string that represents the path to the directory that contains the input data for the specified channel. For example, if you specify two input channels in the HuggingFace estimator’s fit call, named `train` and `test`, the environment variables `SM_CHANNEL_TRAIN` and `SM_CHANNEL_TEST` are set.\n", 248 | "\n", 249 | "\n", 250 | "To run your training job locally you can define `instance_type='local'` or `instance_type='local_gpu'` for gpu usage. _Note: this does not working within SageMaker Studio_\n" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": 7, 256 | "metadata": {}, 257 | "outputs": [ 258 | { 259 | "name": "stdout", 260 | "output_type": "stream", 261 | "text": [ 262 | "\u001b[34mfrom\u001b[39;49;00m \u001b[04m\u001b[36mtransformers\u001b[39;49;00m \u001b[34mimport\u001b[39;49;00m AutoModelForSequenceClassification, Trainer, TrainingArguments\r\n", 263 | "\u001b[34mfrom\u001b[39;49;00m \u001b[04m\u001b[36mtransformers\u001b[39;49;00m\u001b[04m\u001b[36m.\u001b[39;49;00m\u001b[04m\u001b[36mtrainer_utils\u001b[39;49;00m \u001b[34mimport\u001b[39;49;00m get_last_checkpoint\r\n", 264 | "\r\n", 265 | "\u001b[34mfrom\u001b[39;49;00m \u001b[04m\u001b[36msklearn\u001b[39;49;00m\u001b[04m\u001b[36m.\u001b[39;49;00m\u001b[04m\u001b[36mmetrics\u001b[39;49;00m \u001b[34mimport\u001b[39;49;00m accuracy_score, precision_recall_fscore_support\r\n", 266 | "\u001b[34mfrom\u001b[39;49;00m \u001b[04m\u001b[36mdatasets\u001b[39;49;00m \u001b[34mimport\u001b[39;49;00m load_from_disk\r\n", 267 | "\u001b[34mimport\u001b[39;49;00m \u001b[04m\u001b[36mlogging\u001b[39;49;00m\r\n", 268 | "\u001b[34mimport\u001b[39;49;00m \u001b[04m\u001b[36msys\u001b[39;49;00m\r\n", 269 | "\u001b[34mimport\u001b[39;49;00m \u001b[04m\u001b[36margparse\u001b[39;49;00m\r\n", 270 | "\u001b[34mimport\u001b[39;49;00m \u001b[04m\u001b[36mos\u001b[39;49;00m\r\n", 271 | "\r\n", 272 | "\u001b[37m# Set up logging\u001b[39;49;00m\r\n", 273 | "logger = logging.getLogger(\u001b[31m__name__\u001b[39;49;00m)\r\n", 274 | "\r\n", 275 | "logging.basicConfig(\r\n", 276 | " level=logging.getLevelName(\u001b[33m\"\u001b[39;49;00m\u001b[33mINFO\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m),\r\n", 277 | " handlers=[logging.StreamHandler(sys.stdout)],\r\n", 278 | " \u001b[36mformat\u001b[39;49;00m=\u001b[33m\"\u001b[39;49;00m\u001b[33m%(asctime)s\u001b[39;49;00m\u001b[33m - \u001b[39;49;00m\u001b[33m%(name)s\u001b[39;49;00m\u001b[33m - \u001b[39;49;00m\u001b[33m%(levelname)s\u001b[39;49;00m\u001b[33m - \u001b[39;49;00m\u001b[33m%(message)s\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m,\r\n", 279 | ")\r\n", 280 | "\r\n", 281 | "\u001b[34mif\u001b[39;49;00m \u001b[31m__name__\u001b[39;49;00m == \u001b[33m\"\u001b[39;49;00m\u001b[33m__main__\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m:\r\n", 282 | "\r\n", 283 | " logger.info(sys.argv)\r\n", 284 | "\r\n", 285 | " parser = argparse.ArgumentParser()\r\n", 286 | "\r\n", 287 | " \u001b[37m# hyperparameters sent by the client are passed as command-line arguments to the script.\u001b[39;49;00m\r\n", 288 | " parser.add_argument(\u001b[33m\"\u001b[39;49;00m\u001b[33m--epochs\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m, \u001b[36mtype\u001b[39;49;00m=\u001b[36mint\u001b[39;49;00m, default=\u001b[34m3\u001b[39;49;00m)\r\n", 289 | " parser.add_argument(\u001b[33m\"\u001b[39;49;00m\u001b[33m--train-batch-size\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m, \u001b[36mtype\u001b[39;49;00m=\u001b[36mint\u001b[39;49;00m, default=\u001b[34m32\u001b[39;49;00m)\r\n", 290 | " parser.add_argument(\u001b[33m\"\u001b[39;49;00m\u001b[33m--eval-batch-size\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m, \u001b[36mtype\u001b[39;49;00m=\u001b[36mint\u001b[39;49;00m, default=\u001b[34m64\u001b[39;49;00m)\r\n", 291 | " parser.add_argument(\u001b[33m\"\u001b[39;49;00m\u001b[33m--warmup_steps\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m, \u001b[36mtype\u001b[39;49;00m=\u001b[36mint\u001b[39;49;00m, default=\u001b[34m500\u001b[39;49;00m)\r\n", 292 | " parser.add_argument(\u001b[33m\"\u001b[39;49;00m\u001b[33m--model_name\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m, \u001b[36mtype\u001b[39;49;00m=\u001b[36mstr\u001b[39;49;00m)\r\n", 293 | " parser.add_argument(\u001b[33m\"\u001b[39;49;00m\u001b[33m--learning_rate\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m, \u001b[36mtype\u001b[39;49;00m=\u001b[36mstr\u001b[39;49;00m, default=\u001b[34m5e-5\u001b[39;49;00m)\r\n", 294 | " parser.add_argument(\u001b[33m\"\u001b[39;49;00m\u001b[33m--output_dir\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m, \u001b[36mtype\u001b[39;49;00m=\u001b[36mstr\u001b[39;49;00m)\r\n", 295 | "\r\n", 296 | " \u001b[37m# Data, model, and output directories\u001b[39;49;00m\r\n", 297 | " parser.add_argument(\u001b[33m\"\u001b[39;49;00m\u001b[33m--output-data-dir\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m, \u001b[36mtype\u001b[39;49;00m=\u001b[36mstr\u001b[39;49;00m, default=os.environ[\u001b[33m\"\u001b[39;49;00m\u001b[33mSM_OUTPUT_DATA_DIR\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m])\r\n", 298 | " parser.add_argument(\u001b[33m\"\u001b[39;49;00m\u001b[33m--model-dir\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m, \u001b[36mtype\u001b[39;49;00m=\u001b[36mstr\u001b[39;49;00m, default=os.environ[\u001b[33m\"\u001b[39;49;00m\u001b[33mSM_MODEL_DIR\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m])\r\n", 299 | " parser.add_argument(\u001b[33m\"\u001b[39;49;00m\u001b[33m--n_gpus\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m, \u001b[36mtype\u001b[39;49;00m=\u001b[36mstr\u001b[39;49;00m, default=os.environ[\u001b[33m\"\u001b[39;49;00m\u001b[33mSM_NUM_GPUS\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m])\r\n", 300 | " parser.add_argument(\u001b[33m\"\u001b[39;49;00m\u001b[33m--training_dir\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m, \u001b[36mtype\u001b[39;49;00m=\u001b[36mstr\u001b[39;49;00m, default=os.environ[\u001b[33m\"\u001b[39;49;00m\u001b[33mSM_CHANNEL_TRAIN\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m])\r\n", 301 | " parser.add_argument(\u001b[33m\"\u001b[39;49;00m\u001b[33m--test_dir\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m, \u001b[36mtype\u001b[39;49;00m=\u001b[36mstr\u001b[39;49;00m, default=os.environ[\u001b[33m\"\u001b[39;49;00m\u001b[33mSM_CHANNEL_TEST\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m])\r\n", 302 | "\r\n", 303 | " args, _ = parser.parse_known_args()\r\n", 304 | "\r\n", 305 | " \u001b[37m# load datasets\u001b[39;49;00m\r\n", 306 | " train_dataset = load_from_disk(args.training_dir)\r\n", 307 | " test_dataset = load_from_disk(args.test_dir)\r\n", 308 | "\r\n", 309 | " logger.info(\u001b[33mf\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m\u001b[33m loaded train_dataset length is: \u001b[39;49;00m\u001b[33m{\u001b[39;49;00m\u001b[36mlen\u001b[39;49;00m(train_dataset)\u001b[33m}\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m)\r\n", 310 | " logger.info(\u001b[33mf\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m\u001b[33m loaded test_dataset length is: \u001b[39;49;00m\u001b[33m{\u001b[39;49;00m\u001b[36mlen\u001b[39;49;00m(test_dataset)\u001b[33m}\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m)\r\n", 311 | "\r\n", 312 | " \u001b[37m# compute metrics function for binary classification\u001b[39;49;00m\r\n", 313 | " \u001b[34mdef\u001b[39;49;00m \u001b[32mcompute_metrics\u001b[39;49;00m(pred):\r\n", 314 | " labels = pred.label_ids\r\n", 315 | " preds = pred.predictions.argmax(-\u001b[34m1\u001b[39;49;00m)\r\n", 316 | " precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average=\u001b[33m\"\u001b[39;49;00m\u001b[33mbinary\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m)\r\n", 317 | " acc = accuracy_score(labels, preds)\r\n", 318 | " \u001b[34mreturn\u001b[39;49;00m {\u001b[33m\"\u001b[39;49;00m\u001b[33maccuracy\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m: acc, \u001b[33m\"\u001b[39;49;00m\u001b[33mf1\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m: f1, \u001b[33m\"\u001b[39;49;00m\u001b[33mprecision\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m: precision, \u001b[33m\"\u001b[39;49;00m\u001b[33mrecall\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m: recall}\r\n", 319 | "\r\n", 320 | " \u001b[37m# download model from model hub\u001b[39;49;00m\r\n", 321 | " model = AutoModelForSequenceClassification.from_pretrained(args.model_name)\r\n", 322 | "\r\n", 323 | " \u001b[37m# define training args\u001b[39;49;00m\r\n", 324 | " training_args = TrainingArguments(\r\n", 325 | " output_dir=args.output_dir,\r\n", 326 | " num_train_epochs=args.epochs,\r\n", 327 | " per_device_train_batch_size=args.train_batch_size,\r\n", 328 | " per_device_eval_batch_size=args.eval_batch_size,\r\n", 329 | " warmup_steps=args.warmup_steps,\r\n", 330 | " evaluation_strategy=\u001b[33m\"\u001b[39;49;00m\u001b[33mepoch\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m,\r\n", 331 | " logging_dir=\u001b[33mf\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m\u001b[33m{\u001b[39;49;00margs.output_data_dir\u001b[33m}\u001b[39;49;00m\u001b[33m/logs\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m,\r\n", 332 | " learning_rate=\u001b[36mfloat\u001b[39;49;00m(args.learning_rate),\r\n", 333 | " )\r\n", 334 | "\r\n", 335 | " \u001b[37m# create Trainer instance\u001b[39;49;00m\r\n", 336 | " trainer = Trainer(\r\n", 337 | " model=model,\r\n", 338 | " args=training_args,\r\n", 339 | " compute_metrics=compute_metrics,\r\n", 340 | " train_dataset=train_dataset,\r\n", 341 | " eval_dataset=test_dataset,\r\n", 342 | " )\r\n", 343 | "\r\n", 344 | " \u001b[37m# train model\u001b[39;49;00m\r\n", 345 | " \u001b[34mif\u001b[39;49;00m get_last_checkpoint(args.output_dir) \u001b[35mis\u001b[39;49;00m \u001b[35mnot\u001b[39;49;00m \u001b[34mNone\u001b[39;49;00m:\r\n", 346 | " logger.info(\u001b[33m\"\u001b[39;49;00m\u001b[33m***** continue training *****\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m)\r\n", 347 | " trainer.train(resume_from_checkpoint=args.output_dir)\r\n", 348 | " \u001b[34melse\u001b[39;49;00m:\r\n", 349 | " trainer.train()\r\n", 350 | " \u001b[37m# evaluate model\u001b[39;49;00m\r\n", 351 | " eval_result = trainer.evaluate(eval_dataset=test_dataset)\r\n", 352 | "\r\n", 353 | " \u001b[37m# writes eval result to file which can be accessed later in s3 ouput\u001b[39;49;00m\r\n", 354 | " \u001b[34mwith\u001b[39;49;00m \u001b[36mopen\u001b[39;49;00m(os.path.join(args.output_data_dir, \u001b[33m\"\u001b[39;49;00m\u001b[33meval_results.txt\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m), \u001b[33m\"\u001b[39;49;00m\u001b[33mw\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m) \u001b[34mas\u001b[39;49;00m writer:\r\n", 355 | " \u001b[36mprint\u001b[39;49;00m(\u001b[33mf\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m\u001b[33m***** Eval results *****\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m)\r\n", 356 | " \u001b[34mfor\u001b[39;49;00m key, value \u001b[35min\u001b[39;49;00m \u001b[36msorted\u001b[39;49;00m(eval_result.items()):\r\n", 357 | " writer.write(\u001b[33mf\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m\u001b[33m{\u001b[39;49;00mkey\u001b[33m}\u001b[39;49;00m\u001b[33m = \u001b[39;49;00m\u001b[33m{\u001b[39;49;00mvalue\u001b[33m}\u001b[39;49;00m\u001b[33m\\n\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m)\r\n", 358 | "\r\n", 359 | " \u001b[37m# Saves the model to s3\u001b[39;49;00m\r\n", 360 | " trainer.save_model(args.model_dir)\r\n" 361 | ] 362 | } 363 | ], 364 | "source": [ 365 | "!pygmentize ./scripts/train.py" 366 | ] 367 | }, 368 | { 369 | "cell_type": "markdown", 370 | "metadata": {}, 371 | "source": [ 372 | "## Creating an Estimator and start a training job" 373 | ] 374 | }, 375 | { 376 | "cell_type": "code", 377 | "execution_count": 8, 378 | "metadata": {}, 379 | "outputs": [], 380 | "source": [ 381 | "from sagemaker.huggingface import HuggingFace\n", 382 | "import time\n", 383 | "\n", 384 | "# hyperparameters, which are passed into the training job\n", 385 | "hyperparameters={'epochs': 1, # number of training epochs\n", 386 | " 'train_batch_size': 32, # batch size for training\n", 387 | " 'eval_batch_size': 64, # batch size for evaluation\n", 388 | " 'learning_rate': 3e-5, # learning rate used during training\n", 389 | " 'model_id':'distilbert-base-uncased', # pre-trained model\n", 390 | " 'fp16': True, # Whether to use 16-bit (mixed) precision training\n", 391 | " 'output_dir':'/opt/ml/checkpoints', # output_dir where our checkpoints will be saved\n", 392 | " }" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": 9, 398 | "metadata": {}, 399 | "outputs": [], 400 | "source": [ 401 | "# s3 uri where our checkpoints will be uploaded during training\n", 402 | "job_name = f'huggingface-workshop-using-spot-{time.strftime(\"%Y-%m-%d-%H-%M-%S\", time.localtime())}'\n", 403 | "\n", 404 | "# s3 directory for our uploaded checkpoints\n", 405 | "checkpoint_s3_uri = f's3://{sess.default_bucket()}/{job_name}/checkpoints'\n", 406 | "\n", 407 | "# create the Estimator\n", 408 | "huggingface_estimator = HuggingFace(\n", 409 | " entry_point = 'train.py', # fine-tuning script used in training jon\n", 410 | " source_dir = './scripts', # directory where fine-tuning script is stored\n", 411 | " instance_type = 'ml.p3.2xlarge', # instances type used for the training job\n", 412 | " instance_count = 1, # the number of instances used for training\n", 413 | " base_job_name = job_name, # the name of the training job\n", 414 | " role = role, # Iam role used in training job to access AWS ressources, e.g. S3\n", 415 | " transformers_version = '4.6.1', # the transformers version used in the training job\n", 416 | " pytorch_version = '1.7.1', # the pytorch_version version used in the training job\n", 417 | " py_version = 'py36', # the python version used in the training job\n", 418 | " hyperparameters = hyperparameters, # the hyperparameter used for running the training job\n", 419 | " checkpoint_s3_uri = checkpoint_s3_uri, # s3 directory for our uploaded checkpoints\n", 420 | " use_spot_instances = True, # Wether to use spot instances or not\n", 421 | " max_wait = 3600, # This should be equal to or greater than max_run in seconds'\n", 422 | " max_run = 1000, # expected max run in seconds\n", 423 | ")" 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": null, 429 | "metadata": {}, 430 | "outputs": [], 431 | "source": [ 432 | "# define a data input dictonary with our uploaded s3 uris\n", 433 | "data = {\n", 434 | " 'train': training_input_path,\n", 435 | " 'test': test_input_path\n", 436 | "}\n", 437 | "\n", 438 | "\n", 439 | "# starting the train job with our uploaded datasets as input\n", 440 | "huggingface_estimator.fit(data)\n", 441 | "\n", 442 | "# Training seconds: 874\n", 443 | "# Billable seconds: 262\n", 444 | "# Managed Spot Training savings: 70.0%" 445 | ] 446 | }, 447 | { 448 | "cell_type": "markdown", 449 | "metadata": {}, 450 | "source": [ 451 | "## Deploying the endpoint\n", 452 | "\n", 453 | "To deploy our endpoint, we call `deploy()` on our HuggingFace estimator object, passing in our desired number of instances and instance type." 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": null, 459 | "metadata": {}, 460 | "outputs": [], 461 | "source": [ 462 | "predictor = huggingface_estimator.deploy(1,\"ml.g4dn.xlarge\")" 463 | ] 464 | }, 465 | { 466 | "cell_type": "markdown", 467 | "metadata": {}, 468 | "source": [ 469 | "Then, we use the returned predictor object to call the endpoint." 470 | ] 471 | }, 472 | { 473 | "cell_type": "code", 474 | "execution_count": null, 475 | "metadata": {}, 476 | "outputs": [], 477 | "source": [ 478 | "sentiment_input= {\"inputs\":\"I love using the new Inference DLC.\"}\n", 479 | "\n", 480 | "predictor.predict(sentiment_input)" 481 | ] 482 | }, 483 | { 484 | "cell_type": "markdown", 485 | "metadata": {}, 486 | "source": [ 487 | "Finally, we delete the endpoint again." 488 | ] 489 | }, 490 | { 491 | "cell_type": "code", 492 | "execution_count": 12, 493 | "metadata": {}, 494 | "outputs": [], 495 | "source": [ 496 | "predictor.delete_endpoint()" 497 | ] 498 | } 499 | ], 500 | "metadata": { 501 | "instance_type": "ml.t3.medium", 502 | "interpreter": { 503 | "hash": "c281c456f1b8161c8906f4af2c08ed2c40c50136979eaae69688b01f70e9f4a9" 504 | }, 505 | "kernelspec": { 506 | "display_name": "Python 3", 507 | "language": "python", 508 | "name": "python3" 509 | }, 510 | "language_info": { 511 | "codemirror_mode": { 512 | "name": "ipython", 513 | "version": 3 514 | }, 515 | "file_extension": ".py", 516 | "mimetype": "text/x-python", 517 | "name": "python", 518 | "nbconvert_exporter": "python", 519 | "pygments_lexer": "ipython3", 520 | "version": "3.8.5" 521 | } 522 | }, 523 | "nbformat": 4, 524 | "nbformat_minor": 4 525 | } 526 | --------------------------------------------------------------------------------