├── .dockerignore
├── .github
    ├── solutionid_validator.sh
    └── workflows
    │   └── maintainer_workflows.yml
├── .gitignore
├── 0-provision
    ├── .env
    ├── .gitignore
    ├── ManagementInstance.json
    ├── prepull-daemonset.yaml-template
    ├── prepull-workshop.sh
    ├── prepull.sh
    ├── stack-create.sh
    └── stack-delete.sh
├── 1-build
    ├── Dockerfile-base-cpu
    ├── Dockerfile-base-gpu
    ├── Dockerfile-base-graviton
    ├── Dockerfile-base-inf1
    ├── Dockerfile-base-inf2
    ├── etc
    │   └── yum.repos.d
    │   │   └── neuron.repo
    ├── pull.sh
    ├── push.sh
    └── sanity-inf.py
├── 2-trace
    └── model-tracer.py
├── 3-pack
    ├── Dockerfile
    ├── fastapi-server.py
    ├── pull.sh
    ├── push.sh
    ├── requirements.txt
    └── run.sh
├── 4-deploy
    ├── cpu-yaml.template
    ├── exec.sh
    ├── generate-yaml.sh
    ├── gpu-yaml.template
    ├── graviton-yaml.template
    ├── inf1-yaml.template
    ├── inf2-yaml.template
    ├── logs.sh
    ├── run.sh
    ├── status.sh
    └── stop.sh
├── 5-test
    ├── Dockerfile
    ├── aggregate.sh
    ├── build.sh
    ├── deployment-yaml.template
    ├── exec.sh
    ├── generate-yaml.sh
    ├── job-yaml.template
    ├── logs.sh
    ├── pull.sh
    ├── push.sh
    ├── run.sh
    ├── status.sh
    ├── stop.sh
    └── tests
    │   ├── benchmark.sh
    │   ├── benchmark_client.py
    │   ├── clock.sh
    │   ├── curl-rnd-ip.sh
    │   ├── curl-seq-ip.sh
    │   └── loop.sh
├── 6-remove
    └── stack-delete.sh
├── CODEOWNERS
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── aws-do-inference-video.png
├── aws-do-inference.png
├── build.sh
├── config.properties
├── config.properties_gpu_tests
├── config.properties_graviton_tests
├── config.properties_inferentia_tests
├── config.sh
├── deploy.sh
├── login.sh
├── low-latency-high-bandwidth-updated-architecture.jpg
├── low-latency-high-throughput-inference-on-amazon-eks.png
├── pack.sh
├── provision.sh
├── remove.sh
├── test.sh
└── trace.sh


/.dockerignore:
--------------------------------------------------------------------------------
1 | .git
2 | 


--------------------------------------------------------------------------------
/.github/solutionid_validator.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh 
 2 | #set -e 
 3 | 
 4 | echo "checking solution id $1"
 5 | echo "grep -nr --exclude-dir='.github' "$1" ./.."
 6 | result=$(grep -nr --exclude-dir='.github' "$1" ./..)
 7 | if [ $? -eq 0 ]
 8 | then
 9 |   echo "Solution ID $1 found\n"
10 |   echo "$result"
11 |   exit 0
12 | else
13 |   echo "Solution ID $1 not found"
14 |   exit 1
15 | fi
16 | 
17 | export result
18 | 


--------------------------------------------------------------------------------
/.github/workflows/maintainer_workflows.yml:
--------------------------------------------------------------------------------
 1 | # Workflows managed by aws-solutions-library-samples maintainers
 2 | name: Maintainer Workflows
 3 | on:
 4 |   # Triggers the workflow on push or pull request events but only for the "main" branch
 5 |   push:
 6 |     branches: [ "main" ]
 7 |   pull_request:
 8 |     branches: [ "main" ]
 9 |     types: [opened, reopened, edited]
10 | 
11 | jobs:
12 |   CheckSolutionId:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - uses: actions/checkout@v4
16 |       - name: Run solutionid validator
17 |         run: |
18 |           chmod u+x ./.github/solutionid_validator.sh
19 |           ./.github/solutionid_validator.sh ${{ vars.SOLUTIONID }}


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | 2-trace/traced-*
2 | 2-trace/traced-bert-base-multilingual-cased/
3 | **/compile_wd_inf_*
4 | 4-deploy/app-*
5 | 5-test/app-*
6 | 


--------------------------------------------------------------------------------
/0-provision/.env:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | export AWS_REGION=us-west-2
 4 | export AWS_REGION_AZ1=$(aws ec2 describe-availability-zones | jq '.AvailabilityZones[] | select (.ZoneId == "usw2-az1") | .ZoneName')
 5 | export AWS_REGION_AZ2=$(aws ec2 describe-availability-zones | jq '.AvailabilityZones[] | select (.ZoneId == "usw2-az2") | .ZoneName')
 6 | export INSTANCE_TYPE_CPU=c5.4xlarge
 7 | export INSTANCE_TYPE_GRAVITON=c7g.4xlarge
 8 | export INSTANCE_TYPE_INF=inf2.xlarge
 9 | 
10 | export REGISTRY=public.ecr.aws/a2u7h5w3/
11 | export MODEL_IMAGE_NAME=bert-base-workshop
12 | export TEST_IMAGE_NAME=bert-base-workshop
13 | export MODEL_IMAGE_TAG_INF=:v15-inf2
14 | export MODEL_IMAGE_TAG_GRAVITON=:v15-graviton
15 | export TEST_IMAGE_TAG_CPU=:test-v15-cpu
16 | 
17 | 


--------------------------------------------------------------------------------
/0-provision/.gitignore:
--------------------------------------------------------------------------------
1 | *.yaml
2 | 


--------------------------------------------------------------------------------
/0-provision/ManagementInstance.json:
--------------------------------------------------------------------------------
  1 | {
  2 | 	"AWSTemplateFormatVersion": "2010-09-09",
  3 | 
  4 | 	"Description": "(SO9259) - ML inference on EKS with Graviton and Inferentia2 architecures",
  5 | 	"Mappings": {
  6 | 		"AWSRegion2AMI": {
  7 | 			"us-east-1": {
  8 | 				"ami": "ami-03f6c2c562b3df715"
  9 | 			},
 10 | 			"us-east-2": {
 11 | 				"ami": "ami-0485ca4b2b7cb275d"
 12 | 			},
 13 | 			"us-west-1": {
 14 | 				"ami": "ami-06800a9f539283f43"
 15 | 			},
 16 | 			"us-west-2": {
 17 | 				"ami": "ami-093e5f43b11508e3e"
 18 | 			}
 19 | 		}
 20 | 	},
 21 | 
 22 | 	"Resources": {
 23 | 
 24 | 		"ManagementInstance": {
 25 | 			"Type": "AWS::EC2::Instance",
 26 | 			"Properties": {
 27 | 				"ImageId": {
 28 | 					"Fn::FindInMap": ["AWSRegion2AMI", {
 29 | 						"Ref": "AWS::Region"
 30 | 					}, "ami"]
 31 | 				},
 32 | 				"InstanceType": "c7g.4xlarge",
 33 | 				"IamInstanceProfile": {
 34 | 					"Ref": "RootInstanceProfile"
 35 | 				},
 36 | 				"SecurityGroups": [{
 37 | 					"Ref": "ManagementInstanceSecurityGroup"
 38 | 				}],
 39 | 				"UserData": {
 40 | 					"Fn::Base64": {
 41 | 						"Fn::Join": ["", [
 42 | 							"#!/bin/bash \n",
 43 | 							"echo 'Executing userdata script to install EKS cluster with add-ons...' | sudo tee -a /tmp/bootstrap.log \n",
 44 | 							"#sudo chmod 666 /tmp/bootstrap.log; sudo chown ec2-user:ec2-user /tmp/bootstrap.log \n",
 45 | 							"date | sudo tee -a /tmp/bootstrap.log \n",
 46 | 							"sudo yum update | sudo tee -a /tmp/bootstrap.log \n",
 47 | 						        "sudo yum install -y aws-cfn-bootstrap git | sudo tee -a /tmp/bootstrap.log \n",
 48 | 							"sudo su ec2-user bash -c 'cd /home/ec2-user; git clone https://github.com/aws-samples/aws-do-eks.git | sudo tee -a /tmp/bootstrap.log' \n",
 49 | 							"sudo su ec2-user bash -c 'cd /home/ec2-user; git clone https://github.com/aws-solutions-library-samples/guidance-for-machine-learning-inference-on-aws.git | sudo tee -a /tmp/bootstrap.log' \n",
 50 | 							"sudo bash -c '/home/ec2-user/aws-do-eks/Container-Root/eks/ops/setup/install-aws-cli.sh | sudo tee -a /tmp/bootstrap.log' \n",
 51 | 							"sudo bash -c '/home/ec2-user/aws-do-eks/Container-Root/eks/ops/setup/install-eksctl.sh | sudo tee -a /tmp/bootstrap.log' \n",
 52 | 							"sudo bash -c '/home/ec2-user/aws-do-eks/Container-Root/eks/ops/setup/install-kubectl.sh | sudo tee -a /tmp/bootstrap.log' \n",
 53 | 							"sudo bash -c '/home/ec2-user/aws-do-eks/Container-Root/eks/ops/setup/install-kubectx.sh | sudo tee -a /tmp/bootstrap.log' \n",
 54 | 							"sudo bash -c '/home/ec2-user/aws-do-eks/Container-Root/eks/ops/setup/install-kubeps1.sh | sudo tee -a /tmp/bootstrap.log' \n",
 55 | 							"sudo bash -c '/home/ec2-user/aws-do-eks/Container-Root/eks/ops/setup/install-docker-yum.sh | sudo tee -a /tmp/bootstrap.log' \n",
 56 | 							"sudo bash -c '/home/ec2-user/aws-do-eks/Container-Root/eks/ops/setup/install-helm.sh | sudo tee -a /tmp/bootstrap.log' \n",
 57 | 							"sudo bash -c 'ln -s /usr/bin/python3 /usr/bin/python | sudo tee -a /tmp/bootstrap.log' \n",
 58 | 							"sudo su ec2-user bash -c '/home/ec2-user/aws-do-eks/Container-Root/eks/ops/setup/install-envsubst.sh | sudo tee -a /tmp/bootstrap.log' \n",
 59 | 							"sudo su ec2-user bash -c 'source /home/ec2-user/aws-do-eks/wd/cfn/inference-workshop/.env; cat /home/ec2-user/aws-do-eks/wd/conf/eksctl/yaml/eks-inference-workshop.yaml-template | envsubst | sudo tee /home/ec2-user/aws-do-eks/wd/conf/eksctl/yaml/eks-inference-workshop.yaml' \n", 
 60 | 							"sudo su ec2-user bash -c 'cat /home/ec2-user/aws-do-eks/wd/conf/eksctl/yaml/eks-inference-workshop.yaml | sudo tee -a /tmp/bootstrap.log' \n",
 61 | 						        "sudo su ec2-user bash -c 'echo Creating EKS cluster ... | sudo tee -a /tmp/bootstrap.log' \n",	
 62 | 							"sudo su ec2-user bash -c '/usr/local/bin/eksctl create cluster -f /home/ec2-user/aws-do-eks/wd/conf/eksctl/yaml/eks-inference-workshop.yaml | sudo tee -a /tmp/bootstrap.log' \n",
 63 | 							"sudo su ec2-user bash -c 'source /home/ec2-user/.bashrc; sleep 2; aws eks update-kubeconfig --name eks-inference-workshop | sudo tee -a /tmp/bootstrap.log' \n",
 64 | 							"sudo su ec2-user bash -c '/home/ec2-user/aws-do-eks/Container-Root/eks/deployment/neuron-device-driver/labels-add.sh | sudo tee -a /tmp/bootstrap.log' \n",
 65 | 							"sudo su ec2-user bash -c 'kubectl apply -f /home/ec2-user/aws-do-eks/Container-Root/eks/deployment/neuron-device-driver/neuron2-device-driver-daemonset.yaml | sudo tee -a /tmp/bootstrap.log' \n",
 66 | 							"sudo su ec2-user bash -c '/home/ec2-user/aws-do-eks/Container-Root/eks/deployment/neuron-device-plugin/deploy.sh | sudo tee -a /tmp/bootstrap.log' \n",
 67 | 							"sudo su ec2-user bash -c 'chmod +x /home/ec2-user/aws-do-eks/Container-Root/eks/ops/setup/install-cloudwatch-addon.sh; /home/ec2-user/aws-do-eks/Container-Root/eks/ops/setup/install-cloudwatch-addon.sh | sudo tee -a /tmp/bootstrap.log' \n",
 68 | 							"sudo su ec2-user bash -c 'cd /home/ec2-user/aws-do-eks/Container-Root/eks/deployment/karpenter/; ./deploy.sh; source ./karpenter.conf; ./provisioner-deploy-v1beta1.sh | sudo tee -a /tmp/bootstrap.log' \n", 
 69 | 							"sudo su ec2-user bash -c 'cd /home/ec2-user/guidance-for-machine-learning-inference-on-aws/0-provision; ./prepull-workshop.sh | sudo tee -a /tmp/bootstrap.log' \n",
 70 | 							"date | sudo tee -a /tmp/bootstrap.log \n",
 71 | 							"echo 'Done executing userdata script, please see output in the /tmp/bootstrap.log file.' | sudo tee -a /tmp/bootstrap.log \n"
 72 | 						]]
 73 | 					}
 74 | 				}
 75 | 			},
 76 | 			"CreationPolicy": {}
 77 | 		},
 78 | 
 79 | 		"RootRole": {
 80 | 			"Type": "AWS::IAM::Role",
 81 | 			"Properties": {
 82 | 				"AssumeRolePolicyDocument": {
 83 | 					"Version": "2012-10-17",
 84 | 					"Statement": [{
 85 | 						"Effect": "Allow",
 86 | 						"Principal": {
 87 | 							"Service": ["ec2.amazonaws.com"]
 88 | 						},
 89 | 						"Action": ["sts:AssumeRole"]
 90 | 					}]
 91 | 				},
 92 | 				"Path": "/"
 93 | 			}
 94 | 		},
 95 | 
 96 | 		"RolePolicies": {
 97 | 			"Type": "AWS::IAM::Policy",
 98 | 			"Properties": {
 99 | 				"PolicyName": "root",
100 | 				"PolicyDocument": {
101 | 					"Version": "2012-10-17",
102 | 					"Statement": [{
103 | 						"Effect": "Allow",
104 | 						"Action": "*",
105 | 						"Resource": "*"
106 | 					}]
107 | 				},
108 | 				"Roles": [{
109 | 					"Ref": "RootRole"
110 | 				}]
111 | 			}
112 | 		},
113 | 
114 | 		"RootInstanceProfile": {
115 | 			"Type": "AWS::IAM::InstanceProfile",
116 | 			"Properties": {
117 | 				"Path": "/",
118 | 				"Roles": [{
119 | 					"Ref": "RootRole"
120 | 				}]
121 | 			}
122 | 		},
123 | 
124 | 		"ManagementInstanceSecurityGroup": {
125 | 			"Type": "AWS::EC2::SecurityGroup",
126 | 			"Properties": {
127 | 				"GroupDescription": "Enable access to Management Instance",
128 | 				"SecurityGroupIngress": [{
129 | 						"IpProtocol": "tcp",
130 | 						"FromPort": "443",
131 | 						"ToPort": "443",
132 | 						"CidrIp": "0.0.0.0/0"
133 | 					},
134 | 					{
135 | 						"IpProtocol": "tcp",
136 | 						"FromPort": "22",
137 | 						"ToPort": "22",
138 | 						"CidrIp": "0.0.0.0/0"
139 | 					}
140 | 				]
141 | 			}
142 | 		}
143 | 
144 | 	},
145 | 
146 | 	"Outputs": {
147 | 		"ManagementInstanceDNSName": {
148 | 			"Description": "Management Instance",
149 | 			"Value": {
150 | 				"Fn::Join": ["", ["", {
151 | 					"Fn::GetAtt": ["ManagementInstance", "PublicDnsName"]
152 | 				}]]
153 | 			}
154 | 		},
155 | 		"ManagementInstanceSMLogin": {
156 | 			"Description": "Session Manager Login",
157 | 			"Value": {
158 | 				"Fn::Join": ["",  [
159 | 						"https://",
160 | 						{ "Ref": "AWS::Region" },
161 | 						".console.aws.amazon.com/systems-manager/session-manager/",
162 | 						{ "Ref": "ManagementInstance" },
163 | 						"?region=",
164 | 						{"Ref": "AWS::Region"}
165 | 					]
166 | 				]
167 | 			}
168 | 		}
169 | 	}
170 | }
171 | 


--------------------------------------------------------------------------------
/0-provision/prepull-daemonset.yaml-template:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: DaemonSet
 3 | metadata:
 4 |   name: prepull-${ENTITY}-${PROCESSOR}
 5 |   namespace: kube-system
 6 |   labels:
 7 |     k8s-app: prepull-${ENTITY}-${PROCESSOR}
 8 | spec:
 9 |   selector:
10 |     matchLabels:
11 |       name: prepull-${ENTITY}-${PROCESSOR}
12 |   template:
13 |     metadata:
14 |       labels:
15 |         name: prepull-${ENTITY}-${PROCESSOR}
16 |     spec:
17 |       nodeSelector:
18 |         node.kubernetes.io/instance-type: ${INSTANCE_TYPE}
19 |       initContainers:
20 |       - name: prepull-${ENTITY}-${PROCESSOR}
21 |         # This is the image to pre-pull
22 |         image: ${REGISTRY}${IMAGE}${TAG}
23 |         command: ["/bin/sh"]
24 |         args: ["-c", "echo Image ${REGISTRY}${IMAGE}${TAG} pre-pulled"]
25 |       containers:
26 |       - name: pause
27 |         image: public.ecr.aws/eks-distro/kubernetes/pause:v1.29.5-eks-1-29-latest
28 |       terminationGracePeriodSeconds: 10
29 | 


--------------------------------------------------------------------------------
/0-provision/prepull-workshop.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | help () {
 4 | 	echo ""
 5 | 	echo "Usage: $0 [ACTION]"
 6 | 	echo "       ACTION - start(default), status, stop, help"
 7 | 	echo ""
 8 | }
 9 | 
10 | export ACTION=$1
11 | if [ "$ACTION" == "" ]; then
12 | 	export ACTION=start
13 | fi
14 | 
15 | if [ "$ACTION" == "help" ]; then
16 | 	help
17 | elif [ "$ACTION" == "start" ]; then
18 | 	./prepull.sh start model inf
19 | 	./prepull.sh start model graviton
20 | 	./prepull.sh start test cpu
21 | elif [ "$ACTION" == "status" ]; then
22 | 	CMD="kubectl get ds -A | grep -E 'READY|prepull'"
23 | 	echo ""
24 | 	echo "$CMD"
25 | 	eval "$CMD"
26 | elif [ "$ACTION" == "stop" ]; then
27 | 	./prepull.sh stop model inf
28 | 	./prepull.sh stop model graviton
29 | 	./prepull.sh stop test cpu
30 | else
31 | 	echo ""
32 | 	echo "Invalid action: $ACTION"
33 | 	help
34 | fi
35 | 	
36 | 


--------------------------------------------------------------------------------
/0-provision/prepull.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | help () {
 4 | 	echo ""
 5 | 	echo "Usage: "
 6 | 	echo "   $0 <ACTION> <ENTITY> <PROCESSOR>"
 7 | 	echo ""
 8 | 	echo "   ACTION    - start, describe, status, stop"
 9 | 	echo "   ENTITY    - model or test"
10 | 	echo "   PROCESSOR - cpu, gpu, graviton, inf"
11 | 	echo ""
12 | }
13 | 
14 | if [ "$3" == "" ]; then
15 | 	help
16 | else
17 | 	source .env
18 | 	export ACTION=$1
19 | 	export ENTITY=$2
20 | 	export ENTITY_UPPER=$(echo $ENTITY | tr '[:lower:]' '[:upper:]')
21 | 	export PROCESSOR=$3
22 | 	export PROCESSOR_UPPER=$(echo $PROCESSOR | tr '[:lower:]' '[:upper:]')
23 | 	export VAR_NAME_INSTANCE_TYPE=INSTANCE_TYPE_${PROCESSOR_UPPER}
24 | 	export INSTANCE_TYPE=$(printenv $VAR_NAME_INSTANCE_TYPE)
25 | 	export VAR_IMAGE_NAME=${ENTITY_UPPER}_IMAGE_NAME
26 | 	export IMAGE=$(printenv $VAR_IMAGE_NAME)
27 | 	export VAR_TAG=${ENTITY_UPPER}_IMAGE_TAG_${PROCESSOR_UPPER}
28 | 	export TAG=$(printenv $VAR_TAG)
29 | 	export MANIFEST=prepull-daemonset-${ENTITY}-${PROCESSOR}.yaml
30 | 	cat prepull-daemonset.yaml-template | envsubst > $MANIFEST
31 | 
32 | 	cat $MANIFEST
33 | 
34 | 	if [ "$ACTION" == "start" ]; then
35 | 		CMD="kubectl apply -f ./${MANIFEST}"
36 | 	elif [ "$ACTION" == "describe" ]; then
37 | 		CMD="kubectl describe -f ./${MANIFEST}"
38 | 	elif [ "$ACTION" == "status" ]; then
39 | 		CMD="kubectl get -f ./${MANIFEST}"
40 | 	elif [ "$ACTION" == "stop" ]; then
41 | 		CMD="kubectl delete -f ./${MANIFEST}"
42 | 	else
43 | 		echo ""
44 | 		echo "Invalid action: $ACTION"
45 | 		CMD=""
46 | 	fi
47 | 
48 | 	echo "$CMD"
49 | 
50 | 	eval "$CMD"
51 | fi
52 | 


--------------------------------------------------------------------------------
/0-provision/stack-create.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | source ../config.properties
 4 | 
 5 | CMD="aws cloudformation create-stack --stack-name ManagementInstance --template-body file://ManagementInstance.json --capabilities CAPABILITY_IAM"
 6 | 
 7 | if [ ! "$verbose" == "false" ]; then
 8 |         echo -e "\n${CMD}\n"
 9 | fi
10 | eval "${CMD}"
11 | 
12 | 


--------------------------------------------------------------------------------
/0-provision/stack-delete.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | aws cloudformation delete-stack --stack-name ManagementInstance
4 | 
5 | 


--------------------------------------------------------------------------------
/1-build/Dockerfile-base-cpu:
--------------------------------------------------------------------------------
1 | FROM python:3.9
2 | 
3 | LABEL description="Base container for CPU models"
4 | 
5 | RUN apt-get update && apt-get install -y htop dnsutils bc vim
6 | 
7 | RUN pip install torch configparser transformers
8 | 
9 | RUN echo "alias ll='ls -alh --color=auto'" >> /root/.bashrc


--------------------------------------------------------------------------------
/1-build/Dockerfile-base-gpu:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:11.1.1-runtime-ubuntu20.04
 2 | 
 3 | LABEL description="Base container for GPU models"
 4 | 
 5 | RUN apt-get update && apt-get install -y htop vim wget curl software-properties-common debconf-utils python3-distutils dnsutils bc
 6 | 
 7 | # Install python3.9
 8 | RUN DEBIAN_FRONTEND=noninteractive; add-apt-repository -y ppa:deadsnakes/ppa; apt install -y python3.9; update-alternatives --install /usr/bin/python python /usr/bin/python3.9 1
 9 | 
10 | # Install pip
11 | RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py; python get-pip.py; rm -f get-pip.py
12 | 
13 | # Install pytorch with GPU support
14 | # RUN pip install torch==1.8.1+cu111 torchvision==0.9.1+cu111 torchaudio==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html
15 | # Need to install newer version of torch to work with Python 3.9
16 | RUN pip install torch==2.0.1+cu117 torchvision==0.15.2+cu117 -f https://download.pytorch.org/whl/torch_stable.html
17 | 
18 | RUN echo "PATH=/usr/local/cuda/bin\${PATH:+:\${PATH}}" >> /etc/environment
19 | RUN echo "LD_LIBRARY_PATH=/usr/local/cuda/lib64\${LD_LIBRARY_PATH:+:\${LD_LIBRARY_PATH}}" >> /etc/environment
20 | 
21 | # Install other python libraries
22 | RUN pip install transformers configparser
23 | 


--------------------------------------------------------------------------------
/1-build/Dockerfile-base-graviton:
--------------------------------------------------------------------------------
 1 | FROM python:3.9
 2 | 
 3 | LABEL description="Base container for CPU models running on Graviton architecture processors"
 4 | 
 5 | RUN apt-get update && apt-get install -y htop dnsutils bc vim
 6 | 
 7 | RUN pip install torch configparser transformers
 8 | 
 9 | RUN echo "alias ll='ls -alh --color=auto'" >> /root/.bashrc
10 | 


--------------------------------------------------------------------------------
/1-build/Dockerfile-base-inf1:
--------------------------------------------------------------------------------
 1 | FROM amazonlinux:2
 2 |   
 3 | LABEL description="Base container for Inferentia1 models"
 4 | ENV PYTHONUNBUFFERED=TRUE
 5 | ENV PYTHONDONTWRITEBYTECODE=TRUE
 6 | ADD ./1-build/etc /etc
 7 | RUN echo -e '[neuron]\nname=Neuron YUM Repository\nbaseurl=https://yum.repos.neuron.amazonaws.com\nenabled=1\nmetadata_expire=0\n' >> /etc/yum.repos.d/neuron.repo
 8 | RUN rpm --import https://yum.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB
 9 | RUN yum update -y && \
10 |     yum install -y python3 python3-devel gcc-c++ && \
11 |     yum install -y tar gzip ca-certificates procps net-tools which vim wget libgomp htop jq bind-utils bc pciutils && \
12 |     yum install -y aws-neuronx-tools-2.*
13 | RUN pip3 install --upgrade --force-reinstall --no-cache-dir neuron-cc[tensorflow] torch-neuron transformers==4.2.0 --extra-index-url=https://pip.repos.neuron.amazonaws.com
14 | RUN pip3 install --no-cache-dir torchserve==0.3.0 torch-model-archiver==0.3.0 configparser
15 | RUN alternatives --install /usr/bin/python python /usr/bin/python3 1; alternatives --install /usr/bin/pip pip /usr/bin/pip3 1
16 | RUN echo "export PATH=/opt/aws/neuron/bin:$PATH" >> /root/.bashrc
17 | RUN echo "alias ll='ls -alh --color=auto'" >> /root/.bashrc 
18 | ADD ./1-build/*.py /app/
19 | 
20 | 


--------------------------------------------------------------------------------
/1-build/Dockerfile-base-inf2:
--------------------------------------------------------------------------------
 1 | FROM amazonlinux:2
 2 | 
 3 | LABEL description="Base container for Inferentia2 models"
 4 | ENV PYTHONUNBUFFERED=TRUE
 5 | ENV PYTHONDONTWRITEBYTECODE=TRUE
 6 | ADD ./1-build/etc /etc
 7 | # Neuron SDK components version numbers
 8 | ARG NEURONX_RUNTIME_LIB_VERSION=2.16.*
 9 | ARG NEURONX_COLLECTIVES_LIB_VERSION=2.16.*
10 | ARG NEURONX_TOOLS_VERSION=2.13.*
11 | ARG NEURONX_FRAMEWORK_VERSION=1.13.1.1.10.*
12 | ARG NEURONX_TRANSFORMERS_VERSION=0.6.*
13 | ARG NEURONX_CC_VERSION=2.9.*
14 | ARG TORCHSERVE_VERSION=0.8.2
15 | 
16 | RUN echo -e '[neuron]\nname=Neuron YUM Repository\nbaseurl=https://yum.repos.neuron.amazonaws.com\nenabled=1\nmetadata_expire=0\n' >> /etc/yum.repos.d/neuron.repo
17 | RUN rpm --import https://yum.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB
18 | RUN amazon-linux-extras install -y python3.8
19 | RUN yum update -y && \
20 |     yum install -y git tar gzip ca-certificates procps net-tools which vim wget libgomp htop jq bind-utils bc pciutils && \
21 |     yum install -y gcc-c++ && \
22 |     yum install -y jq java-11-amazon-corretto-headless  # for torchserve
23 | RUN yum install -y aws-neuronx-collectives-${NEURONX_COLLECTIVES_LIB_VERSION} && \
24 |     yum install -y aws-neuronx-runtime-lib-${NEURONX_RUNTIME_LIB_VERSION} && \
25 |     yum install -y aws-neuronx-tools-${NEURONX_TOOLS_VERSION}
26 | ENV PATH="/opt/aws/neuron/bin:${PATH}"
27 | 
28 | #fix for incorrect Python version configured by default in the base image
29 | RUN rm -f /usr/bin/python
30 | RUN ln -s /usr/bin/python3.8 /usr/bin/python3
31 | RUN ln -s /usr/bin/python3 /usr/bin/python
32 | 
33 | RUN update-alternatives --install /usr/bin/pip pip /usr/bin/pip3.8 1
34 | 
35 | RUN pip3.8 install --extra-index-url https://pip.repos.neuron.amazonaws.com \
36 |     neuronx-cc==$NEURONX_CC_VERSION \
37 |     torch-neuronx==$NEURONX_FRAMEWORK_VERSION \
38 |     transformers-neuronx==$NEURONX_TRANSFORMERS_VERSION 
39 | RUN pip3.8 install "protobuf<4" \
40 |     && pip3.8 install torchserve==${TORCHSERVE_VERSION} \
41 |     && pip3.8 install torch-model-archiver==${TORCHSERVE_VERSION} \
42 |     && pip3.8 install --no-deps --no-cache-dir -U torchvision==0.14.* captum==0.6.0 configparser
43 | 
44 | RUN echo "alias ll='ls -alh --color=auto'" >> /root/.bashrc 
45 | ADD ./1-build/*.py /app/
46 | 
47 | 


--------------------------------------------------------------------------------
/1-build/etc/yum.repos.d/neuron.repo:
--------------------------------------------------------------------------------
1 | [neuron]
2 | name=Neuron YUM Repository
3 | baseurl=https://yum.repos.neuron.amazonaws.com
4 | enabled=1


--------------------------------------------------------------------------------
/1-build/pull.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ######################################################################
 4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
 5 | # SPDX-License-Identifier: MIT-0                                     #
 6 | ######################################################################
 7 | 
 8 | # Pull base image from container registry
 9 | 
10 | if [ -f ./config.properties ]; then
11 |     source ./config.properties
12 | elif [ -f ../config.properties ]; then
13 |     source ../config.properties
14 | else
15 |     echo "config.properties not found!"
16 | fi
17 | 
18 | CMD="docker pull ${registry}${base_image_name}${base_image_tag}"
19 | if [ ! "$verbose" == "false" ]; then
20 |         echo -e "\n${CMD}\n"
21 | fi
22 | eval "${CMD}"
23 | 
24 | 


--------------------------------------------------------------------------------
/1-build/push.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ######################################################################
 4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
 5 | # SPDX-License-Identifier: MIT-0                                     #
 6 | ######################################################################
 7 | 
 8 | # Push packed image to container registry
 9 | 
10 | if [ -f ./config.properties ]; then
11 |     source ./config.properties
12 | elif [ -f ../config.properties ]; then
13 |     source ../config.properties
14 | else
15 |     echo "config.properties not found!"
16 | fi
17 | 
18 | ./login.sh
19 | # Create registry if needed
20 | IMAGE=${base_image_name}
21 | REGISTRY_COUNT=$(aws ecr describe-repositories | grep ${IMAGE} | wc -l)
22 | if [ "$REGISTRY_COUNT" == "0" ]; then
23 |     CMD="aws ecr create-repository --repository-name ${IMAGE} --region ${region}"
24 |     if [ ! "$verbose" == "false" ]; then
25 |         echo -e "\n${CMD}\n"
26 |     fi
27 |     eval "${CMD}"
28 | fi
29 | 
30 | CMD="docker push ${registry}${base_image_name}${base_image_tag}"
31 | if [ ! "$verbose" == "false" ]; then
32 |     echo -e "\n${CMD}\n"
33 | fi
34 | eval "${CMD}"
35 | 
36 | 


--------------------------------------------------------------------------------
/1-build/sanity-inf.py:
--------------------------------------------------------------------------------
 1 | import torch, torch_neuron
 2 | import torch.nn as nn
 3 | from time import time
 4 | 
 5 | # Define minimalistic PyTorch model class
 6 | class mymodel(nn.Module):
 7 |     def __init__(self):
 8 |         super(mymodel, self).__init__()
 9 | 
10 |     def forward(self, inputs):
11 |         a = inputs[0]
12 |         b = inputs[1]
13 |         return torch.matmul(a,b)
14 | 
15 | # Create example model inputs required for tracing
16 | inputs = (torch.randn((5000,100), dtype=torch.float32), torch.randn((100,5000), dtype=torch.float32))
17 | 
18 | # Instantiate instance of model
19 | mymodel = mymodel()
20 | 
21 | # Trace the model, returning a Neuron-compiled version
22 | nmod = torch.neuron.trace(mymodel, example_inputs=[inputs], minimum_segment_size=1)
23 | 
24 | # Save Neuron-compiled model -> can be loaded at a later time using torch.jit.load()
25 | nmod.save("neuron_model.pt")
26 | 
27 | print("\nAttempting inference using Neuron-compiled model")
28 | for _ in range(10):
29 |     start = time()
30 |     _ = nmod(inputs)
31 |     print(f"latency: {time()-start:.3f}s")
32 | 
33 | 


--------------------------------------------------------------------------------
/2-trace/model-tracer.py:
--------------------------------------------------------------------------------
  1 | ######################################################################
  2 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
  3 | # SPDX-License-Identifier: MIT-0                                     #
  4 | ######################################################################
  5 | 
  6 | import platform
  7 | import torch
  8 | import importlib
  9 | from configparser import ConfigParser
 10 | 
 11 | machine=platform.uname().machine
 12 | device_type='cpu'
 13 | if machine == 'aarch64':
 14 |     device_type='arm'
 15 | 
 16 | try:
 17 |     import torch_neuron
 18 |     device_type='inf1'
 19 | except ImportError:
 20 |     print('[WARN] Torch Neuron not Found')
 21 |     pass
 22 | try:
 23 |     import torch_neuronx
 24 |     device_type='inf2'
 25 | except ImportError:
 26 |     print('[WARN] Torch Neuronx not Found')
 27 |     pass
 28 | 
 29 | import os
 30 | 
 31 | # 1. READ config.properties
 32 | print("\nParsing configuration ...")
 33 | path_prefix = os.getcwd()
 34 | with open(path_prefix + '/../config.properties') as f:
 35 |     config_lines = '[global]\n' + f.read()
 36 |     f.close()
 37 | config = ConfigParser()
 38 | config.read_string(config_lines)
 39 | 
 40 | model_name = config['global']['huggingface_model_name']
 41 | tokenizer_class_name = config['global']['huggingface_tokenizer_class']
 42 | model_class_name = config['global']['huggingface_model_class']
 43 | sequence_length=int(config['global']['sequence_length'])
 44 | processor=config['global']['processor']
 45 | pipeline_cores=config['global']['pipeline_cores']
 46 | batch_size=int(config['global']['batch_size'])
 47 | test=config['global']['test']
 48 | 
 49 | question = "What does the little engine say?"
 50 | 
 51 | context = """In the childrens story about the little engine a small locomotive is pulling a large load up a mountain.
 52 |     Since the load is heavy and the engine is small it is not sure whether it will be able to do the job. This is a story 
 53 |     about how an optimistic attitude empowers everyone to achieve more. In the story the little engine says: 'I think I can' as it is 
 54 |     pulling the heavy load all the way to the top of the mountain. On the way down it says: I thought I could."""
 55 | 
 56 | 
 57 | # 2. LOAD PRE-TRAINED MODEL
 58 | print(f'\nLoading pre-trained model: {model_name}')
 59 | transformers = importlib.import_module("transformers")
 60 | tokenizer_class = getattr(transformers, tokenizer_class_name)
 61 | model_class = getattr(transformers, model_class_name)
 62 | tokenizer = tokenizer_class.from_pretrained(model_name)
 63 | model = model_class.from_pretrained(model_name, return_dict=False)
 64 | 
 65 | # 3. TOKENIZE THE INPUT
 66 | print('\nTokenizing input sample ...')
 67 | inputs = tokenizer.encode_plus(question,
 68 |                                context,
 69 |                                return_tensors="pt",
 70 |                                max_length=sequence_length,
 71 |                                padding='max_length',
 72 |                                truncation=True)
 73 | if device_type not in ['inf1', 'inf2']:
 74 |     if torch.cuda.is_available():
 75 |         device = torch.device("cuda")
 76 |         device_type = "gpu"
 77 |         model.to(device)
 78 |         inputs.to(device)
 79 |     else:
 80 |         device = torch.device("cpu")
 81 | 
 82 | if device_type == processor:
 83 |     print(f"   ... Using device: {device_type}")
 84 | else:
 85 |     print(f"[WARN] detected device_type ({device_type}) does not match the configured processor ({processor})")
 86 | 
 87 | # 2. COMPILE THE MODEL
 88 | print('\nTracing model ...')
 89 | example_inputs = (
 90 |     torch.cat([inputs['input_ids']] * batch_size,0), 
 91 |     torch.cat([inputs['attention_mask']] * batch_size,0)
 92 | )
 93 | os.makedirs(f'traced-{model_name}', exist_ok=True)
 94 | torch.set_num_threads(6)
 95 | if 'inf' == processor:
 96 |     model_traced = torch.neuron.trace(model, 
 97 |                                   example_inputs, 
 98 |                                   verbose=1, 
 99 |                                   compiler_workdir=f'./traced-{model_name}/compile_wd_{processor}_bs{batch_size}_seq{sequence_length}_pc{pipeline_cores}',  
100 |                                   compiler_args = ['--neuroncore-pipeline-cores', str(pipeline_cores)])
101 | elif 'inf2' == processor:
102 |     model_traced = torch_neuronx.trace(model,
103 |                                   example_inputs)
104 | else:
105 |     model_traced = torch.jit.trace(model, example_inputs)
106 |     
107 | # 3. TEST THE COMPILED MODEL (Optional)        
108 | if test.lower() == 'true':
109 |     print("\nTesting traced model ...")
110 |     print(f"Question: {question}")
111 |     # Testing the traced model
112 |     answer_logits = model_traced(*example_inputs)
113 |     answer_start = answer_logits[0].argmax().item()
114 |     answer_end = answer_logits[1].argmax().item()+1
115 |     answer_txt = ""
116 |     if answer_end > answer_start:
117 |         answer_txt = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][answer_start:answer_end]))
118 |     else:
119 |         answer_txt = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][answer_start:]))
120 |     print(f'Model Answer: {answer_txt}')
121 | 
122 | # 4. SAVE THE COMPILED MODEL
123 | print('\nSaving traced model ...')
124 | model_path=f'./traced-{model_name}/{model_name}_bs{batch_size}_seq{sequence_length}_pc{pipeline_cores}_{processor}.pt'
125 | model_traced.save(model_path)
126 | 
127 | print(f'Done. Model saved as: {model_path}')
128 | 


--------------------------------------------------------------------------------
/3-pack/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG BASE_IMAGE
 2 | 
 3 | FROM $BASE_IMAGE
 4 | 
 5 | ARG MODEL_NAME
 6 | ARG MODEL_FILE_NAME
 7 | ARG PROCESSOR
 8 | 
 9 | LABEL description="Model $MODEL_NAME packed in a FastAPI Server container to run on $PROCESSOR"
10 | 
11 | RUN mkdir -p /app/server/models
12 | 
13 | COPY ./config.properties /app/config.properties
14 | 
15 | COPY  ./3-pack/fastapi-server.py /app/server/fastapi-server.py
16 | 
17 | COPY ./3-pack/run.sh /app/server/run.sh
18 | 
19 | COPY ./3-pack/requirements.txt /app/server/requirements.txt
20 | 
21 | COPY ./2-trace/traced-${MODEL_NAME}/${MODEL_FILE_NAME} /app/server/models
22 | 
23 | RUN pip install -r /app/server/requirements.txt
24 | 
25 | WORKDIR /app/server
26 | 
27 | EXPOSE 8080
28 | 
29 | CMD ["./run.sh"]


--------------------------------------------------------------------------------
/3-pack/fastapi-server.py:
--------------------------------------------------------------------------------
  1 | ######################################################################
  2 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
  3 | # SPDX-License-Identifier: MIT-0                                     #
  4 | ######################################################################
  5 | 
  6 | from typing import Optional
  7 | from fastapi import FastAPI,logger,responses
  8 | from configparser import ConfigParser
  9 | import torch, os, logging
 10 | import importlib
 11 | import platform
 12 | 
 13 | global device
 14 | global processor
 15 | global device_type
 16 | global model
 17 | global tokenizer
 18 | global logger
 19 | global postprocess
 20 | global default_question, default_context
 21 | 
 22 | logger = logging.getLogger()
 23 | 
 24 | # Read static configuration from config.properties
 25 | logger.warning("\nParsing configuration ...")
 26 | path_prefix = os.path.dirname(__file__)
 27 | with open(path_prefix + '/../config.properties') as f:
 28 |     config_lines = '[global]\n' + f.read()
 29 |     f.close()
 30 | config = ConfigParser()
 31 | config.read_string(config_lines)
 32 | model_name = config['global']['huggingface_model_name']
 33 | tokenizer_class_name = config['global']['huggingface_tokenizer_class']
 34 | model_class_name = config['global']['huggingface_model_class']
 35 | sequence_length=config['global']['sequence_length']
 36 | processor=config['global']['processor']
 37 | pipeline_cores=config['global']['pipeline_cores']
 38 | batch_size=config['global']['batch_size']
 39 | default_question = "What does the little engine say"
 40 | default_context = """In the childrens story about the little engine a small locomotive is pulling a large load up a mountain.
 41 |     Since the load is heavy and the engine is small it is not sure whether it will be able to do the job. This is a story 
 42 |     about how an optimistic attitude empowers everyone to achieve more. In the story the little engine says: 'I think I can' as it is 
 43 |     pulling the heavy load all the way to the top of the mountain. On the way down it says: I thought I could."""
 44 | 
 45 | # Read runtime configuration from environment
 46 | postprocess=True
 47 | if (os.getenv("POSTPROCESS",'True').lower() in ['false','0']):
 48 |     postprocess=False
 49 | quiet=False
 50 | if (os.getenv("QUIET","False").lower() in ['true','1']):
 51 |     quiet=True
 52 | num_models=1
 53 | try:
 54 |     num_models=int(os.getenv("NUM_MODELS", '1'))
 55 | except ValueError:
 56 |     logger.warning(f"Failed to parse environment variable NUM_MODELS={os.getenv('NUM_MODELS')}")
 57 |     logger.warning("Please ensure if set NUM_MODELS is a numeric value. Assuming value of 1")
 58 | 
 59 | # Detect runtime device type inf1, inf2, gpu, cpu, or arm
 60 | device_type=""
 61 | 
 62 | try:
 63 |     import torch_neuron
 64 |     device_type="inf1"
 65 | except ImportError:
 66 |     logger.warning("Inf1 chip not detected")
 67 |     pass
 68 | try:
 69 |     import torch_neuronx
 70 |     device_type = 'inf2'
 71 | except ImportError:
 72 |     print('[WARN] Inf2 device not found')
 73 |     pass
 74 | 
 75 | 
 76 | if device_type in ['inf1', 'inf2']:
 77 |     pass
 78 | elif torch.cuda.is_available():
 79 |     device_type="gpu"
 80 |     device = torch.device("cuda")
 81 |     logger.warning(torch.cuda.get_device_name(0))
 82 | else:
 83 |     machine=platform.uname().machine
 84 |     device_type="cpu"
 85 |     if machine == 'aarch64':
 86 |         device_type="arm"
 87 |     device = torch.device("cpu")
 88 | 
 89 | if processor != device_type:
 90 |     logger.warning(f"Configured target processor {processor} differs from actual processor {device_type}")
 91 | logger.warning(f"Running models on processor: {device_type}")
 92 | 
 93 | 
 94 | # FastAPI server
 95 | app = FastAPI()
 96 | 
 97 | # Server healthcheck
 98 | @app.get("/")
 99 | async def read_root():
100 |     return {"Status": "Healthy"}
101 | 
102 | # Model inference API endpoint
103 | @app.get("/predictions/{model_id}")
104 | async def infer(model_id, seq_0: Optional[str] = default_question, seq_1: Optional[str] = default_context):
105 |     question=seq_0
106 |     context=seq_1
107 |     status=200
108 |     if model_id in models.keys():
109 |         if not quiet:
110 |             logger.warning(f"\nQuestion: {question}\n")
111 |         tokenizer=tokenizers[model_id]
112 |         encoded_input = tokenizer.encode_plus(question, context, return_tensors='pt', max_length=128, padding='max_length', truncation=True)
113 |         if processor=='gpu':
114 |             encoded_input.to(device)
115 |         model=models[model_id]
116 |         model_input = (encoded_input['input_ids'],  encoded_input['attention_mask'])
117 |         output=model(*model_input) # This is specific to Inferentia
118 |         answer_text = str(output[0])
119 |         if postprocess:
120 |             answer_start = torch.argmax(output[0])
121 |             answer_end = torch.argmax(output[1])+1
122 |             if (answer_end > answer_start):
123 |                 answer_text = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(encoded_input["input_ids"][0][answer_start:answer_end]))
124 |             else:
125 |                 answer_text = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(encoded_input["input_ids"][0][answer_start:]))
126 |         if not quiet:
127 |             logger.warning("\nAnswer: ")
128 |             logger.warning(answer_text)
129 |     else:
130 |         status=404
131 |         answer_text = f"Model {model_id} does not exist. Try a model name up to model{num_models-1}"
132 |         if not quiet:
133 |             logger.warning(answer_text)
134 |     return responses.JSONResponse(status_code=status, content={"detail": answer_text})
135 | 
136 | # Load models in memory and onto accelerator as needed
137 | model_suffix = "_bs"+batch_size+"_seq"+sequence_length+"_pc"+pipeline_cores+"_"+processor
138 | model_path=os.path.join(path_prefix,'models',model_name + model_suffix + ".pt")
139 | logger.warning(f"Loading {num_models} instances of pre-trained model {model_name} from path {model_path} ...")
140 | tokenizers={}
141 | models={}
142 | transformers = importlib.import_module("transformers")
143 | tokenizer_class = getattr(transformers, tokenizer_class_name)
144 | for i in range(num_models):
145 |     model_id = 'model' + str(i)
146 |     logger.warning(f"   {model_id} ...")
147 |     tokenizers[model_id]=tokenizer_class.from_pretrained(model_name)
148 |     models[model_id] = torch.jit.load(model_path)
149 |     if device_type=='gpu':
150 |         model=models[model_id]
151 |         model.to(device)
152 |     elif device_type in ['inf1', 'inf2']:
153 |         infer(model_id, default_question, default_context)
154 |         logger.warning("    ... warmup completed")
155 | 


--------------------------------------------------------------------------------
/3-pack/pull.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ######################################################################
 4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
 5 | # SPDX-License-Identifier: MIT-0                                     #
 6 | ######################################################################
 7 | 
 8 | # Pull model image from container registry
 9 | 
10 | if [ -f ./config.properties ]; then
11 |     source ./config.properties
12 | elif [ -f ../config.properties ]; then
13 |     source ../config.properties
14 | else
15 |     echo "config.properties not found!"
16 | fi
17 | 
18 | CMD="docker pull ${registry}${model_image_name}${model_image_tag}"
19 | if [ ! "$verbose" == "false" ]; then
20 |     echo -e "\n${CMD}\n"
21 | fi
22 | eval "${CMD}"
23 | 
24 | 


--------------------------------------------------------------------------------
/3-pack/push.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ######################################################################
 4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
 5 | # SPDX-License-Identifier: MIT-0                                     #
 6 | ######################################################################
 7 | 
 8 | # Push model image to container registry
 9 | 
10 | if [ -f ./config.properties ]; then
11 |     source ./config.properties
12 | elif [ -f ../config.properties ]; then
13 |     source ../config.properties
14 | else
15 |     echo "config.properties not found!"
16 | fi
17 | 
18 | ./login.sh
19 | # Create registry if needed
20 | IMAGE=${model_image_name}
21 | REGISTRY_COUNT=$(aws ecr describe-repositories | grep ${IMAGE} | wc -l)
22 | if [ "$REGISTRY_COUNT" == "0" ]; then
23 |     CMD="aws ecr create-repository --repository-name ${IMAGE} --region ${region}"
24 |     if [ ! "$verbose" == "false" ]; then
25 |         echo -e "\n${CMD}\n"
26 |     fi
27 |     eval "${CMD}"
28 | fi
29 | 
30 | CMD="docker push ${registry}${model_image_name}${model_image_tag}"
31 | if [ ! "$verbose" == "false" ]; then
32 |     echo -e "\n${CMD}\n"
33 | fi
34 | eval "${CMD}"
35 | 
36 | 


--------------------------------------------------------------------------------
/3-pack/requirements.txt:
--------------------------------------------------------------------------------
1 | fastapi
2 | uvicorn
3 | 


--------------------------------------------------------------------------------
/3-pack/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ######################################################################
 4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
 5 | # SPDX-License-Identifier: MIT-0                                     #
 6 | ######################################################################
 7 | 
 8 | # Uncomment the infinate loop below to start an idle container locally while developing or troubleshooting
 9 | #while true; do date; sleep 10; done
10 | 
11 | uvicorn fastapi-server:app --host 0.0.0.0 --port 8080
12 | 
13 | 


--------------------------------------------------------------------------------
/4-deploy/cpu-yaml.template:
--------------------------------------------------------------------------------
 1 | ---
 2 | kind: Service
 3 | apiVersion: v1
 4 | metadata:
 5 |   name: ${instance_name}
 6 |   namespace: ${namespace}
 7 |   labels:
 8 |     app: ${instance_name}
 9 | spec:
10 |   ports:
11 |   - name: preds
12 |     port: ${service_port}
13 |     targetPort: pod-port 
14 |   type: ClusterIP
15 |   selector:
16 |     app: ${instance_name}
17 | ---
18 | kind: Deployment
19 | apiVersion: apps/v1
20 | metadata:
21 |   name: ${instance_name}
22 |   namespace: ${namespace}
23 |   labels:
24 |     app: ${instance_name}
25 | spec:
26 |   replicas: 1
27 |   selector:
28 |     matchLabels:
29 |       app: ${instance_name}
30 |   template:
31 |     metadata:
32 |       labels:
33 |         app: ${instance_name}
34 |     spec:
35 |       nodeSelector:
36 |         node.kubernetes.io/instance-type: "${instance_type}"
37 |       containers:
38 |       - name: main
39 |         image: "${registry}${model_image_name}${model_image_tag}"
40 |         imagePullPolicy: Always
41 |         env:
42 |           - name: NUM_MODELS
43 |             value: "${num_models}"
44 |           - name: POSTPROCESS
45 |             value: "${postprocess}"
46 |           - name: QUIET
47 |             value: "${quiet}"
48 |         ports:
49 |         - name: pod-port
50 |           containerPort: 8080
51 |         resources:
52 |         # Use 'memory' setting in limits and requests to ensure that model pods get scheduled to nodes evenly
53 |           limits:
54 |             cpu: 1
55 |             #Total node memory resource is about 32 GB for c5.4xlarge and similar instance, adjust for other node types
56 |             #memory: "27000Mi"
57 |          #requests:
58 |             #Total node memory resource is about 32 GB for c5.4xlarge and similar instance, adjust for other node types
59 |             #memory: "27000Mi"
60 | 
61 | 


--------------------------------------------------------------------------------
/4-deploy/exec.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ######################################################################
 4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
 5 | # SPDX-License-Identifier: MIT-0                                     #
 6 | ######################################################################
 7 | 
 8 | if [ -f ../config.properties ]; then
 9 |     source ../config.properties
10 | elif [ -f ./config.properties ]; then
11 |     source ./config.properties
12 | else
13 |     echo "config.properties not found!"
14 | fi
15 | 
16 | echo ""
17 | echo "Runtime: $runtime"
18 | echo "Processor: $processor"
19 | 
20 | if [ "$runtime" == "docker" ]; then
21 |     CMD="docker exec -it ${app_name}-0 bash"
22 |     if [ ! "$verbose" == "false" ]; then
23 |         echo -e "\n${CMD}\n"
24 |     fi
25 |     eval "${CMD}"
26 | elif [ "$runtime" == "kubernetes" ]; then
27 |     CMD="kubectl -n ${namespace} exec -it $(kubectl -n ${namespace} get pod | grep ${app_name}-$1 | cut -d ' ' -f 1) -- bash"
28 |     if [ ! "$verbose" == "false" ]; then
29 |         echo -e "\n${CMD}\n"
30 |     fi
31 |     eval "${CMD}"
32 | else
33 |     echo "Runtime $runtime not recognized"
34 | fi
35 | 


--------------------------------------------------------------------------------
/4-deploy/generate-yaml.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ######################################################################
 4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
 5 | # SPDX-License-Identifier: MIT-0                                     #
 6 | ######################################################################
 7 | 
 8 | set -a
 9 | 
10 | if [ -f ../config.properties ]; then
11 |     source ../config.properties
12 | elif [ -f ./config.properties ]; then
13 |     source ./config.properties
14 | else
15 |     echo "config.properties not found!"
16 | fi
17 | 
18 | template=./${processor}-yaml.template
19 | prefix=${app_name}-
20 | instance_start=0
21 | instances=${num_servers}
22 | 
23 | if [ -d ./${app_dir} ]; then
24 |     rm -rf ./${app_dir}
25 | fi
26 | mkdir -p ./${app_dir}
27 | 
28 | instance=$instance_start
29 | while [ $instance -lt $instances ]
30 | do
31 | 	export instance_name=${prefix}${instance}
32 | 	echo "Generating ./${app_dir}/${instance_name}.yaml ..."
33 | 	CMD="cat $template | envsubst > ./${app_dir}/${instance_name}.yaml"
34 |         if [ ! "$verbose" == "false" ]; then
35 |             echo -e "\n${CMD}\n"
36 |         fi
37 |         eval "${CMD}"
38 | 	instance=$((instance+1))
39 | done
40 | 
41 | set +a
42 | 


--------------------------------------------------------------------------------
/4-deploy/gpu-yaml.template:
--------------------------------------------------------------------------------
 1 | ---
 2 | kind: Service
 3 | apiVersion: v1
 4 | metadata:
 5 |   name: ${instance_name}
 6 |   namespace: ${namespace}
 7 |   labels:
 8 |     app: ${instance_name}
 9 | spec:
10 |   ports:
11 |   - name: preds
12 |     port: ${service_port}
13 |     targetPort: pod-port 
14 |   type: ClusterIP
15 |   selector:
16 |     app: ${instance_name}
17 | ---
18 | kind: Deployment
19 | apiVersion: apps/v1
20 | metadata:
21 |   name: ${instance_name}
22 |   namespace: ${namespace}
23 |   labels:
24 |     app: ${instance_name}
25 | spec:
26 |   replicas: 1
27 |   selector:
28 |     matchLabels:
29 |       app: ${instance_name}
30 |   template:
31 |     metadata:
32 |       labels:
33 |         app: ${instance_name}
34 |     spec:
35 |       nodeSelector:
36 |         node.kubernetes.io/instance-type: "${instance_type}"
37 |       containers:
38 |       - name: main
39 |         image: "${registry}${model_image_name}${model_image_tag}"
40 |         imagePullPolicy: Always
41 |         env:
42 |           - name: NUM_MODELS
43 |             value: "${num_models}"
44 |           - name: POSTPROCESS
45 |             value: "${postprocess}"
46 |           - name: QUIET
47 |             value: "${quiet}"
48 |         ports:
49 |         - name: pod-port
50 |           containerPort: 8080
51 |         resources:
52 |           limits:
53 |             nvidia.com/gpu: 1
54 | 


--------------------------------------------------------------------------------
/4-deploy/graviton-yaml.template:
--------------------------------------------------------------------------------
 1 | ---
 2 | kind: Service
 3 | apiVersion: v1
 4 | metadata:
 5 |   name: ${instance_name}
 6 |   namespace: ${namespace}
 7 |   labels:
 8 |     app: ${instance_name}
 9 | spec:
10 |   ports:
11 |   - name: preds
12 |     port: ${service_port}
13 |     targetPort: pod-port 
14 |   type: ClusterIP
15 |   selector:
16 |     app: ${instance_name}
17 | ---
18 | kind: Deployment
19 | apiVersion: apps/v1
20 | metadata:
21 |   name: ${instance_name}
22 |   namespace: ${namespace}
23 |   labels:
24 |     app: ${instance_name}
25 | spec:
26 |   replicas: 1
27 |   selector:
28 |     matchLabels:
29 |       app: ${instance_name}
30 |   template:
31 |     metadata:
32 |       labels:
33 |         app: ${instance_name}
34 |     spec:
35 |       nodeSelector:
36 |         node.kubernetes.io/instance-type: "${instance_type}"
37 |       topologySpreadConstraints:
38 |         - maxSkew: 1
39 |           topologyKey: kubernetes.io/hostname
40 |           whenUnsatisfiable: DoNotSchedule
41 |           #nodeAffinityPolicy: Honor
42 |           labelSelector:
43 |             matchLabels:
44 |               app: ${instance_name}
45 |       containers:
46 |       - name: main
47 |         image: "${registry}${model_image_name}${model_image_tag}"
48 |         imagePullPolicy: Always
49 |         env:
50 |           - name: NUM_MODELS
51 |             value: "${num_models}"
52 |           - name: POSTPROCESS
53 |             value: "${postprocess}"
54 |           - name: QUIET
55 |             value: "${quiet}"
56 |         ports:
57 |         - name: pod-port
58 |           containerPort: 8080
59 |         resources:
60 |         #use limits and requests to ensure that certain number of model pods get scheduled per node
61 |           limits:
62 |             #Total node memory resource is about 32 GB for c7g.4xlarge and similar instances, adjust for other node types
63 |             memory: "27000Mi"
64 |           requests:
65 |             #Total node memory resource is about 32 GB for c7g.4xlarge and similar instances, adjust for other node types
66 |             memory: "27000Mi"
67 | 
68 | 


--------------------------------------------------------------------------------
/4-deploy/inf1-yaml.template:
--------------------------------------------------------------------------------
 1 | ---
 2 | kind: Service
 3 | apiVersion: v1
 4 | metadata:
 5 |   name: ${instance_name}
 6 |   namespace: ${namespace}
 7 |   labels:
 8 |     app: ${instance_name}
 9 | spec:
10 |   ports:
11 |     - name: model-server
12 |       port: ${service_port}
13 |       targetPort: pod-port
14 |   selector:
15 |     app: ${instance_name}
16 |     role: master
17 |   type: ClusterIP
18 | ---
19 | kind: Deployment
20 | apiVersion: apps/v1
21 | metadata:
22 |   name: ${instance_name}
23 |   namespace: ${namespace}
24 |   labels:
25 |     app: ${instance_name}
26 |     role: master
27 | spec:
28 |   replicas: 1 # Number of desired replicas. Increase to desired number.
29 |   selector:
30 |     matchLabels:
31 |       app: ${instance_name}
32 |       role: master
33 |   template:
34 |     metadata:
35 |       labels:
36 |         app: ${instance_name}
37 |         role: master
38 |     spec:
39 |       nodeSelector:
40 |         node.kubernetes.io/instance-type: "${instance_type}"
41 |       containers:
42 |         - name: main
43 |           image: "${registry}${model_image_name}${model_image_tag}"
44 |           env:
45 |             - name: NUM_MODELS
46 |               value: "${num_models}"
47 |             - name: POSTPROCESS
48 |               value: "${postprocess}"
49 |             - name: QUIET
50 |               value: "${quiet}"
51 |           imagePullPolicy: Always
52 |           ports:
53 |             - name: pod-port
54 |               containerPort: 8080
55 |           securityContext:
56 |             capabilities:
57 |               add:
58 |                 - IPC_LOCK
59 |           resources:
60 |             limits:
61 |               #hugepages-2Mi: 256Mi    # configure to 256 * desired number of Inferentia devices.
62 |               aws.amazon.com/neuron: 1  # desired number of Inferentia devices.
63 |             #requests:
64 |               #memory: 1024Mi          # Desired amount of memory. Should be larger than hugepages-2Mi limit.
65 | 


--------------------------------------------------------------------------------
/4-deploy/inf2-yaml.template:
--------------------------------------------------------------------------------
 1 | ---
 2 | kind: Service
 3 | apiVersion: v1
 4 | metadata:
 5 |   name: ${instance_name}
 6 |   namespace: ${namespace}
 7 |   labels:
 8 |     app: ${instance_name}
 9 | spec:
10 |   ports:
11 |     - name: model-server
12 |       port: ${service_port}
13 |       targetPort: pod-port
14 |   selector:
15 |     app: ${instance_name}
16 |     role: master
17 |   type: ClusterIP
18 | ---
19 | kind: Deployment
20 | apiVersion: apps/v1
21 | metadata:
22 |   name: ${instance_name}
23 |   namespace: ${namespace}
24 |   labels:
25 |     app: ${instance_name}
26 |     role: master
27 | spec:
28 |   replicas: 1 # Number of desired replicas. Increase to desired number.
29 |   selector:
30 |     matchLabels:
31 |       app: ${instance_name}
32 |       role: master
33 |   template:
34 |     metadata:
35 |       labels:
36 |         app: ${instance_name}
37 |         role: master
38 |     spec:
39 |       nodeSelector:
40 |         node.kubernetes.io/instance-type: "${instance_type}"
41 |       containers:
42 |         - name: main
43 |           image: "${registry}${model_image_name}${model_image_tag}"
44 |           env:
45 |             - name: NUM_MODELS
46 |               value: "${num_models}"
47 |             - name: POSTPROCESS
48 |               value: "${postprocess}"
49 |             - name: QUIET
50 |               value: "${quiet}"
51 |           imagePullPolicy: Always
52 |           ports:
53 |             - name: pod-port
54 |               containerPort: 8080
55 |           securityContext:
56 |             capabilities:
57 |               add:
58 |                 - IPC_LOCK
59 |           resources:
60 |           #use limits and requests to ensure that certain number of model pods get scheduled per node
61 |             limits:
62 |               #hugepages-2Mi: 256Mi    # configure to 256 * desired number of Inferentia devices.
63 |               aws.amazon.com/neuron: 1  # desired number of Inferentia devices.
64 |             #requests:
65 |               #memory: 1024Mi          # Desired amount of memory. Should be larger than hugepages-2Mi limit.
66 | 


--------------------------------------------------------------------------------
/4-deploy/logs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ######################################################################
 4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
 5 | # SPDX-License-Identifier: MIT-0                                     #
 6 | ######################################################################
 7 | 
 8 | if [ -f ../config.properties ]; then
 9 |     source ../config.properties
10 | elif [ -f ./config.properties ]; then
11 |     source ./config.properties
12 | else
13 |     echo "config.properties not found!"
14 | fi
15 | 
16 | echo ""
17 | echo "Runtime: $runtime"
18 | echo "Processor: $processor"
19 | 
20 | CMD=""
21 | if [ "$runtime" == "docker" ]; then
22 |     if [ "$num_servers" == "1" ]; then
23 |         CMD="docker logs -f ${app_name}-0"
24 |     else
25 |         if [ "$1" == "" ]; then
26 |             CMD="docker ps | grep ${app_name}- | cut -d ' ' -f 1 | xargs -L 1 docker logs"
27 |         else
28 |             CMD="docker logs -f ${app_name}-$1"
29 |         fi
30 |     fi
31 | elif [ "$runtime" == "kubernetes" ]; then
32 |     command -v kubetail > /dev/null
33 |     if [ "$?" == "1" ]; then
34 |         echo "kubetail not found"
35 |         echo "Please follow the instructions here https://github.com/johanhaleby/kubetail#installation, then try again"
36 |     else
37 |         if [ "$1" == "" ]; then
38 |             CMD="kubetail -n ${namespace} -f ${app_name}"
39 |         else
40 |             CMD="kubectl -n ${namespace} logs -f $(kubectl -n ${namespace} get pods | grep ${app_name}-$1 | cut -d ' ' -f 1)"
41 |         fi
42 |     fi
43 | else
44 |     echo "Runtime $runtime not recognized"
45 | fi
46 | 
47 | if [ ! "$verbose" == "false" ]; then
48 |     echo -e "\n${CMD}\n"
49 | fi
50 | eval "${CMD}"
51 | 
52 | 


--------------------------------------------------------------------------------
/4-deploy/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ######################################################################
 4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
 5 | # SPDX-License-Identifier: MIT-0                                     #
 6 | ######################################################################
 7 | 
 8 | if [ -f ../config.properties ]; then
 9 |     source ../config.properties
10 | elif [ -f ./config.properties ]; then
11 |     source ./config.properties
12 | else
13 |     echo "config.properties not found!"
14 | fi
15 | 
16 | echo ""
17 | echo "Runtime: $runtime"
18 | echo "Processor: $processor"
19 | 
20 | if [ "$runtime" == "docker" ]; then
21 |     server=0
22 |     while [ $server -lt $num_servers ]; do
23 | 	    run_opts="--name ${app_name}-${server} -e NUM_MODELS=$num_models -e POSTPROCESS=$postprocess -e QUIET=$quiet -P -v $(pwd)/../3-pack:/app/dev"    
24 |     	if [ "$processor" == "gpu" ]; then
25 |             run_opts="--gpus 0 ${run_opts}"
26 |     	fi
27 | 	if [ "$processor" == "inf" ]; then
28 | 	    run_opts="--device=/dev/neuron${server} ${run_opts}"
29 | 	fi
30 |     	CMD="docker run -d ${run_opts} ${registry}${model_image_name}${model_image_tag}"
31 |         if [ ! "$verbose" == "false" ]; then
32 |             echo -e "\n${CMD}\n"
33 |         fi
34 |         eval "${CMD}"
35 | 	server=$((server+1))
36 |     done
37 | elif [ "$runtime" == "kubernetes" ]; then
38 |     kubectl create namespace ${namespace} --dry-run=client -o yaml | kubectl apply -f -
39 |     ./generate-yaml.sh
40 |     CMD="kubectl apply -f ${app_dir}"
41 |     if [ ! "$verbose" == "false" ]; then
42 |         echo -e "\n${CMD}\n"
43 |     fi
44 |     eval "${CMD}"
45 | else
46 |     echo "Runtime $runtime not recognized"
47 | fi
48 | 


--------------------------------------------------------------------------------
/4-deploy/status.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ######################################################################
 4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
 5 | # SPDX-License-Identifier: MIT-0                                     #
 6 | ######################################################################
 7 | 
 8 | if [ -f ../config.properties ]; then
 9 |     source ../config.properties
10 | elif [ -f ./config.properties ]; then
11 |     source ./config.properties
12 | else
13 |     echo "config.properties not found!"
14 | fi
15 | 
16 | echo ""
17 | echo "Runtime: $runtime"
18 | echo "Processor: $processor"
19 | 
20 | if [ "$runtime" == "docker" ]; then
21 |     CMD="docker ps -a | grep ${app_name}"
22 |     if [ ! "$verbose" == "false" ]; then
23 |         echo -e "\n${CMD}\n"
24 |     fi
25 |     eval "${CMD}"
26 | elif [ "$runtime" == "kubernetes" ]; then
27 |     if [ "$1" == "" ]; then
28 |         CMD="kubectl -n ${namespace} get pods"
29 |         if [ ! "$verbose" == "false" ]; then
30 |             echo -e "\n${CMD}\n"
31 |         fi
32 |         echo ""
33 |         echo "Pods:"
34 |         eval "${CMD}"
35 |         CMD="kubectl -n ${namespace} get services"
36 |         if [ ! "$verbose" == "false" ]; then
37 |             echo -e "\n${CMD}\n"
38 |         fi
39 |         echo ""
40 |         echo "Services:"
41 |         eval "${CMD}"
42 |     else
43 |         CMD="kubectl -n ${namespace} get pod $(kubectl -n ${namespace} get pods | grep ${app_name}-$1 | cut -d ' ' -f 1) -o wide"
44 |         if [ ! "$verbose" == "false" ]; then
45 |             echo -e "\n${CMD}\n"
46 |         fi
47 |         echo ""
48 |         echo "Pod:"
49 |         eval "${CMD}"
50 |         CMD="kubectl -n ${namespace} get service ${app_name}-$1"
51 |         if [ ! "$verbose" == "false" ]; then
52 |             echo -e "\n${CMD}\n"
53 |         fi
54 |         echo ""
55 |         echo "Service:"
56 |         eval "${CMD}"
57 |     fi
58 | else
59 |     echo "Runtime $runtime not recognized"
60 | fi
61 | 


--------------------------------------------------------------------------------
/4-deploy/stop.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ######################################################################
 4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
 5 | # SPDX-License-Identifier: MIT-0                                     #
 6 | ######################################################################
 7 | 
 8 | if [ -f ../config.properties ]; then
 9 |     source ../config.properties
10 | elif [ -f ./config.properties ]; then
11 |     source ./config.properties
12 | else
13 |     echo "config.properties not found!"
14 | fi
15 | 
16 | echo ""
17 | echo "Runtime: $runtime"
18 | echo "Processor: $processor"
19 | 
20 | if [ "$runtime" == "docker" ]; then
21 |     server=0
22 |     while [ $server -lt $num_servers ]; do
23 |         CMD="docker rm -f ${app_name}-${server}"
24 |         if [ ! "$verbose" == "false" ]; then
25 |             echo -e "\n${CMD}\n"
26 |         fi
27 |         eval "${CMD}"
28 | 	server=$((server+1))
29 |     done
30 | elif [ "$runtime" == "kubernetes" ]; then
31 |     CMD="kubectl delete -f ${app_dir}"
32 |     if [ ! "$verbose" == "false" ]; then
33 |         echo -e "\n${CMD}\n"
34 |     fi
35 |     eval "${CMD}"
36 | else
37 |     echo "Runtime $runtime not recognized"
38 | fi
39 | 
40 | 


--------------------------------------------------------------------------------
/5-test/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG BASE_IMAGE
 2 | 
 3 | FROM $BASE_IMAGE
 4 | 
 5 | RUN mkdir -p /app/tests
 6 | 
 7 | COPY config.properties /app
 8 | 
 9 | ADD ./5-test/tests /app/tests
10 | 
11 | CMD ["bash","-c","while true; do date; sleep 10; done"]
12 | 


--------------------------------------------------------------------------------
/5-test/aggregate.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ######################################################################
 4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
 5 | # SPDX-License-Identifier: MIT-0                                     #
 6 | ######################################################################
 7 | 
 8 | if [ -f ../config.properties ]; then
 9 |     source ../config.properties
10 | elif [ -f ./config.properties ]; then
11 |     source ./config.properties
12 | else
13 |     echo "config.properties not found!"
14 | fi
15 | 
16 | echo ""
17 | echo "Aggregated statistics ..."
18 | line_count=0
19 | throughput_sum=0
20 | p50_sum=0
21 | p90_sum=0
22 | p95_sum=0
23 | errors_total=0
24 | throughput_clients=$num_test_containers
25 | while IFS='' read -r line; do
26 |     line_count=$((line_count+1))
27 |     throughput=$(echo $line | awk '{print $4}' | sed -e "s/,//g" | sed -e "s/'//g")
28 |     p50=$(echo $line | awk '{print $6}' | sed -e "s/,//g" | sed -e "s/'//g")
29 |     p90=$(echo $line | awk '{print $8}' | sed -e "s/,//g" | sed -e "s/'//g")
30 |     p95=$(echo $line | awk '{print $10}' | sed -e "s/,//g" | sed -e "s/'//g")
31 |     errors=$(echo $line | awk '{print $12}' | sed -e "s/}//g" | sed -e "s/'//g")
32 |     throughput_sum=$( echo "$throughput_sum + $throughput" | bc )
33 |     p50_sum=$( echo "$p50_sum + $p50" | bc)
34 |     p90_sum=$( echo "$p90_sum + $p90" | bc)
35 |     p95_sum=$( echo "$p95_sum + $p95" | bc)
36 |     errors_total=$(echo "${errors_total} + $errors" | bc)
37 | done < $1
38 | echo 'Line count is:'$line_count
39 | echo 'Throughputsum is:' $throughput_sum
40 | 
41 | throughput_total=$(echo "scale=1; $throughput_clients * ($throughput_sum / $line_count)" | bc)
42 | p50_avg=$(echo "scale=3; $p50_sum / $line_count" | bc)
43 | p90_avg=$(echo "scale=3; $p90_sum / $line_count" | bc)
44 | p95_avg=$(echo "scale=3; $p95_sum / $line_count" | bc)
45 | printf "{ 'throughput_total': %.1f, 'p50_avg': %.3f, 'p90_avg': %.3f, 'p95_avg': %.3f, 'errors_total': %.0f }\n" "$(echo $throughput_total)" "$(echo $p50_avg)" "$(echo $p90_avg)" "$(echo $p95_avg)" "$(echo $errors_total)"
46 | 


--------------------------------------------------------------------------------
/5-test/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ######################################################################
 4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
 5 | # SPDX-License-Identifier: MIT-0                                     #
 6 | ######################################################################
 7 | 
 8 | if [ -f ../config.properties ]; then
 9 |     source ../config.properties
10 | elif [ -f ./config.properties ]; then
11 |     source ./config.properties
12 | else
13 |     echo "config.properties not found!"
14 | fi
15 | 
16 | #use CMD variable for better debugging
17 | CMD="docker build -t ${registry}${test_image_name}${test_image_tag} --build-arg BASE_IMAGE=${registry}${base_image_name}${base_image_tag} \
18 |              -f 5-test/Dockerfile ."
19 | if [ ! "$verbose" == "false" ]; then
20 |     echo -e "\n${CMD}\n"
21 | fi
22 | eval "${CMD}"
23 |     
24 | 


--------------------------------------------------------------------------------
/5-test/deployment-yaml.template:
--------------------------------------------------------------------------------
 1 | ---
 2 | kind: Deployment
 3 | apiVersion: apps/v1
 4 | metadata:
 5 |   name: ${instance_name}
 6 |   namespace: ${test_namespace}
 7 |   labels:
 8 |     app: ${instance_name}
 9 | spec:
10 |   replicas: 1
11 |   selector:
12 |     matchLabels:
13 |       app: ${instance_name}
14 |   template:
15 |     metadata:
16 |       labels:
17 |         app: ${instance_name}
18 |     spec:
19 |       nodeSelector:
20 |         node.kubernetes.io/instance-type: "${test_instance_type}"
21 |       containers:
22 |       - name: main
23 |         image: "${registry}${test_image_name}${test_image_tag}"
24 |         command: ["bash","-c","${cmd_pod}"]
25 |         imagePullPolicy: Always
26 |         env:
27 |         - name: runtime
28 |           value: "$runtime"
29 |         - name: num_servers
30 |           value: "$num_servers"
31 |         - name: num_models
32 |           value: "$num_models"
33 |         - name: app_name
34 |           value: "$app_name"
35 |         - name: namespace
36 |           value: "$namespace"
37 |         - name: num_requests
38 |           value: "$num_requests"
39 |         - name: request_frequency
40 |           value: "$request_frequency"
41 |         resources:
42 |           limits:
43 |             cpu: 1
44 | 
45 | 


--------------------------------------------------------------------------------
/5-test/exec.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ######################################################################
 4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
 5 | # SPDX-License-Identifier: MIT-0                                     #
 6 | ######################################################################
 7 | 
 8 | if [ -f ../config.properties ]; then
 9 |     source ../config.properties
10 | elif [ -f ./config.properties ]; then
11 |     source ./config.properties
12 | else
13 |     echo "config.properties not found!"
14 | fi
15 | 
16 | echo ""
17 | echo "Runtime: $runtime"
18 | echo "Processor: $processor"
19 | 
20 | if [ "$runtime" == "docker" ]; then
21 |     if [ "$num_test_containers" == "1" ]; then
22 |         CMD="docker exec -it ${test_image_name}-0 bash"
23 |     else
24 |         if [ "$1" == "" ]; then
25 |             echo "Please specify test container index to exec into. Defaulting to 0"
26 |             CMD="docker exec -it ${test_image_name}-0 bash"
27 |         else
28 |             CMD="docker exec -it ${test_image_name}-$1 bash"
29 |         fi
30 |     fi
31 | elif [ "$runtime" == "kubernetes" ]; then
32 |     if [ "$num_test_containers" == "1" ]; then
33 |         CMD="kubectl -n ${test_namespace} exec -it $(kubectl -n ${test_namespace} get pod | grep ${test_image_name}-0 | cut -d ' ' -f 1) -- bash"
34 |     else
35 |         if [ "$1" == "" ]; then
36 |             echo "Please specify test container index to exec into. Defaulting to 0."
37 |             CMD="kubectl -n ${test_namespace} exec -it $(kubectl -n ${test_namespace} get pod | grep ${test_image_name}-0 | cut -d ' ' -f 1) -- bash"
38 |         else
39 |             CMD="kubectl -n ${test_namespace} exec -it $(kubectl -n ${test_namespace} get pod | grep ${test_image_name}-$1 | cut -d ' ' -f 1) -- bash"
40 |         fi
41 |     fi
42 | else
43 |     echo "Runtime $runtime not recognized"
44 | fi
45 | if [ ! "$verbose" == "false" ]; then
46 |     echo -e "\n${CMD}\n"
47 | fi
48 | eval "${CMD}"
49 | 
50 | 


--------------------------------------------------------------------------------
/5-test/generate-yaml.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ######################################################################
 4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
 5 | # SPDX-License-Identifier: MIT-0                                     #
 6 | ######################################################################
 7 | 
 8 | set -a
 9 | 
10 | if [ -f ../config.properties ]; then
11 |     source ../config.properties
12 | elif [ -f ./config.properties ]; then
13 |     source ./config.properties
14 | else
15 |     echo "config.properties not found!"
16 | fi
17 | 
18 | prefix=${test_image_name}-
19 | instance_start=0
20 | instances=${num_test_containers}
21 | 
22 | if [ -d ./${test_dir} ]; then
23 |     rm -rf ./${test_dir}
24 | fi
25 | mkdir -p ./${test_dir}
26 | 
27 | if [ -f ./cmd_pod.properties ]; then
28 | 	source ./cmd_pod.properties
29 | 	rm -f ./cmd_pod.properties
30 | fi
31 | echo "cmd_pod=$cmd_pod"
32 | echo "template=$template"
33 | 
34 | instance=$instance_start
35 | while [ $instance -lt $instances ]
36 | do
37 | 	export instance_name=${prefix}${instance}
38 | 	echo "Generating ./${test_dir}/${instance_name}.yaml ..."
39 | 	CMD="cat $template | envsubst > ./${test_dir}/${instance_name}.yaml"
40 |         if [ ! "$verbose" == "false" ]; then
41 |             echo -e "\n${CMD}\n"
42 |         fi
43 |         eval "${CMD}"
44 | 	instance=$((instance+1))
45 | done
46 | 
47 | set +a
48 | 


--------------------------------------------------------------------------------
/5-test/job-yaml.template:
--------------------------------------------------------------------------------
 1 | ---
 2 | kind: Job
 3 | apiVersion: batch/v1
 4 | metadata:
 5 |   name: ${instance_name}
 6 |   namespace: ${test_namespace}
 7 |   labels:
 8 |     app: ${instance_name}
 9 | spec:
10 |   backoffLimit: 4
11 |   template:
12 |     metadata:
13 |       labels:
14 |         app: ${instance_name}
15 |     spec:
16 |       nodeSelector:
17 |         node.kubernetes.io/instance-type: "${test_instance_type}"
18 |       restartPolicy: Never
19 |       containers:
20 |       - name: main
21 |         image: "${registry}${test_image_name}${test_image_tag}"
22 |         command: ["bash","-c","${cmd_pod}"]
23 |         imagePullPolicy: Always
24 |         env:
25 |         - name: runtime
26 |           value: "$runtime"
27 |         - name: num_servers
28 |           value: "$num_servers"
29 |         - name: num_models
30 |           value: "$num_models"
31 |         - name: app_name
32 |           value: "$app_name"
33 |         - name: namespace
34 |           value: "$namespace"
35 |         - name: num_requests
36 |           value: "$num_requests"
37 |         - name: request_frequency
38 |           value: "$request_frequency"
39 |         resources:
40 |           requests:
41 |             cpu: 1
42 | 


--------------------------------------------------------------------------------
/5-test/logs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ######################################################################
 4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
 5 | # SPDX-License-Identifier: MIT-0                                     #
 6 | ######################################################################
 7 | 
 8 | if [ -f ../config.properties ]; then
 9 |     source ../config.properties
10 | elif [ -f ./config.properties ]; then
11 |     source ./config.properties
12 | else
13 |     echo "config.properties not found!"
14 | fi
15 | 
16 | echo ""
17 | echo "Runtime: $runtime"
18 | echo "Processor: $processor"
19 | 
20 | if [ "$runtime" == "docker" ]; then
21 |     if [ "$num_test_containers" == "1" ]; then
22 |         CMD="docker logs -f ${test_image_name}-0"
23 |     else
24 |         if [ "$1" == "" ]; then
25 |             CMD="docker ps -a | grep ${test_image_name}- | cut -d ' ' -f 1 | xargs -L 1 docker logs"
26 |         else
27 |             CMD="docker logs -f ${test_image_name}-$1"
28 |         fi
29 |     fi
30 | elif [ "$runtime" == "kubernetes" ]; then
31 |     if [ "$1" == "" ]; then
32 |         CMD="kubectl -n ${test_namespace} get pods | grep ${test_image_name}- | cut -d ' ' -f 1 | xargs -L 1 kubectl -n ${test_namespace} logs"
33 |     else
34 |         CMD="kubectl -n ${test_namespace} logs -f $(kubectl -n ${test_namespace} get pods | grep ${test_image_name}-$1 | cut -d ' ' -f 1)"
35 |     fi
36 | else
37 |     echo "Runtime $runtime not recognized"
38 | fi
39 | if [ ! "$verbose" == "false" ]; then
40 |     echo -e "\n${CMD}\n"
41 | fi
42 | eval "${CMD}"
43 | 
44 | 


--------------------------------------------------------------------------------
/5-test/pull.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ######################################################################
 4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
 5 | # SPDX-License-Identifier: MIT-0                                     #
 6 | ######################################################################
 7 | 
 8 | # Pull model image from container registry
 9 | 
10 | if [ -f ./config.properties ]; then
11 |     source ./config.properties
12 | elif [ -f ../config.properties ]; then
13 |     source ../config.properties
14 | else
15 |     echo "config.properties not found!"
16 | fi
17 | 
18 | CMD="docker pull ${registry}${test_image_name}${test_image_tag}"
19 | if [ ! "$verbose" == "false" ]; then
20 |     echo -e "\n${CMD}\n"
21 | fi
22 | eval "${CMD}"
23 | 
24 | 


--------------------------------------------------------------------------------
/5-test/push.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ######################################################################
 4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
 5 | # SPDX-License-Identifier: MIT-0                                     #
 6 | ######################################################################
 7 | 
 8 | # Push model image to container registry
 9 | 
10 | if [ -f ./config.properties ]; then
11 |     source ./config.properties
12 | elif [ -f ../config.properties ]; then
13 |     source ../config.properties
14 | else
15 |     echo "config.properties not found!"
16 | fi
17 | 
18 | ./login.sh
19 | # Create registry if needed
20 | IMAGE=${test_image_name}
21 | REGISTRY_COUNT=$(aws ecr describe-repositories | grep ${IMAGE} | wc -l)
22 | if [ "$REGISTRY_COUNT" == "0" ]; then
23 |     CMD="aws ecr create-repository --repository-name ${IMAGE} --region ${region}"
24 |     if [ ! "$verbose" == "false" ]; then
25 |         echo -e "\n${CMD}\n"
26 |     fi
27 |     eval "${CMD}"
28 | fi
29 | 
30 | CMD="docker push ${registry}${test_image_name}${test_image_tag}"
31 | if [ ! "$verbose" == "false" ]; then
32 |     echo -e "\n${CMD}\n"
33 | fi
34 | eval "${CMD}"
35 | 
36 | 


--------------------------------------------------------------------------------
/5-test/run.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | ######################################################################
  4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
  5 | # SPDX-License-Identifier: MIT-0                                     #
  6 | ######################################################################
  7 | 
  8 | if [ -f ../config.properties ]; then
  9 |     source ../config.properties
 10 | elif [ -f ./config.properties ]; then
 11 |     source ./config.properties
 12 | else
 13 |     echo "config.properties not found!"
 14 | fi
 15 | 
 16 | echo ""
 17 | echo "Runtime: $runtime"
 18 | echo ""
 19 | 
 20 | if [ "$runtime" == "docker" ]; then
 21 |     if [ "$1" == "bma" ]; then
 22 |         pushd ./5-test > /dev/null
 23 |         CMD="docker ps -a | grep ${test_image_name}- | cut -d ' ' -f 1 | xargs -L 1 docker logs | grep { | grep -v 0.0, | tee ./bmk-all.log"
 24 |         command -v bc > /dev/null
 25 |         if [ "$?" == "1" ]; then
 26 |             echo "bc not found"
 27 |             echo "Please 'sudo apt-get install -y bc' or 'sudo yum install -y bc', then try again"
 28 |         else
 29 |             if [ ! "$verbose" == "false" ]; then
 30 |                 echo -e "\n${CMD}\n"
 31 |             fi
 32 |             eval "${CMD}"
 33 |             ./aggregate.sh ./bmk-all.log
 34 |         fi
 35 |         rm -f ./bmk-all.log
 36 |         popd > /dev/null
 37 |     else
 38 |         server=0
 39 |         run_links=""
 40 |         while [ $server -lt $num_servers ]; do
 41 |             run_links="${run_links} --link ${app_name}-${server}:${app_name}-${server}"
 42 |             server=$((server+1))
 43 |         done
 44 |         test_container=0
 45 |         while [ $test_container -lt $num_test_containers ]; do
 46 |             run_opts="--name ${test_image_name}-${test_container} ${run_links}"
 47 |             CMD="docker run -d ${run_opts} ${registry}${test_image_name}${test_image_tag}"
 48 |             if [ "$1" == "seq" ]; then
 49 |                 CMD="$CMD bash -c 'pushd /app/tests && ./curl-seq-ip.sh'"
 50 |             elif [ "$1" == "rnd" ]; then
 51 |                 CMD="$CMD bash -c 'pushd /app/tests && ./curl-rnd-ip.sh'"
 52 |             elif [ "$1" == "bmk" ]; then
 53 |                 CMD="$CMD bash -c 'pushd /app/tests && ./benchmark.sh'"
 54 |             fi
 55 |             if [ ! "$verbose" == "false" ]; then
 56 |                 echo -e "\n${CMD}\n"
 57 |             fi
 58 |             eval "${CMD}"
 59 |             test_container=$((test_container+1))
 60 |         done
 61 |     fi
 62 | elif [ "$runtime" == "kubernetes" ]; then
 63 |     pushd ./5-test > /dev/null
 64 |     if [ "$1" == "bma" ]; then
 65 |         CMD="kubectl -n ${test_namespace} get pods | grep ${test_image_name}- | cut -d ' ' -f 1 | xargs -L 1 kubectl -n ${test_namespace} logs | grep { | grep -v 0.0, | tee ./bmk-all.log"
 66 |         command -v bc > /dev/null
 67 |         if [ "$?" == "1" ]; then
 68 |             echo "bc not found"
 69 |             echo "Please 'sudo apt-get install -y bc' or 'sudo yum install -y bc', then try again"
 70 |         else
 71 |             if [ ! "$verbose" == "false" ]; then
 72 |                 echo -e "\n${CMD}\n"
 73 |             fi
 74 |             eval "${CMD}"
 75 |             ./aggregate.sh ./bmk-all.log
 76 |         fi
 77 |         rm -f ./bmk-all.log
 78 |     else
 79 |         CMD="kubectl create namespace ${test_namespace} --dry-run=client -o yaml | kubectl apply -f -"
 80 |         if [ ! "$verbose" == "false" ]; then
 81 |             echo -e "\n${CMD}\n"
 82 |         fi
 83 |         eval "${CMD}"
 84 |         cmd_pod="while true; do date; sleep 10; done"
 85 |         template="./deployment-yaml.template"
 86 |         if [ "$1" == "seq" ]; then
 87 |             cmd_pod="pushd /app/tests && ./curl-seq-ip.sh"
 88 |             template="./job-yaml.template"
 89 |         elif [ "$1" == "rnd" ]; then
 90 |             cmd_pod="pushd /app/tests && ./curl-rnd-ip.sh"
 91 |             template="./job-yaml.template"
 92 |         elif [ "$1" == "bmk" ]; then
 93 |             cmd_pod="pushd /app/tests && ./benchmark.sh"
 94 |             template="./job-yaml.template"
 95 |          fi
 96 |         echo "export cmd_pod=\"$cmd_pod\"" > cmd_pod.properties
 97 |         echo "export template=$template" >> cmd_pod.properties
 98 |         eval "./generate-yaml.sh"
 99 |         CMD="kubectl apply -f ${test_dir}"
100 |         if [ ! "$verbose" == "false" ]; then
101 |             echo -e "\n${CMD}\n"
102 |         fi
103 |         eval "${CMD}"
104 |     fi
105 |     popd > /dev/null
106 | else
107 |     echo "Runtime $runtime not recognized"
108 | fi
109 | 


--------------------------------------------------------------------------------
/5-test/status.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ######################################################################
 4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
 5 | # SPDX-License-Identifier: MIT-0                                     #
 6 | ######################################################################
 7 | 
 8 | if [ -f ../config.properties ]; then
 9 |     source ../config.properties
10 | elif [ -f ./config.properties ]; then
11 |     source ./config.properties
12 | else
13 |     echo "config.properties not found!"
14 | fi
15 | 
16 | echo ""
17 | echo "Runtime: $runtime"
18 | echo "Processor: $processor"
19 | 
20 | if [ "$runtime" == "docker" ]; then
21 |     CMD="docker ps -a | grep ${test_image_name}-"
22 | elif [ "$runtime" == "kubernetes" ]; then
23 |     if [ "$1" == "" ]; then
24 |         echo ""
25 |         echo "Pods:"
26 |         CMD="kubectl -n ${test_namespace} get pods"
27 |     else
28 |         echo ""
29 |         echo "Pod:"
30 |         CMD="kubectl -n ${test_namespace} get pod $(kubectl -n ${test_namespace} get pods | grep ${test_image_name}-$1 | cut -d ' ' -f 1) -o wide"
31 |     fi
32 | else
33 |     echo "Runtime $runtime not recognized"
34 | fi
35 | if [ ! "$verbose" == "false" ]; then
36 |     echo -e "\n${CMD}\n"
37 | fi
38 | eval "${CMD}"
39 | 
40 | 


--------------------------------------------------------------------------------
/5-test/stop.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ######################################################################
 4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
 5 | # SPDX-License-Identifier: MIT-0                                     #
 6 | ######################################################################
 7 | 
 8 | if [ -f ../config.properties ]; then
 9 |     source ../config.properties
10 | elif [ -f ./config.properties ]; then
11 |     source ./config.properties
12 | else
13 |     echo "config.properties not found!"
14 | fi
15 | 
16 | echo ""
17 | echo "Runtime: $runtime"
18 | echo "Processor: $processor"
19 | 
20 | if [ "$runtime" == "docker" ]; then
21 |     if [ "$1" == "" ]; then
22 |     test_container=0
23 |         while [ $test_container -lt $num_test_containers ]; do
24 |             CMD="docker rm -f ${test_image_name}-${test_container}"
25 |             if [ ! "$verbose" == "false" ]; then
26 |                 echo -e "\n${CMD}\n"
27 |             fi
28 |             eval "${CMD}"
29 |             test_container=$((test_container+1))
30 |         done
31 |     else
32 |         CMD="Docker rm -f ${test_image_name}-$1"
33 |         echo "$CMD"
34 |         eval "$CMD"
35 |     fi
36 | elif [ "$runtime" == "kubernetes" ]; then
37 |     pushd ./5-test > /dev/null
38 |     CMD="kubectl delete -f ${test_dir}"
39 |     if [ ! "$verbose" == "false" ]; then
40 |         echo -e "\n${CMD}\n"
41 |     fi
42 |     eval "${CMD}"
43 |     popd > /dev/null
44 | else
45 |     echo "Runtime $runtime not recognized"
46 | fi
47 | 


--------------------------------------------------------------------------------
/5-test/tests/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ######################################################################
 4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
 5 | # SPDX-License-Identifier: MIT-0                                     #
 6 | ######################################################################
 7 | 
 8 | if [ "$num_servers" == "" ]; then
 9 | 
10 |     echo "Configuring number of model servers from config.properties ..."
11 | 
12 |     if [ -f ../config.properties ]; then
13 |         source ../config.properties
14 |     elif [ -f ../../config.properties ]; then
15 |         source ../../config.properties
16 |     elif [ -f ./config.properties ]; then
17 |         source ./config.properties
18 |     else
19 |         echo "config.properties not found!"
20 |     fi
21 | else
22 |     echo "Number of model servers ($num_servers) configured from environment ..."
23 | fi
24 | 
25 | if [ "$runtime" == "docker" ]; then
26 |     python benchmark_client.py --num_thread 2 --url http://${app_name}-[INSTANCE_IDX]:8080/predictions/model[MODEL_IDX] --is_multi_instance --n_instance ${num_servers} --is_multi_model_per_instance --n_model_per_instance ${num_models} --latency_window_size 1000 --cache_dns 
27 | elif [ "$runtime" == "kubernetes" ]; then 
28 |     python benchmark_client.py --num_thread 2 --url http://${app_name}-[INSTANCE_IDX].${namespace}.svc.cluster.local:8080/predictions/model[MODEL_IDX] --is_multi_instance --n_instance ${num_servers} --is_multi_model_per_instance --n_model_per_instance ${num_models} --latency_window_size 1000 --cache_dns 
29 | else
30 |     echo "Runtime $runtime not recognized"
31 | fi
32 | 


--------------------------------------------------------------------------------
/5-test/tests/benchmark_client.py:
--------------------------------------------------------------------------------
  1 | ######################################################################
  2 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
  3 | # SPDX-License-Identifier: MIT-0                                     #
  4 | ######################################################################
  5 | 
  6 | import os
  7 | import argparse
  8 | import time
  9 | import numpy as np
 10 | import requests
 11 | import sys
 12 | import random
 13 | from concurrent import futures
 14 | import socket
 15 | import traceback
 16 | from urllib.parse import urlparse
 17 | 
 18 | # from essential_generators import DocumentGenerator
 19 | # num_instance = 4
 20 | # num_model_per_instance = 10
 21 | # http://instance-[INSTANCE_IDX].scale.svc.cluser.local:8000/predictions/model-[MODEL_IDX]
 22 | # INSTANCE_IDX = 0 to 3
 23 | # MODEL_IDX = 0 to 9
 24 | 
 25 | if __name__ == '__main__':
 26 |     parser = argparse.ArgumentParser()
 27 |     parser.add_argument('--url', help='FastAPI model URL', type=str,
 28 |                         default=f'http://localhost:8080/predictions/model0')
 29 |     # parser.add_argument('--url', help='FastAPI model URL', type=str, default=f'http://instance[INSTANCE_IDX].scale.svc.cluser.local:8000/predictions/model[MODEL_IDX]')
 30 |     parser.add_argument('--num_thread', type=int, default=2, help='Number of threads invoking the model URL')
 31 |     # parser.add_argument('--sequence_length', type=int, default=512)
 32 |     parser.add_argument('--latency_window_size', type=int, default=250)
 33 |     parser.add_argument('--throughput_time', type=int, default=180)
 34 |     parser.add_argument('--throughput_interval', type=int, default=10)
 35 |     parser.add_argument('--is_multi_instance', default=False, action='store_true')
 36 |     parser.add_argument('--n_instance', required=False, type=int)
 37 |     parser.add_argument('--is_multi_model_per_instance', default=False, action='store_true')
 38 |     parser.add_argument('--n_model_per_instance', required=False, type=int)
 39 |     parser.add_argument('--post', default=False, action='store_true')
 40 |     parser.add_argument('--verbose', default=False, action='store_true')
 41 |     parser.add_argument('--cache_dns', default=False, action='store_true')
 42 | 
 43 |     args, leftovers = parser.parse_known_args()
 44 | 
 45 |     is_multi_instance = args.is_multi_instance
 46 |     n_instance = 0
 47 |     if is_multi_instance:
 48 |         n_instance = args.n_instance
 49 |     n_model_per_instance = 0
 50 |     is_multi_model_per_instance = args.is_multi_model_per_instance
 51 |     if is_multi_model_per_instance:
 52 |         n_model_per_instance = args.n_model_per_instance
 53 | 
 54 |     data = {'seq_0': "how many chapters the book has?",
 55 |             'seq_1': """The number 42 is, in The Hitchhiker's Guide to the Galaxy by Douglas Adams."""}
 56 |     live = True
 57 |     num_infer = 0
 58 |     latency_list = []
 59 |     ret_status_failure_list = []
 60 |     latency_map = {}
 61 |     ret_status_failure_map = {}
 62 |     dns_cache = ['']
 63 |     if is_multi_instance:
 64 |         dns_cache = ['']*n_instance
 65 | 
 66 |     def single_request(pred, feed_data):
 67 |         session = requests.Session()
 68 |         pred_replace = pred
 69 |         idx_instance = 0
 70 |         idx_model_per_instance = None
 71 |         if is_multi_instance:
 72 |             idx_instance = random.choice(range(n_instance))
 73 |             pred_replace = pred_replace.replace('[INSTANCE_IDX]', str(idx_instance))
 74 |         if is_multi_model_per_instance:
 75 |             idx_model_per_instance = random.choice(range(n_model_per_instance))
 76 |             pred_replace = pred_replace.replace('[MODEL_IDX]', str(idx_model_per_instance))
 77 |         print(args)
 78 |         if args.cache_dns:
 79 |             print('caching dns')
 80 |             print(pred_replace)
 81 |             hostip = dns_cache[idx_instance]
 82 |             urlparts = urlparse(pred_replace)
 83 |             if hostip == '':
 84 |                 hostname = urlparts.hostname
 85 |                 hostip = socket.gethostbyname(hostname)
 86 |                 dns_cache[idx_instance] = hostip
 87 |             port = ''
 88 |             if urlparts.port != None:
 89 |                 port = f":{urlparts.port}"
 90 |             pred_replace = f"{urlparts.scheme}://{hostip}{port}{urlparts.path}"
 91 |         if args.verbose:
 92 |             print(pred_replace)
 93 |         if args.post:
 94 |             result = session.post(pred_replace, data=feed_data)
 95 |         else:
 96 |             result = session.get(pred_replace)
 97 |         print(result)
 98 |         sys.stdout.flush()
 99 | 
100 | 
101 |     def one_thread(pred, feed_data):
102 |         global latency_list
103 |         global ret_status_failure_list
104 |         global latency_map
105 |         global num_infer
106 |         global live
107 |         global dns_cache
108 |         session = requests.Session()
109 |         while True:
110 |             start = time.time()
111 |             pred_replace = pred
112 |             idx_instance = 0
113 |             idx_model_per_instance = None
114 |             if is_multi_instance:
115 |                 idx_instance = random.choice(range(n_instance))
116 |                 pred_replace = pred_replace.replace('[INSTANCE_IDX]', str(idx_instance))
117 |             if is_multi_model_per_instance:
118 |                 idx_model_per_instance = random.choice(range(n_model_per_instance))
119 |                 pred_replace = pred_replace.replace('[MODEL_IDX]', str(idx_model_per_instance))
120 |             if args.cache_dns:
121 |                 hostip = dns_cache[idx_instance]
122 |                 urlparts = urlparse(pred_replace)
123 |                 if hostip == '':
124 |                     hostname = urlparts.hostname
125 |                     hostip = socket.gethostbyname(hostname)
126 |                     dns_cache[idx_instance] = hostip
127 |                 port = ''
128 |                 if urlparts.port != None:
129 |                     port = f":{urlparts.port}"
130 |                 pred_replace = f"{urlparts.scheme}://{hostip}{port}{urlparts.path}"
131 |             if args.post:
132 |                 result = session.post(pred_replace, data=feed_data)
133 |             else:
134 |                 result = session.get(pred_replace)
135 |             latency = time.time() - start
136 |             latency_list.append(latency)
137 | 
138 |             map_key = '%s_%s' % (idx_instance, idx_model_per_instance)
139 |             if map_key not in latency_map:
140 |                 latency_map[map_key] = []
141 |             latency_map[map_key].append(latency)
142 | 
143 |             if result.status_code != 200:
144 |                 ret_status_failure_list.append(result.status_code)
145 |                 if map_key not in ret_status_failure_map:
146 |                     ret_status_failure_map[map_key] = []
147 |                 ret_status_failure_map[map_key].append(result.status_code)
148 | 
149 | 
150 |             num_infer += 1
151 |             if not live:
152 |                 break
153 | 
154 |     def current_performance():
155 |         try:
156 |             last_num_infer = num_infer
157 |             for _ in range(args.throughput_time // args.throughput_interval):
158 |                 current_num_infer = num_infer
159 |                 throughput = (current_num_infer - last_num_infer) / args.throughput_interval
160 |                 p50 = 0.0
161 |                 p90 = 0.0
162 |                 p95 = 0.0
163 |                 if latency_list:
164 |                     p50 = np.percentile(latency_list[-args.latency_window_size:], 50)
165 |                     p90 = np.percentile(latency_list[-args.latency_window_size:], 90)
166 |                     p95 = np.percentile(latency_list[-args.latency_window_size:], 95)
167 | 
168 |                 dump_output = {
169 |                     'pid': os.getpid(),
170 |                     'throughput': throughput,
171 |                     'p50': '%.3f' % (p50),
172 |                     'p90': '%.3f' % (p90),
173 |                     'p95': '%.3f' % (p95),
174 |                     'errors': '%d'%(len(ret_status_failure_list))
175 |                 }
176 |                 print(dump_output)
177 |                 if args.verbose:
178 |                     # To prevent the error dictionary changed during iteration
179 |                     lm_key_list = list(latency_map.keys())
180 |                     print({'p90_%s' % x: '%0.3f' % (np.percentile(latency_map[x], 90)) for x in lm_key_list})
181 |                     print({'num_%s' % x: len(latency_map[x]) for x in lm_key_list})
182 |                     if(len(ret_status_failure_list) > 0):
183 |                         rs_key_list = list(ret_status_failure_map.keys())
184 |                         print(dict(zip(*np.unique(ret_status_failure_list, return_counts=True))))
185 |                         print({'error_%s'% x: dict(zip(*np.unique(ret_status_failure_map[x], return_counts=True))) for x in rs_key_list})
186 |                     print()
187 | 
188 |                 sys.stdout.flush()
189 |                 last_num_infer = current_num_infer
190 |                 time.sleep(args.throughput_interval)
191 |             global live
192 |             live = False
193 |         except:
194 |             traceback.print_exc()
195 | 
196 | 
197 |     # Single Request to debug the package being sent
198 |     single_request(args.url, data)
199 |     with futures.ThreadPoolExecutor(max_workers=args.num_thread + 1) as executor:
200 |         executor.submit(current_performance)
201 |         for _ in range(args.num_thread):
202 |             executor.submit(one_thread, args.url, data)
203 |             # executor.submit(one_thread, args.url)
204 | 


--------------------------------------------------------------------------------
/5-test/tests/clock.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | ######################################################################
3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
4 | # SPDX-License-Identifier: MIT-0                                     #
5 | ######################################################################
6 | 
7 | ts=$(date +%s%N) ; $@ ; tt=$((($(date +%s%N) - $ts)/1000000)) ; echo ""; echo "Time elapsed: $tt milliseconds"


--------------------------------------------------------------------------------
/5-test/tests/curl-rnd-ip.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ######################################################################
 4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
 5 | # SPDX-License-Identifier: MIT-0                                     #
 6 | ######################################################################
 7 | if [ "$num_servers" == "" ]; then
 8 |     echo "Configuring number of model servers from config.properties ..."
 9 |     if [ -f ../config.properties ]; then
10 |         source ../config.properties
11 |     elif [ -f ../../config.properties ]; then
12 |         source ../../config.properties
13 |     elif [ -f ./config.properties ]; then
14 |         source ./config.properties
15 |     else
16 |         echo "config.properties not found!"
17 |     fi
18 | else
19 |     echo "Configured number of model servers ($num_servers) from environment"
20 | fi
21 | 
22 | server=0
23 | servers=$num_servers
24 | model=0
25 | models=$num_models
26 | 
27 | # get instance ip addresses
28 | rm -f  ./endpoint_ip.conf
29 | echo "runtime=$runtime"
30 | while [ $server -lt $servers ]
31 | do
32 | 	if [ "$runtime" == "docker" ]; then
33 | 		server_ip=$(cat /etc/hosts | grep ${app_name}-${server} | awk '{print $1}')
34 | 	elif [ "$runtime" == "kubernetes" ]; then
35 | 		server_ip=$(host ${app_name}-${server}.${namespace}.svc.cluster.local | grep "has address" | cut -d ' ' -f 4)
36 | 	fi
37 | 	echo $server_ip >> ./endpoint_ip.conf
38 | 	server=$((server+1))
39 | done
40 | 
41 | echo "Endpoints:"
42 | cat ./endpoint_ip.conf
43 | 
44 | mapfile -t server_ips < endpoint_ip.conf 
45 | 
46 | server_last_index=$((${#server_ips[@]}-1))
47 | model_last_index=$(($models-1))
48 | request=0
49 | while [ $request -lt $num_requests ]
50 | do
51 | 	server=$(shuf -i 0-${server_last_index} -n 1)
52 | 	server_ip=${server_ips[$server]}
53 | 	model=$(shuf -i 0-${model_last_index} -n 1)
54 | 	echo "Request: $request, Server: $server, IP: $server_ip,  Model: $model"
55 | 	./clock.sh curl http://$server_ip:8080/predictions/model$model
56 | 	sleep $request_frequency
57 | 	request=$((request+1))
58 | done
59 | 
60 | rm -f  ./endpoint_ip.conf
61 | 


--------------------------------------------------------------------------------
/5-test/tests/curl-seq-ip.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ######################################################################
 4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
 5 | # SPDX-License-Identifier: MIT-0                                     #
 6 | ######################################################################
 7 | 
 8 | if [ "$num_servers" == "" ]; then
 9 |     echo "Configuring number of model servers from config.properties ..."
10 |     if [ -f ../config.properties ]; then
11 |         source ../config.properties
12 |     elif [ -f ../../config.properties ]; then
13 |         source ../../config.properties
14 |     elif [ -f ./config.properties ]; then
15 |         source ./config.properties
16 |     else
17 |         echo "config.properties not found!"
18 |     fi
19 | else
20 |     echo "Configured number of model servers ($num_servers) from environment"
21 | fi
22 | 
23 | server=0
24 | servers=$num_servers
25 | model=0
26 | models=$num_models
27 | 
28 | # get server ip addresses
29 | rm -f  ./endpoint_ip.conf
30 | echo "runtime=$runtime"
31 | while [ $server -lt $servers ]
32 | do
33 | 	if [ "$runtime" == "docker" ]; then
34 | 		instance_ip=$(cat /etc/hosts | grep  ${app_name}-${server} | awk '{print $1}')
35 | 	elif [ "$runtime" == "kubernetes" ]; then
36 | 		#echo "host=${app_name}-${server}.${namespace}.svc.cluster.local"
37 | 		instance_ip=$(host ${app_name}-${server}.${namespace}.svc.cluster.local | grep "has address" | cut -d ' ' -f 4)
38 | 		#echo "instance_ip=$instance_ip"
39 | 	fi
40 | 	echo $instance_ip >> endpoint_ip.conf
41 | 	server=$((server+1))
42 | done
43 | 
44 | # call each model
45 | server=0
46 | request=0
47 | echo "Endpoints:"
48 | cat ./endpoint_ip.conf
49 | for endpoint_ip in $(cat ./endpoint_ip.conf)
50 | do
51 | 	while [ $model -lt $models ] 
52 | 	do
53 | 		echo "Request: $request, Server: $server, IP: $endpoint_ip, Model: $model"
54 | 		./clock.sh curl http://${endpoint_ip}:8080/predictions/model$model
55 | 		model=$((model+1))
56 | 		request=$((request+1))
57 | 		sleep $request_frequency
58 | 	done
59 | 	model=0
60 | 	server=$((server+1))
61 | done
62 | 
63 | rm -f  ./endpoint_ip.conf
64 | 


--------------------------------------------------------------------------------
/5-test/tests/loop.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | ######################################################################
4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
5 | # SPDX-License-Identifier: MIT-0                                     #
6 | ######################################################################
7 | 
8 | while sleep 0.01; do $@; done


--------------------------------------------------------------------------------
/6-remove/stack-delete.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pushd ..
 4 | if [ -f ./config.properties ]; then
 5 | 	source ./config.properties
 6 | fi
 7 | 
 8 | if [ ! "$verbose" == "false" ]; then
 9 | 	set -x
10 | fi
11 | 
12 | echo ""
13 | echo "Deleting node groups, IAM service account and EKS cluster eksctl-eks-inference-workshop ..."
14 | aws cloudformation delete-stack --stack-name eksctl-eks-inference-workshop-nodegroup-inf --region us-west-2
15 | aws cloudformation wait stack-delete-complete --stack-name eksctl-eks-inference-workshop-nodegroup-inf --region us-west-2
16 | aws cloudformation delete-stack --stack-name eksctl-eks-inference-workshop-nodegroup-cpu --region us-west-2
17 | aws cloudformation wait stack-delete-complete --stack-name eksctl-eks-inference-workshop-nodegroup-cpu --region us-west-2
18 | aws cloudformation delete-stack --stack-name eksctl-eks-inference-workshop-nodegroup-graviton --region us-west-2
19 | aws cloudformation wait stack-delete-complete --stack-name eksctl-eks-inference-workshop-nodegroup-graviton --region us-west-2
20 | aws cloudformation delete-stack --stack-name eksctl-eks-inference-workshop-addon-iamserviceaccount-kube-system-aws-node --region us-west-2
21 | aws cloudformation wait stack-delete-complete --stack-name eksctl-eks-inference-workshop-addon-iamserviceaccount-kube-system-aws-node --region us-west-2
22 | aws cloudformation delete-stack --stack-name eksctl-eks-inference-workshop-cluster --region us-west-2
23 | aws cloudformation wait stack-delete-complete --stack-name eksctl-eks-inference-workshop-cluster --region us-west-2
24 | 
25 | echo ""
26 | echo "Finished deletion of eksctl-eks-inference-workshop CF stack in us-west-2 region. Now deleting  EC2 Management Instance stack in your default region ..."
27 | aws cloudformation delete-stack --stack-name ManagementInstance
28 | aws cloudformation wait stack-delete-complete --stack-name ManagementInstance
29 | 
30 | set +x
31 | 
32 | echo ""
33 | echo "Cleanup of all ML Inference Guidance AWS Resources complete" 
34 | echo ""
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/CODEOWNERS:
--------------------------------------------------------------------------------
1 | CODEOWNERS @aws-solutions-library-samples/maintainers
2 | /.github/workflows/maintainer_workflows.yml @aws-solutions-library-samples/maintainers
3 | /.github/solutionid_validator.sh @aws-solutions-library-samples/maintainers
4 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 4 | this software and associated documentation files (the "Software"), to deal in
 5 | the Software without restriction, including without limitation the rights to
 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 7 | the Software, and to permit persons to whom the Software is furnished to do so.
 8 | 
 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
10 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
11 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
12 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
13 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
14 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
15 | 
16 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Guidance for Low Latency, High Throughput Inference using Efficient Compute on Amazon EKS
  2 | The [**guidance-for-machine-learning-inference-on-aws**](https://github.com/aws-solutions-library-samples/guidance-for-machine-learning-inference-on-aws) repository contains an end-to-end automation framework example for running model inference locally on Docker or at scale on Amazon EKS Kubernetes cluster. 
  3 | It supports EKS compute nodes based on CPU, GPU, AWS Graviton and AWS Inferentia processor architectures  and can pack multiple models in a single processor core for improved cost efficiency.
  4 | While this example focuses on one processor architecture at a time, iterating over the steps below for various CPU/GPU Efficient Compute and Inferentia architectures enables hybrid deployments where the best processor/accelerator is used to serve each model depending on its resource consumption profile.
  5 | In this sample repository, we use a [bert-base](https://huggingface.co/distilbert-base-multilingual-cased) NLP model from [huggingface.co](https://huggingface.co/), however the project structure and workflow is generic and can be adapted for use with other models.
  6 | 
  7 | <div align="center">
  8 | <img src="./low-latency-high-bandwidth-updated-architecture.jpg" width="90%">  
  9 | <br/>
 10 | Fig. 1 - Sample Amazon EKS cluster infrastructure and deploying, running and testing of ML Inference workloads
 11 | </div>
 12 | <br/>
 13 | 
 14 | The ML inference workloads in this project are deployed on the CPU, GPU, or Inferentia based EKS compute nodes as shown on Fig. 1. 
 15 | The control scripts may run in any location that has a full access to the cluster Kubernetes API. To eliminate latency concern related to the EKS cluster ingress, load tests run in pods deployed within the same cluster and send requests to the models directly through the cluster pod network.
 16 | 
 17 | <div align="left">
 18 | 
 19 | 1. The Amazon EKS cluster has several node groups, with one Amazon EC2 instance family for each node group. Each node group can support different instance types, such as CPU (C5,C6i, C7gn), GPU (G4dn), [AWS Inferentia](https://aws.amazon.com/machine-learning/inferentia/) (inf1, inf2) and can pack multiple models for each EKS node to maximize the number of served ML models that are running in a node group. Model bin packing is used to maximize compute and memory utilization of the Amazon EC2 instances in the cluster node groups.
 20 | 2. The natural language processing (NLP) open-source PyTorch model from [Hugging Face](https://huggingface.co/), serving application and ML framework dependencies, are built by users as container images use an automation framework. These images are uploaded to Amazon Elastic Container Registry - [Amazon ECR](https://aws.amazon.com/ecr/).
 21 | 3. Using the automation framework, the model container images are obtained from Amazon ECR and deployed to an [Amazon EKS cluster](https://aws.amazon.com/eks/) using generated deployment and service manifests through the Kubernetes API (exposed through Elastic Load Balancing (ELB)). Model deployments are customized for each deployment target EKS compute node instance type through settings in the central configuration [file](https://github.com/aws-solutions-library-samples/guidance-for-machine-learning-inference-on-aws/blob/main/config.properties). 
 22 | 4. Following the best practices of the separation of model data from containers that run it, the ML model microservice design allows it to scale out to a large number of models. In the sample project, model containers are pulling data from Amazon Simple Storage Service ([Amazon S3](https://aws.amazon.com)) and other public model data sources each time they are initialized. 
 23 | 5. Using the automation framework, the test container images are deployed to  an Amazon EKS cluster using generated deployment and service manifests through the Kubernetes API. Test deployments are customized for each deployment target EKS compute node instance type through settings in the central configuration [file](https://github.com/aws-solutions-library-samples/guidance-for-machine-learning-inference-on-aws/blob/main/config.properties). Load or scale testing is performed by sending simultaneous requests to the model service pool from test pods. Performance test results and metrics are obtained, recorded, and aggregated.  
 24 | </div>
 25 | <br/><br/>
 26 | <div align="center">
 27 | <a href="https://www.youtube.com/watch?v=g9XRhGhQhAE"><img src="./aws-do-inference-video.png" width="90%"></a>
 28 | </br>
 29 | Fig. 2 - ML Inference video walkthrough
 30 | </div>
 31 | <br/>
 32 | 
 33 | Please watch this end-to-end accelerated [video walkthrough](https://bit.ly/aws-do-inference-video) (7 min) or follow the instructions below to build and run your own inference solution.
 34 | 
 35 | ## Prerequisites
 36 | This sample can be run on a single machine using Docker, or at scale on a Amazon EKS cluster.
 37 | 
 38 | It is assumed that the following basic tools are present: [docker](https://docs.docker.com/get-docker/), [kubectl](https://kubernetes.io/docs/tasks/tools/), [envsubst](https://command-not-found.com/envsubst), [kubetail](https://github.com/johanhaleby/kubetail), [bc](https://howtoinstall.co/en/bc).
 39 | 
 40 | ## Operation
 41 | 
 42 | The project is operated through a set of action scripts as described below. To complete a full cycle from beginning-to-end, first configure the project, then follow steps 1 through 5 executing the corresponding action scripts. Each of the action scripts has a help screen, which can be invoked by passing "help" as argument: `<script>.sh help` 
 43 | 
 44 | ### Optional - Provision an EKS cluster with 3 node groups
 45 | To provision this "opinionated" EKS cluster infrastructure optimized for running this guidance, run the `./provision.sh` script.
 46 | Optionally, you can use an existing EKS cluster you have or provision a new one using one of [Terraform EKS blueprint](https://aws-ia.github.io/terraform-aws-eks-blueprints/) that would contains nodegroups of desired target instance types.
 47 | ```
 48 | ./provision.sh
 49 | ```
 50 | This command will execute a script that creates a CloudFormation stack which deploys an EC2 "management" instance in your default AWS region. That instance contains a *userData* script that provisions an EKS cluster in the **us-west-2** region, pre-defined per specification based on the following [template](https://github.com/aws-samples/aws-do-eks/blob/main/wd/conf/eksctl/yaml/eks-inference-workshop.yaml-template) which is a part of another Git repo project. 
 51 | After that EKS cluster is provisoned, it is fully acessible from that EC2 "management" instance and this repository is copied there as well, ready to proceed to next steps.
 52 | 
 53 | ### Configure
 54 | ```
 55 | ./config.sh
 56 | ```
 57 | A centralized configuration file `config.properties` contains all settings that are customizeable for the project.
 58 | This file comes pre-configured with reasonable defaults that work out of the box. To set the `processor` target or 
 59 | any other setting edit the config file, or execute the `config.sh` script. 
 60 | Configuration changes take effect immediately upon execution of the next action script.
 61 | 
 62 | ### 1. Build
 63 | ```
 64 | ./build.sh
 65 | ```
 66 | This step builds a base container for the selected processor. 
 67 | A base container is required for any of the subsequent steps. 
 68 | This step can be executed on any instance type, regardless of processor target. 
 69 | 
 70 | Optionally, if you'd like to push the base image to a container registry, execute `./build.sh push`.
 71 | Pushing the base image to a container registry is required if you are planning to run the test step against models deployed to Kubernetes. 
 72 | If you are using a private registry and you need to login before pushing, execute `./login.sh`. This script will login to AWS ECR, 
 73 | other private registry implementations can be added to the script as needed.
 74 | 
 75 | ### 2. Trace
 76 | ```
 77 | ./trace.sh
 78 | ```
 79 | Compiles the model into a TorchScript serialized graph file (`.pt`). This step requires the model to run on the target processor.
 80 | Therefore it is necessary to run this step on an instance that has the target processor available. 
 81 | 
 82 | Upon successful compilation, the model will be saved in a local folder named `trace-{model_name}`.
 83 | 
 84 | #### Note
 85 | It is recommended to use the [AWS Deep Learning AMI](https://docs.aws.amazon.com/dlami/latest/devguide/what-is-dlami.html) to launch the instance where your model will be traced.
 86 | 
 87 | * To trace a model for GPU, run the trace step on a [GPU instance](https://aws.amazon.com/ec2/instance-types/#Accelerated_Computing) launched with the [AWS DLAMI](https://docs.aws.amazon.com/dlami/latest/devguide/launch.html).
 88 | 
 89 | * To trace a model for Inferentia, run the trace step on an [Inferentia instance](https://aws.amazon.com/ec2/instance-types/#Accelerated_Computing) launched with the [AWS DLAMI with Neuron](https://docs.aws.amazon.com/dlami/latest/devguide/tutorial-inferentia-launching.html) and activate the [Neuron compiler conda environment](https://docs.aws.amazon.com/dlami/latest/devguide/tutorial-inferentia-pytorch-neuron.html)
 90 | 
 91 | 
 92 | ### 3. Pack
 93 | ```
 94 | ./pack.sh
 95 | ````
 96 | Packs the model in a container with FastAPI, also allowing for multiple models to be packed within the same container.
 97 | FastAPI is used as an example here for simplicity and performance, however it can be interchanged with any other model server. 
 98 | For the purpose of this project we pack several instances of the same model in the container, however a natural extension
 99 | of the same concept is to pack different models in the same container.
100 | 
101 | To push the model container image to a registry, execute `./pack.sh push`. 
102 | The model container must be pushed to a registry if you are deploying your models to Kubernetes.
103 | 
104 | ### 4. Deploy
105 | ```
106 | ./deploy.sh
107 | ```
108 | This script runs your models on the configured runtime. The project has built-in support for both
109 | local Docker runtimes and Kubernetes. The deploy script also has several sub-commands that facilitate
110 | the management of the full lifecycle of your model server containers.
111 | * `./deploy.sh run` - (default) runs model server containers
112 | * `./deploy.sh status [number]` - show container / pod / service status. Optionally show only specified instance number
113 | * `./deploy.sh logs [number]` - tail container logs. Optionally tail only specified instance number
114 | * `./deploy.sh exec <number>` - open bash into model server container with the specified instance number
115 | * `./deploy.sh stop` - stop and remove deployed model contaiers from runtime
116 | 
117 | ### 5. Test
118 | ```
119 | ./test.sh
120 | ```
121 | The test script helps run a number of tests against the model servers deployed in your runtime environment.
122 | * `./test.sh build` - build test container image
123 | * `./test.sh push` - push test image to container registry
124 | * `./test.sh pull` - pull the current test image from the container registry if one exists
125 | * `./test.sh run` - run a test client container instance for advanced testing and exploration
126 | * `./test.sh exec` - open shell in test container
127 | * `./test.sh status`- show status of test container
128 | * `./test.sh stop` - stop test container
129 | * `./test.sh help` - list the available test commands
130 | * `./test.sh run seq` - run sequential test. One request at a time submitted to each model server and model in sequential order.
131 | * `./test.sh run rnd` - run random test. One request at a time submitted to a randomly selected server and model at a preset frequency.
132 | * `./test.sh run bmk` - run benchmark test client to measure throughput and latency under load with random requests
133 | * `./test.sh run bma` - run benchmark analysis - aggregate and average stats from logs of all completed benchmark containers
134 | 
135 | ## Clean up
136 | 
137 | You can uninstall the sample code for this Guidance using the AWS Command Line Interface. You must also delete the EKS cluster if it was deployed using references from this Guidance, since removal of the scale testing framework does not automatically delete Cluster and its resources.
138 | 
139 | To stop or uninstall scale Inferencetest job(s), run the following command:
140 | ```shell
141 | ./test.sh stop
142 | ```
143 | It should delete all scale test pods and jobs from the specified EKS K8s namespace.
144 | 
145 | To stop or uninstall Inference model services, run the following command:
146 | ```shell
147 | ./deploy.sh stop
148 | ```
149 | It should delete all Model deployments, pods, and services from the specified EKS K8s namespace.
150 | 
151 | If you provisioned an EKS cluster when setting up your prerequisites for the project  as described in the "Optional - Provision an EKS cluster with 3 node groups" above, you can clean up the cluster and all resources associated with it by running this script:
152 | ```
153 | ./remove.sh
154 | ```
155 | It should delete EKS cluster compute node groups first, then IAM service account used in that cluster, then cluster itself and, finally, ManagementInstance EC2 instance via corresponding Cloud Formations. Sometimes you may need to run that command a few times as individual stack deletion commands may time out - that should not create any problem.
156 | 
157 | ## Security
158 | 
159 | See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information.
160 | 
161 | ## License
162 | 
163 | This library is licensed under the MIT-0 License. See the LICENSE file.
164 | 
165 | ## References
166 | 
167 | * [Huggingface](https://huggingface.co)
168 | * [AWS EKS](https://aws.amazon.com/eks)
169 | * [aws-do-eks](https://github.com/aws-samples/aws-do-eks)
170 | * [FastAPI](https://fastapi.tiangolo.com/)
171 | * [AWS GPU](https://aws.amazon.com/machine-learning/accelerate-machine-learning-P3/)
172 | * [AWS Inferentia](https://aws.amazon.com/machine-learning/inferentia/)
173 | * [Instance Selector](https://instances.vantage.sh/?selected=inf1.6xlarge)
174 | * [kubetail](https://github.com/johanhaleby/kubetail)
175 | * [envsubst](https://www.gnu.org/software/gettext/manual/gettext.html#envsubst-Invocation)
176 | * [AWS Machine Learning blog post](https://aws.amazon.com/blogs/machine-learning/serve-3000-deep-learning-models-on-amazon-eks-with-aws-inferentia-for-under-50-an-hour/)
177 | * [AWS Guidance for Low Latency, High Throughput Inference using Efficient Compute on Amazon EKS](https://bit.ly/aws-inference-guidance)
178 | 
179 | 


--------------------------------------------------------------------------------
/aws-do-inference-video.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-machine-learning-inference-on-aws/d0e7fd36e976a184b5fbbb3cd992ffeb9ed28e03/aws-do-inference-video.png


--------------------------------------------------------------------------------
/aws-do-inference.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-machine-learning-inference-on-aws/d0e7fd36e976a184b5fbbb3cd992ffeb9ed28e03/aws-do-inference.png


--------------------------------------------------------------------------------
/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ######################################################################
 4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
 5 | # SPDX-License-Identifier: MIT-0                                     #
 6 | ######################################################################
 7 | 
 8 | print_help() {
 9 | 	echo ""
10 | 	echo "Usage: $0 [arg]"
11 | 	echo ""
12 | 	echo "   When no arguments are specified, this script builds a base container "
13 | 	echo "   for the processor type, configured in config.properties."
14 | 	echo "   Optionally, the script can push/pull the base image to/from a container registry."
15 | 	echo ""
16 | 	echo "   Available optional arguments:"
17 | 	echo "      push   - push base image to container registry"
18 | 	echo "      pull   - pull base image from container registry"
19 |         echo ""	
20 | }
21 | 
22 | 
23 | action=$1
24 | if [ "$action" == "" ]; then
25 | 	source ./config.properties
26 | 
27 | 	echo ""
28 | 	echo "Building base container ..."
29 | 	
30 | 	echo ""
31 | 	dockerfile=./1-build/Dockerfile-base-${processor}
32 | 	if [ -f $dockerfile ]; then
33 | 		echo "    ... base-${processor} ..."
34 |                 CMD="docker build -t ${registry}${base_image_name}${base_image_tag} -f $dockerfile ."
35 |         	if [ ! "$verbose" == "false" ]; then
36 |                 	echo -e "\n${CMD}\n"
37 |         	fi
38 |         	eval "${CMD}"
39 | 	else
40 | 		echo "Dockerfile $dockerfile was not found."
41 | 	        echo "Please ensure that processor is configured with a supported value in config.properties"
42 | 		exit 1
43 | 	fi
44 | elif [ "$action" == "push" ]; then
45 | 	./1-build/push.sh
46 | elif [ "$action" == "pull" ]; then
47 | 	./-build/pull.sh
48 | else 
49 | 	print_help
50 | fi
51 | 


--------------------------------------------------------------------------------
/config.properties:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # This file contains all customizable configuration items for the project
  4 | # core version to be used at re:Invent 23 builder sessions
  5 | 
  6 | ######################################################################
  7 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
  8 | # SPDX-License-Identifier: MIT-0                                     #
  9 | ######################################################################
 10 | 
 11 | # Project settings
 12 | verbose=true
 13 | 
 14 | # Model settings
 15 | huggingface_model_name=bert-base-multilingual-cased
 16 | huggingface_tokenizer_class=BertTokenizer
 17 | huggingface_model_class=BertForQuestionAnswering
 18 | 
 19 | # Compiler settings
 20 | # processor = cpu|gpu|inf1|inf2|graviton
 21 | processor=graviton
 22 | pipeline_cores=1
 23 | sequence_length=128
 24 | batch_size=1
 25 | test=True
 26 | 
 27 | # account is the current AWS user account. This setting is determined automatically.
 28 | account=$(aws sts get-caller-identity --query Account --output text)
 29 | 
 30 | # region is used to login if the registry is ecr 
 31 | region=us-west-2
 32 | 
 33 | # Container settings
 34 | # Default is the private ECR registry in the current AWS account.
 35 | # If registry is set, include the registry uri up to the image name, end the registry setting with /
 36 | # registry setting for locally built images uploaded to local ECR
 37 | registry=${account}.dkr.ecr.${region}.amazonaws.com/
 38 | # registry_type=ecr
 39 | registry_type=ecr
 40 | base_image_name=aws-do-inference-base
 41 | base_image_tag=:v15-${processor}
 42 | model_image_name=${huggingface_model_name}
 43 | model_image_tag=:v15-${processor}
 44 | 
 45 | # if using pre-built public ECR registry Model image (may require authentication) use the following settings for model_image
 46 | #registry=public.ecr.aws/a2u7h5w3/
 47 | #model_image_name=bert-base-workshop
 48 | #model_image_tag=:v15-${processor}
 49 | 
 50 | # Trace settings
 51 | # trace_opts_$processor is a processor-specific setting used by the docker run command in the trace.sh script
 52 | # This setting will be automatically assigned based on your processor value
 53 | trace_opts_cpu=""
 54 | trace_opts_gpu="--gpus 0"
 55 | trace_opts_inf1="-e AWS_NEURON_VISIBLE_DEVICES=ALL --privileged"
 56 | trace_opts_inf2="-e AWS_NEURON_VISIBLE_DEVICES=ALL --privileged"
 57 | trace_opts_graviton=""
 58 | 
 59 | # Deployment settings
 60 | # some of these settings apply only when the runtime is kubernetes
 61 | # runtime = docker | kubernetes
 62 | runtime=kubernetes
 63 | # number of models per model server
 64 | num_models=16
 65 | # quiet = False | True - sets whether the model server should print logs
 66 | quiet=False
 67 | # postprocess = True | False - sets whether tensors returned from model should be translated back to text or just returned
 68 | postprocess=True
 69 | # service_port=8080 - port on which model service will be exposed
 70 | service_port=8080
 71 | # Kubernetes-specific deployment settings
 72 | # instance_type = c5.xxx | g4dn.xlarge | g4dn.12xlarge | inf1.xlarge | inf2.8xlarge | c7g.4xlarge...
 73 | # A node group with the specified instance_type must exist in the cluster
 74 | # The instance type must have the processor configured above
 75 | # Example: processor=graviton, instance_type=c7g.4xlarge
 76 | instance_type=c7g.4xlarge
 77 | # num_servers - number of model servers to deploy
 78 | # note that more than one model server can run on a node with multiple cpu/gpu/inferentia chips.
 79 | # example: 4 model servers fit on one inf1.6xlarge instance as it has 4 inferentia chips.
 80 | num_servers=1
 81 | # Kubernetes namespace
 82 | namespace=mpi
 83 | # Kubernetes app name
 84 | app_name=${huggingface_model_name}-${processor}
 85 | app_dir=app-${app_name}-${instance_type}
 86 | 
 87 | # Test image settings - locally built images
 88 | #test_image_name=test-${huggingface_model_name}
 89 | #test_image_tag=:v15-cpu
 90 | 
 91 | #when using pre-built test image for CPU architecture available in public ECR registry (may require authentication): 
 92 | test_image_name=bert-base-workshop
 93 | test_image_tag=:test-v15-cpu
 94 | 
 95 | # request_frequency - time to sleep between two consecutive requests in curl tests
 96 | request_frequency=0.01
 97 | # Stop random request test after num_requests number of requests
 98 | num_requests=30
 99 | # Number of test containers to launch (default=1), use > 1 for scale testing
100 | num_test_containers=5
101 | # test_instance_type - when runtime is kubernetes, node instance type on which test pods will run
102 | test_instance_type=c5.4xlarge
103 | # test_namespace - when runtime is kubernetes, namespace where test pods will be created
104 | test_namespace=mpi
105 | # test_dir - when runtime is kubernetes, directory where test job/pod manifests are stored
106 | test_dir=app-${test_image_name}-${instance_type}
107 | 


--------------------------------------------------------------------------------
/config.properties_gpu_tests:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # This file contains all customizable configuration items for the project
  4 | # It is pre-configured for re:Invent 2023 builder session for the following basic settings:
  5 | # using publicly shared Model container images for Inferentia 2 and publicly shared Test container images
  6 | # replace the original config.properties file with this version to run Tests on c5.4xlarge nodes
  7 | 
  8 | ######################################################################
  9 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
 10 | # SPDX-License-Identifier: MIT-0                                     #
 11 | ######################################################################
 12 | # Project settings
 13 | verbose=true
 14 | 
 15 | # Model settings
 16 | huggingface_model_name=bert-base-multilingual-cased
 17 | huggingface_tokenizer_class=BertTokenizer
 18 | huggingface_model_class=BertForQuestionAnswering
 19 | 
 20 | # Compiler settings
 21 | # processor = cpu|gpu|inf1|inf2|graviton
 22 | processor=gpu
 23 | pipeline_cores=1
 24 | sequence_length=128
 25 | batch_size=1
 26 | test=True
 27 | 
 28 | # account is the current AWS user account. This setting is determined automatically.
 29 | account=$(aws sts get-caller-identity --query Account --output text)
 30 | 
 31 | # region is used to login if the registry is ecr 
 32 | region=us-west-2
 33 | 
 34 | # Container settings
 35 | # Default is the private ECR registry in the current AWS account.
 36 | # If registry is set, include the registry uri up to the image name,
 37 | # end the registry setting with /
 38 | # registry=${account}.dkr.ecr.${region}.amazonaws.com/
 39 | # registry_type=ecr
 40 | registry_type=ecr
 41 | base_image_name=aws-do-inference-base
 42 | base_image_tag=:v15-${processor}
 43 | #model_image_name=${huggingface_model_name}
 44 | #model_image_tag=:v15-${processor}
 45 | 
 46 | # if using pre-built public registry model image (may require authentication) use the following settings
 47 | registry=public.ecr.aws/a2u7h5w3/
 48 | model_image_name=bert-base-workshop
 49 | model_image_tag=:v15-${processor}
 50 | 
 51 | # Trace settings
 52 | # trace_opts_$processor is a processor-specific setting used by the docker run command in the trace.sh script
 53 | # This setting will be automatically assigned based on your processor value
 54 | trace_opts_cpu=""
 55 | trace_opts_gpu="--gpus 0"
 56 | trace_opts_inf1="-e AWS_NEURON_VISIBLE_DEVICES=ALL --privileged"
 57 | trace_opts_inf2="-e AWS_NEURON_VISIBLE_DEVICES=ALL --privileged"
 58 | trace_opts_graviton=""
 59 | 
 60 | # Deployment settings
 61 | # some of these settings apply only when the runtime is kubernetes
 62 | # runtime = docker | kubernetes
 63 | runtime=kubernetes
 64 | # number of models per model server
 65 | num_models=16
 66 | # quiet = False | True - sets whether the model server should print logs
 67 | quiet=False
 68 | # postprocess = True | False - sets whether tensors returned from model should be translated back to text or just returned
 69 | postprocess=True
 70 | # service_port=8080 - port on which model service will be exposed
 71 | service_port=8080
 72 | # Kubernetes-specific deployment settings
 73 | # instance_type = c5.xxx | g4dn.xlarge | g4dn.12xlarge | inf1.xlarge | inf2.8xlarge | c7g.4xlarge...
 74 | # A node group with the specified instance_type must exist in the cluster
 75 | # The instance type must have the processor configured above
 76 | # Example: processor=graviton, instance_type=c7g.4xlarge
 77 | instance_type=g5.4xlarge
 78 | # num_servers - number of model servers to deploy
 79 | # note that more than one model server can run on a node with multiple cpu/gpu/inferentia chips.
 80 | # example: 4 model servers fit on one inf1.6xlarge instance as it has 4 inferentia chips.
 81 | num_servers=1
 82 | # Kubernetes namespace
 83 | namespace=mpi
 84 | # Kubernetes app name
 85 | app_name=${huggingface_model_name}-${processor}
 86 | app_dir=app-${app_name}-${instance_type}
 87 | 
 88 | # Local Test image settings
 89 | #test_image_name=test-${huggingface_model_name}
 90 | #test_image_tag=:v15-cpu
 91 | 
 92 | # when using pre-built test image available in public ECR registry (may require authentication): 
 93 | # public.ecr.aws/a2u7h5w3/bert-base-workshop:test-v15-cpu
 94 | test_image_name=bert-base-workshop
 95 | test_image_tag=:test-v15-cpu
 96 | 
 97 | # request_frequency - time to sleep between two consecutive requests in curl tests
 98 | request_frequency=0.01
 99 | # Stop random request test after num_requests number of requests
100 | num_requests=30
101 | # Number of test containers to launch (default=1), use > 1 for scale testing
102 | num_test_containers=10
103 | # test_instance_type - when runtime is kubernetes, node instance type on which test pods will run
104 | test_instance_type=c5.4xlarge
105 | # test_namespace - when runtime is kubernetes, namespace where test pods will be created
106 | test_namespace=mpi
107 | # test_dir - when runtime is kubernetes, directory where test job/pod manifests are stored
108 | test_dir=app-${test_image_name}-${instance_type}
109 | 


--------------------------------------------------------------------------------
/config.properties_graviton_tests:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # This file contains all customizable configuration items for the project
  4 | # It is pre-configured for re:Invent 2023 builder session for the following basic settings:
  5 | # using locally built and uploaded Model container images for Graviton and publicly shared Test container images
  6 | # replace the original config.properties file with this version to run Tests on c5.4xlarge nodes
  7 | 
  8 | ######################################################################
  9 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
 10 | # SPDX-License-Identifier: MIT-0                                     #
 11 | ######################################################################
 12 | # Project settings
 13 | verbose=true
 14 | 
 15 | # Model settings
 16 | huggingface_model_name=bert-base-multilingual-cased
 17 | huggingface_tokenizer_class=BertTokenizer
 18 | huggingface_model_class=BertForQuestionAnswering
 19 | 
 20 | # Compiler settings
 21 | # processor = cpu|gpu|inf1|inf2|graviton
 22 | processor=graviton
 23 | pipeline_cores=1
 24 | sequence_length=128
 25 | batch_size=1
 26 | test=True
 27 | 
 28 | # account is the current AWS user account. This setting is determined automatically.
 29 | account=$(aws sts get-caller-identity --query Account --output text)
 30 | 
 31 | # region is used to login if the registry is ecr 
 32 | region=us-west-2
 33 | 
 34 | # Container settings
 35 | # Default is the private ECR registry in the current AWS account.
 36 | # If registry is set, include the registry uri up to the image name,
 37 | # end the registry setting with /
 38 | # registry=${account}.dkr.ecr.${region}.amazonaws.com/
 39 | # registry_type=ecr
 40 | registry_type=ecr
 41 | base_image_name=aws-do-inference-base
 42 | base_image_tag=:v15-${processor}
 43 | # model_image_name=${huggingface_model_name}
 44 | # model_image_tag=:v15-${processor}
 45 | 
 46 | # if using pre-built public registry image (may require authentication) use the following settings
 47 | registry=public.ecr.aws/a2u7h5w3/
 48 | model_image_name=bert-base-workshop
 49 | model_image_tag=:v15-${processor}
 50 | 
 51 | # Trace settings
 52 | # trace_opts_$processor is a processor-specific setting used by the docker run command in the trace.sh script
 53 | # This setting will be automatically assigned based on your processor value
 54 | trace_opts_cpu=""
 55 | trace_opts_gpu="--gpus 0"
 56 | trace_opts_inf1="-e AWS_NEURON_VISIBLE_DEVICES=ALL --privileged"
 57 | trace_opts_inf2="-e AWS_NEURON_VISIBLE_DEVICES=ALL --privileged"
 58 | trace_opts_graviton=""
 59 | 
 60 | # Deployment settings
 61 | # some of these settings apply only when the runtime is kubernetes
 62 | # runtime = docker | kubernetes
 63 | runtime=kubernetes
 64 | # number of models per model server
 65 | num_models=16
 66 | # quiet = False | True - sets whether the model server should print logs
 67 | quiet=False
 68 | # postprocess = True | False - sets whether tensors returned from model should be translated back to text or just returned
 69 | postprocess=True
 70 | # service_port=8080 - port on which model service will be exposed
 71 | service_port=8080
 72 | # Kubernetes-specific deployment settings
 73 | # instance_type = c5.xxx | g4dn.xlarge | g4dn.12xlarge | inf1.xlarge | inf2.8xlarge | c7g.4xlarge...
 74 | # A node group with the specified instance_type must exist in the cluster
 75 | # The instance type must have the processor configured above
 76 | # Example: processor=graviton, instance_type=c7g.4xlarge
 77 | instance_type=c7g.4xlarge
 78 | # num_servers - number of model servers to deploy
 79 | # note that more than one model server can run on a node with multiple cpu/gpu/inferentia chips.
 80 | # example: 4 model servers fit on one inf1.6xlarge instance as it has 4 inferentia chips.
 81 | num_servers=1
 82 | # Kubernetes namespace
 83 | namespace=mpi
 84 | # Kubernetes app name
 85 | app_name=${huggingface_model_name}-${processor}
 86 | app_dir=app-${app_name}-${instance_type}
 87 | 
 88 | # Test image settings
 89 | #test_image_name=test-${huggingface_model_name}
 90 | #test_image_tag=:v15-cpu
 91 | 
 92 | #when using pre-built test image available in public ECR registry (may require authentication): 
 93 | #registry=public.ecr.aws/a2u7h5w3/
 94 | test_image_name=bert-base-workshop
 95 | test_image_tag=:test-v15-cpu
 96 | 
 97 | # request_frequency - time to sleep between two consecutive requests in curl tests
 98 | request_frequency=0.01
 99 | # Stop random request test after num_requests number of requests
100 | num_requests=30
101 | # Number of test containers to launch (default=1), use > 1 for scale testing
102 | num_test_containers=10
103 | # test_instance_type - when runtime is kubernetes, node instance type on which test pods will run
104 | test_instance_type=c5.4xlarge
105 | # test_namespace - when runtime is kubernetes, namespace where test pods will be created
106 | test_namespace=mpi
107 | # test_dir - when runtime is kubernetes, directory where test job/pod manifests are stored
108 | test_dir=app-${test_image_name}-${instance_type}
109 | 


--------------------------------------------------------------------------------
/config.properties_inferentia_tests:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # This file contains all customizable configuration items for the project
  4 | # It is pre-configured for re:Invent 2023 builder session for the following basic settings:
  5 | # using publicly shared Model container images for Inferentia 2 and publicly shared Test container images
  6 | # replace the original config.properties file with this version to run Tests on c5.4xlarge nodes
  7 | 
  8 | ######################################################################
  9 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
 10 | # SPDX-License-Identifier: MIT-0                                     #
 11 | ######################################################################
 12 | # Project settings
 13 | verbose=true
 14 | 
 15 | # Model settings
 16 | huggingface_model_name=bert-base-multilingual-cased
 17 | huggingface_tokenizer_class=BertTokenizer
 18 | huggingface_model_class=BertForQuestionAnswering
 19 | 
 20 | # Compiler settings
 21 | # processor = cpu|gpu|inf1|inf2|graviton
 22 | processor=inf2
 23 | pipeline_cores=1
 24 | sequence_length=128
 25 | batch_size=1
 26 | test=True
 27 | 
 28 | # account is the current AWS user account. This setting is determined automatically.
 29 | account=$(aws sts get-caller-identity --query Account --output text)
 30 | 
 31 | # region is used to login if the registry is ecr 
 32 | region=us-west-2
 33 | 
 34 | # Container settings
 35 | # Default is the private ECR registry in the current AWS account.
 36 | # If registry is set, include the registry uri up to the image name,
 37 | # end the registry setting with /
 38 | # registry=${account}.dkr.ecr.${region}.amazonaws.com/
 39 | # registry_type=ecr
 40 | registry_type=ecr
 41 | base_image_name=aws-do-inference-base
 42 | base_image_tag=:v15-${processor}
 43 | #model_image_name=${huggingface_model_name}
 44 | #model_image_tag=:v15-${processor}
 45 | 
 46 | # if using pre-built public registry model image (may require authentication) use the following settings
 47 | registry=public.ecr.aws/a2u7h5w3/
 48 | model_image_name=bert-base-workshop
 49 | model_image_tag=:v15-${processor}
 50 | 
 51 | # Trace settings
 52 | # trace_opts_$processor is a processor-specific setting used by the docker run command in the trace.sh script
 53 | # This setting will be automatically assigned based on your processor value
 54 | trace_opts_cpu=""
 55 | trace_opts_gpu="--gpus 0"
 56 | trace_opts_inf1="-e AWS_NEURON_VISIBLE_DEVICES=ALL --privileged"
 57 | trace_opts_inf2="-e AWS_NEURON_VISIBLE_DEVICES=ALL --privileged"
 58 | trace_opts_graviton=""
 59 | 
 60 | # Deployment settings
 61 | # some of these settings apply only when the runtime is kubernetes
 62 | # runtime = docker | kubernetes
 63 | runtime=kubernetes
 64 | # number of models per model server
 65 | num_models=6
 66 | # quiet = False | True - sets whether the model server should print logs
 67 | quiet=False
 68 | # postprocess = True | False - sets whether tensors returned from model should be translated back to text or just returned
 69 | postprocess=True
 70 | # service_port=8080 - port on which model service will be exposed
 71 | service_port=8080
 72 | # Kubernetes-specific deployment settings
 73 | # instance_type = c5.xxx | g4dn.xlarge | g4dn.12xlarge | inf1.xlarge | inf2.8xlarge | c7g.4xlarge...
 74 | # A node group with the specified instance_type must exist in the cluster
 75 | # The instance type must have the processor configured above
 76 | # Example: processor=graviton, instance_type=c7g.4xlarge
 77 | instance_type=inf2.xlarge
 78 | # num_servers - number of model servers to deploy
 79 | # note that more than one model server can run on a node with multiple cpu/gpu/inferentia chips.
 80 | # example: 4 model servers fit on one inf1.6xlarge instance as it has 4 inferentia chips.
 81 | num_servers=2
 82 | # Kubernetes namespace
 83 | namespace=mpi
 84 | # Kubernetes app name
 85 | app_name=${huggingface_model_name}-${processor}
 86 | app_dir=app-${app_name}-${instance_type}
 87 | 
 88 | # Local Test image settings
 89 | #test_image_name=test-${huggingface_model_name}
 90 | #test_image_tag=:v15-cpu
 91 | 
 92 | # when using pre-built test image available in public ECR registry (may require authentication): 
 93 | # public.ecr.aws/a2u7h5w3/bert-base-workshop:test-v15-cpu
 94 | test_image_name=bert-base-workshop
 95 | test_image_tag=:test-v15-cpu
 96 | 
 97 | # request_frequency - time to sleep between two consecutive requests in curl tests
 98 | request_frequency=0.01
 99 | # Stop random request test after num_requests number of requests
100 | num_requests=30
101 | # Number of test containers to launch (default=1), use > 1 for scale testing
102 | num_test_containers=10
103 | # test_instance_type - when runtime is kubernetes, node instance type on which test pods will run
104 | test_instance_type=c5.4xlarge
105 | # test_namespace - when runtime is kubernetes, namespace where test pods will be created
106 | test_namespace=mpi
107 | # test_dir - when runtime is kubernetes, directory where test job/pod manifests are stored
108 | test_dir=app-${test_image_name}-${instance_type}
109 | 


--------------------------------------------------------------------------------
/config.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ######################################################################
 4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
 5 | # SPDX-License-Identifier: MIT-0                                     #
 6 | ######################################################################
 7 | 
 8 | print_help() {
 9 | 	echo ""
10 | 	echo "Usage: $0"
11 | 	echo ""
12 | 	echo "   This script just opens the global configuration file (config.properties) in a text editor."
13 | 	echo "   By default we use vi, but this can be easily changed by modifying the script."
14 | 	echo "   Changes to the config file take effect with the next action script execution."
15 | 	echo ""
16 | }
17 | 
18 | if [ "$1" == "" ]; then
19 | 	CMD="vi ./config.properties"
20 | 	if [ ! "$verbose" == "false" ]; then
21 | 		echo -e "\n${CMD}\n"
22 | 	fi
23 | 	eval "${CMD}"
24 | else
25 | 	print_help
26 | fi
27 | 


--------------------------------------------------------------------------------
/deploy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ######################################################################
 4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
 5 | # SPDX-License-Identifier: MIT-0                                     #
 6 | ######################################################################
 7 | 
 8 | print_help() {
 9 |     echo ""
10 |     echo "Usage: $0 [arg]"
11 |     echo ""
12 |     echo "   This script deploys and manages the model servers on the configured runtime."
13 |     echo "   Both Docker for single host deployments and Kubernetes for cluster deployments are supported container runtimes."
14 |     echo "   If no arguments are specified, the default action (run) will be executed."
15 |     echo ""
16 |     echo "   Available optional arguments:"
17 |     echo "   run         - deploy the model servers to the configured runtime."
18 |     echo "   stop        - remove the model servers from the configured runtime."
19 |     echo "   status [id] - show current status of deployed model servers, optionally just for the specified server id."
20 |     echo "   logs [id]   - show model server logs for all servers, or only the specified server id."
21 |     echo "   exec <id>   - open bash shell into the container of the server with the specified id. Note the id is required." 
22 |     echo ""
23 | }
24 | 
25 | action=$1
26 | 
27 | if [ "$action" == "" ]
28 | then
29 |     action="run"
30 | fi
31 | 
32 | echo ""
33 | pushd ./4-deploy > /dev/null
34 | case "$action" in
35 |     "run")
36 |         ./run.sh
37 |         ;;
38 |     "stop")
39 |         ./stop.sh
40 |         ;;
41 |     "status")
42 |         ./status.sh $2
43 |         ;;
44 |     "logs")
45 |         ./logs.sh $2
46 |         ;;
47 |     "exec")
48 |         ./exec.sh $2
49 |         ;;
50 |     *)
51 | 	print_help
52 |         ;;
53 | esac
54 | popd > /dev/null
55 | echo ""
56 | 


--------------------------------------------------------------------------------
/login.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ######################################################################
 4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
 5 | # SPDX-License-Identifier: MIT-0                                     #
 6 | ######################################################################
 7 | 
 8 | print_help() {
 9 | 	echo ""
10 | 	echo "Usage: $0"
11 | 	echo ""
12 | 	echo "   This script assists with logging in to a private container registry."
13 | 	echo "   By default we use Amazon ECR, however the script can be extended to support other registries as needed."
14 | 	echo "   In order to login successfully, the environment in which this script is running, must be configured"
15 | 	echo "   with an IAM role allowing access to ECR in the target AWS account."
16 | 	echo ""
17 | }
18 | 
19 | if [ "$1" == "" ]; then
20 | 
21 | 	if [ -f ./config.properties ]; then
22 |     		source ./config.properties
23 | 	elif [ -f ../config.properties ]; then
24 |     		source ../config.properties
25 | 	else
26 |     		echo "config.properties not found!"
27 | 	fi
28 | 
29 | 	# Login to container registry
30 | 	case "$registry_type" in
31 |     		"ecr")
32 |         		echo "Logging in to $registry_type $registry ..."
33 |         		CMD="aws ecr get-login-password --region $region | docker login --username AWS --password-stdin $registry"
34 |         		if [ ! "$verbose" == "false" ]; then
35 |                 		echo -e "\n${CMD}\n"
36 |         		fi
37 |         		eval "${CMD}"
38 |         		;;
39 |     		*)
40 |         		echo "Login for registry_type=$registry_type is not implemented"
41 |         		;;
42 | 	esac
43 | else
44 | 	print_help
45 | fi
46 | 


--------------------------------------------------------------------------------
/low-latency-high-bandwidth-updated-architecture.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-machine-learning-inference-on-aws/d0e7fd36e976a184b5fbbb3cd992ffeb9ed28e03/low-latency-high-bandwidth-updated-architecture.jpg


--------------------------------------------------------------------------------
/low-latency-high-throughput-inference-on-amazon-eks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-machine-learning-inference-on-aws/d0e7fd36e976a184b5fbbb3cd992ffeb9ed28e03/low-latency-high-throughput-inference-on-amazon-eks.png


--------------------------------------------------------------------------------
/pack.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ######################################################################
 4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
 5 | # SPDX-License-Identifier: MIT-0                                     #
 6 | ######################################################################
 7 | 
 8 | print_help() {
 9 |     echo ""
10 |     echo "Usage: $0 [arg]"
11 |     echo ""
12 |     echo "   This script builds a model container using the traced/compiled model file and a model server."
13 |     echo "   By default we use FastAPI and hypercorn to serve models, but the same approach can be extended"
14 |     echo "   to other servers if needed. Optionally, this script can push/pull the model container"
15 |     echo "   to/from a container registry."
16 |     echo ""
17 |     echo "   Available optional arguments:"
18 |     echo "      push   - push model container to a registry"
19 |     echo "      pull   - pull model container from a registry"
20 |     echo ""
21 | }
22 | 
23 | source ./config.properties
24 | 
25 | action=$1
26 | 
27 | if [ "$action" == "" ]; then
28 |     model_file_name=${huggingface_model_name}_bs${batch_size}_seq${sequence_length}_pc${pipeline_cores}_${processor}.pt
29 |     
30 |     CMD="docker build -t ${registry}${model_image_name}${model_image_tag} --build-arg BASE_IMAGE=${registry}${base_image_name}${base_image_tag} \
31 |                  --build-arg MODEL_NAME=${huggingface_model_name} --build-arg MODEL_FILE_NAME=${model_file_name} --build-arg PROCESSOR=${processor} \
32 |                  -f 3-pack/Dockerfile ."
33 |     if [ ! "$verbose" == "false" ]; then
34 |         echo -e "\n${CMD}\n"
35 |     fi
36 |     eval "${CMD}"
37 | elif [ "$action" == "push" ]; then
38 |     ./3-pack/push.sh
39 | elif [ "$action" == "pull" ]; then
40 |     ./3-pack/pull.sh
41 | else
42 |     print_help
43 | fi
44 | 


--------------------------------------------------------------------------------
/provision.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ######################################################################
 4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
 5 | # SPDX-License-Identifier: MIT-0                                     #
 6 | ######################################################################
 7 | 
 8 | print_help() {
 9 | 	echo ""
10 | 	echo "Usage: $0 [runtime]"
11 | 	echo ""
12 | 	echo "   runtime - optional [docker|kubernetes}. Target runtime to provision."
13 |         echo "             if not specified, reads value from ./config.properties"	
14 | 	echo ""
15 | 	echo "   When runtime is kubernetes, this script uses CloudFormation to create a management instance"
16 | 	echo "   which is used to launch and access an EKS cluster, and can be used to complete"
17 | 	echo "   the remaining steps in this project as well."
18 | 	echo ""
19 | 	echo "   When runtime is docker, this script does not provision any additional resources."
20 |         echo ""	
21 | }
22 | 
23 | 
24 | runtime=$1
25 | if [ "$runtime" == "" ]; then
26 | 	source ./config.properties
27 | fi
28 | 
29 | if [ "$runtime" == "kubernetes" ]; then
30 | 	echo ""
31 | 	echo "Provisioning Management Instance and EKS cluster infrastructure ..."
32 | 	
33 | 	echo ""
34 | 	pushd ./0-provision
35 | 	./stack-create.sh
36 | 	popd
37 | elif [ "$runtime" == "docker" ]; then
38 |         echo ""
39 |         echo "No additional infrastructure is required for runtime: $runtime..."
40 |         echo ""       
41 | else 
42 | 	print_help
43 | fi
44 | 


--------------------------------------------------------------------------------
/remove.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ######################################################################
 4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
 5 | # SPDX-License-Identifier: MIT-0                                     #
 6 | ######################################################################
 7 | 
 8 | print_help() {
 9 | 	echo ""
10 | 	echo "Usage: $0 [runtime]"
11 | 	echo ""
12 | 	echo "   runtime - optional [docker|kubernetes}. Target runtime to remove."
13 |         echo "             if not specified, reads value from ./config.properties"	
14 | 	echo ""
15 | 	echo "   When runtime is kubernetes, this script uses CloudFormation to delete all stacks created by this project's provision script"
16 | 	echo ""
17 | 	echo "   When runtime is docker, this script does not remove any resources."
18 |         echo ""	
19 | }
20 | 
21 | 
22 | runtime=$1
23 | if [ "$runtime" == "" ]; then
24 | 	source ./config.properties
25 | fi
26 | 
27 | if [ "$runtime" == "kubernetes" ]; then
28 | 	echo ""
29 | 	echo "Removing EKS cluster and Management instance infrastructure ..."
30 | 	
31 | 	echo ""
32 | 	pushd ./6-remove
33 | 	./stack-delete.sh
34 | 	popd
35 | elif [ "$runtime" == "docker" ]; then
36 |         echo ""
37 |         echo "No additional project asset cleanup is required for runtime: $runtime..."
38 |         echo ""       
39 | else 
40 | 	print_help
41 | fi
42 | 
43 | 


--------------------------------------------------------------------------------
/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ######################################################################
 4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
 5 | # SPDX-License-Identifier: MIT-0                                     #
 6 | ######################################################################
 7 | 
 8 | action=$1
 9 | 
10 | print_help() {
11 |     echo ""
12 |     echo "Usage: $0 <arg>"
13 |     echo ""
14 |     echo "   This script is used for building, deploying, executing, and managing performance and benchmark tests"
15 |     echo "   against model servers and models running on the configured container runtime."
16 |     echo ""
17 |     echo "   Available arguments:"
18 |     echo "   build      - build a test container image"
19 |     echo "   push       - push test image to container registry"
20 |     echo "   pull       - pull test image from container registry if available"
21 |     echo "   run [test] - if a test is not specified, the test container starts as a service in idle mode. "
22 |     echo "                A shell can be opened and tests located in /app/tests or others can be executed manually."
23 |     echo "                If a test is specified, then the test will be run as a job "
24 |     echo "                and the container will exit when the job is complete."
25 |     echo "   exec       - open shell in test container"
26 |     echo "   logs       - show logs of the test container"
27 |     echo "   status     - show status of test container"
28 |     echo "   stop       - stop test container"
29 |     echo ""
30 |     echo "       Available tests:"
31 |     echo "       seq - send a request to each model server and model sequentially"
32 |     echo "       rnd - send random requests to models"
33 |     echo "       bmk - run benchmark test clint to measure throughput and latency under load with random requests"
34 |     echo "       bma - run benchmark analysis - aggregate and average stats from logs of all completed benchmark containers"
35 |     echo "             It is required that bmk completes successfully before bma can produce proper statistics"
36 |     echo ""
37 |     echo "       Example:"
38 |     echo "       $0 run seq"
39 |     echo ""
40 | }
41 | 
42 | echo ""
43 | case "$action" in
44 |     "build")
45 |         ./5-test/build.sh
46 |         ;;
47 |     "push")
48 |         ./5-test/push.sh
49 |         ;;
50 |     "pull")
51 |         ./5-test/pull.sh
52 |         ;;
53 |     "exec")
54 |         ./5-test/exec.sh $2
55 |         ;;
56 |     "logs")
57 |         ./5-test/logs.sh $2
58 |         ;;
59 |     "status")
60 |         ./5-test/status.sh
61 |         ;;
62 |     "stop")
63 |         ./5-test/stop.sh $2
64 |         ;;
65 |     "run")
66 |         # $2 here is the name of the test to execute, if none specified, then test container will start in sleep mode
67 |         ./5-test/run.sh $2
68 |         ;;
69 |     *)
70 |         print_help
71 |         ;;
72 | esac
73 | echo ""
74 | 


--------------------------------------------------------------------------------
/trace.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ######################################################################
 4 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
 5 | # SPDX-License-Identifier: MIT-0                                     #
 6 | ######################################################################
 7 | 
 8 | print_help() {
 9 | 	echo ""
10 | 	echo "Usage: $0 "
11 | 	echo ""
12 | 	echo "   This script compiles/traces the model configured in config.properties and saves it locally as a .pt file"
13 | 	echo "   Tracing is supported on CPU, GPU, or Inferentia, however it must be done on a machine that has"
14 | 	echo "   the target processor chip available. Example: tracing a model for Inferentia must be done on an inf1 instance."
15 | 	echo ""
16 | }
17 | 
18 | 
19 | if [ "$1" == "" ]; then 
20 | 	source ./config.properties
21 | 	echo ""
22 | 	echo "Tracing model: $huggingface_model_name ..."
23 | 	
24 | 	dockerfile=./1-build/Dockerfile-base-${processor}
25 | 	echo ""
26 | 	if [ -f $dockerfile ]; then
27 | 		echo "   ... for processor: $processor ..."
28 | 		trace_opts=trace_opts_${processor}
29 |                 CMD="docker run ${!trace_opts} -it --rm -v $(pwd)/2-trace:/app/trace -v $(pwd)/config.properties:/app/config.properties ${registry}${base_image_name}${base_image_tag} bash -c 'cd /app/trace; python --version; python model-tracer.py'"
30 | 	        if [ ! "$verbose" == "false" ]; then
31 |                 	echo -e "\n${CMD}\n"
32 |         	fi
33 |         	eval "${CMD}"	
34 | 	else
35 | 		echo "Processor $processor is not supported. Please ensure the processor setting in config.properties is configured properly"
36 | 		exit 1
37 | 	fi
38 | else
39 | 	print_help
40 | fi
41 | 
42 | 


--------------------------------------------------------------------------------