├── .gitignore ├── Chalice ├── .chalice │ ├── config.json │ └── policy-dev.json ├── .gitignore ├── app.py ├── floppy.jpg ├── invoke.py ├── post.sh └── requirements.txt ├── CloudFormation └── sagemaker.yaml ├── CodePipeline ├── pipeline.yaml └── s3-launchstack.yaml ├── Docs └── sagemaker-pipeline.png ├── LICENSE.md ├── README.md └── Source ├── test.py └── training.py /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store -------------------------------------------------------------------------------- /Chalice/.chalice/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0", 3 | "app_name": "predictor", 4 | "autogen_policy": false, 5 | "environment_variables": { 6 | "ENDPOINT_NAME": "DEMO-imageclassification-ep--2018-04-23-19-55-49" 7 | }, 8 | "stages": { 9 | "dev": { 10 | "api_gateway_stage": "api" 11 | } 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /Chalice/.chalice/policy-dev.json: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Sid": "VisualEditor0", 6 | "Effect": "Allow", 7 | "Action": [ 8 | "logs:CreateLogStream", 9 | "logs:PutLogEvents" 10 | ], 11 | "Resource": "arn:aws:logs:*:*:*" 12 | }, 13 | { 14 | "Sid": "VisualEditor1", 15 | "Effect": "Allow", 16 | "Action": "sagemaker:InvokeEndpoint", 17 | "Resource": "*" 18 | }, 19 | { 20 | "Sid": "VisualEditor2", 21 | "Effect": "Allow", 22 | "Action": "logs:CreateLogGroup", 23 | "Resource": "arn:aws:logs:*:*:*" 24 | } 25 | ] 26 | } 27 | -------------------------------------------------------------------------------- /Chalice/.gitignore: -------------------------------------------------------------------------------- 1 | .chalice/deployments/ 2 | .chalice/venv/ 3 | -------------------------------------------------------------------------------- /Chalice/app.py: -------------------------------------------------------------------------------- 1 | from chalice import Chalice 2 | from chalice import BadRequestError 3 | import base64, os, boto3, ast 4 | import numpy as np 5 | 6 | app = Chalice(app_name='predictor') 7 | app.debug=True 8 | 9 | @app.route('/', methods=['POST']) 10 | def index(): 11 | body = app.current_request.json_body 12 | 13 | if 'data' not in body: 14 | raise BadRequestError('Missing image data') 15 | if 'ENDPOINT_NAME' not in os.environ: 16 | raise BadRequestError('Missing endpoint') 17 | 18 | image = base64.b64decode(body['data']) # byte array 19 | endpoint = os.environ['ENDPOINT_NAME'] 20 | 21 | if 'topk' not in body: 22 | topk = 257 23 | else: 24 | topk = body['topk'] 25 | 26 | print("%s %d" % (endpoint, topk)) 27 | 28 | runtime = boto3.Session().client(service_name='sagemaker-runtime', region_name='us-east-1') 29 | response = runtime.invoke_endpoint(EndpointName=endpoint, ContentType='application/x-image', Body=image) 30 | probs = response['Body'].read().decode() # byte array 31 | 32 | probs = ast.literal_eval(probs) # array of floats 33 | probs = np.array(probs) # numpy array of floats 34 | 35 | topk_indexes = probs.argsort() # indexes in ascending order of probabilities 36 | topk_indexes = topk_indexes[::-1][:topk] # indexes for top k probabilities in descending order 37 | 38 | topk_categories = [] 39 | for i in topk_indexes: 40 | topk_categories.append((i+1, probs[i])) 41 | 42 | return {'response': str(topk_categories)} 43 | 44 | -------------------------------------------------------------------------------- /Chalice/floppy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stelligent/sagemaker-pipeline/a1875d832b8cd90ab54b6f44d765cf85073cabd9/Chalice/floppy.jpg -------------------------------------------------------------------------------- /Chalice/invoke.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import json 3 | import numpy as np 4 | 5 | file_name = 'floppy.jpg' 6 | endpoint_name = 'DEMO-imageclassification-ep--2018-04-23-19-55-49' 7 | runtime = boto3.Session().client(service_name='runtime.sagemaker',region_name='us-east-1') 8 | 9 | with open(file_name, 'rb') as f: 10 | payload = f.read() 11 | payload = bytearray(payload) 12 | response = runtime.invoke_endpoint(EndpointName=endpoint_name, ContentType='application/x-image', Body=payload) 13 | print(response['Body'].read()) 14 | -------------------------------------------------------------------------------- /Chalice/post.sh: -------------------------------------------------------------------------------- 1 | #export URL='http://localhost:8000' 2 | export URL=`chalice url` 3 | 4 | export PIC='floppy.jpg' 5 | 6 | (echo -n '{"data": "'; base64 $PIC; echo '", "topk": 1}') | 7 | curl -H "Content-Type: application/json" -d @- $URL 8 | 9 | echo 10 | 11 | (echo -n '{"data": "'; base64 $PIC; echo '", "topk": 3}') | 12 | curl -H "Content-Type: application/json" -d @- $URL 13 | 14 | echo 15 | 16 | (echo -n '{"data": "'; base64 $PIC; echo '"}') | 17 | curl -H "Content-Type: application/json" -d @- $URL 18 | -------------------------------------------------------------------------------- /Chalice/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | -------------------------------------------------------------------------------- /CloudFormation/sagemaker.yaml: -------------------------------------------------------------------------------- 1 | AWSTemplateFormatVersion: '2010-09-09' 2 | Description: 'AWS SageMaker Endpoint' 3 | Parameters: 4 | BucketName: 5 | Description: Name of data bucket 6 | MaxLength: '64' 7 | MinLength: '1' 8 | Type: String 9 | CommitID: 10 | Description: ID of the current commit 11 | MaxLength: '64' 12 | MinLength: '1' 13 | Type: String 14 | Environment: 15 | Description: Current environment we are working in 16 | MaxLength: '64' 17 | MinLength: '1' 18 | Type: String 19 | ParentStackName: 20 | Description: Name of pipeline stack 21 | MaxLength: '64' 22 | MinLength: '1' 23 | Type: String 24 | SageMakerRole: 25 | Description: Name of SageMaker role 26 | MaxLength: '64' 27 | MinLength: '1' 28 | Type: String 29 | Timestamp: 30 | Description: Resource timestamp to prevent naming conflicts 31 | MaxLength: '64' 32 | MinLength: '1' 33 | Type: String 34 | Resources: 35 | Model: 36 | Type: "AWS::SageMaker::Model" 37 | Properties: 38 | ModelName: !Sub ${Environment}-${ParentStackName}-${CommitID}-${Timestamp} 39 | ExecutionRoleArn: !Sub ${SageMakerRole} 40 | PrimaryContainer: 41 | ModelDataUrl: !Sub s3://${BucketName}/${ParentStackName}-${CommitID}-${Timestamp}/output/model.tar.gz 42 | Image: 811284229777.dkr.ecr.us-east-1.amazonaws.com/image-classification:latest 43 | Endpoint: 44 | Type: "AWS::SageMaker::Endpoint" 45 | DependsOn: EndpointConfig 46 | Properties: 47 | EndpointName: !Sub ${Environment}-${ParentStackName}-${CommitID}-${Timestamp} 48 | EndpointConfigName: !GetAtt EndpointConfig.EndpointConfigName 49 | EndpointConfig: 50 | Type: "AWS::SageMaker::EndpointConfig" 51 | DependsOn: Model 52 | Properties: 53 | EndpointConfigName: !Sub ${Environment}-${ParentStackName}-${CommitID}-${Timestamp} 54 | ProductionVariants: 55 | - ModelName: !GetAtt Model.ModelName 56 | VariantName: AllTraffic 57 | InitialInstanceCount: 1 58 | InstanceType: ml.t2.medium 59 | InitialVariantWeight: 1 -------------------------------------------------------------------------------- /CodePipeline/pipeline.yaml: -------------------------------------------------------------------------------- 1 | AWSTemplateFormatVersion: "2010-09-09" 2 | Description: > 3 | CloudFormation SageMaker pipeline. 4 | Parameters: 5 | Email: 6 | Description: The email address where CodePipeline sends pipeline notifications 7 | Type: String 8 | GitHubToken: 9 | NoEcho: true 10 | Description: Secret. It might look something like 9b189a1654643522561f7b3ebd44a1531a4287af OAuthToken with access to Repo. Go to https://github.com/settings/tokens 11 | Type: String 12 | GitHubUser: 13 | Default: stelligent 14 | Description: GitHub UserName 15 | Type: String 16 | Repo: 17 | Default: sagemaker-pipeline 18 | Description: GitHub Repo to pull from. Only the Name. not the URL 19 | Type: String 20 | Branch: 21 | Default: master 22 | Description: Branch to use from Repo. Only the Name. not the URL 23 | Type: String 24 | Metadata: 25 | AWS::CloudFormation::Interface: 26 | ParameterGroups: 27 | - Label: 28 | default: "Stack Settings" 29 | Parameters: 30 | - Email 31 | - GitHubToken 32 | - GitHubUser 33 | - Repo 34 | - Branch 35 | 36 | Resources: 37 | ArtifactStoreBucket: 38 | Type: AWS::S3::Bucket 39 | Properties: 40 | BucketName: !Sub ${AWS::StackName}-pipeline-artifact-store 41 | VersioningConfiguration: 42 | Status: Enabled 43 | 44 | DataBucket: 45 | Type: AWS::S3::Bucket 46 | Properties: 47 | BucketName: !Sub ${AWS::StackName}-pipeline-data 48 | VersioningConfiguration: 49 | Status: Enabled 50 | 51 | CodePipelineSNSTopic: 52 | Type: AWS::SNS::Topic 53 | Properties: 54 | TopicName: !Sub ${AWS::StackName}-pipeline-topic 55 | Subscription: 56 | - Endpoint: !Ref Email 57 | Protocol: email 58 | 59 | 60 | CFNNagProject: 61 | Type: AWS::CodeBuild::Project 62 | Properties: 63 | Name: !Sub ${AWS::StackName}-pipeline-cfnnag 64 | Description: Lints CloudFormation 65 | ServiceRole: !GetAtt CodeBuildRole.Arn 66 | Artifacts: 67 | Type: CODEPIPELINE 68 | Environment: 69 | Type: LINUX_CONTAINER 70 | ComputeType: BUILD_GENERAL1_SMALL 71 | Image: aws/codebuild/eb-ruby-2.3-amazonlinux-64:2.1.6 72 | Source: 73 | Type: CODEPIPELINE 74 | BuildSpec: !Sub | 75 | version: 0.2 76 | phases: 77 | install: 78 | commands: 79 | - gem install cfn-nag 80 | build: 81 | commands: 82 | - cfn_nag_scan --input-path CloudFormation/*.yaml 83 | TimeoutInMinutes: 30 84 | 85 | ModelTrainingProject: 86 | Type: AWS::CodeBuild::Project 87 | Properties: 88 | Name: !Sub ${AWS::StackName}-pipeline-modeltraining 89 | Description: Trains machine learning model using SageMaker 90 | ServiceRole: !GetAtt CodeBuildRole.Arn 91 | Artifacts: 92 | Type: CODEPIPELINE 93 | Environment: 94 | Type: LINUX_CONTAINER 95 | ComputeType: BUILD_GENERAL1_SMALL 96 | Image: aws/codebuild/eb-python-3.4-amazonlinux-64:2.1.6 97 | Source: 98 | Type: CODEPIPELINE 99 | BuildSpec: !Sub | 100 | version: 0.2 101 | phases: 102 | install: 103 | commands: 104 | - echo "Installing wget and boto3" 105 | - pip3 install wget 106 | - pip3 install boto3 107 | - printenv 108 | build: 109 | commands: 110 | - echo "Running training.py" 111 | - python3 Source/training.py "${SagemakerRole.Arn}" "${DataBucket}" "${AWS::StackName}" $CODEBUILD_RESOLVED_SOURCE_VERSION 112 | post_build: 113 | commands: 114 | - echo "Cleaning" 115 | - rm caltech* 116 | artifacts: 117 | files: 118 | - '**/*' 119 | TimeoutInMinutes: 30 120 | 121 | TestEndpointProjectQA: 122 | Type: AWS::CodeBuild::Project 123 | Properties: 124 | Name: !Sub ${AWS::StackName}-pipeline-test-qa 125 | Description: Checks if endpoint is working 126 | ServiceRole: !GetAtt CodeBuildRole.Arn 127 | Artifacts: 128 | Type: CODEPIPELINE 129 | Environment: 130 | Type: LINUX_CONTAINER 131 | ComputeType: BUILD_GENERAL1_SMALL 132 | Image: aws/codebuild/eb-python-3.4-amazonlinux-64:2.1.6 133 | Source: 134 | Type: CODEPIPELINE 135 | BuildSpec: !Sub | 136 | version: 0.2 137 | phases: 138 | install: 139 | commands: 140 | - echo "Installing wget and boto3" 141 | - pip3 install wget 142 | - pip3 install boto3 143 | - pip3 install numpy 144 | build: 145 | commands: 146 | - echo "Running test.py" 147 | - python3 Source/test.py "qa-${AWS::StackName}" "CloudFormation/configuration_qa.json" 148 | TimeoutInMinutes: 30 149 | 150 | TestEndpointProjectProd: 151 | Type: AWS::CodeBuild::Project 152 | Properties: 153 | Name: !Sub ${AWS::StackName}-pipeline-test-prod 154 | Description: Checks if endpoint is working 155 | ServiceRole: !GetAtt CodeBuildRole.Arn 156 | Artifacts: 157 | Type: CODEPIPELINE 158 | Environment: 159 | Type: LINUX_CONTAINER 160 | ComputeType: BUILD_GENERAL1_SMALL 161 | Image: aws/codebuild/eb-python-3.4-amazonlinux-64:2.1.6 162 | Source: 163 | Type: CODEPIPELINE 164 | BuildSpec: !Sub | 165 | version: 0.2 166 | phases: 167 | install: 168 | commands: 169 | - echo "Installing wget and boto3" 170 | - pip3 install wget 171 | - pip3 install boto3 172 | - pip3 install numpy 173 | build: 174 | commands: 175 | - echo "Running test.py" 176 | - python3 Source/test.py "prod-${AWS::StackName}" "CloudFormation/configuration_prod.json" 177 | TimeoutInMinutes: 30 178 | 179 | Pipeline: 180 | Type: AWS::CodePipeline::Pipeline 181 | Properties: 182 | ArtifactStore: 183 | Location: !Ref 'ArtifactStoreBucket' 184 | Type: S3 185 | DisableInboundStageTransitions: [] 186 | Name: !Ref 'AWS::StackName' 187 | RoleArn: !GetAtt [PipelineRole, Arn] 188 | Stages: 189 | - Name: Source 190 | Actions: 191 | - Name: Source 192 | ActionTypeId: 193 | Category: Source 194 | Owner: ThirdParty 195 | Provider: GitHub 196 | Version: '1' 197 | Configuration: 198 | Owner: !Ref 'GitHubUser' 199 | Repo: !Ref 'Repo' 200 | Branch: !Ref 'Branch' 201 | OAuthToken: !Ref 'GitHubToken' 202 | OutputArtifacts: 203 | - Name: src 204 | RunOrder: '1' 205 | - Name: Build_and_Train 206 | Actions: 207 | - Name: cfn-nag 208 | ActionTypeId: 209 | Category: Test 210 | Owner: AWS 211 | Provider: CodeBuild 212 | Version: '1' 213 | Configuration: 214 | ProjectName: !Ref 'CFNNagProject' 215 | InputArtifacts: 216 | - Name: src 217 | RunOrder: '1' 218 | - Name: ModelTraining 219 | ActionTypeId: 220 | Category: Build 221 | Owner: AWS 222 | Provider: CodeBuild 223 | Version: '1' 224 | Configuration: 225 | ProjectName: !Ref 'ModelTrainingProject' 226 | InputArtifacts: 227 | - Name: src 228 | OutputArtifacts: 229 | - Name: bld 230 | RunOrder: '2' 231 | - Name: QA 232 | Actions: 233 | - Name: LaunchEndpoint 234 | ActionTypeId: 235 | Category: Deploy 236 | Owner: AWS 237 | Provider: CloudFormation 238 | Version: '1' 239 | InputArtifacts: 240 | - Name: bld 241 | Configuration: 242 | ActionMode: REPLACE_ON_FAILURE 243 | RoleArn: !GetAtt [CFNRole, Arn] 244 | StackName: !Sub qa-${AWS::StackName} 245 | TemplatePath: !Sub "bld::CloudFormation/sagemaker.yaml" 246 | TemplateConfiguration: "bld::CloudFormation/configuration_qa.json" 247 | RunOrder: '1' 248 | - Name: TestEndpoint 249 | ActionTypeId: 250 | Category: Test 251 | Owner: AWS 252 | Provider: CodeBuild 253 | Version: '1' 254 | Configuration: 255 | ProjectName: !Ref 'TestEndpointProjectQA' 256 | InputArtifacts: 257 | - Name: bld 258 | RunOrder: '2' 259 | - Name: Production 260 | Actions: 261 | - Name: ApprovalGate 262 | ActionTypeId: 263 | Category: Approval 264 | Owner: AWS 265 | Provider: Manual 266 | Version: '1' 267 | Configuration: 268 | NotificationArn: !Ref CodePipelineSNSTopic 269 | CustomData: !Sub 'Do you want to push your changes to production?' 270 | RunOrder: '1' 271 | - Name: LaunchEndpoint 272 | ActionTypeId: 273 | Category: Deploy 274 | Owner: AWS 275 | Provider: CloudFormation 276 | Version: '1' 277 | InputArtifacts: 278 | - Name: bld 279 | Configuration: 280 | ActionMode: REPLACE_ON_FAILURE 281 | RoleArn: !GetAtt [CFNRole, Arn] 282 | StackName: !Sub prod-${AWS::StackName} 283 | TemplatePath: !Sub "bld::CloudFormation/sagemaker.yaml" 284 | TemplateConfiguration: "bld::CloudFormation/configuration_prod.json" 285 | RunOrder: '2' 286 | - Name: TestEndpoint 287 | ActionTypeId: 288 | Category: Test 289 | Owner: AWS 290 | Provider: CodeBuild 291 | Version: '1' 292 | Configuration: 293 | ProjectName: !Ref 'TestEndpointProjectProd' 294 | InputArtifacts: 295 | - Name: bld 296 | RunOrder: '3' 297 | 298 | CFNRole: 299 | Type: AWS::IAM::Role 300 | Properties: 301 | RoleName: !Sub ${AWS::StackName}-cfn-role 302 | AssumeRolePolicyDocument: 303 | Statement: 304 | - Action: ['sts:AssumeRole'] 305 | Effect: Allow 306 | Principal: 307 | Service: [cloudformation.amazonaws.com] 308 | Version: '2012-10-17' 309 | Path: / 310 | Policies: 311 | - PolicyName: CloudFormationRole 312 | PolicyDocument: 313 | Version: '2012-10-17' 314 | Statement: 315 | - Action: 316 | - sagemaker:* 317 | - iam:PassRole 318 | - s3:* 319 | Effect: Allow 320 | Resource: '*' 321 | 322 | CodeBuildRole: 323 | Type: AWS::IAM::Role 324 | Properties: 325 | RoleName: !Sub ${AWS::StackName}-codebuild-role 326 | AssumeRolePolicyDocument: 327 | Statement: 328 | - Action: ['sts:AssumeRole'] 329 | Effect: Allow 330 | Principal: 331 | Service: [codebuild.amazonaws.com] 332 | Version: '2012-10-17' 333 | Path: / 334 | Policies: 335 | - PolicyName: UploadAccess 336 | PolicyDocument: 337 | Version: '2012-10-17' 338 | Statement: 339 | - Action: 340 | - codepipeline:* 341 | - sagemaker:* 342 | - s3:* 343 | - logs:CreateLogGroup 344 | - logs:CreateLogStream 345 | - logs:PutLogEvents 346 | Effect: Allow 347 | Resource: '*' 348 | - Action: 349 | - iam:PassRole 350 | Effect: Allow 351 | Resource: !Sub arn:aws:iam::${AWS::AccountId}:role/${AWS::StackName}-sagemaker-role 352 | 353 | PipelineRole: 354 | Type: AWS::IAM::Role 355 | Properties: 356 | RoleName: !Sub ${AWS::StackName}-pipeline-role 357 | AssumeRolePolicyDocument: 358 | Statement: 359 | - Action: ['sts:AssumeRole'] 360 | Effect: Allow 361 | Principal: 362 | Service: [codepipeline.amazonaws.com] 363 | Version: '2012-10-17' 364 | Path: / 365 | Policies: 366 | - PolicyName: CodePipelineAccess 367 | PolicyDocument: 368 | Version: '2012-10-17' 369 | Statement: 370 | - Action: 371 | - s3:* 372 | - codebuild:* 373 | - cloudformation:CreateStack 374 | - cloudformation:DescribeStacks 375 | - cloudformation:DeleteStack 376 | - cloudformation:UpdateStack 377 | - cloudformation:CreateChangeSet 378 | - cloudformation:ExecuteChangeSet 379 | - cloudformation:DeleteChangeSet 380 | - cloudformation:DescribeChangeSet 381 | - cloudformation:SetStackPolicy 382 | - iam:PassRole 383 | - sns:Publish 384 | Effect: Allow 385 | Resource: '*' 386 | 387 | SagemakerRole: 388 | Type: AWS::IAM::Role 389 | Properties: 390 | RoleName: !Sub ${AWS::StackName}-sagemaker-role 391 | AssumeRolePolicyDocument: 392 | Statement: 393 | - Action: ['sts:AssumeRole'] 394 | Effect: Allow 395 | Principal: 396 | Service: [sagemaker.amazonaws.com] 397 | Version: '2012-10-17' 398 | Path: / 399 | ManagedPolicyArns: 400 | - arn:aws:iam::aws:policy/AmazonSageMakerFullAccess 401 | Policies: 402 | - PolicyName: S3Access 403 | PolicyDocument: 404 | Version: '2012-10-17' 405 | Statement: 406 | - Action: 407 | - s3:GetObject 408 | - s3:PutObject 409 | - s3:DeleteObject 410 | - s3:ListBucket 411 | Effect: Allow 412 | Resource: arn:aws:s3:::* 413 | 414 | Outputs: 415 | PipelineUrl: 416 | Value: !Sub https://console.aws.amazon.com/codepipeline/home?region=${AWS::Region}#/view/${Pipeline} 417 | Description: CodePipeline URL 418 | -------------------------------------------------------------------------------- /CodePipeline/s3-launchstack.yaml: -------------------------------------------------------------------------------- 1 | AWSTemplateFormatVersion: "2010-09-09" 2 | Description: > 3 | Creates an S3 bucket that hosts GitHub from stelligent/sagemaker-pipeline to use for Launch Stack. 4 | Parameters: 5 | Email: 6 | Description: The email address where CodePipeline sends pipeline notifications 7 | Type: String 8 | GitHubToken: 9 | NoEcho: true 10 | Description: Secret. It might look something like 9b189a1654643522561f7b3ebd44a1531a4287af OAuthToken with access to Repo. Go to https://github.com/settings/tokens 11 | Type: String 12 | GitHubUser: 13 | Default: stelligent 14 | Description: GitHub UserName 15 | Type: String 16 | Repo: 17 | Default: sagemaker-pipeline 18 | Description: GitHub Repo to pull from. Only the Name. not the URL 19 | Type: String 20 | Branch: 21 | Default: master 22 | Description: Branch to use from Repo. Only the Name. not the URL 23 | Type: String 24 | Metadata: 25 | AWS::CloudFormation::Interface: 26 | ParameterGroups: 27 | - Label: 28 | default: "Stack Settings" 29 | Parameters: 30 | - Email 31 | - GitHubToken 32 | - GitHubUser 33 | - Repo 34 | - Branch 35 | 36 | Resources: 37 | ArtifactStoreBucket: 38 | Type: AWS::S3::Bucket 39 | Properties: 40 | BucketName: !Sub ${AWS::StackName}-pipeline-artifact-store 41 | VersioningConfiguration: 42 | Status: Enabled 43 | 44 | SiteBucket: 45 | Type: AWS::S3::Bucket 46 | DeletionPolicy: Delete 47 | Properties: 48 | AccessControl: PublicRead 49 | BucketName: !Sub ${AWS::StackName}-src 50 | WebsiteConfiguration: 51 | IndexDocument: index.html 52 | 53 | CodePipelineSNSTopic: 54 | Type: AWS::SNS::Topic 55 | Properties: 56 | TopicName: !Sub ${AWS::StackName}-pipeline-topic 57 | Subscription: 58 | - Endpoint: !Ref Email 59 | Protocol: email 60 | 61 | CodeBuildDeploySite: 62 | Type: AWS::CodeBuild::Project 63 | DependsOn: CodeBuildRole 64 | Properties: 65 | Name: !Sub ${AWS::StackName}-DeploySite 66 | Description: Deploy site to S3 67 | ServiceRole: !GetAtt CodeBuildRole.Arn 68 | Artifacts: 69 | Type: CODEPIPELINE 70 | Environment: 71 | Type: LINUX_CONTAINER 72 | ComputeType: BUILD_GENERAL1_SMALL 73 | Image: aws/codebuild/eb-ruby-2.3-amazonlinux-64:2.1.6 74 | Source: 75 | Type: CODEPIPELINE 76 | BuildSpec: !Sub | 77 | version: 0.1 78 | phases: 79 | post_build: 80 | commands: 81 | - aws s3 cp --recursive --acl public-read ./ s3://${AWS::StackName}-src/ 82 | artifacts: 83 | type: zip 84 | files: 85 | - '**/*' 86 | 87 | TimeoutInMinutes: 10 88 | 89 | 90 | Pipeline: 91 | Type: AWS::CodePipeline::Pipeline 92 | Properties: 93 | ArtifactStore: 94 | Location: !Ref 'ArtifactStoreBucket' 95 | Type: S3 96 | DisableInboundStageTransitions: [] 97 | Name: !Ref 'AWS::StackName' 98 | RoleArn: !GetAtt [PipelineRole, Arn] 99 | Stages: 100 | - Name: Source 101 | Actions: 102 | - Name: Source 103 | ActionTypeId: 104 | Category: Source 105 | Owner: ThirdParty 106 | Provider: GitHub 107 | Version: '1' 108 | Configuration: 109 | Owner: !Ref 'GitHubUser' 110 | Repo: !Ref 'Repo' 111 | Branch: !Ref 'Branch' 112 | OAuthToken: !Ref 'GitHubToken' 113 | OutputArtifacts: 114 | - Name: src 115 | RunOrder: '1' 116 | - Name: Build_and_Train 117 | Actions: 118 | - Name: CopyToS3 119 | ActionTypeId: 120 | Category: Build 121 | Owner: AWS 122 | Provider: CodeBuild 123 | Version: '1' 124 | Configuration: 125 | ProjectName: !Ref CodeBuildDeploySite 126 | InputArtifacts: 127 | - Name: src 128 | RunOrder: '1' 129 | 130 | CFNRole: 131 | Type: AWS::IAM::Role 132 | Properties: 133 | RoleName: !Sub ${AWS::StackName}-cfn-role 134 | AssumeRolePolicyDocument: 135 | Statement: 136 | - Action: ['sts:AssumeRole'] 137 | Effect: Allow 138 | Principal: 139 | Service: [cloudformation.amazonaws.com] 140 | Version: '2012-10-17' 141 | Path: / 142 | Policies: 143 | - PolicyName: CloudFormationRole 144 | PolicyDocument: 145 | Version: '2012-10-17' 146 | Statement: 147 | - Action: 148 | - sagemaker:* 149 | - iam:PassRole 150 | - s3:* 151 | Effect: Allow 152 | Resource: '*' 153 | 154 | CodeBuildRole: 155 | Type: AWS::IAM::Role 156 | Properties: 157 | RoleName: !Sub ${AWS::StackName}-codebuild-role 158 | AssumeRolePolicyDocument: 159 | Statement: 160 | - Action: ['sts:AssumeRole'] 161 | Effect: Allow 162 | Principal: 163 | Service: [codebuild.amazonaws.com] 164 | Version: '2012-10-17' 165 | Path: / 166 | Policies: 167 | - PolicyName: UploadAccess 168 | PolicyDocument: 169 | Version: '2012-10-17' 170 | Statement: 171 | - Action: 172 | - codepipeline:* 173 | - sagemaker:* 174 | - s3:* 175 | - logs:CreateLogGroup 176 | - logs:CreateLogStream 177 | - logs:PutLogEvents 178 | Effect: Allow 179 | Resource: '*' 180 | - Action: 181 | - iam:PassRole 182 | Effect: Allow 183 | Resource: !Sub arn:aws:iam::${AWS::AccountId}:role/${AWS::StackName}-sagemaker-role 184 | 185 | PipelineRole: 186 | Type: AWS::IAM::Role 187 | Properties: 188 | RoleName: !Sub ${AWS::StackName}-pipeline-role 189 | AssumeRolePolicyDocument: 190 | Statement: 191 | - Action: ['sts:AssumeRole'] 192 | Effect: Allow 193 | Principal: 194 | Service: [codepipeline.amazonaws.com] 195 | Version: '2012-10-17' 196 | Path: / 197 | Policies: 198 | - PolicyName: CodePipelineAccess 199 | PolicyDocument: 200 | Version: '2012-10-17' 201 | Statement: 202 | - Action: 203 | - s3:* 204 | - codebuild:* 205 | - cloudformation:CreateStack 206 | - cloudformation:DescribeStacks 207 | - cloudformation:DeleteStack 208 | - cloudformation:UpdateStack 209 | - cloudformation:CreateChangeSet 210 | - cloudformation:ExecuteChangeSet 211 | - cloudformation:DeleteChangeSet 212 | - cloudformation:DescribeChangeSet 213 | - cloudformation:SetStackPolicy 214 | - iam:PassRole 215 | - sns:Publish 216 | Effect: Allow 217 | Resource: '*' 218 | 219 | Outputs: 220 | PipelineUrl: 221 | Value: !Sub https://console.aws.amazon.com/codepipeline/home?region=${AWS::Region}#/view/${Pipeline} 222 | Description: CodePipeline URL 223 | LaunchStackUrl: 224 | Value: !Sub https://console.aws.amazon.com/cloudformation/home?region=us-east-1#cstack=sn~sagemaker-stack|turl~https://s3.amazonaws.com/${AWS::StackName}-src/CodePipeline/pipeline.yaml 225 | Description: Location of CloudFormation Template used by Launch Stack for us-east-1 226 | LaunchStackMarkdownCode: 227 | Value: !Sub (https://s3.amazonaws.com/cloudformation-examples/cloudformation-launch-stack.png)](https://console.aws.amazon.com/cloudformation/home?region=us-east-1#cstack=sn~sagemaker-stack|turl~https://s3.amazonaws.com/${AWS::StackName}-src/CodePipeline/pipeline.yaml) 228 | Description: Code snippet written in Markdown for launching a stack -------------------------------------------------------------------------------- /Docs/sagemaker-pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stelligent/sagemaker-pipeline/a1875d832b8cd90ab54b6f44d765cf85073cabd9/Docs/sagemaker-pipeline.png -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018-2020 Stelligent Systems LLC 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SageMaker Pipeline 2 | ## Introduction 3 | 4 | This is a sample solution using a SageMaker pipeline. This implementation could be useful for any organization trying to automate their use of Machine Learning. With an implementation like this, any inference is easy, and can simply be queried through an endpoint to receive the output of the model’s inference, tests can be automatically performed for QA, and ML code can be quickly updated to match needs. 5 | 6 | 7 | ## Prerequisites 8 | - **AWS account** – Follow these instructions to create an AWS Account: [Create an AWS Account](http://docs.aws.amazon.com/AmazonSimpleDB/latest/DeveloperGuide/AboutAWSAccounts.html) 9 | - **GitHub OAuth Token** – Follow these instructions to create an OAuth Token: [Create a GitHub OAuth Token](https://github.com/stelligent/devops-essentials/wiki/Prerequisites#create-an-oauth-token-in-github) 10 | 11 | 12 | ## Architecture and Implementation 13 | ### Architecture Diagram 14 | ![app-overview](Docs/sagemaker-pipeline.png) 15 | 16 | 17 | 18 | ### Components Details 19 | - [**AWS CloudFormation**](https://aws.amazon.com/cloudformation/) – This solution uses the CloudFormation Template language, in either YAML or JSON, to create each resource. 20 | - [**AWS CodeBuild**](https://aws.amazon.com/codebuild/) – This solution uses CodeBuild to build the source code from GitHub 21 | - [**AWS CodePipeline**](https://aws.amazon.com/codepipeline/) – CodePipeline has various stages defined in CloudFormation which step through which actions must be taken in which order to go from source code to creation of the production endpoint. 22 | - [**AWS EC2**](https://aws.amazon.com/ec2/) – EC2 Instances are created in order to train the model as well as host the model to be accessed via and endpoint. 23 | - [**AWS SageMaker**](https://aws.amazon.com/sagemaker/) – This solution uses SageMaker to train the model to be used and host the model at an endpoint, where it can be accessed via HTTP/HTTPS requests 24 | - [**AWS IAM**](https://aws.amazon.com/iam/) – Separate Identity and Access Management (IAM) Roles are created for the pipeline, CodeBuild, and CloudFormation. 25 | - [**AWS SNS**](https://aws.amazon.com/sns/) – This solution uses a Simple Notification Service (SNS) Topic in order to approve movement into production after testing. 26 | - [**AWS S3**](https://aws.amazon.com/s3/) – Artifacts created throughout the pipeline as well as the data for the model is stored in an Simple Storage Service (S3) Bucket. 27 | 28 | 29 | ## CloudFormation Templates resources 30 | - **AWS CloudFormation** – [AWS::CloudFormation::Interface](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-cloudformation-interface.html) sets parameter group metadata. 31 | - **AWS CodeBuild** – [AWS::CodeBuild::Project](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-codebuild-project.html) uploads the project source code stored in GitHub to an S3 bucket. 32 | - **AWS CodePipeline** – [AWS::CodePipeline::Pipeline](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-codepipeline-pipeline.html) – Easiest to create the Pipeline in the AWS Console, then use the get-pipeline CLI command to get the configuration in JSON to be placed into the CloudFormation Template. 33 | - **AWS EC2** – Instance type specified in [AWS::SageMaker::EndpointConfig](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-sagemaker-endpointconfig.html) 34 | - **AWS SageMaker** – [AWS::SageMaker::Model](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-sagemaker-model.html) – here the algorithm to be used by SageMaker is specified, as well as the source code to be submitted to once the model has been created; 35 | 36 | [AWS::SageMaker::Endpoint](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-sagemaker-endpoint.html) – this is the endpoint from which you can make requests; 37 | 38 | [AWS::SageMaker::EndpointConfig](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-sagemaker-endpointconfig.html)– here we specify key configurations for the endpoint, including the type of EC2 instance used, and can specify if we would like multiple endpoint models, e.g. for A-B testing, and similarly how much/what traffic we will direct to this endpoint. 39 | - **AWS IAM** – [AWS::IAM::Role](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-iam-role.html) – Make sure to specify only the necessary permissions for each role. 40 | - **AWS SNS** – [AWS::SNS::Topic](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-sns-topic.html) – sends a confirmation to the email specified as a parameter. 41 | - **AWS S3** – [AWS::S3::Bucket](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-s3-bucket.html) – stores the model data and necessary artifacts 42 | 43 | 44 | ## Costs 45 | This section outlines cost considerations for running a SageMaker Pipeline. Running the default pipeline for 24 hours will cost roughly $1.76 including one training run, or $1.56 per day once the model is already trained. 46 | - **CloudFormation** – No Additional Cost 47 | - **CodeBuild** – Charges per minute used. First 100 minutes each month come at no charge. For information on pricing beyond the first 100 minutes, see [AWS CodeBuild Pricing](https://aws.amazon.com/codebuild/pricing/). 48 | - **CodePipeline** – "With AWS CodePipeline, there are no upfront fees or commitments. You pay only for what you use. AWS CodePipeline costs $1 per active pipeline* per month. To encourage experimentation, pipelines are free for the first 30 days after creation. An active pipeline is a pipeline that has existed for more than 30 days and has at least one code change that runs through it during the month. There is no charge for pipelines that have no new code changes running through them during the month. An active pipeline is not prorated for partial months." More can be found at [AWS CodePipeline Pricing](https://aws.amazon.com/codepipeline/pricing/). 49 | - **IAM** – No Additional Cost 50 | - **SageMaker** – Prices vary based on EC2 instance usage for Building in Notebook Instances, Model Hosting, and Model Training; each charged per hour of use. This example currently uses the `ml.p2.xlarge` for training and the `ml.t2.medium` instance for hosting. The cost for *training* with this instance is $1.26 an hour and $0.065 per hour for *hosting* with this instance. For more information, see [Amazon SageMaker Pricing](https://aws.amazon.com/sagemaker/pricing/). 51 | - **S3** – Prices Vary, depends on size of model/artifacts stored. For first 50 TB each month, costs only $0.023 per GB stored. For more information, see [Amazon S3 Pricing](https://aws.amazon.com/s3/pricing/). 52 | - **SNS** – Realistically No Cost – Free for first 1 million SNS requests and for first 1,000 Email Deliveries each month. 53 | 54 | 55 | ## Deployment Steps 56 | #### Step 1. Prepare an AWS Account 57 | Create your AWS account at [http://aws.amazon.com](http://aws.amazon.com) by following the instructions on the site. 58 | 59 | #### Step 2. Create a GitHub OAuth Token 60 | Create your token at [GitHub's Token Settings](https://github.com/settings/tokens), making sure to select scopes of **repo** and **admin:repo_hook**. After clicking **Generate Token**, make sure to save your OAuth Token in a secure location. The token will not be shown again. 61 | 62 | #### Step 3. Launch the Stack 63 | Click on the **Launch Stack** button below to launch the CloudFormation Stack to set up the SageMaker Pipeline. Before Launching, ensure all architecture, configuration, etc. is set as desired. 64 | 65 | **Stack Assumptions:** The pipeline stack assumes the following conditions, and may not function properly if they are not met: 66 | 1. The pipeline stack name is less than 20 characters long 67 | 2. The stack is launched in the US East (N. Virginia) Region (`us-east-1`). 68 | 69 | *NOTE: The URL for Launch Stack is automatically generated through a pipeline in one of Stelligent's AWS accounts.* 70 | 71 | [![Launch CFN stack](https://s3.amazonaws.com/cloudformation-examples/cloudformation-launch-stack.png)](https://us-east-1.console.aws.amazon.com/cloudformation/home?region=us-east-1#/stacks/create/template?stackName=sagemaker-stack&templateURL=https://s3.amazonaws.com/sagemaker-pipeline-src/CodePipeline/pipeline.yaml) 72 | 73 | You can launch the same stack using the AWS CLI. Here's an example: 74 | 75 | `aws cloudformation create-stack --stack-name YOURSTACKNAME --template-body file:///home/ec2-user/environment/sagemaker-pipeline/CodePipeline/pipeline.yaml --parameters ParameterKey=Email,ParameterValue="youremailaddress@example.com" ParameterKey=GitHubToken,ParameterValue="YOURGITHUBTOKEN12345ab1234234" --capabilities CAPABILITY_NAMED_IAM` 76 | 77 | 78 | 79 | 80 | 81 | 82 | #### Step 4. Test and Approve the Deployment 83 | Once the deployment has passed automated QA testing, before proceeding with the production stage it sends an email notification (via SNS) for manual approval. At this time, you may run any additional tests on the endpoint before approving it to be deployed into production. 84 | 85 | #### Approximate Times: 86 | * **Full Pipeline**: 30 minutes 87 | * **Model Training:** 10 minutes 88 | * **Launch Endpoint:** 4 minutes 89 | 90 | ## Parameters 91 | 100 | 101 | 102 | Parameters | Description 103 | ---------- | ----------- 104 | Email | The email where CodePipeline will send SNS notifications. 105 | GitHubToken | A Secret OAuthToken with access to the GitHub repo. 106 | GitHubUser | GitHub Username. 107 | Repo | The name (not URL) of the GitHub repository to pull from. 108 | Branch | The name (not URL) of the GitHub repository’s branch to use. 109 | 110 | ## Using AWS Chalice to create API infront your Sagemaker Endpoint 111 | 112 | To launch a endpoint using the provide chalice project all you have to do is and then run from the Chalice directory in this repo. Once piece that will change depending on your endpoint is the value in . 113 | 114 | Note this will create resource that you will manually have to delete. It will create a ApiGateway, IAM role, and a Lambda function. 115 | 116 | For more details checkout this blog, it's also the source for this code: https://medium.com/@julsimon/using-chalice-to-serve-sagemaker-predictions-a2015c02b033 117 | 118 | ## Summary 119 | After following the deployment steps, your pipeline should be up and running with a production SageMaker Endpoint that you can query to make inferences with your newly trained model! 120 | 121 | 123 | -------------------------------------------------------------------------------- /Source/test.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import wget 3 | import json 4 | import numpy as np 5 | import sys 6 | import time 7 | 8 | start = time.time() 9 | 10 | endpoint_name = sys.argv[1] 11 | configuration_file = sys.argv[2] 12 | 13 | with open(configuration_file) as f: 14 | data = json.load(f) 15 | 16 | commit_id = data["Parameters"]["CommitID"] 17 | timestamp = data["Parameters"]["Timestamp"] 18 | 19 | 20 | endpoint_name = endpoint_name + "-" + commit_id + "-" + timestamp 21 | 22 | runtime = boto3.client('runtime.sagemaker') 23 | 24 | wget.download("http://www.vision.caltech.edu/Image_Datasets/Caltech256/images/008.bathtub/008_0007.jpg", "test.jpg") 25 | 26 | 27 | with open("test.jpg", 'rb') as f: 28 | payload = f.read() 29 | payload = bytearray(payload) 30 | response = runtime.invoke_endpoint(EndpointName=endpoint_name, 31 | ContentType='application/x-image', 32 | Body=payload) 33 | result = response['Body'].read() 34 | # result will be in json format and convert it to ndarray 35 | result = json.loads(result.decode('utf-8')) 36 | # the result will output the probabilities for all classes 37 | # find the class with maximum probability and print the class index 38 | index = np.argmax(result) 39 | object_categories = ['ak47', 'american-flag', 'backpack', 'baseball-bat', 'baseball-glove', 'basketball-hoop', 'bat', 'bathtub', 'bear', 'beer-mug', 'billiards', 'binoculars', 'birdbath', 'blimp', 'bonsai-101', 'boom-box', 'bowling-ball', 'bowling-pin', 'boxing-glove', 'brain-101', 'breadmaker', 'buddha-101', 'bulldozer', 'butterfly', 'cactus', 'cake', 'calculator', 'camel', 'cannon', 'canoe', 'car-tire', 'cartman', 'cd', 'centipede', 'cereal-box', 'chandelier-101', 'chess-board', 'chimp', 'chopsticks', 'cockroach', 'coffee-mug', 'coffin', 'coin', 'comet', 'computer-keyboard', 'computer-monitor', 'computer-mouse', 'conch', 'cormorant', 'covered-wagon', 'cowboy-hat', 'crab-101', 'desk-globe', 'diamond-ring', 'dice', 'dog', 'dolphin-101', 'doorknob', 'drinking-straw', 'duck', 'dumb-bell', 'eiffel-tower', 'electric-guitar-101', 'elephant-101', 'elk', 'ewer-101', 'eyeglasses', 'fern', 'fighter-jet', 'fire-extinguisher', 'fire-hydrant', 'fire-truck', 'fireworks', 'flashlight', 'floppy-disk', 'football-helmet', 'french-horn', 'fried-egg', 'frisbee', 'frog', 'frying-pan', 'galaxy', 'gas-pump', 'giraffe', 'goat', 'golden-gate-bridge', 'goldfish', 'golf-ball', 'goose', 'gorilla', 'grand-piano-101', 'grapes', 'grasshopper', 'guitar-pick', 'hamburger', 'hammock', 'harmonica', 'harp', 'harpsichord', 'hawksbill-101', 'head-phones', 'helicopter-101', 'hibiscus', 'homer-simpson', 'horse', 'horseshoe-crab', 'hot-air-balloon', 'hot-dog', 'hot-tub', 'hourglass', 'house-fly', 'human-skeleton', 'hummingbird', 'ibis-101', 'ice-cream-cone', 'iguana', 'ipod', 'iris', 'jesus-christ', 'joy-stick', 'kangaroo-101', 'kayak', 'ketch-101', 'killer-whale', 'knife', 'ladder', 'laptop-101', 'lathe', 'leopards-101', 'license-plate', 'lightbulb', 'light-house', 'lightning', 'llama-101', 'mailbox', 'mandolin', 'mars', 'mattress', 'megaphone', 'menorah-101', 'microscope', 'microwave', 'minaret', 'minotaur', 'motorbikes-101', 'mountain-bike', 'mushroom', 'mussels', 'necktie', 'octopus', 'ostrich', 'owl', 'palm-pilot', 'palm-tree', 'paperclip', 'paper-shredder', 'pci-card', 'penguin', 'people', 'pez-dispenser', 'photocopier', 'picnic-table', 'playing-card', 'porcupine', 'pram', 'praying-mantis', 'pyramid', 'raccoon', 'radio-telescope', 'rainbow', 'refrigerator', 'revolver-101', 'rifle', 'rotary-phone', 'roulette-wheel', 'saddle', 'saturn', 'school-bus', 'scorpion-101', 'screwdriver', 'segway', 'self-propelled-lawn-mower', 'sextant', 'sheet-music', 'skateboard', 'skunk', 'skyscraper', 'smokestack', 'snail', 'snake', 'sneaker', 'snowmobile', 'soccer-ball', 'socks', 'soda-can', 'spaghetti', 'speed-boat', 'spider', 'spoon', 'stained-glass', 'starfish-101', 'steering-wheel', 'stirrups', 'sunflower-101', 'superman', 'sushi', 'swan', 'swiss-army-knife', 'sword', 'syringe', 'tambourine', 'teapot', 'teddy-bear', 'teepee', 'telephone-box', 'tennis-ball', 'tennis-court', 'tennis-racket', 'theodolite', 'toaster', 'tomato', 'tombstone', 'top-hat', 'touring-bike', 'tower-pisa', 'traffic-light', 'treadmill', 'triceratops', 'tricycle', 'trilobite-101', 'tripod', 't-shirt', 'tuning-fork', 'tweezer', 'umbrella-101', 'unicorn', 'vcr', 'video-projector', 'washing-machine', 'watch-101', 'waterfall', 'watermelon', 'welding-mask', 'wheelbarrow', 'windmill', 'wine-bottle', 'xylophone', 'yarmulke', 'yo-yo', 'zebra', 'airplanes-101', 'car-side-101', 'faces-easy-101', 'greyhound', 'tennis-shoes', 'toad', 'clutter'] 40 | print ("\n") 41 | print ("Result: label - " + object_categories[index] + ", probability - " + str(result[index])) 42 | 43 | end = time.time() 44 | seconds = end - start 45 | seconds = repr(seconds) 46 | print ("Time: " + seconds) -------------------------------------------------------------------------------- /Source/training.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import re 3 | import os 4 | import wget 5 | import time 6 | from time import gmtime, strftime 7 | import sys 8 | import json 9 | 10 | start = time.time() 11 | 12 | role = sys.argv[1] 13 | bucket = sys.argv[2] 14 | stack_name = sys.argv[3] 15 | commit_id = sys.argv[4] 16 | commit_id = commit_id[0:7] 17 | 18 | training_image = '811284229777.dkr.ecr.us-east-1.amazonaws.com/image-classification:latest' 19 | timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.gmtime()) 20 | 21 | def download(url): 22 | filename = url.split("/")[-1] 23 | if not os.path.exists(filename): 24 | wget.download(url, filename) 25 | 26 | 27 | def upload_to_s3(channel, file): 28 | s3 = boto3.resource('s3') 29 | data = open(file, "rb") 30 | key = channel + '/' + file 31 | s3.Bucket(bucket).put_object(Key=key, Body=data) 32 | 33 | # caltech-256 34 | print ("Downloadng Training Data") 35 | download('http://data.mxnet.io/data/caltech-256/caltech-256-60-train.rec') 36 | upload_to_s3('train', 'caltech-256-60-train.rec') 37 | print ("Finished Downloadng Training Data") 38 | print ("Downloadng Testing Data") 39 | download('http://data.mxnet.io/data/caltech-256/caltech-256-60-val.rec') 40 | upload_to_s3('validation', 'caltech-256-60-val.rec') 41 | print ("Finished Downloadng Testing Data") 42 | 43 | print ("Setting Algorithm Settings") 44 | # The algorithm supports multiple network depth (number of layers). They are 18, 34, 50, 101, 152 and 200 45 | # For this training, we will use 18 layers 46 | num_layers = "18" 47 | # we need to specify the input image shape for the training data 48 | image_shape = "3,224,224" 49 | # we also need to specify the number of training samples in the training set 50 | # for caltech it is 15420 51 | num_training_samples = "15420" 52 | # specify the number of output classes 53 | num_classes = "257" 54 | # batch size for training 55 | mini_batch_size = "64" 56 | # number of epochs 57 | epochs = "2" 58 | # learning rate 59 | learning_rate = "0.01" 60 | 61 | s3 = boto3.client('s3') 62 | # create unique job name 63 | job_name = stack_name + "-" + commit_id + "-" + timestamp 64 | training_params = \ 65 | { 66 | # specify the training docker image 67 | "AlgorithmSpecification": { 68 | "TrainingImage": training_image, 69 | "TrainingInputMode": "File" 70 | }, 71 | "RoleArn": role, 72 | "OutputDataConfig": { 73 | "S3OutputPath": 's3://{}/'.format(bucket) 74 | }, 75 | "ResourceConfig": { 76 | "InstanceCount": 1, 77 | "InstanceType": "ml.p2.xlarge", 78 | "VolumeSizeInGB": 50 79 | }, 80 | "TrainingJobName": job_name, 81 | "HyperParameters": { 82 | "image_shape": image_shape, 83 | "num_layers": str(num_layers), 84 | "num_training_samples": str(num_training_samples), 85 | "num_classes": str(num_classes), 86 | "mini_batch_size": str(mini_batch_size), 87 | "epochs": str(epochs), 88 | "learning_rate": str(learning_rate) 89 | }, 90 | "StoppingCondition": { 91 | "MaxRuntimeInSeconds": 360000 92 | }, 93 | #Training data should be inside a subdirectory called "train" 94 | #Validation data should be inside a subdirectory called "validation" 95 | #The algorithm currently only supports fullyreplicated model (where data is copied onto each machine) 96 | "InputDataConfig": [ 97 | { 98 | "ChannelName": "train", 99 | "DataSource": { 100 | "S3DataSource": { 101 | "S3DataType": "S3Prefix", 102 | "S3Uri": 's3://{}/train/'.format(bucket), 103 | "S3DataDistributionType": "FullyReplicated" 104 | } 105 | }, 106 | "ContentType": "application/x-recordio", 107 | "CompressionType": "None" 108 | }, 109 | { 110 | "ChannelName": "validation", 111 | "DataSource": { 112 | "S3DataSource": { 113 | "S3DataType": "S3Prefix", 114 | "S3Uri": 's3://{}/validation/'.format(bucket), 115 | "S3DataDistributionType": "FullyReplicated" 116 | } 117 | }, 118 | "ContentType": "application/x-recordio", 119 | "CompressionType": "None" 120 | } 121 | ] 122 | } 123 | print('Training job name: {}'.format(job_name)) 124 | print('\nInput Data Location: {}'.format(training_params['InputDataConfig'][0]['DataSource']['S3DataSource'])) 125 | 126 | # create the Amazon SageMaker training job 127 | sagemaker = boto3.client(service_name='sagemaker') 128 | sagemaker.create_training_job(**training_params) 129 | 130 | # confirm that the training job has started 131 | status = sagemaker.describe_training_job(TrainingJobName=job_name)['TrainingJobStatus'] 132 | print('Training job current status: {}'.format(status)) 133 | 134 | try: 135 | # wait for the job to finish and report the ending status 136 | sagemaker.get_waiter('training_job_completed_or_stopped').wait(TrainingJobName=job_name) 137 | training_info = sagemaker.describe_training_job(TrainingJobName=job_name) 138 | status = training_info['TrainingJobStatus'] 139 | print("Training job ended with status: " + status) 140 | except: 141 | print('Training failed to start') 142 | # if exception is raised, that means it has failed 143 | message = sagemaker.describe_training_job(TrainingJobName=job_name)['FailureReason'] 144 | print('Training failed with the following error: {}'.format(message)) 145 | 146 | 147 | # creating configuration files so we can pass parameters to our sagemaker endpoint cloudformation 148 | 149 | config_data_qa = { 150 | "Parameters": 151 | { 152 | "BucketName": bucket, 153 | "CommitID": commit_id, 154 | "Environment": "qa", 155 | "ParentStackName": stack_name, 156 | "SageMakerRole": role, 157 | "Timestamp": timestamp 158 | } 159 | } 160 | 161 | config_data_prod = { 162 | "Parameters": 163 | { 164 | "BucketName": bucket, 165 | "CommitID": commit_id, 166 | "Environment": "prod", 167 | "ParentStackName": stack_name, 168 | "SageMakerRole": role, 169 | "Timestamp": timestamp 170 | } 171 | } 172 | 173 | 174 | json_config_data_qa = json.dumps(config_data_qa) 175 | json_config_data_prod = json.dumps(config_data_prod) 176 | 177 | f = open( './CloudFormation/configuration_qa.json', 'w' ) 178 | f.write(json_config_data_qa) 179 | f.close() 180 | 181 | f = open( './CloudFormation/configuration_prod.json', 'w' ) 182 | f.write(json_config_data_prod) 183 | f.close() 184 | 185 | end = time.time() 186 | print(end - start) 187 | --------------------------------------------------------------------------------