├── .gitignore ├── README.md ├── api-gateway ├── code │ └── app.py └── template.yaml ├── autopilot-job-lambda ├── check-autopilot-status │ ├── app.py │ └── requirements.txt ├── create-autopilot │ ├── app.py │ └── requirements.txt ├── template.yaml └── workflow-trigger │ ├── app.py │ └── requirements.txt ├── buildspec.yaml ├── buildspec_deploy_api.yaml ├── img ├── Machine-Learning-Recipe-with-Olalekan.jpg ├── stepfunctions_graph_horitontal.png └── stepfunctions_graph_vertical.png ├── requirements.txt └── workflow ├── __init__.py ├── main.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | venv -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Automate the end-to-end AutoML lifecycle with Amazon SageMaker Autopilot on Amazon Step Functions 2 |

(CD4AutoML)

3 | 4 | 5 | ![Build Badge](https://codebuild.eu-west-1.amazonaws.com/badges?uuid=eyJlbmNyeXB0ZWREYXRhIjoiZ2RDdGJMTHNTYWw4bWk1TmFHZVR4QVRGUDhxeEt4R3V3Y0dabWNDM3cwTFRJMlhHSVFMNWE4YkQ3Y0hicU9Jb2gxQlFhQkZnK2I4dmJScGd3MGsxeHlFPSIsIml2UGFyYW1ldGVyU3BlYyI6IlZQQzgzQkk4WlRIWnBteGMiLCJtYXRlcmlhbFNldFNlcmlhbCI6MX0%3D&branch=master) 6 | 7 | ![Step Functions Graph Horizontal|CD4AutoML](./img/stepfunctions_graph_horitontal.png) 8 | 9 | This repository is a getting started/ready to use kit for deploying and running repeatable machine learning pipelines 10 | with Amazon SageMaker Autopilot. The project leverages with Amazon CodeBuild service integration with Amazon Step Functions 11 | to deploy an Amazon API Gateway for serving predictions. With this project, you can easily jump start your continuous 12 | integration and continuous deployment automated machine learning workflow with Amazon SageMaker Autopilot. 13 | 14 | 15 | ## Architecture 16 | ![End to end ML workflow with Amazon SageMaker Autopilot](./img/Machine-Learning-Recipe-with-Olalekan.jpg) 17 | 18 | This project is designed to get up and running with CD4AutoML (I coined this), much [CD4ML](https://martinfowler.com/articles/cd4ml.html) 19 | from [Martin Fowler's blogpost](https://martinfowler.com/articles/cd4ml.html). 20 | 21 | ### Technologies: 22 | - Amazon Cloudformation 23 | - Amazon Step Functions 24 | - Amazon CodeBuild 25 | - AWS Step Functions Data Science SDK 26 | - AWS Serverless Application Model 27 | - Amazon Lambda 28 | - Amazon API Gateway 29 | - Amazon SSM Parameter Store 30 | 31 | 32 | This project gets you out of the play/lab mode with Amazon SageMaker Autopilot into running real-life applications with Amazon SageMaker Autopilot. 33 | 34 | ### State machine Workflow 35 | The entire workflow is managed with AWS Step Functions Data Science SDK. 36 | Amazon Step Functions does not have service integration with Amazon SageMaker Autopilot out of the box. To manage this, I leveraged 37 | Amazon Lambda integration with Step Functions to periodically poll for Amazon SageMaker Autopilot job status. 38 | 39 | Once the AutoML job is completed, a model is created using the Amazon SageMaker Autopilot Inference Containers, and an Amazon 40 | SageMaker Endpoint is deployed. But there is more... 41 | 42 | On completion of the deployment of the Amazon SageMaker Endpoint, an Amazon CodeBuild Project state machine task is triggered 43 | which deploys our Amazon API Gateway with AWS Serverless Application Model. 44 | 45 | See workflow image below: 46 | 47 |
CD4AutoML
48 | 49 | ![Step Functions Graph Vertical|CD4AutoML](./img/stepfunctions_graph_vertical.png) 50 | 51 | 52 | ## Future Work 53 | I have plans to abstract away all deployment details and convert this into a Python Module or better put AutoML-as-a-Service. 54 | Users can either provide their Pandas DataFrame or local CSV/JSON data, and the service takes care of the rest. Users will 55 | get a secure REST API which they can make predictions in their applications. 56 | 57 | If you're interested in working on this together, feel free to reach out. Also feel free to extend this project as it suites 58 | you. Experiencing any challenges getting started, create an issue and I will have a look as soon as I can. 59 | 60 | 61 | ### TODO 62 | - [X] Add CloudWatch Schedule event trigger 63 | - [ ] Python Module 64 | - [ ] Convert project to AutoML-as-a-Service -------------------------------------------------------------------------------- /api-gateway/code/app.py: -------------------------------------------------------------------------------- 1 | from boto3 import client 2 | from os import getenv 3 | from json import loads, dumps 4 | 5 | 6 | SAGEMAKER_ENDPOINT = getenv('SAGEMAKER_ENDPOINT') 7 | SAGEMAKER_AUTOPILOT_TARGET_MODEL = getenv('SAGEMAKER_AUTOPILOT_TARGET_MODEL') 8 | sm_runtime = client('sagemaker-runtime') 9 | 10 | 11 | def respond(data, status=501): 12 | return { 13 | "headers": { 14 | "Access-Control-Allow-Headers": "Content-Type,Authorization,X-Amz-Date,X-Api-Key,X-Amz-Security-Token", 15 | "Access-Control-Allow-Methods": "OPTIONS,POST,PUT", 16 | "Access-Control-Allow-Origin": "*" 17 | }, 18 | "statusCode": status, 19 | "body": dumps(data) 20 | } 21 | 22 | 23 | def lambda_handler(event, context): 24 | """ 25 | :param event: 26 | :param context: 27 | :return: 28 | """ 29 | request_method = event['httpMethod'] 30 | request_body = loads(event['body']) 31 | 32 | if request_method == 'OPTIONS': 33 | return respond("This is an empty OPTIONS Request", 200) 34 | 35 | response = sm_runtime.invoke_endpoint( 36 | EndpointName=SAGEMAKER_ENDPOINT, 37 | ContentType='text/csv', 38 | Accept='text/csv', 39 | Body=request_body, 40 | TargetModel=SAGEMAKER_AUTOPILOT_TARGET_MODEL 41 | ) 42 | payload = { 43 | 'Prediction': response['Body'].read().decode("utf-8"), 44 | 'SageMakerEndpointName': SAGEMAKER_ENDPOINT 45 | } 46 | return respond(payload, 200) 47 | -------------------------------------------------------------------------------- /api-gateway/template.yaml: -------------------------------------------------------------------------------- 1 | AWSTemplateFormatVersion: '2010-09-09' 2 | Transform: AWS::Serverless-2016-10-31 3 | Description: > 4 | SAM Template for gram2shop-backend 5 | 6 | # More info about Globals: https://github.com/awslabs/serverless-application-model/blob/master/docs/globals.rst 7 | Globals: 8 | Function: 9 | Timeout: 30 10 | 11 | Parameters: 12 | SageMakerEndpoint: 13 | Type: String 14 | SageMakerAutopilotTargetModel: 15 | Type: String 16 | Description: 'Model to invoke in multi-model endpoint' 17 | 18 | 19 | Resources: 20 | SageMakerInferenceApi: 21 | Type: AWS::Serverless::Function 22 | Properties: 23 | CodeUri: code/ 24 | Handler: app.lambda_handler 25 | Runtime: python3.7 26 | Environment: 27 | Variables: 28 | SAGEMAKER_ENDPOINT: !Ref SageMakerEndpoint 29 | SAGEMAKER_AUTOPILOT_TARGET_MODEL: !Ref SageMakerAutopilotTargetModel 30 | Events: 31 | Request: 32 | Type: Api 33 | Properties: 34 | Path: /predictions 35 | Method: POST 36 | Options: 37 | Type: Api 38 | Properties: 39 | Path: /predictions 40 | Method: OPTIONS 41 | Policies: 42 | Version: '2012-10-17' 43 | Statement: 44 | - Effect: Allow 45 | Action: 46 | - sagemaker:InvokeEndpoint 47 | Resource: !Sub 'arn:aws:sagemaker:${AWS::Region}:${AWS::AccountId}:endpoint/${SageMakerEndpoint}' 48 | 49 | 50 | Outputs: 51 | SageMakerInferenceApi: 52 | Description: "API Gateway endpoint URL for Prod stage for ExtractRichMediaFunctionApi function" 53 | Value: !Sub "https://${ServerlessRestApi}.execute-api.${AWS::Region}.amazonaws.com/Prod/predictions" -------------------------------------------------------------------------------- /autopilot-job-lambda/check-autopilot-status/app.py: -------------------------------------------------------------------------------- 1 | """ 2 | Amazon Step Functions Lambda Resource to check Amazon SageMaker AutoPilot job status 3 | """ 4 | 5 | from boto3 import client 6 | 7 | sm_client = client('sagemaker') 8 | 9 | 10 | def lambda_handler(event, context): 11 | """ 12 | :param event: 13 | :param context: 14 | :return: 15 | """ 16 | autopilot_job_name = event['AutopilotJobName'] 17 | print(f'Autopilot JOb Name: {autopilot_job_name}') 18 | response = sm_client.describe_auto_ml_job(AutoMLJobName=autopilot_job_name) 19 | job_status = response['AutoMLJobStatus'] 20 | job_sec_status = response['AutoMLJobSecondaryStatus'] 21 | print(f'Autopilot Job {autopilot_job_name} is currently in {job_status}') 22 | result = { 23 | 'AutopilotJobName': autopilot_job_name, 24 | 'AutopilotJobStatus': job_status, 25 | 'AutopilotSecondaryJobStatus': job_sec_status, 26 | 'FailureReason': response.get('FailureReason', None), 27 | 'MachineLearningTaskType': response.get('ProblemType', None) 28 | } 29 | if job_status == 'Completed': 30 | best_candidate = response['BestCandidate'] 31 | inference_containers = best_candidate['InferenceContainers'] 32 | multi_model_inference_containers = list(map(_set_multimodel_mode, inference_containers)) 33 | result['InferenceContainers'] = multi_model_inference_containers 34 | result['BestCandidateName'] = best_candidate['CandidateName'] 35 | return result 36 | 37 | 38 | def _set_multimodel_mode(inference_container: dict) -> dict: 39 | """ 40 | :param inference_container: 41 | :return: 42 | """ 43 | inference_container['Mode'] = 'MultiModel' 44 | return inference_container 45 | -------------------------------------------------------------------------------- /autopilot-job-lambda/check-autopilot-status/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OElesin/sagemaker-autopilot-step-functions/38e7679f1aff35973f9ce403795cab332c0f3dbd/autopilot-job-lambda/check-autopilot-status/requirements.txt -------------------------------------------------------------------------------- /autopilot-job-lambda/create-autopilot/app.py: -------------------------------------------------------------------------------- 1 | """ 2 | Amazon Step Functions Lambda Resource to create Amazon SageMaker AutoPilot job 3 | """ 4 | from boto3 import client 5 | from time import gmtime, strftime 6 | 7 | sm_client = client('sagemaker') 8 | 9 | 10 | def lambda_handler(event, context): 11 | """ 12 | :param event: 13 | :param context: 14 | :return: 15 | """ 16 | print(event) 17 | timestamp_suffix = strftime('%d-%H-%M-%S', gmtime()) 18 | default_autopilot_job_name = f'aws-samples-autopilot-workflow-{timestamp_suffix}' 19 | configuration: dict = event['Configuration'] 20 | autopilot_job_name = configuration.get('AutoMLJobName', default_autopilot_job_name) 21 | input_data = configuration.get('S3InputData') 22 | job_execution_role = configuration.get('IamRole') 23 | target_column = configuration.get('TargetColumnName') 24 | output_path = configuration.get('S3OutputData') 25 | tags = configuration.get('Tags') 26 | autopilot_job_tags = generate_job_tags(tags) 27 | autopilot_job_config: dict = { 28 | 'CompletionCriteria': { 29 | 'MaxRuntimePerTrainingJobInSeconds': 600, 30 | 'MaxCandidates': 5, 31 | 'MaxAutoMLJobRuntimeInSeconds': 5400 32 | } 33 | } 34 | 35 | autopilot_input_data_config = [ 36 | { 37 | 'DataSource': { 38 | 'S3DataSource': { 39 | 'S3DataType': 'S3Prefix', 40 | 'S3Uri': input_data 41 | } 42 | }, 43 | 'TargetAttributeName': target_column 44 | } 45 | ] 46 | 47 | autopilot_output_data_config = { 48 | 'S3OutputPath': output_path 49 | } 50 | 51 | response = sm_client.create_auto_ml_job( 52 | AutoMLJobName=autopilot_job_name, 53 | InputDataConfig=autopilot_input_data_config, 54 | OutputDataConfig=autopilot_output_data_config, 55 | AutoMLJobConfig=autopilot_job_config, 56 | RoleArn=job_execution_role, 57 | Tags=autopilot_job_tags 58 | ) 59 | return { 60 | 'AutopilotJobName': autopilot_job_name, 61 | 'AutopilotJobArn': response['AutoMLJobArn'] 62 | } 63 | 64 | 65 | def generate_job_tags(raw_tags): 66 | """ 67 | :param raw_tags: 68 | :return: 69 | """ 70 | base_tags = [ 71 | { 72 | 'Key': 'provider', 73 | 'Value': 'elesin.olalekan@gmail.com' 74 | }, 75 | ] 76 | if raw_tags is None: 77 | return base_tags 78 | input_tags = [{'Key': key, 'Value': value} for key, value in raw_tags] 79 | return base_tags + input_tags 80 | -------------------------------------------------------------------------------- /autopilot-job-lambda/create-autopilot/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OElesin/sagemaker-autopilot-step-functions/38e7679f1aff35973f9ce403795cab332c0f3dbd/autopilot-job-lambda/create-autopilot/requirements.txt -------------------------------------------------------------------------------- /autopilot-job-lambda/template.yaml: -------------------------------------------------------------------------------- 1 | AWSTemplateFormatVersion : '2010-09-09' 2 | Transform: AWS::Serverless-2016-10-31 3 | Description: A sample SAM template for deploying Lambda functions. 4 | 5 | Parameters: 6 | ServiceBuildRoleArn: 7 | Type: String 8 | Description: 'IAM Role to be assumed by CodeBuild to run jobs' 9 | S3InputDataPath: 10 | Type: String 11 | Description: 'AWS S3 Input data path for SageMaker Autopilot model training' 12 | Default: 'Unknown' 13 | TargetColumnName: 14 | Type: String 15 | Description: 'Name of the target column to predict with SageMaker Autopilot' 16 | Default: 'Unknown' 17 | S3OutputPath: 18 | Type: String 19 | Description: 'S3 Path to save SageMaker Autopilot model artifacts' 20 | Default: 'Unknown' 21 | DeployedModelName: 22 | Type: String 23 | Description: | 24 | SageMaker Autopilot model name. This should be unique per account. The name is used for multi-model endpoint 25 | deployment 26 | Default: 'default-autopilot-workflow-model' 27 | 28 | Resources: 29 | CreateAutopilotJobFunction: 30 | Type: AWS::Serverless::Function 31 | Properties: 32 | Handler: app.lambda_handler 33 | FunctionName: 'CreateAutopilotJob' 34 | Description: 'Create SageMaker Autopilot Job' 35 | CodeUri: create-autopilot/ 36 | Runtime: python3.7 37 | Policies: 38 | - Version: '2012-10-17' 39 | Statement: 40 | - Effect: Allow 41 | Action: 42 | - lambda:InvokeFunction 43 | - lambda:InvokeAsync 44 | Resource: '*' 45 | - Effect: Allow 46 | Action: 47 | - sagemaker:CreateAutoMLJob 48 | - sagemaker:CreateTrainingJob 49 | - iam:PassRole 50 | Resource: '*' 51 | - Effect: Allow 52 | Action: 53 | - s3:CreateBucket 54 | - s3:PutObject 55 | Resource: arn:aws:s3:::sagemaker-* 56 | 57 | CheckAutopilotJobStatusFunction: 58 | Type: AWS::Serverless::Function 59 | Properties: 60 | Handler: app.lambda_handler 61 | CodeUri: check-autopilot-status/ 62 | Runtime: python3.7 63 | FunctionName: 'CheckAutopilotJobStatus' 64 | Description: 'Checks SageMaker Autopilot Job Status' 65 | Policies: 66 | Version: '2012-10-17' 67 | Statement: 68 | - Effect: Allow 69 | Action: 70 | - lambda:InvokeFunction 71 | - lambda:InvokeAsync 72 | Resource: '*' 73 | - Effect: Allow 74 | Action: 75 | - sagemaker:DescribeAutoMLJob 76 | - sagemaker:DescribeTrainingJob 77 | Resource: '*' 78 | 79 | StateMachineWorkflowTriggerFunction: 80 | Type: AWS::Serverless::Function 81 | Properties: 82 | Handler: app.lambda_handler 83 | CodeUri: workflow-trigger/ 84 | Runtime: python3.7 85 | FunctionName: 'StateMachineWorkflowTrigger' 86 | Description: 'Lambda function to trigger workflow on schedule' 87 | Environment: 88 | Variables: 89 | SAGEMAKER_EXECUTION_ROLE: !GetAtt SageMakerExecutionRole.Arn 90 | S3_INPUT_DATA_PATH: !Ref S3InputDataPath 91 | S3_OUTPUT_PATH: !Ref S3OutputPath 92 | TARGET_COLUMN_NAME: !Ref TargetColumnName 93 | DEPLOYED_MODEL_NAME: !Ref DeployedModelName 94 | Policies: 95 | Version: '2012-10-17' 96 | Statement: 97 | - Effect: Allow 98 | Action: 99 | - lambda:InvokeFunction 100 | - lambda:InvokeAsync 101 | Resource: '*' 102 | - Effect: Allow 103 | Action: 104 | - states:StartExecution 105 | Resource: '*' 106 | # Events: # uncomment for production deployment 107 | # JobSchedule: 108 | # Type: Schedule 109 | # Properties: 110 | # Schedule: 'rate(14 days)' 111 | # Name: CD4AutoMLSchedule 112 | # Description: test schedule 113 | # Enabled: False 114 | 115 | # AutopilotWorkflowBuildProject: 116 | # Type: AWS::CodeBuild::Project 117 | # Properties: 118 | # Name: AutopilotWorkflowBuild 119 | # ServiceRole: !Ref ServiceBuildRoleArn 120 | # Artifacts: 121 | # Type: NO_ARTIFACTS 122 | # Environment: 123 | # Type: LINUX_CONTAINER 124 | # ComputeType: BUILD_GENERAL1_SMALL 125 | # Image: aws/codebuild/standard:1.0 126 | # PrivilegedMode: true 127 | # Source: 128 | # Type: GITHUB 129 | # Location: https://github.com/OElesin/sagemaker-autopilot-step-functions.git 130 | # Triggers: 131 | # Webhook: true 132 | # FilterGroups: 133 | # - - Type: EVENT 134 | # Pattern: PUSH 135 | # - Type: HEAD_REF 136 | # Pattern: '^refs/heads/master$' 137 | # - Type: FILE_PATH 138 | # Pattern: 'README.md' 139 | # ExcludeMatchedPattern: true 140 | # Tags: 141 | # - Key: 'provider' 142 | # Value: 'elesin.olalekan@gmail.com' 143 | 144 | RestApiBuildProject: 145 | Type: AWS::CodeBuild::Project 146 | Properties: 147 | Name: RestApiBuildProject 148 | ServiceRole: !Ref ServiceBuildRoleArn 149 | BadgeEnabled: true 150 | Artifacts: 151 | Type: NO_ARTIFACTS 152 | Environment: 153 | Type: LINUX_CONTAINER 154 | ComputeType: BUILD_GENERAL1_SMALL 155 | Image: aws/codebuild/standard:1.0 156 | PrivilegedMode: true 157 | Source: 158 | Type: GITHUB 159 | Location: https://github.com/OElesin/sagemaker-autopilot-step-functions.git 160 | BuildSpec: buildspec_deploy_api.yaml 161 | TimeoutInMinutes: 10 162 | Tags: 163 | - Key: 'provider' 164 | Value: 'elesin.olalekan@gmail.com' 165 | 166 | WorkflowExecRole: 167 | Type: AWS::IAM::Role 168 | Properties: 169 | RoleName: WorkflowExecRole 170 | Path: '/' 171 | AssumeRolePolicyDocument: 172 | Version: '2012-10-17' 173 | Statement: 174 | - Action: 175 | - sts:AssumeRole 176 | Effect: Allow 177 | Principal: 178 | Service: 179 | - states.amazonaws.com 180 | ManagedPolicyArns: 181 | - arn:aws:iam::aws:policy/CloudWatchEventsFullAccess 182 | 183 | SageMakerExecutionRole: 184 | Type: AWS::IAM::Role 185 | Properties: 186 | RoleName: SageMakerExecutionRole 187 | Path: '/service-role/' 188 | AssumeRolePolicyDocument: 189 | Version: '2012-10-17' 190 | Statement: 191 | - Action: 192 | - sts:AssumeRole 193 | Effect: Allow 194 | Principal: 195 | Service: 196 | - sagemaker.amazonaws.com 197 | ManagedPolicyArns: 198 | - arn:aws:iam::aws:policy/AmazonS3FullAccess 199 | - arn:aws:iam::aws:policy/AmazonSageMakerFullAccess 200 | 201 | WorkflowExecPolicy: 202 | Type: AWS::IAM::Policy 203 | Properties: 204 | PolicyName: 'WorkflowExecPolicy' 205 | PolicyDocument: 206 | Statement: 207 | - Effect: Allow 208 | Action: iam:PassRole 209 | Resource: '*' 210 | Condition: 211 | StringEquals: 212 | iam:PassedToService: sagemaker.amazonaws.com 213 | - Effect: Allow 214 | Action: 215 | - events:DescribeRule 216 | - events:PutRule 217 | - events:PutTargets 218 | Resource: 219 | - !Sub arn:aws:events:${AWS::Region}:${AWS::AccountId}:rule/StepFunctionsGetEventsForSageMakerTrainingJobsRule 220 | - Effect: Allow 221 | Action: 222 | - sagemaker:CreateModel 223 | - sagemaker:DeleteEndpointConfig 224 | - sagemaker:DescribeTrainingJob 225 | - sagemaker:CreateEndpoint 226 | - sagemaker:StopTrainingJob 227 | - sagemaker:CreateTrainingJob 228 | - sagemaker:UpdateEndpoint 229 | - sagemaker:CreateEndpointConfig 230 | - sagemaker:DeleteEndpoint 231 | Resource: 232 | - arn:aws:sagemaker:*:*:* 233 | - Effect: Allow 234 | Action: 235 | - lambda:InvokeFunction 236 | - lambda:InvokeAsync 237 | Resource: 238 | - !GetAtt CreateAutopilotJobFunction.Arn 239 | - !GetAtt CheckAutopilotJobStatusFunction.Arn 240 | - Effect: Allow 241 | Action: 242 | - codebuild:StartBuild 243 | - codebuild:StopBuild 244 | - codebuild:BatchGetBuilds 245 | - codebuild:BatchGetReports 246 | Resource: 247 | - !GetAtt RestApiBuildProject.Arn 248 | Roles: 249 | - !Ref WorkflowExecRole 250 | 251 | WorkflowRoleParameter: 252 | Type: AWS::SSM::Parameter 253 | Properties: 254 | Name: AutopilotWorkflowExecRole 255 | Type: String 256 | Value: !GetAtt WorkflowExecRole.Arn 257 | Description: 'AWS SageMaker Autopilot Step Functions workflow execution role arn.' 258 | 259 | RestApiBuildProjectParameter: 260 | Type: AWS::SSM::Parameter 261 | Properties: 262 | Name: RestApiBuildProject 263 | Type: String 264 | Value: !Ref RestApiBuildProject 265 | Description: 'CodeBuild Project Name for deploying REST API.' 266 | 267 | SageMakerExecutionRoleParameter: 268 | Type: AWS::SSM::Parameter 269 | Properties: 270 | Name: SageMakerExecutionRole 271 | Type: String 272 | Value: !GetAtt SageMakerExecutionRole.Arn 273 | Description: 'AWS SageMaker execution role arn.' 274 | 275 | SageMakerAutopilotModelName: 276 | Type: AWS::SSM::Parameter 277 | Properties: 278 | Name: SageMakerAutopilotModelName 279 | Type: String 280 | Value: !Ref DeployedModelName 281 | Description: 'SageMaker Autopilot model name. This should be unique per account. The name is used for multi-model endpoint deployment' 282 | 283 | Outputs: 284 | WorkflowExecArn: 285 | Value: !GetAtt WorkflowExecRole.Arn 286 | Export: 287 | Name: AutopilotWorkflowExecRole -------------------------------------------------------------------------------- /autopilot-job-lambda/workflow-trigger/app.py: -------------------------------------------------------------------------------- 1 | from boto3 import client 2 | from time import gmtime, strftime 3 | from os import getenv 4 | from json import dumps 5 | 6 | 7 | ssm_client = client('ssm') 8 | sfn_client = client('stepfunctions') 9 | sagemaker_exec_role = getenv('SAGEMAKER_EXECUTION_ROLE') 10 | s3_input_data_path = getenv('S3_INPUT_DATA_PATH') 11 | deployed_model_name = getenv('DEPLOYED_MODEL_NAME') 12 | s3_output_path = getenv('S3_OUTPUT_PATH') 13 | target_column_name = getenv('TARGET_COLUMN_NAME') 14 | 15 | state_machine_arn = ssm_client.get_parameter( 16 | Name='AutopilotStateMachineWorkflowArn', 17 | )['Parameter']['Value'] 18 | 19 | 20 | def lambda_handler(event, context): 21 | """ 22 | :param event: 23 | :param context: 24 | :return: 25 | """ 26 | timestamp_suffix = strftime('%d-%H-%M-%S', gmtime()) 27 | execution_input_dict = { 28 | 'AutoMLJobName': f'autopilot-workflow-job-{timestamp_suffix}', 29 | 'ModelName': deployed_model_name, 30 | 'EndpointName': f'autopilot-workflow-endpoint', 31 | 'S3InputData': s3_input_data_path, 32 | 'TargetColumnName': target_column_name, 33 | 'S3OutputData': s3_output_path, 34 | 'IamRole': sagemaker_exec_role, 35 | } 36 | execution_input = dumps(execution_input_dict) 37 | sfn_client.start_execution( 38 | stateMachineArn=state_machine_arn, 39 | input=execution_input 40 | ) 41 | -------------------------------------------------------------------------------- /autopilot-job-lambda/workflow-trigger/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OElesin/sagemaker-autopilot-step-functions/38e7679f1aff35973f9ce403795cab332c0f3dbd/autopilot-job-lambda/workflow-trigger/requirements.txt -------------------------------------------------------------------------------- /buildspec.yaml: -------------------------------------------------------------------------------- 1 | version: 0.2 2 | 3 | env: 4 | variables: 5 | ARTIFACTS_BUCKET: galudy-visual-search-artifacts 6 | S3_WORKFLOW_PREFIX: sagemaker-autopilot-step-functions-workflow 7 | S3_API_PREFIX: sagemaker-autopilot-api-gateway 8 | WORKFLOW_STACK_NAME: sagemaker-autopilot-ml-workflow 9 | API_STACK_NAME: sagemaker-autopilot-api-gateway 10 | 11 | phases: 12 | install: 13 | runtime-versions: 14 | python: 3.7 15 | commands: 16 | - apt-get update 17 | - apt-get install libblas-dev liblapack-dev -y 18 | - pip install --upgrade pip boto3 19 | - pip install stepfunctions aws-sam-cli 20 | pre_build: 21 | commands: 22 | - echo "Run AWS SAM Build on workflow stack and API stack" 23 | - export AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query "Account" --output text) 24 | - export SERVICE_BUILD_ROLE="arn:aws:iam::${AWS_ACCOUNT_ID}:role/service-role/codebuild-Attires24-App-service-role" 25 | - echo "SAM package workflow template" 26 | - | 27 | sh -c """ 28 | cd autopilot-job-lambda 29 | sam build -b deploy/ 30 | sam package --template-file deploy/template.yaml \ 31 | --output-template-file packaged.template.yaml \ 32 | --s3-bucket ${ARTIFACTS_BUCKET} \ 33 | --s3-prefix ${S3_WORKFLOW_PREFIX} 34 | """ 35 | - echo "SAM package REST API template" 36 | build: 37 | commands: 38 | - echo Build started on `date` 39 | - | 40 | sh -c """ 41 | cd autopilot-job-lambda 42 | sam deploy --stack-name ${WORKFLOW_STACK_NAME} \ 43 | --template-file packaged.template.yaml \ 44 | --parameter-overrides S3ApiCodePath="${ARTIFACTS_BUCKET}/${S3_API_PREFIX}" ServiceBuildRoleArn=${SERVICE_BUILD_ROLE} \ 45 | --capabilities CAPABILITY_NAMED_IAM \ 46 | --no-fail-on-empty-changeset 47 | """ 48 | - echo Done Deploying Stack 49 | - echo "Deploying Step Functions State Machine" 50 | - | 51 | sh -c """ 52 | cd workflow/ 53 | python main.py 54 | """ 55 | - echo "Upload REST API Code to AWS S3" 56 | - aws s3 cp api-gateway "s3://${ARTIFACTS_BUCKET}/${S3_API_PREFIX}" --recursive 57 | post_build: 58 | commands: 59 | - echo "Build Completed" -------------------------------------------------------------------------------- /buildspec_deploy_api.yaml: -------------------------------------------------------------------------------- 1 | version: 0.2 2 | 3 | env: 4 | variables: 5 | LC_ALL: en_US.UTF-8 6 | LANG: en_US.UTF-8 7 | ARTIFACTS_BUCKET: galudy-visual-search-artifacts 8 | S3_API_PREFIX: sagemaker-autopilot-api-gateway 9 | API_STACK_NAME: sagemaker-autopilot-api-gateway 10 | 11 | phases: 12 | install: 13 | runtime-versions: 14 | python: 3.8 15 | commands: 16 | - pip install -r requirements.txt 17 | pre_build: 18 | commands: 19 | - echo "Run AWS SAM Build on workflow stack and API stack" 20 | - export AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query "Account" --output text) 21 | - echo "SAM package workflow template" 22 | - | 23 | sh -c """ 24 | cd api-gateway 25 | sam build -b deploy/ 26 | sam package --template-file deploy/template.yaml \ 27 | --output-template-file packaged.template.yaml \ 28 | --s3-bucket ${ARTIFACTS_BUCKET} \ 29 | --s3-prefix ${S3_API_PREFIX} 30 | """ 31 | - echo "SAM package REST API template" 32 | build: 33 | commands: 34 | - echo Build started on `date` 35 | - | 36 | sh -c """ 37 | cd api-gateway 38 | sam deploy --stack-name ${API_STACK_NAME} \ 39 | --template-file packaged.template.yaml \ 40 | --parameter-overrides SageMakerEndpoint=${SAGEMAKER_ENDPOINT} SageMakerAutopilotTargetModel=${SAGEMAKER_AUTOPILOT_TARGET_MODEL} \ 41 | --capabilities CAPABILITY_IAM \ 42 | --no-fail-on-empty-changeset 43 | """ 44 | post_build: 45 | commands: 46 | - echo "Build Completed" -------------------------------------------------------------------------------- /img/Machine-Learning-Recipe-with-Olalekan.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OElesin/sagemaker-autopilot-step-functions/38e7679f1aff35973f9ce403795cab332c0f3dbd/img/Machine-Learning-Recipe-with-Olalekan.jpg -------------------------------------------------------------------------------- /img/stepfunctions_graph_horitontal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OElesin/sagemaker-autopilot-step-functions/38e7679f1aff35973f9ce403795cab332c0f3dbd/img/stepfunctions_graph_horitontal.png -------------------------------------------------------------------------------- /img/stepfunctions_graph_vertical.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OElesin/sagemaker-autopilot-step-functions/38e7679f1aff35973f9ce403795cab332c0f3dbd/img/stepfunctions_graph_vertical.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | boto3 2 | stepfunctions 3 | aws-sam-cli 4 | sagemaker -------------------------------------------------------------------------------- /workflow/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OElesin/sagemaker-autopilot-step-functions/38e7679f1aff35973f9ce403795cab332c0f3dbd/workflow/__init__.py -------------------------------------------------------------------------------- /workflow/main.py: -------------------------------------------------------------------------------- 1 | import sagemaker 2 | from stepfunctions.steps import LambdaStep, Wait, Choice, Task, Chain, ChoiceRule, \ 3 | Catch, Retry, Fail, EndpointConfigStep, EndpointStep 4 | from boto3 import client 5 | from stepfunctions.inputs import ExecutionInput 6 | from stepfunctions.workflow import Workflow 7 | from time import gmtime, strftime 8 | from sagemaker.model_monitor import DataCaptureConfig 9 | import utils 10 | 11 | sagemaker_session = sagemaker.Session() 12 | sagemaker_exec_role = utils.get_sagemaker_execution_role() 13 | sfn_client = client('stepfunctions') 14 | # define execution input 15 | execution_input = ExecutionInput(schema={ 16 | 'AutoMLJobName': str, 17 | 'ModelName': str, 18 | 'S3InputData': str, 19 | 'IamRole': str, 20 | 'TargetColumnName': str, 21 | 'S3OutputData': str, 22 | 'Tags': dict, 23 | 'EndpointName': str, 24 | 'EndpointConfigName': str 25 | 26 | }) 27 | 28 | # TODO: make this a notification 29 | workflow_failure = Fail( 30 | 'WorkflowFailed' 31 | ) 32 | 33 | # create autopilot lambda step 34 | create_autopilot_job_step = LambdaStep( 35 | 'StartAutopilotJob', 36 | parameters={ 37 | 'FunctionName': 'CreateAutopilotJob', 38 | 'Payload': { 39 | 'Configuration': { 40 | 'AutoMLJobName': execution_input['AutoMLJobName'], 41 | 'S3InputData': execution_input['S3InputData'], 42 | 'IamRole': execution_input['IamRole'], 43 | 'TargetColumnName': execution_input['TargetColumnName'], 44 | 'S3OutputData': execution_input['S3OutputData'], 45 | 'Tags': execution_input['Tags'] 46 | } 47 | } 48 | } 49 | ) 50 | 51 | create_autopilot_job_step.add_retry(Retry( 52 | error_equals=["States.TaskFailed"], 53 | interval_seconds=15, 54 | max_attempts=2, 55 | backoff_rate=4.0 56 | )) 57 | 58 | create_autopilot_job_step.add_catch(Catch( 59 | error_equals=["States.TaskFailed"], 60 | next_step=workflow_failure 61 | )) 62 | 63 | check_autopilot_job_status = LambdaStep( 64 | 'CheckAutopilotJobStatus', 65 | parameters={ 66 | 'FunctionName': 'CheckAutopilotJobStatus', 67 | 'Payload': { 68 | 'AutopilotJobName': create_autopilot_job_step.output()['Payload']['AutopilotJobName'] 69 | } 70 | } 71 | ) 72 | 73 | check_job_wait_state = Wait( 74 | state_id="Wait", 75 | seconds=360 76 | ) 77 | 78 | check_job_choice = Choice( 79 | state_id="IsAutopilotJobComplete" 80 | ) 81 | 82 | 83 | model_step = Task( 84 | 'CreateAutopilotModel', 85 | resource='arn:aws:states:::sagemaker:createModel', 86 | parameters={ 87 | 'Containers': check_autopilot_job_status.output()['Payload']['InferenceContainers'], 88 | 'ModelName': execution_input['ModelName'], 89 | 'ExecutionRoleArn': sagemaker_exec_role 90 | } 91 | ) 92 | 93 | endpoint_config_step = EndpointConfigStep( 94 | 'CreateModelEndpointConfig', 95 | endpoint_config_name=execution_input['EndpointConfigName'], 96 | model_name=execution_input['ModelName'], 97 | initial_instance_count=1, 98 | instance_type='ml.m4.xlarge', 99 | data_capture_config=DataCaptureConfig( 100 | enable_capture=True, 101 | sampling_percentage=100, 102 | ) 103 | ) 104 | 105 | 106 | endpoint_step = EndpointStep( 107 | 'UpdateModelEndpoint', 108 | endpoint_name=execution_input['EndpointName'], 109 | endpoint_config_name=execution_input['EndpointConfigName'], 110 | update=False 111 | ) 112 | 113 | # define Amazon CodeBuild Step Functions Task 114 | deploy_rest_api_task = Task( 115 | 'DeployRestAPI', 116 | resource='arn:aws:states:::codebuild:startBuild.sync', 117 | parameters={ 118 | 'ProjectName': utils.get_api_codebuild_project(), 119 | 'EnvironmentVariablesOverride': [ 120 | { 121 | 'Name': 'SAGEMAKER_ENDPOINT', 122 | 'Type': 'PLAIN_TEXT', 123 | 'Value': execution_input['EndpointName'] 124 | }, 125 | { 126 | 'Name': 'SAGEMAKER_AUTOPILOT_TARGET_MODEL', 127 | 'Type': 'PLAIN_TEXT', 128 | 'Value': '{}.tar.gz'.format(execution_input['ModelName']) 129 | } 130 | ] 131 | } 132 | ) 133 | 134 | # happy path 135 | model_and_endpoint_step = Chain([ 136 | model_step, 137 | endpoint_config_step, 138 | endpoint_step, 139 | deploy_rest_api_task 140 | ]) 141 | 142 | 143 | # define choice 144 | check_job_choice.add_choice( 145 | ChoiceRule.StringEquals( 146 | variable=check_autopilot_job_status.output()['Payload']['AutopilotJobStatus'], 147 | value='InProgress' 148 | ), 149 | next_step=check_autopilot_job_status 150 | ) 151 | 152 | check_job_choice.add_choice( 153 | ChoiceRule.StringEquals( 154 | variable=check_autopilot_job_status.output()['Payload']['AutopilotJobStatus'], 155 | value='Stopping' 156 | ), 157 | next_step=check_autopilot_job_status 158 | ) 159 | 160 | check_job_choice.add_choice( 161 | ChoiceRule.StringEquals( 162 | variable=check_autopilot_job_status.output()['Payload']['AutopilotJobStatus'], 163 | value='Failed' 164 | ), 165 | next_step=workflow_failure 166 | ) 167 | 168 | check_job_choice.add_choice( 169 | ChoiceRule.StringEquals( 170 | variable=check_autopilot_job_status.output()['Payload']['AutopilotJobStatus'], 171 | value='Stopped' 172 | ), 173 | next_step=workflow_failure 174 | ) 175 | 176 | check_job_choice.add_choice( 177 | ChoiceRule.StringEquals( 178 | variable=check_autopilot_job_status.output()['Payload']['AutopilotJobStatus'], 179 | value='Completed' 180 | ), 181 | next_step=model_and_endpoint_step 182 | ) 183 | 184 | workflow_definition = Chain([ 185 | create_autopilot_job_step, 186 | check_autopilot_job_status, 187 | check_job_wait_state, 188 | check_job_choice 189 | ]) 190 | 191 | autopilot_ml_workflow = Workflow( 192 | name="AutopilotStateMachineWorkflow", 193 | definition=workflow_definition, 194 | role=utils.get_workflow_role() 195 | ) 196 | 197 | try: 198 | state_machine_arn = autopilot_ml_workflow.create() 199 | except sfn_client.exceptions.StateMachineAlreadyExists as e: 200 | print(e.message) 201 | else: 202 | print("Updating workflow definition") 203 | state_machine_arn = autopilot_ml_workflow.update(workflow_definition) 204 | 205 | 206 | utils.save_state_machine_arn(state_machine_arn) 207 | 208 | timestamp_suffix = strftime('%d-%H-%M-%S', gmtime()) 209 | 210 | # Uncomment below when you're ready to execute workflow on deployment 211 | # autopilot_ml_workflow.execute( 212 | # inputs={ 213 | # 'AutoMLJobName': f'autopilot-workflow-job-{timestamp_suffix}', 214 | # 'ModelName': f'autopilot-workflow-{timestamp_suffix}-model', 215 | # 'EndpointConfigName': f'autopilot-workflow-{timestamp_suffix}-endpoint-config', 216 | # 'EndpointName': f'autopilot-workflow-{timestamp_suffix}-endpoint', 217 | # 'S3InputData': '', 218 | # 'TargetColumnName': '', 219 | # 'S3OutputData': '', 220 | # 'IamRole': sagemaker_exec_role, 221 | # } 222 | # ) 223 | 224 | -------------------------------------------------------------------------------- /workflow/utils.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | 3 | ssm_client = boto3.client('ssm') 4 | 5 | 6 | def get_workflow_role() -> str: 7 | """ 8 | :return: 9 | """ 10 | response = ssm_client.get_parameter( 11 | Name='AutopilotWorkflowExecRole', 12 | ) 13 | return response['Parameter']['Value'] 14 | 15 | 16 | def get_api_codebuild_project() -> str: 17 | """ 18 | :return: 19 | """ 20 | response = ssm_client.get_parameter( 21 | Name='RestApiBuildProject', 22 | ) 23 | return response['Parameter']['Value'] 24 | 25 | 26 | def get_sagemaker_execution_role(): 27 | """ 28 | Convert SageMaker Autopilot Inference Containers to PipelineModel 29 | :return: 30 | """ 31 | response = ssm_client.get_parameter( 32 | Name='SageMakerExecutionRole', 33 | ) 34 | return response['Parameter']['Value'] 35 | 36 | 37 | def save_state_machine_arn(state_machine_arn: str): 38 | """ 39 | Save state machine ARN to Amazon SSM Parameter Store 40 | :param state_machine_arn: 41 | :return: 42 | """ 43 | response = ssm_client.put_parameter( 44 | Name='AutopilotStateMachineWorkflowArn', 45 | Description='SageMaker Autopilot Step Function State machine ARN', 46 | Value=state_machine_arn, 47 | Type='String', 48 | Overwrite=True 49 | ) 50 | print(response) 51 | return None 52 | 53 | --------------------------------------------------------------------------------