├── modelbuild_pipeline ├── LICENSE ├── pipelines │ ├── __init__.py │ ├── customer_churn │ │ ├── __init__.py │ │ ├── evaluate.py │ │ ├── .ipynb_checkpoints │ │ │ ├── evaluate-checkpoint.py │ │ │ ├── preprocess-checkpoint.py │ │ │ └── pipeline-checkpoint.py │ │ ├── preprocess.py │ │ └── pipeline.py │ ├── .ipynb_checkpoints │ │ ├── __init__-checkpoint.py │ │ ├── __version__-checkpoint.py │ │ ├── _utils-checkpoint.py │ │ ├── get_pipeline_definition-checkpoint.py │ │ └── run_pipeline-checkpoint.py │ ├── __version__.py │ ├── _utils.py │ ├── get_pipeline_definition.py │ └── run_pipeline.py ├── img │ └── pipeline-full.png ├── setup.cfg ├── tests │ ├── test_pipelines.py │ └── .ipynb_checkpoints │ │ └── test_pipelines-checkpoint.py ├── tox.ini ├── setup.py ├── CONTRIBUTING.md └── README.md ├── images ├── K8.jpg ├── Cloud9.png ├── Architecture.jpg ├── c9attachrole.png ├── c9disableiam.png ├── createrole.png ├── Terraform-Org.png ├── Terraform-Vars.png ├── Terraform-init.png ├── c9instancerole.png ├── Sign_up_TFCloud.png ├── Terraform-Cloud-1.png ├── Terraform-Login.png ├── terraform_pipeline.jpg ├── Terraform-Workspace-1.png └── Terraform-Workspace-2.png ├── terraform ├── img │ └── terraform_pipeline.jpg ├── s3.tf ├── main.tf ├── events.tf ├── modeldeploy_hooks.tf ├── modelbuild_hooks.tf ├── modelbuild_buildspec.yml ├── variables.tf ├── modeldeploy_buildspec.yml ├── modelbuild_ci_pipeline.tf ├── modeldeploy_testbuild.tf ├── modelbuild_codebuild.tf ├── modeldeploy_codebuild.tf ├── modeldeploy_cd_pipline.tf └── iam_roles.tf ├── modeldeploy_pipeline ├── prod-config.json ├── staging-config.json ├── test │ ├── test_buildspec.yml │ ├── test_buildspec_singleaccount.yml │ ├── test.py │ └── test_singleaccount.py ├── fix_model_permission.py ├── endpoint-config-template.yml ├── README.md ├── setup.py └── build.py ├── .gitignore ├── CODE_OF_CONDUCT.md ├── LICENSE ├── CONTRIBUTING.md ├── README.md └── Notebooks ├── SageMaker_Customer_Churn_XGB_end2end.ipynb └── SageMaker_Customer_Churn_XGB_Pipeline.ipynb /modelbuild_pipeline/LICENSE: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /modelbuild_pipeline/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /modelbuild_pipeline/pipelines/customer_churn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /modelbuild_pipeline/pipelines/.ipynb_checkpoints/__init__-checkpoint.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /images/K8.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-mlops-pipelines-terraform/HEAD/images/K8.jpg -------------------------------------------------------------------------------- /images/Cloud9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-mlops-pipelines-terraform/HEAD/images/Cloud9.png -------------------------------------------------------------------------------- /images/Architecture.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-mlops-pipelines-terraform/HEAD/images/Architecture.jpg -------------------------------------------------------------------------------- /images/c9attachrole.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-mlops-pipelines-terraform/HEAD/images/c9attachrole.png -------------------------------------------------------------------------------- /images/c9disableiam.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-mlops-pipelines-terraform/HEAD/images/c9disableiam.png -------------------------------------------------------------------------------- /images/createrole.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-mlops-pipelines-terraform/HEAD/images/createrole.png -------------------------------------------------------------------------------- /images/Terraform-Org.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-mlops-pipelines-terraform/HEAD/images/Terraform-Org.png -------------------------------------------------------------------------------- /images/Terraform-Vars.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-mlops-pipelines-terraform/HEAD/images/Terraform-Vars.png -------------------------------------------------------------------------------- /images/Terraform-init.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-mlops-pipelines-terraform/HEAD/images/Terraform-init.png -------------------------------------------------------------------------------- /images/c9instancerole.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-mlops-pipelines-terraform/HEAD/images/c9instancerole.png -------------------------------------------------------------------------------- /images/Sign_up_TFCloud.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-mlops-pipelines-terraform/HEAD/images/Sign_up_TFCloud.png -------------------------------------------------------------------------------- /images/Terraform-Cloud-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-mlops-pipelines-terraform/HEAD/images/Terraform-Cloud-1.png -------------------------------------------------------------------------------- /images/Terraform-Login.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-mlops-pipelines-terraform/HEAD/images/Terraform-Login.png -------------------------------------------------------------------------------- /images/terraform_pipeline.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-mlops-pipelines-terraform/HEAD/images/terraform_pipeline.jpg -------------------------------------------------------------------------------- /images/Terraform-Workspace-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-mlops-pipelines-terraform/HEAD/images/Terraform-Workspace-1.png -------------------------------------------------------------------------------- /images/Terraform-Workspace-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-mlops-pipelines-terraform/HEAD/images/Terraform-Workspace-2.png -------------------------------------------------------------------------------- /terraform/img/terraform_pipeline.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-mlops-pipelines-terraform/HEAD/terraform/img/terraform_pipeline.jpg -------------------------------------------------------------------------------- /terraform/s3.tf: -------------------------------------------------------------------------------- 1 | 2 | 3 | resource "aws_s3_bucket" "artifacts_bucket" { 4 | bucket = var.artifacts_bucket_name 5 | force_destroy = true 6 | } 7 | -------------------------------------------------------------------------------- /modelbuild_pipeline/img/pipeline-full.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-mlops-pipelines-terraform/HEAD/modelbuild_pipeline/img/pipeline-full.png -------------------------------------------------------------------------------- /modeldeploy_pipeline/prod-config.json: -------------------------------------------------------------------------------- 1 | { 2 | "Parameters": { 3 | "StageName": "prod-0306", 4 | "EndpointInstanceCount": "1", 5 | "EndpointInstanceType": "ml.m5.large" 6 | } 7 | } -------------------------------------------------------------------------------- /modeldeploy_pipeline/staging-config.json: -------------------------------------------------------------------------------- 1 | { 2 | "Parameters": { 3 | "StageName": "staging-0306", 4 | "EndpointInstanceCount": "1", 5 | "EndpointInstanceType": "ml.m5.large" 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/.terraform/** 2 | terraform.tfstate 3 | terraform.tfstate.backup 4 | .DS_Store 5 | petclinic/target/** 6 | terraform/.terraform.lock.hcl 7 | **/*.iml 8 | **/*/target 9 | .idea/ 10 | -------------------------------------------------------------------------------- /modelbuild_pipeline/setup.cfg: -------------------------------------------------------------------------------- 1 | [tool:pytest] 2 | addopts = 3 | -vv 4 | testpaths = tests 5 | 6 | [aliases] 7 | test=pytest 8 | 9 | [metadata] 10 | description-file = README.md 11 | license_file = LICENSE 12 | 13 | [wheel] 14 | universal = 1 15 | -------------------------------------------------------------------------------- /modelbuild_pipeline/tests/test_pipelines.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | @pytest.mark.xfail 5 | def test_that_you_wrote_tests(): 6 | assert False, "No tests written" 7 | 8 | 9 | def test_pipelines_importable(): 10 | import pipelines # noqa: F401 11 | -------------------------------------------------------------------------------- /modelbuild_pipeline/tests/.ipynb_checkpoints/test_pipelines-checkpoint.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | @pytest.mark.xfail 5 | def test_that_you_wrote_tests(): 6 | assert False, "No tests written" 7 | 8 | 9 | def test_pipelines_importable(): 10 | import pipelines # noqa: F401 11 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /modelbuild_pipeline/pipelines/__version__.py: -------------------------------------------------------------------------------- 1 | """Metadata for the pipelines package.""" 2 | 3 | __title__ = "pipelines" 4 | __description__ = "pipelines - template package" 5 | __version__ = "0.0.1" 6 | __author__ = "" 7 | __author_email__ = "" 8 | __license__ = "Apache 2.0" 9 | __url__ = "" 10 | -------------------------------------------------------------------------------- /modelbuild_pipeline/pipelines/.ipynb_checkpoints/__version__-checkpoint.py: -------------------------------------------------------------------------------- 1 | """Metadata for the pipelines package.""" 2 | 3 | __title__ = "pipelines" 4 | __description__ = "pipelines - template package" 5 | __version__ = "0.0.1" 6 | __author__ = "" 7 | __author_email__ = "" 8 | __license__ = "Apache 2.0" 9 | __url__ = "" 10 | -------------------------------------------------------------------------------- /terraform/main.tf: -------------------------------------------------------------------------------- 1 | provider "aws" { 2 | region = var.region 3 | } 4 | 5 | provider "github" { 6 | token = var.github_token 7 | owner = var.repository_owner 8 | version = "~> 4.0.0" 9 | } 10 | 11 | provider "random" { 12 | version = "~> 3.0.0" 13 | } 14 | 15 | provider "template" { 16 | version = "~> 2.2.0" 17 | } 18 | 19 | terraform { 20 | cloud { 21 | organization = "" 22 | 23 | workspaces { 24 | name = "" 25 | } 26 | } 27 | } -------------------------------------------------------------------------------- /modeldeploy_pipeline/test/test_buildspec.yml: -------------------------------------------------------------------------------- 1 | version: 0.2 2 | 3 | phases: 4 | install: 5 | runtime-versions: 6 | python: 3.8 7 | build: 8 | commands: 9 | # Call the test python code 10 | - python test/test.py --import-build-config $CODEBUILD_SRC_DIR_BuildArtifact/staging-config-export.json --export-test-results ${EXPORT_TEST_RESULTS} 11 | # Show the test results file 12 | - cat ${EXPORT_TEST_RESULTS} 13 | 14 | artifacts: 15 | files: 16 | - ${EXPORT_TEST_RESULTS} 17 | -------------------------------------------------------------------------------- /modeldeploy_pipeline/test/test_buildspec_singleaccount.yml: -------------------------------------------------------------------------------- 1 | version: 0.2 2 | 3 | phases: 4 | install: 5 | runtime-versions: 6 | python: 3.8 7 | build: 8 | commands: 9 | # Call the test python code 10 | - python test/test.py --import-build-config $CODEBUILD_SRC_DIR_BuildArtifact/staging-config-export.json --export-test-results ${EXPORT_TEST_RESULTS} 11 | # Show the test results file 12 | - cat ${EXPORT_TEST_RESULTS} 13 | 14 | artifacts: 15 | files: 16 | - ${EXPORT_TEST_RESULTS} 17 | -------------------------------------------------------------------------------- /terraform/events.tf: -------------------------------------------------------------------------------- 1 | resource "aws_cloudwatch_event_rule" "sm_model_registry_rule" { 2 | name = "sm-model-registry-event-rule" 3 | description = "Capture new model registry" 4 | 5 | event_pattern = < check only Python 3.7 compatible 31 | FI50, 32 | FI51, 33 | FI52, 34 | FI53, 35 | FI54, 36 | FI55, 37 | FI56, 38 | FI57, 39 | W503 40 | 41 | require-code = True 42 | 43 | [testenv] 44 | commands = 45 | pytest --cov=pipelines --cov-append {posargs} 46 | coverage report --fail-under=0 47 | deps = .[test] 48 | depends = 49 | {py36,py37,py38}: clean 50 | 51 | [testenv:flake8] 52 | skipdist = true 53 | skip_install = true 54 | deps = flake8 55 | commands = flake8 56 | 57 | [testenv:black-format] 58 | deps = black 59 | commands = 60 | black -l 100 ./ 61 | 62 | [testenv:black-check] 63 | deps = black 64 | commands = 65 | black -l 100 --check ./ 66 | 67 | [testenv:clean] 68 | skip_install = true 69 | deps = coverage 70 | commands = coverage erase 71 | 72 | [testenv:pydocstyle] 73 | deps = pydocstyle 74 | commands = 75 | pydocstyle pipelines 76 | -------------------------------------------------------------------------------- /modelbuild_pipeline/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import setuptools 3 | 4 | 5 | about = {} 6 | here = os.path.abspath(os.path.dirname(__file__)) 7 | with open(os.path.join(here, "pipelines", "__version__.py")) as f: 8 | exec(f.read(), about) 9 | 10 | 11 | with open("README.md", "r") as f: 12 | readme = f.read() 13 | 14 | 15 | required_packages = ["sagemaker"] 16 | extras = { 17 | "test": [ 18 | "black", 19 | "coverage", 20 | "flake8", 21 | "mock", 22 | "pydocstyle", 23 | "pytest", 24 | "pytest-cov", 25 | "sagemaker", 26 | "tox", 27 | ] 28 | } 29 | setuptools.setup( 30 | name=about["__title__"], 31 | description=about["__description__"], 32 | version=about["__version__"], 33 | author=about["__author__"], 34 | author_email=["__author_email__"], 35 | long_description=readme, 36 | long_description_content_type="text/markdown", 37 | url=about["__url__"], 38 | license=about["__license__"], 39 | packages=setuptools.find_packages(), 40 | include_package_data=True, 41 | python_requires=">=3.6", 42 | install_requires=required_packages, 43 | extras_require=extras, 44 | entry_points={ 45 | "console_scripts": [ 46 | "get-pipeline-definition=pipelines.get_pipeline_definition:main", 47 | "run-pipeline=pipelines.run_pipeline:main", 48 | ] 49 | }, 50 | classifiers=[ 51 | "Development Status :: 3 - Alpha", 52 | "Intended Audience :: Developers", 53 | "Natural Language :: English", 54 | "Programming Language :: Python", 55 | "Programming Language :: Python :: 3", 56 | "Programming Language :: Python :: 3.6", 57 | "Programming Language :: Python :: 3.7", 58 | "Programming Language :: Python :: 3.8", 59 | ], 60 | ) 61 | -------------------------------------------------------------------------------- /modeldeploy_pipeline/fix_model_permission.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import boto3 3 | import json 4 | import os 5 | import logging 6 | from botocore.exceptions import ClientError 7 | 8 | # this script is a workaround to fix some permission issues with the file 9 | # created for the model and stored in an S3 bucket 10 | 11 | s3_client = boto3.client('s3') 12 | sm_client = boto3.client('sagemaker') 13 | 14 | if __name__ == "__main__": 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument("--log-level", type=str, default=os.environ.get("LOGLEVEL", "INFO").upper()) 17 | parser.add_argument("--prod-config-file", type=str, default="prod-config-export.json") 18 | 19 | args, _ = parser.parse_known_args() 20 | 21 | # Configure logging to output the line number and message 22 | log_format = "%(levelname)s: [%(filename)s:%(lineno)s] %(message)s" 23 | logging.basicConfig(format=log_format, level=args.log_level) 24 | 25 | # first retrieve the name of the package that will be deployed 26 | model_package_name = None 27 | with open(args.prod_config_file, 'r') as f: 28 | for param in json.loads(f.read()): 29 | if param.get('ParameterKey') == 'ModelPackageName': 30 | model_package_name = param.get('ParameterValue') 31 | if model_package_name is None: 32 | raise Exception("Configuration file must include ModelPackageName parameter") 33 | 34 | # then, describe it to get the S3 URL of the model 35 | resp = sm_client.describe_model_package(ModelPackageName=model_package_name) 36 | model_data_url = resp['InferenceSpecification']['Containers'][0]['ModelDataUrl'] 37 | _,_,bucket_name,key = model_data_url.split('/', 3) 38 | 39 | # finally, copy the file to override the permissions 40 | with open('/tmp/model.tar.gz', 'wb') as data: 41 | s3_client.download_fileobj(bucket_name, key, data) 42 | with open('/tmp/model.tar.gz', 'rb') as data: 43 | s3_client.upload_fileobj(data, bucket_name, key) 44 | 45 | -------------------------------------------------------------------------------- /modeldeploy_pipeline/endpoint-config-template.yml: -------------------------------------------------------------------------------- 1 | Description: 2 | This template is built and deployed by the infrastructure pipeline in various stages (staging/production) as required. 3 | It specifies the resources that need to be created, like the SageMaker Endpoint. It can be extended to include resources like 4 | AutoScalingPolicy, API Gateway, etc,. as required. 5 | Parameters: 6 | SageMakerProjectName: 7 | Type: String 8 | Description: Name of the project 9 | MinLength: 1 10 | MaxLength: 32 11 | AllowedPattern: ^[a-zA-Z](-*[a-zA-Z0-9])* 12 | ModelExecutionRoleArn: 13 | Type: String 14 | Description: Execution role used for deploying the model. 15 | ModelPackageName: 16 | Type: String 17 | Description: The trained Model Package Name 18 | StageName: 19 | Type: String 20 | Description: 21 | The name for a project pipeline stage, such as Staging or Prod, for 22 | which resources are provisioned and deployed. 23 | EndpointInstanceCount: 24 | Type: Number 25 | Description: Number of instances to launch for the endpoint. 26 | MinValue: 1 27 | EndpointInstanceType: 28 | Type: String 29 | Description: The ML compute instance type for the endpoint. 30 | 31 | Resources: 32 | Model: 33 | Type: AWS::SageMaker::Model 34 | Properties: 35 | PrimaryContainer: 36 | ModelPackageName: !Ref ModelPackageName 37 | ExecutionRoleArn: !Ref ModelExecutionRoleArn 38 | 39 | EndpointConfig: 40 | Type: AWS::SageMaker::EndpointConfig 41 | Properties: 42 | ProductionVariants: 43 | - InitialInstanceCount: !Ref EndpointInstanceCount 44 | InitialVariantWeight: 1.0 45 | InstanceType: !Ref EndpointInstanceType 46 | ModelName: !GetAtt Model.ModelName 47 | VariantName: AllTraffic 48 | 49 | Endpoint: 50 | Type: AWS::SageMaker::Endpoint 51 | Properties: 52 | EndpointName: !Sub ${SageMakerProjectName}-${StageName} 53 | EndpointConfigName: !GetAtt EndpointConfig.EndpointConfigName 54 | -------------------------------------------------------------------------------- /terraform/modeldeploy_testbuild.tf: -------------------------------------------------------------------------------- 1 | 2 | resource "aws_codebuild_project" "tf_mlops_testbuild" { 3 | badge_enabled = false 4 | build_timeout = 60 5 | name = "tf-mlops-testbuild" 6 | queued_timeout = 480 7 | service_role = aws_iam_role.tf_mlops_role.arn 8 | tags = { 9 | Environment = var.env 10 | } 11 | 12 | artifacts { 13 | encryption_disabled = false 14 | name = "tf-mlops-testbuild-${var.env}" 15 | override_artifact_name = false 16 | packaging = "NONE" 17 | type = "CODEPIPELINE" 18 | } 19 | 20 | environment { 21 | compute_type = "BUILD_GENERAL1_SMALL" 22 | image = "aws/codebuild/amazonlinux2-x86_64-standard:2.0" 23 | image_pull_credentials_type = "CODEBUILD" 24 | privileged_mode = false 25 | type = "LINUX_CONTAINER" 26 | environment_variable { 27 | name = "environment" 28 | type = "PLAINTEXT" 29 | value = var.env 30 | } 31 | environment_variable { 32 | name = "SAGEMAKER_PROJECT_NAME" 33 | type = "PLAINTEXT" 34 | value = var.project_name 35 | } 36 | environment_variable { 37 | name = "SAGEMAKER_PROJECT_ID" 38 | type = "PLAINTEXT" 39 | value = var.project_id 40 | } 41 | environment_variable { 42 | name = "AWS_REGION" 43 | type = "PLAINTEXT" 44 | value = var.region 45 | } 46 | environment_variable { 47 | name = "BUILD_CONFIG" 48 | type = "PLAINTEXT" 49 | value = "staging-config-export.json" 50 | } 51 | environment_variable { 52 | name = "EXPORT_TEST_RESULTS" 53 | type = "PLAINTEXT" 54 | value = "test-results.json" 55 | } 56 | } 57 | 58 | logs_config { 59 | cloudwatch_logs { 60 | status = "ENABLED" 61 | } 62 | 63 | s3_logs { 64 | encryption_disabled = false 65 | status = "DISABLED" 66 | } 67 | } 68 | 69 | source { 70 | buildspec = "test/test_buildspec.yml" 71 | git_clone_depth = 0 72 | insecure_ssl = false 73 | report_build_status = false 74 | type = "CODEPIPELINE" 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /modelbuild_pipeline/pipelines/get_pipeline_definition.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | """A CLI to get pipeline definitions from pipeline modules.""" 14 | from __future__ import absolute_import 15 | 16 | import argparse 17 | import sys 18 | 19 | from pipelines._utils import get_pipeline_driver 20 | 21 | 22 | def main(): # pragma: no cover 23 | """The main harness that gets the pipeline definition JSON. 24 | 25 | Prints the json to stdout or saves to file. 26 | """ 27 | parser = argparse.ArgumentParser("Gets the pipeline definition for the pipeline script.") 28 | 29 | parser.add_argument( 30 | "-n", 31 | "--module-name", 32 | dest="module_name", 33 | type=str, 34 | help="The module name of the pipeline to import.", 35 | ) 36 | parser.add_argument( 37 | "-f", 38 | "--file-name", 39 | dest="file_name", 40 | type=str, 41 | default=None, 42 | help="The file to output the pipeline definition json to.", 43 | ) 44 | parser.add_argument( 45 | "-kwargs", 46 | "--kwargs", 47 | dest="kwargs", 48 | default=None, 49 | help="Dict string of keyword arguments for the pipeline generation (if supported)", 50 | ) 51 | args = parser.parse_args() 52 | 53 | if args.module_name is None: 54 | parser.print_help() 55 | sys.exit(2) 56 | 57 | try: 58 | pipeline = get_pipeline_driver(args.module_name, args.kwargs) 59 | content = pipeline.definition() 60 | if args.file_name: 61 | with open(args.file_name, "w") as f: 62 | f.write(content) 63 | else: 64 | print(content) 65 | except Exception as e: # pylint: disable=W0703 66 | print(f"Exception: {e}") 67 | sys.exit(1) 68 | 69 | 70 | if __name__ == "__main__": 71 | main() 72 | -------------------------------------------------------------------------------- /modelbuild_pipeline/pipelines/.ipynb_checkpoints/get_pipeline_definition-checkpoint.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | """A CLI to get pipeline definitions from pipeline modules.""" 14 | from __future__ import absolute_import 15 | 16 | import argparse 17 | import sys 18 | 19 | from pipelines._utils import get_pipeline_driver 20 | 21 | 22 | def main(): # pragma: no cover 23 | """The main harness that gets the pipeline definition JSON. 24 | 25 | Prints the json to stdout or saves to file. 26 | """ 27 | parser = argparse.ArgumentParser("Gets the pipeline definition for the pipeline script.") 28 | 29 | parser.add_argument( 30 | "-n", 31 | "--module-name", 32 | dest="module_name", 33 | type=str, 34 | help="The module name of the pipeline to import.", 35 | ) 36 | parser.add_argument( 37 | "-f", 38 | "--file-name", 39 | dest="file_name", 40 | type=str, 41 | default=None, 42 | help="The file to output the pipeline definition json to.", 43 | ) 44 | parser.add_argument( 45 | "-kwargs", 46 | "--kwargs", 47 | dest="kwargs", 48 | default=None, 49 | help="Dict string of keyword arguments for the pipeline generation (if supported)", 50 | ) 51 | args = parser.parse_args() 52 | 53 | if args.module_name is None: 54 | parser.print_help() 55 | sys.exit(2) 56 | 57 | try: 58 | pipeline = get_pipeline_driver(args.module_name, args.kwargs) 59 | content = pipeline.definition() 60 | if args.file_name: 61 | with open(args.file_name, "w") as f: 62 | f.write(content) 63 | else: 64 | print(content) 65 | except Exception as e: # pylint: disable=W0703 66 | print(f"Exception: {e}") 67 | sys.exit(1) 68 | 69 | 70 | if __name__ == "__main__": 71 | main() 72 | -------------------------------------------------------------------------------- /modeldeploy_pipeline/README.md: -------------------------------------------------------------------------------- 1 | ## MLOps for SageMaker Endpoint Deployment 2 | 3 | The codes are modified from this example: 4 | https://aws.amazon.com/blogs/machine-learning/building-automating-managing-and-scaling-ml-workflows-using-amazon-sagemaker-pipelines/ 5 | 6 | This is a sample code repository for demonstrating how you can organize your code for deploying an realtime inference Endpoint infrastructure. 7 | 8 | This code repository has the code to find the latest approved ModelPackage for the associated ModelPackageGroup and automaticaly deploy it to the Endpoint on detecting a change (`build.py`). This code repository also defines the CloudFormation template which defines the Endpoints as infrastructure. It also has configuration files associated with `staging` and `prod` stages. 9 | 10 | Upon triggering a deployment, the CodePipeline pipeline will deploy 2 Endpoints - `staging` and `prod`. After the first deployment is completed, the CodePipeline waits for a manual approval step for promotion to the prod stage. You will need to go to CodePipeline AWS Managed Console to complete this step. 11 | 12 | You own this code and you can modify this template to change as you need it, add additional tests for your custom validation. 13 | 14 | A description of some of the artifacts is provided below: 15 | 16 | 17 | ## Layout of the SageMaker ModelDeploy Pipeline 18 | ``` 19 | . 20 | ├── README.md 21 | ├── build.py 22 | ├── endpoint-config-template.yml 23 | ├── prod-config.json 24 | ├── staging-config.json 25 | └── test 26 | ├── test.py 27 | └── test_buildspec.yml 28 | ``` 29 | 30 | `build.py` 31 | - this python file contains code to get the latest approve package arn and exports staging and configuration files. This is invoked from the Build stage. 32 | 33 | `endpoint-config-template.yml` 34 | - this CloudFormation template file is packaged by the build step in the CodePipeline and is deployed in different stages. 35 | 36 | `staging-config.json` 37 | - this configuration file is used to customize `staging` stage in the pipeline. You can configure the instance type, instance count here. 38 | 39 | `prod-config.json` 40 | - this configuration file is used to customize `prod` stage in the pipeline. You can configure the instance type, instance count here. 41 | 42 | `test\test_buildspec.yml` 43 | - this file is used by the CodePipeline's `staging` stage to run the test code of the following python file 44 | 45 | `test\test.py` 46 | - this python file contains code to describe and invoke the staging endpoint. You can customize to add more tests here. 47 | -------------------------------------------------------------------------------- /terraform/modelbuild_codebuild.tf: -------------------------------------------------------------------------------- 1 | data "template_file" "buildspec" { 2 | template = file("modelbuild_buildspec.yml") 3 | vars = { 4 | env = var.env 5 | SAGEMAKER_PROJECT_NAME=var.project_name 6 | SAGEMAKER_PROJECT_ID=var.project_id 7 | ARTIFACT_BUCKET=var.artifacts_bucket_name 8 | SAGEMAKER_PIPELINE_ROLE_ARN=aws_iam_role.tf_mlops_role.arn 9 | AWS_REGION=var.region 10 | SAGEMAKER_PROJECT_NAME_ID="${var.project_name}-${var.project_id}" 11 | } 12 | } 13 | 14 | resource "aws_codebuild_project" "tf_mlops_modelbuild" { 15 | badge_enabled = false 16 | build_timeout = 60 17 | name = "tf-mlops-modelbuild" 18 | queued_timeout = 480 19 | service_role = aws_iam_role.tf_mlops_role.arn 20 | tags = { 21 | Environment = var.env 22 | } 23 | 24 | artifacts { 25 | encryption_disabled = false 26 | name = "tf-mlops-modelbuild-${var.env}" 27 | override_artifact_name = false 28 | packaging = "NONE" 29 | type = "CODEPIPELINE" 30 | } 31 | 32 | environment { 33 | compute_type = "BUILD_GENERAL1_SMALL" 34 | image = "aws/codebuild/amazonlinux2-x86_64-standard:2.0" 35 | image_pull_credentials_type = "CODEBUILD" 36 | privileged_mode = false 37 | type = "LINUX_CONTAINER" 38 | environment_variable { 39 | name = "environment" 40 | type = "PLAINTEXT" 41 | value = var.env 42 | } 43 | environment_variable { 44 | name = "SAGEMAKER_PROJECT_NAME" 45 | type = "PLAINTEXT" 46 | value = var.project_name 47 | } 48 | environment_variable { 49 | name = "SAGEMAKER_PROJECT_ID" 50 | type = "PLAINTEXT" 51 | value = var.project_id 52 | } 53 | environment_variable { 54 | name = "ARTIFACT_BUCKET" 55 | type = "PLAINTEXT" 56 | value = var.artifacts_bucket_name 57 | } 58 | environment_variable { 59 | name = "SAGEMAKER_PIPELINE_ROLE_ARN" 60 | type = "PLAINTEXT" 61 | value = aws_iam_role.tf_mlops_role.arn 62 | } 63 | environment_variable { 64 | name = "AWS_REGION" 65 | type = "PLAINTEXT" 66 | value = var.region 67 | } 68 | } 69 | 70 | logs_config { 71 | cloudwatch_logs { 72 | status = "ENABLED" 73 | } 74 | 75 | s3_logs { 76 | encryption_disabled = false 77 | status = "DISABLED" 78 | } 79 | } 80 | 81 | source { 82 | buildspec = data.template_file.buildspec.rendered 83 | git_clone_depth = 0 84 | insecure_ssl = false 85 | report_build_status = false 86 | type = "CODEPIPELINE" 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /modelbuild_pipeline/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | ## Set up your dev env 4 | 5 | * install virtualenv and virtualenvwrapper globally 6 | * do anything you want to your .zshrc for virtualenv (readthedocs) 7 | * create a virtualenv using the latest stable python. 8 | * enable the virtual env 9 | * install the package deps in dependencies 10 | 11 | on ubuntu 18.04, it's all something like this 12 | ``` 13 | ❯ pip3 install virtualenv 14 | 15 | ❯ pip3 install virtualenvwrapper 16 | 17 | # this is for when you need to make the virtualenv 18 | ❯ mkvirtualenv -p /usr/bin/python3.8 your-project 19 | 20 | # or, if the virtualenv is already there and you want to use it 21 | ❯ workon your-project 22 | 23 | # now that we're in our virtualenv, use the virtualenv pip to install the required packages 24 | ❯ pip install . 25 | 26 | # but wait! we want to be able to run tests, so go ahead and install the test dependencies too 27 | ❯ pip install .[test] 28 | ``` 29 | 30 | so after this, your virtualenv is ready to do all the fun stuff in a safe way 31 | 32 | ## Running basic script 33 | 34 | let's execute the command line script to get a pipeline definition from one of the pipeline scripts in the project. 35 | 36 | ``` 37 | ❯ workon your-project 38 | 39 | ❯ get-pipeline-definition --help 40 | usage: Gets the pipeline definition for the pipeline script. [-h] [-n MODULE_NAME] [-kwargs KWARGS] 41 | 42 | optional arguments: 43 | -h, --help show this help message and exit 44 | -n MODULE_NAME, --module-name MODULE_NAME 45 | The module name of the pipeline to import. 46 | -kwargs KWARGS, --kwargs KWARGS 47 | Dict string of keyword arguments for the pipeline generation (if supported) 48 | ``` 49 | 50 | ## Running tests 51 | 52 | start up your virtualenv again and let's get to testing 53 | 54 | ``` 55 | ❯ workon your-project 56 | 57 | ❯ python -m pytest 58 | ============================================================= test session starts ============================================================= 59 | cachedir: .pytest_cache 60 | plugins: cov-2.10.1 61 | collected 2 items 62 | 63 | tests/test_pipelines.py::test_that_you_wrote_tests XFAIL [ 50%] 64 | tests/test_pipelines.py::test_pipelines_importable PASSED [100%] 65 | 66 | ======================================================== 1 passed, 1 xfailed in 0.04s ========================================================= 67 | ``` 68 | 69 | w00t! there you go. have fun developing! 70 | -------------------------------------------------------------------------------- /modelbuild_pipeline/pipelines/customer_churn/evaluate.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | """Evaluation script for measuring model accuracy.""" 14 | 15 | import json 16 | import os 17 | import tarfile 18 | import logging 19 | import pickle 20 | 21 | import pandas as pd 22 | import xgboost 23 | 24 | logger = logging.getLogger() 25 | logger.setLevel(logging.INFO) 26 | logger.addHandler(logging.StreamHandler()) 27 | 28 | # May need to import additional metrics depending on what you are measuring. 29 | # See https://docs.aws.amazon.com/sagemaker/latest/dg/model-monitor-model-quality-metrics.html 30 | from sklearn.metrics import classification_report, roc_auc_score, accuracy_score 31 | 32 | 33 | if __name__ == "__main__": 34 | model_path = "/opt/ml/processing/model/model.tar.gz" 35 | with tarfile.open(model_path) as tar: 36 | tar.extractall(path="..") 37 | 38 | logger.debug("Loading xgboost model.") 39 | model = pickle.load(open("xgboost-model", "rb")) 40 | 41 | print("Loading test input data") 42 | test_path = "/opt/ml/processing/test/test.csv" 43 | df = pd.read_csv(test_path, header=None) 44 | 45 | logger.debug("Reading test data.") 46 | y_test = df.iloc[:, 0].to_numpy() 47 | df.drop(df.columns[0], axis=1, inplace=True) 48 | X_test = xgboost.DMatrix(df.values) 49 | 50 | logger.info("Performing predictions against test data.") 51 | predictions = model.predict(X_test) 52 | 53 | print("Creating classification evaluation report") 54 | acc = accuracy_score(y_test, predictions.round()) 55 | auc = roc_auc_score(y_test, predictions.round()) 56 | 57 | # The metrics reported can change based on the model used, but it must be a specific name per (https://docs.aws.amazon.com/sagemaker/latest/dg/model-monitor-model-quality-metrics.html) 58 | report_dict = { 59 | "binary_classification_metrics": { 60 | "accuracy": { 61 | "value": acc, 62 | "standard_deviation" : "NaN" 63 | }, 64 | "auc" : { 65 | "value" : auc, 66 | "standard_deviation": "NaN" 67 | }, 68 | }, 69 | } 70 | evaluation_output_path = '/opt/ml/processing/evaluation/evaluation.json' 71 | with open(evaluation_output_path, 'w') as f: 72 | f.write(json.dumps(report_dict)) 73 | -------------------------------------------------------------------------------- /modelbuild_pipeline/pipelines/customer_churn/.ipynb_checkpoints/evaluate-checkpoint.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | """Evaluation script for measuring model accuracy.""" 14 | 15 | import json 16 | import os 17 | import tarfile 18 | import logging 19 | import pickle 20 | 21 | import pandas as pd 22 | import xgboost 23 | 24 | logger = logging.getLogger() 25 | logger.setLevel(logging.INFO) 26 | logger.addHandler(logging.StreamHandler()) 27 | 28 | # May need to import additional metrics depending on what you are measuring. 29 | # See https://docs.aws.amazon.com/sagemaker/latest/dg/model-monitor-model-quality-metrics.html 30 | from sklearn.metrics import classification_report, roc_auc_score, accuracy_score 31 | 32 | 33 | if __name__ == "__main__": 34 | model_path = "/opt/ml/processing/model/model.tar.gz" 35 | with tarfile.open(model_path) as tar: 36 | tar.extractall(path="..") 37 | 38 | logger.debug("Loading xgboost model.") 39 | model = pickle.load(open("xgboost-model", "rb")) 40 | 41 | print("Loading test input data") 42 | test_path = "/opt/ml/processing/test/test.csv" 43 | df = pd.read_csv(test_path, header=None) 44 | 45 | logger.debug("Reading test data.") 46 | y_test = df.iloc[:, 0].to_numpy() 47 | df.drop(df.columns[0], axis=1, inplace=True) 48 | X_test = xgboost.DMatrix(df.values) 49 | 50 | logger.info("Performing predictions against test data.") 51 | predictions = model.predict(X_test) 52 | 53 | print("Creating classification evaluation report") 54 | acc = accuracy_score(y_test, predictions.round()) 55 | auc = roc_auc_score(y_test, predictions.round()) 56 | 57 | # The metrics reported can change based on the model used, but it must be a specific name per (https://docs.aws.amazon.com/sagemaker/latest/dg/model-monitor-model-quality-metrics.html) 58 | report_dict = { 59 | "binary_classification_metrics": { 60 | "accuracy": { 61 | "value": acc, 62 | "standard_deviation" : "NaN" 63 | }, 64 | "auc" : { 65 | "value" : auc, 66 | "standard_deviation": "NaN" 67 | }, 68 | }, 69 | } 70 | evaluation_output_path = '/opt/ml/processing/evaluation/evaluation.json' 71 | with open(evaluation_output_path, 'w') as f: 72 | f.write(json.dumps(report_dict)) 73 | -------------------------------------------------------------------------------- /modeldeploy_pipeline/test/test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import logging 4 | import os 5 | 6 | import boto3 7 | from botocore.exceptions import ClientError 8 | 9 | logger = logging.getLogger(__name__) 10 | sm_client = boto3.client("sagemaker") 11 | 12 | 13 | def invoke_endpoint(endpoint_name): 14 | """ 15 | Add custom logic here to invoke the endpoint and validate reponse 16 | """ 17 | return {"endpoint_name": endpoint_name, "success": True} 18 | 19 | 20 | def test_endpoint(endpoint_name): 21 | """ 22 | Describe the endpoint and ensure InSerivce, then invoke endpoint. Raises exception on error. 23 | """ 24 | error_message = None 25 | try: 26 | # Ensure endpoint is in service 27 | response = sm_client.describe_endpoint(EndpointName=endpoint_name) 28 | status = response["EndpointStatus"] 29 | if status != "InService": 30 | error_message = f"SageMaker endpoint: {endpoint_name} status: {status} not InService" 31 | logger.error(error_message) 32 | raise Exception(error_message) 33 | 34 | # Output if endpoint has data capture enbaled 35 | endpoint_config_name = response["EndpointConfigName"] 36 | response = sm_client.describe_endpoint_config(EndpointConfigName=endpoint_config_name) 37 | if "DataCaptureConfig" in response and response["DataCaptureConfig"]["EnableCapture"]: 38 | logger.info(f"data capture enabled for endpoint config {endpoint_config_name}") 39 | 40 | # Call endpoint to handle 41 | return invoke_endpoint(endpoint_name) 42 | except ClientError as e: 43 | error_message = e.response["Error"]["Message"] 44 | logger.error(error_message) 45 | raise Exception(error_message) 46 | 47 | 48 | if __name__ == "__main__": 49 | parser = argparse.ArgumentParser() 50 | parser.add_argument("--log-level", type=str, default=os.environ.get("LOGLEVEL", "INFO").upper()) 51 | parser.add_argument("--import-build-config", type=str, required=True) 52 | parser.add_argument("--export-test-results", type=str, required=True) 53 | args, _ = parser.parse_known_args() 54 | 55 | # Configure logging to output the line number and message 56 | log_format = "%(levelname)s: [%(filename)s:%(lineno)s] %(message)s" 57 | logging.basicConfig(format=log_format, level=args.log_level) 58 | 59 | # Load the build config 60 | with open(args.import_build_config, "r") as f: 61 | config = json.load(f) 62 | 63 | # Get the endpoint name from sagemaker project name 64 | endpoint_name = "{}-{}".format( 65 | config["Parameters"]["SageMakerProjectName"], config["Parameters"]["StageName"] 66 | ) 67 | results = test_endpoint(endpoint_name) 68 | 69 | # Print results and write to file 70 | logger.debug(json.dumps(results, indent=4)) 71 | with open(args.export_test_results, "w") as f: 72 | json.dump(results, f, indent=4) 73 | -------------------------------------------------------------------------------- /modeldeploy_pipeline/test/test_singleaccount.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import logging 4 | import os 5 | 6 | import boto3 7 | from botocore.exceptions import ClientError 8 | 9 | logger = logging.getLogger(__name__) 10 | sm_client = boto3.client("sagemaker") 11 | 12 | 13 | def invoke_endpoint(endpoint_name): 14 | """ 15 | Add custom logic here to invoke the endpoint and validate reponse 16 | """ 17 | return {"endpoint_name": endpoint_name, "success": True} 18 | 19 | 20 | def test_endpoint(endpoint_name): 21 | """ 22 | Describe the endpoint and ensure InSerivce, then invoke endpoint. Raises exception on error. 23 | """ 24 | error_message = None 25 | try: 26 | # Ensure endpoint is in service 27 | response = sm_client.describe_endpoint(EndpointName=endpoint_name) 28 | status = response["EndpointStatus"] 29 | if status != "InService": 30 | error_message = f"SageMaker endpoint: {endpoint_name} status: {status} not InService" 31 | logger.error(error_message) 32 | raise Exception(error_message) 33 | 34 | # Output if endpoint has data capture enbaled 35 | endpoint_config_name = response["EndpointConfigName"] 36 | response = sm_client.describe_endpoint_config(EndpointConfigName=endpoint_config_name) 37 | if "DataCaptureConfig" in response and response["DataCaptureConfig"]["EnableCapture"]: 38 | logger.info(f"data capture enabled for endpoint config {endpoint_config_name}") 39 | 40 | # Call endpoint to handle 41 | return invoke_endpoint(endpoint_name) 42 | except ClientError as e: 43 | error_message = e.response["Error"]["Message"] 44 | logger.error(error_message) 45 | raise Exception(error_message) 46 | 47 | 48 | if __name__ == "__main__": 49 | parser = argparse.ArgumentParser() 50 | parser.add_argument("--log-level", type=str, default=os.environ.get("LOGLEVEL", "INFO").upper()) 51 | parser.add_argument("--import-build-config", type=str, required=True) 52 | parser.add_argument("--export-test-results", type=str, required=True) 53 | args, _ = parser.parse_known_args() 54 | 55 | # Configure logging to output the line number and message 56 | log_format = "%(levelname)s: [%(filename)s:%(lineno)s] %(message)s" 57 | logging.basicConfig(format=log_format, level=args.log_level) 58 | 59 | # Load the build config 60 | with open(args.import_build_config, "r") as f: 61 | config = json.load(f) 62 | 63 | # Get the endpoint name from sagemaker project name 64 | endpoint_name = "{}-{}".format( 65 | config["Parameters"]["SageMakerProjectName"], config["Parameters"]["StageName"] 66 | ) 67 | results = test_endpoint(endpoint_name) 68 | 69 | # Print results and write to file 70 | logger.debug(json.dumps(results, indent=4)) 71 | with open(args.export_test_results, "w") as f: 72 | json.dump(results, f, indent=4) 73 | -------------------------------------------------------------------------------- /modelbuild_pipeline/pipelines/customer_churn/preprocess.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | """Feature engineers the customer churn dataset.""" 14 | import argparse 15 | import logging 16 | import pathlib 17 | 18 | import boto3 19 | import numpy as np 20 | import pandas as pd 21 | 22 | logger = logging.getLogger() 23 | logger.setLevel(logging.INFO) 24 | logger.addHandler(logging.StreamHandler()) 25 | 26 | if __name__ == "__main__": 27 | logger.info("Starting preprocessing.") 28 | parser = argparse.ArgumentParser() 29 | parser.add_argument("--input-data", type=str, required=True) 30 | args = parser.parse_args() 31 | 32 | base_dir = "/opt/ml/processing" 33 | pathlib.Path(f"{base_dir}/data").mkdir(parents=True, exist_ok=True) 34 | input_data = args.input_data 35 | print(input_data) 36 | bucket = input_data.split("/")[2] 37 | key = "/".join(input_data.split("/")[3:]) 38 | 39 | logger.info("Downloading data from bucket: %s, key: %s", bucket, key) 40 | fn = f"{base_dir}/data/raw-data.csv" 41 | s3 = boto3.resource("s3") 42 | s3.Bucket(bucket).download_file(key, fn) 43 | 44 | logger.info("Reading downloaded data.") 45 | 46 | # read in csv 47 | df = pd.read_csv(fn) 48 | 49 | # drop the "Phone" feature column 50 | df = df.drop(["Phone"], axis=1) 51 | 52 | # Change the data type of "Area Code" 53 | df["Area Code"] = df["Area Code"].astype(object) 54 | 55 | # Drop several other columns 56 | df = df.drop(["Day Charge", "Eve Charge", "Night Charge", "Intl Charge"], axis=1) 57 | 58 | # Convert categorical variables into dummy/indicator variables. 59 | model_data = pd.get_dummies(df) 60 | 61 | # Create one binary classification target column 62 | model_data = pd.concat( 63 | [ 64 | model_data["Churn?_True."], 65 | model_data.drop(["Churn?_False.", "Churn?_True."], axis=1), 66 | ], 67 | axis=1, 68 | ) 69 | 70 | # Split the data 71 | train_data, validation_data, test_data = np.split( 72 | model_data.sample(frac=1, random_state=1729), 73 | [int(0.7 * len(model_data)), int(0.9 * len(model_data))], 74 | ) 75 | 76 | pd.DataFrame(train_data).to_csv( 77 | f"{base_dir}/train/train.csv", header=False, index=False 78 | ) 79 | pd.DataFrame(validation_data).to_csv( 80 | f"{base_dir}/validation/validation.csv", header=False, index=False 81 | ) 82 | pd.DataFrame(test_data).to_csv( 83 | f"{base_dir}/test/test.csv", header=False, index=False 84 | ) 85 | -------------------------------------------------------------------------------- /modelbuild_pipeline/pipelines/customer_churn/.ipynb_checkpoints/preprocess-checkpoint.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | """Feature engineers the customer churn dataset.""" 14 | import argparse 15 | import logging 16 | import pathlib 17 | 18 | import boto3 19 | import numpy as np 20 | import pandas as pd 21 | 22 | logger = logging.getLogger() 23 | logger.setLevel(logging.INFO) 24 | logger.addHandler(logging.StreamHandler()) 25 | 26 | if __name__ == "__main__": 27 | logger.info("Starting preprocessing.") 28 | parser = argparse.ArgumentParser() 29 | parser.add_argument("--input-data", type=str, required=True) 30 | args = parser.parse_args() 31 | 32 | base_dir = "/opt/ml/processing" 33 | pathlib.Path(f"{base_dir}/data").mkdir(parents=True, exist_ok=True) 34 | input_data = args.input_data 35 | print(input_data) 36 | bucket = input_data.split("/")[2] 37 | key = "/".join(input_data.split("/")[3:]) 38 | 39 | logger.info("Downloading data from bucket: %s, key: %s", bucket, key) 40 | fn = f"{base_dir}/data/raw-data.csv" 41 | s3 = boto3.resource("s3") 42 | s3.Bucket(bucket).download_file(key, fn) 43 | 44 | logger.info("Reading downloaded data.") 45 | 46 | # read in csv 47 | df = pd.read_csv(fn) 48 | 49 | # drop the "Phone" feature column 50 | df = df.drop(["Phone"], axis=1) 51 | 52 | # Change the data type of "Area Code" 53 | df["Area Code"] = df["Area Code"].astype(object) 54 | 55 | # Drop several other columns 56 | df = df.drop(["Day Charge", "Eve Charge", "Night Charge", "Intl Charge"], axis=1) 57 | 58 | # Convert categorical variables into dummy/indicator variables. 59 | model_data = pd.get_dummies(df) 60 | 61 | # Create one binary classification target column 62 | model_data = pd.concat( 63 | [ 64 | model_data["Churn?_True."], 65 | model_data.drop(["Churn?_False.", "Churn?_True."], axis=1), 66 | ], 67 | axis=1, 68 | ) 69 | 70 | # Split the data 71 | train_data, validation_data, test_data = np.split( 72 | model_data.sample(frac=1, random_state=1729), 73 | [int(0.7 * len(model_data)), int(0.9 * len(model_data))], 74 | ) 75 | 76 | pd.DataFrame(train_data).to_csv( 77 | f"{base_dir}/train/train.csv", header=False, index=False 78 | ) 79 | pd.DataFrame(validation_data).to_csv( 80 | f"{base_dir}/validation/validation.csv", header=False, index=False 81 | ) 82 | pd.DataFrame(test_data).to_csv( 83 | f"{base_dir}/test/test.csv", header=False, index=False 84 | ) 85 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /modeldeploy_pipeline/setup.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import logging 4 | import os 5 | import argparse 6 | import boto3 7 | from botocore.exceptions import ClientError 8 | 9 | logger = logging.getLogger(__name__) 10 | sm_client = boto3.client("sagemaker") 11 | org_client = boto3.client("organizations") 12 | 13 | if __name__ == "__main__": 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument("--log-level", type=str, default=os.environ.get("LOGLEVEL", "INFO").upper()) 16 | parser.add_argument("--sagemaker-project-id", type=str, required=True) 17 | parser.add_argument("--sagemaker-project-name", type=str, required=True) 18 | parser.add_argument("--model-package-group-name", type=str, required=True) 19 | parser.add_argument("--organizational-unit-staging-id", type=str, required=True) 20 | parser.add_argument("--organizational-unit-prod-id", type=str, required=True) 21 | 22 | args, _ = parser.parse_known_args() 23 | 24 | # Configure logging to output the line number and message 25 | log_format = "%(levelname)s: [%(filename)s:%(lineno)s] %(message)s" 26 | logging.basicConfig(format=log_format, level=args.log_level) 27 | model_package_group_arn = None 28 | # Create model package group if necessary 29 | try: 30 | # check if the model package group exists 31 | resp = sm_client.describe_model_package_group( 32 | ModelPackageGroupName=args.model_package_group_name) 33 | model_package_group_arn = resp['ModelPackageGroupArn'] 34 | except ClientError as e: 35 | if e.response['Error']['Code'] == 'ValidationException': 36 | # it doesn't exist, lets create a new one 37 | resp = sm_client.create_model_package_group( 38 | ModelPackageGroupName=args.model_package_group_name, 39 | ModelPackageGroupDescription="Multi account model group", 40 | Tags=[ 41 | {'Key': 'sagemaker:project-name', 'Value': args.sagemaker_project_name}, 42 | {'Key': 'sagemaker:project-id', 'Value': args.sagemaker_project_id}, 43 | ] 44 | ) 45 | model_package_group_arn = resp['ModelPackageGroupArn'] 46 | else: 47 | raise e 48 | staging_ou_id = args.organizational_unit_staging_id 49 | prod_ou_id = args.organizational_unit_prod_id 50 | 51 | # finally, we need to update the model package group policy 52 | # Get the account principals based on staging and prod ids 53 | staging_accounts = [i['Id'] for i in org_client.list_accounts_for_parent(ParentId=staging_ou_id)['Accounts']] 54 | prod_accounts = [i['Id'] for i in org_client.list_accounts_for_parent(ParentId=prod_ou_id)['Accounts']] 55 | # update the policy 56 | sm_client.put_model_package_group_policy( 57 | ModelPackageGroupName=args.model_package_group_name, 58 | ResourcePolicy=json.dumps({ 59 | 'Version': '2012-10-17', 60 | 'Statement': [{ 61 | 'Sid': 'Stmt1527884065456', 62 | 'Effect': 'Allow', 63 | 'Principal': {'AWS': ['arn:aws:iam::%s:root' % i for i in staging_accounts + prod_accounts] }, 64 | 'Action': 'sagemaker:CreateModel', 65 | 'Resource': '%s/*' % model_package_group_arn.replace('model-package-group', 'model-package') 66 | }] 67 | }) 68 | ) 69 | 70 | 71 | -------------------------------------------------------------------------------- /modelbuild_pipeline/README.md: -------------------------------------------------------------------------------- 1 | ## Layout of the SageMaker ModelBuild Project Template 2 | 3 | The template provides a starting point for bringing your SageMaker Pipeline development to production. 4 | 5 | ``` 6 | |-- CONTRIBUTING.md 7 | |-- pipelines 8 | | |-- customer_churn 9 | | | |-- evaluate.py 10 | | | |-- __init__.py 11 | | | |-- pipeline.py 12 | | | `-- preprocess.py 13 | | |-- get_pipeline_definition.py 14 | | |-- __init__.py 15 | | |-- run_pipeline.py 16 | | |-- _utils.py 17 | | `-- __version__.py 18 | |-- README.md 19 | |-- setup.cfg 20 | |-- setup.py 21 | |-- tests 22 | | `-- test_pipelines.py 23 | `-- tox.ini 24 | ``` 25 | ## Start here 26 | This is a sample code repository that demonstrates how you can organize your code for an ML business solution. This code repository is created as part of creating a Project in SageMaker. 27 | 28 | In this example, we are solving the customer churn using synthetic dataset ( s3://sagemaker-sample-files/datasets/tabular/synthetic/churn.txt). The following section provides an overview of how the code is organized and what you need to modify. In particular, `pipelines/pipelines.py` contains the core of the business logic for this problem. It has the code to express the ML steps involved in generating an ML model. You will also find the code for that supports preprocessing and evaluation steps in `preprocess.py` and `evaluate.py` files respectively. 29 | 30 | Once you understand the code structure described below, you can inspect the code and you can start customizing it for your own business case. This is only sample code, and you own this repository for your business use case. Please go ahead, modify the files, commit them and see the changes kicking off the SageMaker pipelines in the CICD system. 31 | 32 | A description of some of the artifacts is provided below: 33 |

34 | Your codebuild execution instructions. This file contains the instructions needed to kick off an execution of the SageMaker Pipeline in the CICD system (via CodePipeline). You will see that this file has the fields definined for naming the Pipeline, ModelPackageGroup etc. You can customize them as required. 35 | 36 |

37 | Your pipeline artifacts, which includes a pipeline module defining the required `get_pipeline` method that returns an instance of a SageMaker pipeline, a preprocessing script that is used in feature engineering, and a model evaluation script to measure the Mean Squared Error of the model that's trained by the pipeline. This is the core business logic, and if you want to create your own folder, you can do so, and implement the `get_pipeline` interface as illustrated here. 38 | 39 | ``` 40 | |-- pipelines 41 | | |-- customer_churn 42 | | | |-- evaluate.py 43 | | | |-- __init__.py 44 | | | |-- pipeline.py 45 | | | `-- preprocess.py 46 | 47 | ``` 48 |

49 | Utility modules for getting pipeline definition jsons and running pipelines (you do not typically need to modify these): 50 | 51 | ``` 52 | |-- pipelines 53 | | |-- get_pipeline_definition.py 54 | | |-- __init__.py 55 | | |-- run_pipeline.py 56 | | |-- _utils.py 57 | | `-- __version__.py 58 | ``` 59 |

60 | Python package artifacts: 61 | ``` 62 | |-- setup.cfg 63 | |-- setup.py 64 | ``` 65 |

66 | A stubbed testing module for testing your pipeline as you develop: 67 | ``` 68 | |-- tests 69 | | `-- test_pipelines.py 70 | ``` 71 |

72 | The `tox` testing framework configuration: 73 | ``` 74 | `-- tox.ini 75 | ``` 76 | 77 | -------------------------------------------------------------------------------- /terraform/modeldeploy_codebuild.tf: -------------------------------------------------------------------------------- 1 | data "template_file" "deploybuildspec" { 2 | template = file("modeldeploy_buildspec.yml") 3 | vars = { 4 | env = var.env 5 | SAGEMAKER_PROJECT_NAME=var.project_name 6 | SAGEMAKER_PROJECT_ID=var.project_id 7 | ARTIFACT_BUCKET=var.artifacts_bucket_name 8 | MODEL_EXECUTION_ROLE_ARN=aws_iam_role.tf_mlops_role.arn 9 | AWS_REGION=var.region 10 | SOURCE_MODEL_PACKAGE_GROUP_NAME="${var.project_name}-${var.project_id}" 11 | EXPORT_TEMPLATE_NAME="template-export.yml" 12 | EXPORT_TEMPLATE_STAGING_CONFIG="staging-config-export.json" 13 | EXPORT_TEMPLATE_PROD_CONFIG="prod-config-export.json" 14 | 15 | } 16 | } 17 | 18 | resource "aws_codebuild_project" "tf_mlops_deploybuild" { 19 | badge_enabled = false 20 | build_timeout = 60 21 | name = "tf-mlops-deploybuild" 22 | queued_timeout = 480 23 | service_role = aws_iam_role.tf_mlops_role.arn 24 | tags = { 25 | Environment = var.env 26 | } 27 | 28 | artifacts { 29 | encryption_disabled = false 30 | name = "tf-mlops-deploybuild-${var.env}" 31 | override_artifact_name = false 32 | packaging = "NONE" 33 | type = "CODEPIPELINE" 34 | } 35 | 36 | environment { 37 | compute_type = "BUILD_GENERAL1_SMALL" 38 | image = "aws/codebuild/amazonlinux2-x86_64-standard:3.0" 39 | image_pull_credentials_type = "CODEBUILD" 40 | privileged_mode = false 41 | type = "LINUX_CONTAINER" 42 | environment_variable { 43 | name = "environment" 44 | type = "PLAINTEXT" 45 | value = var.env 46 | } 47 | environment_variable { 48 | name = "SAGEMAKER_PROJECT_NAME" 49 | type = "PLAINTEXT" 50 | value = var.project_name 51 | } 52 | environment_variable { 53 | name = "SAGEMAKER_PROJECT_ID" 54 | type = "PLAINTEXT" 55 | value = var.project_id 56 | } 57 | environment_variable { 58 | name = "ARTIFACT_BUCKET" 59 | type = "PLAINTEXT" 60 | value = var.artifacts_bucket_name 61 | } 62 | environment_variable { 63 | name = "MODEL_EXECUTION_ROLE_ARN" 64 | type = "PLAINTEXT" 65 | value = aws_iam_role.tf_mlops_role.arn 66 | } 67 | environment_variable { 68 | name = "SOURCE_MODEL_PACKAGE_GROUP_NAME" 69 | type = "PLAINTEXT" 70 | value = "${var.project_name}-${var.project_id}" 71 | } 72 | environment_variable { 73 | name = "AWS_REGION" 74 | type = "PLAINTEXT" 75 | value = var.region 76 | } 77 | environment_variable { 78 | name = "EXPORT_TEMPLATE_NAME" 79 | type = "PLAINTEXT" 80 | value = "template-export.yml" 81 | } 82 | environment_variable { 83 | name = "EXPORT_TEMPLATE_STAGING_CONFIG" 84 | type = "PLAINTEXT" 85 | value = "staging-config-export.json" 86 | } 87 | environment_variable { 88 | name = "EXPORT_TEMPLATE_PROD_CONFIG" 89 | type = "PLAINTEXT" 90 | value = "prod-config-export.json" 91 | } 92 | } 93 | 94 | logs_config { 95 | cloudwatch_logs { 96 | status = "ENABLED" 97 | } 98 | 99 | s3_logs { 100 | encryption_disabled = false 101 | status = "DISABLED" 102 | } 103 | } 104 | 105 | source { 106 | buildspec = data.template_file.deploybuildspec.rendered 107 | git_clone_depth = 0 108 | insecure_ssl = false 109 | report_build_status = false 110 | type = "CODEPIPELINE" 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /modelbuild_pipeline/pipelines/run_pipeline.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | """A CLI to create or update and run pipelines.""" 14 | from __future__ import absolute_import 15 | 16 | import argparse 17 | import json 18 | import sys 19 | 20 | from pipelines._utils import get_pipeline_driver, convert_struct 21 | 22 | 23 | def main(): # pragma: no cover 24 | """The main harness that creates or updates and runs the pipeline. 25 | 26 | Creates or updates the pipeline and runs it. 27 | """ 28 | parser = argparse.ArgumentParser( 29 | "Creates or updates and runs the pipeline for the pipeline script." 30 | ) 31 | 32 | parser.add_argument( 33 | "-n", 34 | "--module-name", 35 | dest="module_name", 36 | type=str, 37 | help="The module name of the pipeline to import.", 38 | ) 39 | parser.add_argument( 40 | "-kwargs", 41 | "--kwargs", 42 | dest="kwargs", 43 | default=None, 44 | help="Dict string of keyword arguments for the pipeline generation (if supported)", 45 | ) 46 | parser.add_argument( 47 | "-role-arn", 48 | "--role-arn", 49 | dest="role_arn", 50 | type=str, 51 | help="The role arn for the pipeline service execution role.", 52 | ) 53 | parser.add_argument( 54 | "-description", 55 | "--description", 56 | dest="description", 57 | type=str, 58 | default=None, 59 | help="The description of the pipeline.", 60 | ) 61 | parser.add_argument( 62 | "-tags", 63 | "--tags", 64 | dest="tags", 65 | default=None, 66 | help="""List of dict strings of '[{"Key": "string", "Value": "string"}, ..]'""", 67 | ) 68 | args = parser.parse_args() 69 | 70 | if args.module_name is None or args.role_arn is None: 71 | parser.print_help() 72 | sys.exit(2) 73 | tags = convert_struct(args.tags) 74 | 75 | try: 76 | pipeline = get_pipeline_driver(args.module_name, args.kwargs) 77 | print("###### Creating/updating a SageMaker Pipeline with the following definition:") 78 | parsed = json.loads(pipeline.definition()) 79 | print(json.dumps(parsed, indent=2, sort_keys=True)) 80 | 81 | upsert_response = pipeline.upsert( 82 | role_arn=args.role_arn, description=args.description, tags=tags 83 | ) 84 | print("\n###### Created/Updated SageMaker Pipeline: Response received:") 85 | print(upsert_response) 86 | 87 | execution = pipeline.start() 88 | print(f"\n###### Execution started with PipelineExecutionArn: {execution.arn}") 89 | 90 | print("Waiting for the execution to finish...") 91 | execution.wait() 92 | print("\n#####Execution completed. Execution step details:") 93 | 94 | print(execution.list_steps()) 95 | # Todo print the status? 96 | except Exception as e: # pylint: disable=W0703 97 | print(f"Exception: {e}") 98 | sys.exit(1) 99 | 100 | 101 | if __name__ == "__main__": 102 | main() 103 | -------------------------------------------------------------------------------- /modelbuild_pipeline/pipelines/.ipynb_checkpoints/run_pipeline-checkpoint.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | """A CLI to create or update and run pipelines.""" 14 | from __future__ import absolute_import 15 | 16 | import argparse 17 | import json 18 | import sys 19 | 20 | from pipelines._utils import get_pipeline_driver, convert_struct 21 | 22 | 23 | def main(): # pragma: no cover 24 | """The main harness that creates or updates and runs the pipeline. 25 | 26 | Creates or updates the pipeline and runs it. 27 | """ 28 | parser = argparse.ArgumentParser( 29 | "Creates or updates and runs the pipeline for the pipeline script." 30 | ) 31 | 32 | parser.add_argument( 33 | "-n", 34 | "--module-name", 35 | dest="module_name", 36 | type=str, 37 | help="The module name of the pipeline to import.", 38 | ) 39 | parser.add_argument( 40 | "-kwargs", 41 | "--kwargs", 42 | dest="kwargs", 43 | default=None, 44 | help="Dict string of keyword arguments for the pipeline generation (if supported)", 45 | ) 46 | parser.add_argument( 47 | "-role-arn", 48 | "--role-arn", 49 | dest="role_arn", 50 | type=str, 51 | help="The role arn for the pipeline service execution role.", 52 | ) 53 | parser.add_argument( 54 | "-description", 55 | "--description", 56 | dest="description", 57 | type=str, 58 | default=None, 59 | help="The description of the pipeline.", 60 | ) 61 | parser.add_argument( 62 | "-tags", 63 | "--tags", 64 | dest="tags", 65 | default=None, 66 | help="""List of dict strings of '[{"Key": "string", "Value": "string"}, ..]'""", 67 | ) 68 | args = parser.parse_args() 69 | 70 | if args.module_name is None or args.role_arn is None: 71 | parser.print_help() 72 | sys.exit(2) 73 | tags = convert_struct(args.tags) 74 | 75 | try: 76 | pipeline = get_pipeline_driver(args.module_name, args.kwargs) 77 | print("###### Creating/updating a SageMaker Pipeline with the following definition:") 78 | parsed = json.loads(pipeline.definition()) 79 | print(json.dumps(parsed, indent=2, sort_keys=True)) 80 | 81 | upsert_response = pipeline.upsert( 82 | role_arn=args.role_arn, description=args.description, tags=tags 83 | ) 84 | print("\n###### Created/Updated SageMaker Pipeline: Response received:") 85 | print(upsert_response) 86 | 87 | execution = pipeline.start() 88 | print(f"\n###### Execution started with PipelineExecutionArn: {execution.arn}") 89 | 90 | print("Waiting for the execution to finish...") 91 | execution.wait() 92 | print("\n#####Execution completed. Execution step details:") 93 | 94 | print(execution.list_steps()) 95 | # Todo print the status? 96 | except Exception as e: # pylint: disable=W0703 97 | print(f"Exception: {e}") 98 | sys.exit(1) 99 | 100 | 101 | if __name__ == "__main__": 102 | main() 103 | -------------------------------------------------------------------------------- /terraform/modeldeploy_cd_pipline.tf: -------------------------------------------------------------------------------- 1 | resource "aws_codepipeline" "sm_cd_pipeline" { 2 | name = "modeldeploy-pipeline" 3 | role_arn = aws_iam_role.tf_mlops_role.arn 4 | tags = { 5 | Environment = var.env 6 | } 7 | 8 | artifact_store { 9 | location = aws_s3_bucket.artifacts_bucket.bucket 10 | type = "S3" 11 | } 12 | 13 | stage { 14 | name = "Source" 15 | 16 | action { 17 | category = "Source" 18 | configuration = { 19 | "Branch" = var.repository_branch 20 | "Owner" = var.repository_owner 21 | "PollForSourceChanges" = "false" 22 | "Repo" = var.deploy_repository_name 23 | OAuthToken = var.github_token 24 | } 25 | 26 | input_artifacts = [] 27 | name = "Source" 28 | output_artifacts = [ 29 | "SourceArtifact", 30 | ] 31 | owner = "ThirdParty" 32 | provider = "GitHub" 33 | run_order = 1 34 | version = "1" 35 | } 36 | } 37 | 38 | stage { 39 | name = "Build" 40 | 41 | action { 42 | category = "Build" 43 | configuration = { 44 | "ProjectName" = "tf-mlops-deploybuild" 45 | } 46 | input_artifacts = [ 47 | "SourceArtifact", 48 | ] 49 | name = "Build" 50 | output_artifacts = [ 51 | "BuildArtifact", 52 | ] 53 | owner = "AWS" 54 | provider = "CodeBuild" 55 | run_order = 1 56 | version = "1" 57 | } 58 | } 59 | 60 | stage { 61 | name = "DeployStaging" 62 | 63 | action { 64 | category = "Deploy" 65 | configuration = { 66 | "ActionMode": "REPLACE_ON_FAILURE", 67 | "Capabilities": "CAPABILITY_NAMED_IAM", 68 | "RoleArn": aws_iam_role.tf_mlops_role.arn, 69 | "StackName": "sagemaker-${var.project_name}-${var.project_id}-deploy-staging", 70 | "TemplateConfiguration": "BuildArtifact::staging-config-export.json", 71 | "TemplatePath": "BuildArtifact::template-export.yml" 72 | 73 | } 74 | input_artifacts = [ 75 | "BuildArtifact", 76 | ] 77 | name = "DeployResourcesStaging" 78 | owner = "AWS" 79 | provider = "CloudFormation" 80 | run_order = 1 81 | version = "1" 82 | } 83 | 84 | action { 85 | category = "Build" 86 | configuration = { 87 | "ProjectName" = "tf-mlops-testbuild", 88 | "PrimarySource" = "SourceArtifact" 89 | } 90 | input_artifacts = [ 91 | "SourceArtifact","BuildArtifact" 92 | ] 93 | name = "TestStaging" 94 | output_artifacts = [ 95 | "TestArtifact", 96 | ] 97 | owner = "AWS" 98 | provider = "CodeBuild" 99 | run_order = 2 100 | version = "1" 101 | } 102 | 103 | action { 104 | category = "Approval" 105 | configuration = { 106 | "CustomData"= "Approve this model for Production" 107 | } 108 | name = "ApproveDeployment" 109 | owner = "AWS" 110 | provider = "Manual" 111 | run_order = 3 112 | version = "1" 113 | } 114 | } 115 | stage { 116 | name = "DeployProd" 117 | 118 | action { 119 | category = "Deploy" 120 | configuration = { 121 | "ActionMode": "CREATE_UPDATE", 122 | "RoleArn": aws_iam_role.tf_mlops_role.arn, 123 | "Capabilities": "CAPABILITY_NAMED_IAM", 124 | "StackName": "sagemaker-${var.project_name}-${var.project_id}-deploy-prod", 125 | "TemplateConfiguration": "BuildArtifact::prod-config-export.json", 126 | "TemplatePath": "BuildArtifact::template-export.yml" 127 | } 128 | input_artifacts = [ 129 | "BuildArtifact", 130 | ] 131 | name = "DeployResourcesProd" 132 | owner = "AWS" 133 | provider = "CloudFormation" 134 | run_order = 1 135 | version = "1" 136 | } 137 | } 138 | 139 | } 140 | -------------------------------------------------------------------------------- /modeldeploy_pipeline/build.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import logging 4 | import os 5 | 6 | import boto3 7 | from botocore.exceptions import ClientError 8 | 9 | logger = logging.getLogger(__name__) 10 | sm_client = boto3.client("sagemaker") 11 | 12 | 13 | def get_approved_package(model_package_group_name): 14 | """Gets the latest approved model package for a model package group. 15 | 16 | Args: 17 | model_package_group_name: The model package group name. 18 | 19 | Returns: 20 | The SageMaker Model Package ARN. 21 | """ 22 | try: 23 | # Get the latest approved model package 24 | response = sm_client.list_model_packages( 25 | ModelPackageGroupName=model_package_group_name, 26 | ModelApprovalStatus="Approved", 27 | SortBy="CreationTime", 28 | MaxResults=100, 29 | ) 30 | approved_packages = response["ModelPackageSummaryList"] 31 | 32 | # Fetch more packages if none returned with continuation token 33 | while len(approved_packages) == 0 and "NextToken" in response: 34 | logger.debug("Getting more packages for token: {}".format(response["NextToken"])) 35 | response = sm_client.list_model_packages( 36 | ModelPackageGroupName=model_package_group_name, 37 | ModelApprovalStatus="Approved", 38 | SortBy="CreationTime", 39 | MaxResults=100, 40 | NextToken=response["NextToken"], 41 | ) 42 | approved_packages.extend(response["ModelPackageSummaryList"]) 43 | 44 | # Return error if no packages found 45 | if len(approved_packages) == 0: 46 | error_message = ( 47 | f"No approved ModelPackage found for ModelPackageGroup: {model_package_group_name}" 48 | ) 49 | logger.error(error_message) 50 | raise Exception(error_message) 51 | 52 | # Return the pmodel package arn 53 | model_package_arn = approved_packages[0]["ModelPackageArn"] 54 | logger.info(f"Identified the latest approved model package: {model_package_arn}") 55 | return model_package_arn 56 | except ClientError as e: 57 | error_message = e.response["Error"]["Message"] 58 | logger.error(error_message) 59 | raise Exception(error_message) 60 | 61 | 62 | def extend_config(args, model_package_arn, stage_config): 63 | """ 64 | Extend the stage configuration with additional parameters and tags based. 65 | """ 66 | # Verify that config has parameters and tags sections 67 | if not "Parameters" in stage_config or not "StageName" in stage_config["Parameters"]: 68 | raise Exception("Configuration file must include SageName parameter") 69 | if not "Tags" in stage_config: 70 | stage_config["Tags"] = {} 71 | # Create new params and tags 72 | new_params = { 73 | "SageMakerProjectName": args.sagemaker_project_name, 74 | "ModelPackageName": model_package_arn, 75 | "ModelExecutionRoleArn": args.model_execution_role, 76 | } 77 | new_tags = { 78 | "sagemaker:deployment-stage": stage_config["Parameters"]["StageName"], 79 | "sagemaker:project-id": args.sagemaker_project_id, 80 | "sagemaker:project-name": args.sagemaker_project_name, 81 | } 82 | return { 83 | "Parameters": {**stage_config["Parameters"], **new_params}, 84 | "Tags": {**stage_config.get("Tags", {}), **new_tags}, 85 | } 86 | 87 | 88 | if __name__ == "__main__": 89 | parser = argparse.ArgumentParser() 90 | parser.add_argument("--log-level", type=str, default=os.environ.get("LOGLEVEL", "INFO").upper()) 91 | parser.add_argument("--model-execution-role", type=str, required=True) 92 | parser.add_argument("--model-package-group-name", type=str, required=True) 93 | parser.add_argument("--sagemaker-project-id", type=str, required=True) 94 | parser.add_argument("--sagemaker-project-name", type=str, required=True) 95 | parser.add_argument("--import-staging-config", type=str, default="staging-config.json") 96 | parser.add_argument("--import-prod-config", type=str, default="prod-config.json") 97 | parser.add_argument("--export-staging-config", type=str, default="staging-config-export.json") 98 | parser.add_argument("--export-prod-config", type=str, default="prod-config-export.json") 99 | args, _ = parser.parse_known_args() 100 | 101 | # Configure logging to output the line number and message 102 | log_format = "%(levelname)s: [%(filename)s:%(lineno)s] %(message)s" 103 | logging.basicConfig(format=log_format, level=args.log_level) 104 | 105 | # Get the latest approved package 106 | model_package_arn = get_approved_package(args.model_package_group_name) 107 | 108 | # Write the staging config 109 | with open(args.import_staging_config, "r") as f: 110 | staging_config = extend_config(args, model_package_arn, json.load(f)) 111 | logger.debug("Staging config: {}".format(json.dumps(staging_config, indent=4))) 112 | with open(args.export_staging_config, "w") as f: 113 | json.dump(staging_config, f, indent=4) 114 | 115 | # Write the prod config 116 | with open(args.import_prod_config, "r") as f: 117 | prod_config = extend_config(args, model_package_arn, json.load(f)) 118 | logger.debug("Prod config: {}".format(json.dumps(prod_config, indent=4))) 119 | with open(args.export_prod_config, "w") as f: 120 | json.dump(prod_config, f, indent=4) 121 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | ## MLOps 3 | MLOps focuses on realizing the value from any data science activity through the adoption of DevOps and engineering best practices to build solutions and products that have an ML component at their core. It provides the standards, tools, and framework that supports the work of data science teams to take their ideas from the whiteboard to production in a timely, secure, traceable, and repeatable manner. 4 | 5 | ## Benefits 6 | With this MLOps architecture, AWS partners can enable their customers to accelerate operationalizing ML models in production to achieve the benefits of an automated machine learning lifecycle management by reducing the typical lifecycle for a significantly faster delivery time. 7 | 8 | ## Background 9 | The solutions architecture here was made to address a particular use-case (people analytics attrition model) but the intention was to design it in such a way that it can be used generically across different ML problems. Of course, this will not meet all ML use-case requirements but many of the components are key to almost all ML systems in AWS (S3 storage, SageMaker hyperparameter tuning, training, and deployment). This is intended to be a starting point for establishing proper ML pipeline automation with many re-usable elements that can form the foundation for a mature ML system. 10 | 11 | ## Use Case 12 | This architecture describes using machine learning (ML) for the automated identification of unhappy customers, also known as customer churn prediction. Losing customers is costly for any business, so identifying unhappy customers early on gives you a chance to offer them incentives to stay. 13 | 14 | We shall use a familiar example of churn: leaving a mobile phone operator. And if the provider knows that a customer is thinking of leaving, it can offer timely incentives - such as a phone upgrade or perhaps having a new feature activated – and the customer may stick around. Incentives are often much more cost-effective than losing and reacquiring a customer. 15 | 16 | ## Solution Architecture 17 | 18 | 19 | 20 | The architecture shown above is an example of an automated MLOps pipeline could look like in AWS. This is for a batch inference model deployment where a persistent endpoint is not required (although it is optionally available as part of the infrastructure build). It uses a serverless architecture which has benefits in terms of cost-efficiency and ease of development. 21 | 22 | The key AWS services are: 23 | 24 | * **AWS CodePipeline** automates your software release process, allowing you to rapidly release new features to your users. With CodePipeline, you can quickly iterate on feedback and get new features to your users faster. 25 | * **AWS CodeBuild** eliminates the need to set up, patch, update, and manage your own build servers and software. There is no software to install or manage. 26 | * **Amazon S3** for initial data storage/data lakes, storing flat-file data extracts, source code, model objects, inference output, and metadata storage. 27 | * **Amazon SageMaker** Amazon SageMaker helps you streamline the machine learning (ML) lifecycle by automating and standardizing MLOps practices across your organization. You can easily build, train, deploy, and manage ML models, whether it’s only a few, hundreds of thousands, or even millions. With purpose-built tools for ML lifecycle management and built-in integrations with other AWS services, you can boost productivity of data scientists and ML engineers while maintaining high model accuracy and enhancing security and compliance. 28 | * **Amazon CloudWatch** for monitoring SageMaker tuning and training jobs. 29 | 30 | ## Why Terraform 31 | We have received a lot of feedback from a number of AWS Strategic partners and customers about their MLOps use cases and discovered that they are using Terraform for their infrastructure automation. As a result of this feedback, we decided to use Terraform to help them in their efforts. Here are additional reasons for using Terraform for MLOps use cases: 32 | * Terraform can define infrastructure and manage infrastructures’ lifecycle directly. To use SageMaker Studio project, a project template should be created in advance and saved in Service Catalog. The template needs to define infrastructure using CloudFormation. 33 | * Terraform supports multi-cloud platforms. User can provision services on multiple cloud platform. 34 | * Terraform has workspaces, which makes it easier to manage multiple environments. 35 | * Using Terraform can expedite deployment process because development team lacks skills and experiences in CloudFormation. 36 | 37 | ## Prerequisites 38 | 39 | * Terraform CLI installed on your local machine or cloud workspace (if you use cloud workspace) 40 | * AWS CLI installed 41 | * An AWS account 42 | * Your AWS credentials. 43 | * Github repositories. One repository is for build repository (“modelbuild-pipeline”), and the other one is for deploy repository (“modeldeploy-pipeline”). The repository names match those in the variable file. 44 | 45 | 46 | 47 | ## Setup Terraform Cloud 48 | Terraform Cloud is an application that manages Terraform runs in a consistent and reliable environment instead of on your local machine. It stores shared state and secret data, and connects to version control systems so that you and your team can work on infrastructure as code within your usual code workflow. It also has a private registry for sharing Terraform modules. 49 | 50 | 51 | 52 | ### Create an account 53 | 54 | Visit https://app.terraform.io/signup/account and follow the prompts to create a free Terraform Cloud account. 55 | 56 | 57 | 58 | When you sign up, you'll receive an email asking you to confirm your email address. Confirm your email address before moving on. 59 | 60 | ### Create an organization 61 | 62 | After you create your account, the Terraform Cloud Web UI will prompt you to create a new organization. Your organization is free, and the members you add will be able to collaborate on your workspaces and share private modules. 63 | 64 | Enter your own `unique organization name` (here `'aws-terraform'` as example) and an email address. You can use the same email address that you used for your account. 65 | 66 | 67 | 68 | ### Create a workspace 69 | 70 | Workspaces determine how Terraform Cloud organizes infrastructure. 71 | 72 | 73 | 74 | Click CLI-driven workflow. 75 | 76 | 77 | 78 | Enter `your own unique Workspace name` (here `aws-terraform-lambda-container` as example) and click **Create workspace**. 79 | 80 | 81 | 82 | Copy the Example code and paste it inside the main.tf file. The organization and workspace names are those you created in the previous steps. 83 | ``` 84 | terraform { 85 | cloud { 86 | organization = "" 87 | 88 | workspaces { 89 | name = "" 90 | } 91 | } 92 | } 93 | ``` 94 | 95 | ### Configure AWS credentials 96 | 97 | Click the **Variables** tab at the top to create **Terraform Variables** AWS_REGION and **Environment Variables** AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY. 98 | 99 | 100 | 101 | ### Clone the repository to your local machine or cloud workspace 102 | ``` 103 | git clone https://github.com/aws-samples/aws-mlops-pipelines-terraform.git 104 | ``` 105 | There is 'Notebooks' folder, which contains two jupyter notebook examples. One uses SageMaker workflow pipeline within the notebook. The other one uses SageMaker SDK without pipeline. 106 | 107 | ### Push contents to your own Github repos 108 | Push “modelbuild-pipeline” folder contents to your own Github repository “modelbuild-pipeline”. 109 | Push “modeldeploy-pipeline” folder to your own Github repository “modeldeploy-pipeline”. 110 | 111 | ### Terraform login 112 | In your workspace or local machine, open a new Terminal,go to the 'terraform' folder, and use the following command: 113 | 114 | ``` 115 | cd ~/terraform 116 | terraform login 117 | ``` 118 | At the prompt, enter 'yes' and follow the instructions to generate a token. 119 | Terraform will store the token in your workspace or local machine. 120 | 121 | ### Provision infrastructure using Terraform Cloud 122 | Go to your terraform code folder “terraform” run the following commands. 123 | ``` 124 | terraform init 125 | terraform plan 126 | terraform apply 127 | ``` 128 | ### Finish 129 | Go to your AWS CodePipeline console, you can check the pipeline running progress. 130 | 131 | ### Inferencing 132 | 133 | To invoke and test the endpoint, you can copy 'test.csv' from 'Notebooks' folder to your own notebook folder, and run the following code within a notebook. 134 | 135 | ```dotnetcli 136 | import pandas as pd 137 | import numpy as np 138 | import sagemaker 139 | import boto3 140 | from sagemaker import get_execution_role 141 | 142 | test_data=pd.read_csv('test.csv',header=None) 143 | testdata1=test_data.iloc[0:1,1:] 144 | 145 | runtime = boto3.client("sagemaker-runtime") 146 | Endpoint_name= # update to your own endpoint name 147 | 148 | prediction = runtime.invoke_endpoint( 149 | EndpointName=Endpoint_name, 150 | Body=testdata1.to_csv(header=False, index=False).encode("utf-8"), 151 | ContentType="text/csv", 152 | Accept= "text/csv", 153 | ) 154 | 155 | print(prediction["Body"].read()) 156 | ``` 157 | 158 | 159 | ### Cleaning up 160 | To avoid incurring future charges, please execute the following clean-up steps. Login to the AWS Console and enter your credentials 161 | 1. Select Services from the top menu, and choose Amazon SageMaker 162 | Endpoints 163 | Go to Inference / Endpoints on the left-hand menu, click the radio button next to each endpoint. 164 | Select Actions and then Delete, then confirm delete hitting the Delete button. 165 | 2. Select Services from the top menu, and choose S3 166 | Delete all S3 buckets created for this project 167 | 3. Remove AWS infrastructure, run the following command from your “terraform” folder 168 | ``` 169 | terraform destroy 170 | ``` 171 | At the prompt, enter 'yes' and follow the instructions to destroy resources. 172 | -------------------------------------------------------------------------------- /terraform/iam_roles.tf: -------------------------------------------------------------------------------- 1 | 2 | resource "aws_iam_role" "tf_mlops_role" { 3 | assume_role_policy = jsonencode( 4 | { 5 | Statement = [ 6 | { 7 | Action = "sts:AssumeRole" 8 | Effect = "Allow" 9 | Principal = { 10 | Service = [ 11 | "firehose.amazonaws.com", 12 | "glue.amazonaws.com", 13 | "apigateway.amazonaws.com", 14 | "lambda.amazonaws.com", 15 | "events.amazonaws.com", 16 | "states.amazonaws.com", 17 | "sagemaker.amazonaws.com", 18 | "cloudformation.amazonaws.com", 19 | "codebuild.amazonaws.com", 20 | "codepipeline.amazonaws.com" 21 | ] 22 | } 23 | }, 24 | ] 25 | Version = "2012-10-17" 26 | } 27 | ) 28 | force_detach_policies = false 29 | max_session_duration = 3600 30 | name = "tf-mlops-role-${var.env}-1007" 31 | path = "/service-role/" 32 | tags = {} 33 | } 34 | 35 | resource "aws_iam_policy" "tf_mlops_policy" { 36 | description = "Policy used in trust relationship with CodeBuild (${var.env})" 37 | name = "tf-mlops-policy-${var.env}-1007" 38 | path = "/service-role/" 39 | policy = jsonencode( 40 | { 41 | Statement = [ 42 | { 43 | Action : [ 44 | "iam:PassRole" 45 | ], 46 | Resource : "*", 47 | Effect : "Allow" 48 | }, 49 | { 50 | "Effect" = "Allow", 51 | "Action" = [ 52 | "s3:*" 53 | ], 54 | "Resource" = [ 55 | "arn:aws:s3:::*", 56 | "arn:aws:s3:::*" 57 | ] 58 | }, 59 | { 60 | "Action" : [ 61 | "logs:CreateLogGroup", 62 | "logs:CreateLogStream", 63 | "logs:PutLogEvents" 64 | ], 65 | "Effect" : "Allow", 66 | "Resource" : "arn:aws:logs:*" 67 | }, 68 | 69 | { 70 | "Action": [ 71 | "codepipeline:StartPipelineExecution" 72 | ], 73 | "Resource": "arn:aws:codepipeline:*:*:*", 74 | "Effect": "Allow" 75 | }, 76 | { 77 | "Action": [ 78 | "events:DeleteRule", 79 | "events:DescribeRule", 80 | "events:PutRule", 81 | "events:PutTargets", 82 | "events:RemoveTargets" 83 | ], 84 | "Resource": [ 85 | "arn:aws:events:*:*:rule/*" 86 | ], 87 | "Effect": "Allow" 88 | }, 89 | { 90 | "Effect": "Allow", 91 | "Action": [ 92 | "sagemaker:*" 93 | ], 94 | "NotResource": [ 95 | "arn:aws:sagemaker:*:*:domain/*", 96 | "arn:aws:sagemaker:*:*:user-profile/*", 97 | "arn:aws:sagemaker:*:*:app/*", 98 | "arn:aws:sagemaker:*:*:flow-definition/*" 99 | ] 100 | }, 101 | { 102 | "Effect": "Allow", 103 | "Action": [ 104 | "sagemaker:CreatePresignedDomainUrl", 105 | "sagemaker:DescribeDomain", 106 | "sagemaker:ListDomains", 107 | "sagemaker:DescribeUserProfile", 108 | "sagemaker:ListUserProfiles", 109 | "sagemaker:*App", 110 | "sagemaker:ListApps" 111 | ], 112 | "Resource": "*" 113 | }, 114 | { 115 | "Effect": "Allow", 116 | "Action": "sagemaker:*", 117 | "Resource": [ 118 | "arn:aws:sagemaker:*:*:flow-definition/*" 119 | ], 120 | "Condition": { 121 | "StringEqualsIfExists": { 122 | "sagemaker:WorkteamType": [ 123 | "private-crowd", 124 | "vendor-crowd" 125 | ] 126 | } 127 | } 128 | }, 129 | { 130 | "Action": [ 131 | "cloudformation:CreateChangeSet", 132 | "cloudformation:CreateStack", 133 | "cloudformation:DescribeChangeSet", 134 | "cloudformation:DeleteChangeSet", 135 | "cloudformation:DeleteStack", 136 | "cloudformation:DescribeStacks", 137 | "cloudformation:ExecuteChangeSet", 138 | "cloudformation:SetStackPolicy", 139 | "cloudformation:UpdateStack" 140 | ], 141 | "Resource": "arn:aws:cloudformation:*:*:stack/*", 142 | "Effect": "Allow" 143 | }, 144 | { 145 | "Action": [ 146 | "codebuild:BatchGetBuilds", 147 | "codebuild:StartBuild" 148 | ], 149 | "Resource": [ 150 | "arn:aws:codebuild:*:*:project/*", 151 | "arn:aws:codebuild:*:*:build/*" 152 | ], 153 | "Effect": "Allow" 154 | }, 155 | { 156 | "Action": [ 157 | "states:DescribeExecution", 158 | "states:GetExecutionHistory", 159 | "states:StartExecution", 160 | "states:StopExecution", 161 | "states:UpdateStateMachine" 162 | ], 163 | "Resource": [ 164 | "arn:aws:states:*:*:statemachine:*sagemaker*", 165 | "arn:aws:states:*:*:execution:*sagemaker*:*" 166 | ], 167 | "Effect": "Allow" 168 | }, 169 | { 170 | "Effect": "Allow", 171 | "Action": [ 172 | "secretsmanager:DescribeSecret", 173 | "secretsmanager:GetSecretValue", 174 | "secretsmanager:CreateSecret" 175 | ], 176 | "Resource": [ 177 | "arn:aws:secretsmanager:*:*:secret:AmazonSageMaker-*" 178 | ] 179 | }, 180 | { 181 | "Effect": "Allow", 182 | "Action": [ 183 | "secretsmanager:DescribeSecret", 184 | "secretsmanager:GetSecretValue" 185 | ], 186 | "Resource": "*", 187 | "Condition": { 188 | "StringEquals": { 189 | "secretsmanager:ResourceTag/SageMaker": "true" 190 | } 191 | } 192 | }, 193 | { 194 | "Effect": "Allow", 195 | "Action": [ 196 | "s3:CreateBucket", 197 | "s3:GetBucketLocation", 198 | "s3:ListBucket", 199 | "s3:ListAllMyBuckets", 200 | "s3:GetBucketCors", 201 | "s3:PutBucketCors" 202 | ], 203 | "Resource": "*" 204 | }, 205 | { 206 | "Effect": "Allow", 207 | "Action": [ 208 | "s3:GetBucketAcl", 209 | "s3:PutObjectAcl" 210 | ], 211 | "Resource": [ 212 | "arn:aws:s3:::*SageMaker*", 213 | "arn:aws:s3:::*Sagemaker*", 214 | "arn:aws:s3:::*sagemaker*" 215 | ] 216 | }, 217 | { 218 | "Action": [ 219 | "ecr:BatchCheckLayerAvailability", 220 | "ecr:BatchGetImage", 221 | "ecr:Describe*", 222 | "ecr:GetAuthorizationToken", 223 | "ecr:GetDownloadUrlForLayer" 224 | ], 225 | "Resource": "*", 226 | "Effect": "Allow" 227 | }, 228 | { 229 | "Effect": "Allow", 230 | "Action": [ 231 | "ecr:BatchDeleteImage", 232 | "ecr:CompleteLayerUpload", 233 | "ecr:CreateRepository", 234 | "ecr:DeleteRepository", 235 | "ecr:InitiateLayerUpload", 236 | "ecr:PutImage", 237 | "ecr:UploadLayerPart" 238 | ], 239 | "Resource": [ 240 | "arn:aws:ecr:*:*:repository/sagemaker-*" 241 | ] 242 | }, 243 | { 244 | "Action": "iam:CreateServiceLinkedRole", 245 | "Effect": "Allow", 246 | "Resource": "arn:aws:iam::*:role/aws-service-role/sagemaker.application-autoscaling.amazonaws.com/AWSServiceRoleForApplicationAutoScaling_SageMakerEndpoint", 247 | "Condition": { 248 | "StringLike": { 249 | "iam:AWSServiceName": "sagemaker.application-autoscaling.amazonaws.com" 250 | } 251 | } 252 | }, 253 | { 254 | "Effect": "Allow", 255 | "Action": "iam:CreateServiceLinkedRole", 256 | "Resource": "*", 257 | "Condition": { 258 | "StringEquals": { 259 | "iam:AWSServiceName": "robomaker.amazonaws.com" 260 | } 261 | } 262 | }, 263 | { 264 | "Effect": "Allow", 265 | "Action": [ 266 | "sns:Subscribe", 267 | "sns:CreateTopic" 268 | ], 269 | "Resource": [ 270 | "arn:aws:sns:*:*:*SageMaker*", 271 | "arn:aws:sns:*:*:*Sagemaker*", 272 | "arn:aws:sns:*:*:*sagemaker*" 273 | ] 274 | }, 275 | { 276 | "Effect": "Allow", 277 | "Action": [ 278 | "iam:PassRole" 279 | ], 280 | "Resource": "arn:aws:iam::*:role/*AmazonSageMaker*", 281 | "Condition": { 282 | "StringEquals": { 283 | "iam:PassedToService": [ 284 | "glue.amazonaws.com", 285 | "robomaker.amazonaws.com", 286 | "states.amazonaws.com" 287 | ] 288 | } 289 | } 290 | }, 291 | { 292 | "Effect": "Allow", 293 | "Action": [ 294 | "iam:PassRole" 295 | ], 296 | "Resource": "arn:aws:iam::*:role/*", 297 | "Condition": { 298 | "StringEquals": { 299 | "iam:PassedToService": "sagemaker.amazonaws.com" 300 | } 301 | } 302 | } 303 | ] 304 | Version = "2012-10-17" 305 | } 306 | ) 307 | } 308 | 309 | 310 | resource "aws_iam_role_policy_attachment" "tf_mlops_policy_attachment" { 311 | role = aws_iam_role.tf_mlops_role.name 312 | policy_arn = aws_iam_policy.tf_mlops_policy.arn 313 | } 314 | -------------------------------------------------------------------------------- /modelbuild_pipeline/pipelines/customer_churn/pipeline.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | """Example workflow pipeline script for CustomerChurn pipeline. 14 | 15 | . -RegisterModel 16 | . 17 | Process-> Train -> Evaluate -> Condition . 18 | . 19 | . -(stop) 20 | 21 | Implements a get_pipeline(**kwargs) method. 22 | """ 23 | 24 | import os 25 | 26 | import boto3 27 | import sagemaker 28 | import sagemaker.session 29 | from sagemaker.estimator import Estimator 30 | from sagemaker.inputs import TrainingInput 31 | from sagemaker.model_metrics import MetricsSource, ModelMetrics 32 | from sagemaker.processing import ProcessingInput, ProcessingOutput, ScriptProcessor 33 | from sagemaker.sklearn.processing import SKLearnProcessor 34 | from sagemaker.workflow.condition_step import ConditionStep 35 | from sagemaker.workflow.conditions import ConditionGreaterThanOrEqualTo 36 | from sagemaker.workflow.functions import JsonGet 37 | from sagemaker.workflow.parameters import ParameterInteger, ParameterString 38 | from sagemaker.workflow.pipeline import Pipeline 39 | from sagemaker.workflow.properties import PropertyFile 40 | from sagemaker.workflow.step_collections import RegisterModel 41 | from sagemaker.workflow.steps import ProcessingStep, TrainingStep 42 | 43 | BASE_DIR = os.path.dirname(os.path.realpath(__file__)) 44 | 45 | 46 | def get_session(region, default_bucket): 47 | """Gets the sagemaker session based on the region. 48 | 49 | Args: 50 | region: the aws region to start the session 51 | default_bucket: the bucket to use for storing the artifacts 52 | 53 | Returns: 54 | `sagemaker.session.Session instance 55 | """ 56 | 57 | boto_session = boto3.Session(region_name=region) 58 | 59 | sagemaker_client = boto_session.client("sagemaker") 60 | runtime_client = boto_session.client("sagemaker-runtime") 61 | return sagemaker.session.Session( 62 | boto_session=boto_session, 63 | sagemaker_client=sagemaker_client, 64 | sagemaker_runtime_client=runtime_client, 65 | default_bucket=default_bucket, 66 | ) 67 | 68 | 69 | def get_pipeline( 70 | region, 71 | role=None, 72 | default_bucket=None, 73 | model_package_group_name="CustomerChurnPackageGroup", # Choose any name 74 | pipeline_name="CustomerChurnDemo-p-ewf8t7lvhivm", # You can find your pipeline name in the Studio UI (project -> Pipelines -> name) 75 | base_job_prefix="CustomerChurn", # Choose any name 76 | ): 77 | """Gets a SageMaker ML Pipeline instance working with on CustomerChurn data. 78 | 79 | Args: 80 | region: AWS region to create and run the pipeline. 81 | role: IAM role to create and run steps and pipeline. 82 | default_bucket: the bucket to use for storing the artifacts 83 | 84 | Returns: 85 | an instance of a pipeline 86 | """ 87 | sagemaker_session = get_session(region, default_bucket) 88 | if role is None: 89 | role = sagemaker.session.get_execution_role(sagemaker_session) 90 | 91 | # Parameters for pipeline execution 92 | processing_instance_count = ParameterInteger(name="ProcessingInstanceCount", default_value=1) 93 | processing_instance_type = ParameterString( 94 | name="ProcessingInstanceType", default_value="ml.m5.xlarge" 95 | ) 96 | training_instance_type = ParameterString( 97 | name="TrainingInstanceType", default_value="ml.m5.xlarge" 98 | ) 99 | model_approval_status = ParameterString( 100 | name="ModelApprovalStatus", 101 | default_value="Approved", # ModelApprovalStatus can be set to a default of "Approved" if you don't want manual approval. 102 | ) 103 | input_data = ParameterString( 104 | name="InputDataUrl", 105 | default_value=f"s3://{sagemaker_session.default_bucket()}/sagemaker/DEMO-xgboost-churn/data/RawData.csv", # Change this to point to the s3 location of your raw input data. 106 | ) 107 | 108 | # Processing step for feature engineering 109 | sklearn_processor = SKLearnProcessor( 110 | framework_version="0.23-1", 111 | instance_type=processing_instance_type, 112 | instance_count=processing_instance_count, 113 | base_job_name=f"{base_job_prefix}/sklearn-CustomerChurn-preprocess", # choose any name 114 | sagemaker_session=sagemaker_session, 115 | role=role, 116 | ) 117 | step_process = ProcessingStep( 118 | name="CustomerChurnProcess", # choose any name 119 | processor=sklearn_processor, 120 | outputs=[ 121 | ProcessingOutput(output_name="train", source="/opt/ml/processing/train"), 122 | ProcessingOutput(output_name="validation", source="/opt/ml/processing/validation"), 123 | ProcessingOutput(output_name="test", source="/opt/ml/processing/test"), 124 | ], 125 | code=os.path.join(BASE_DIR, "preprocess.py"), 126 | job_arguments=["--input-data", input_data], 127 | ) 128 | 129 | # Training step for generating model artifacts 130 | model_path = f"s3://{sagemaker_session.default_bucket()}/{base_job_prefix}/CustomerChurnTrain" 131 | image_uri = sagemaker.image_uris.retrieve( 132 | framework="xgboost", # we are using the Sagemaker built in xgboost algorithm 133 | region=region, 134 | version="1.0-1", 135 | py_version="py3", 136 | instance_type=training_instance_type, 137 | ) 138 | xgb_train = Estimator( 139 | image_uri=image_uri, 140 | instance_type=training_instance_type, 141 | instance_count=1, 142 | output_path=model_path, 143 | base_job_name=f"{base_job_prefix}/CustomerChurn-train", 144 | sagemaker_session=sagemaker_session, 145 | role=role, 146 | ) 147 | xgb_train.set_hyperparameters( 148 | objective="binary:logistic", 149 | num_round=50, 150 | max_depth=5, 151 | eta=0.2, 152 | gamma=4, 153 | min_child_weight=6, 154 | subsample=0.7, 155 | silent=0, 156 | ) 157 | step_train = TrainingStep( 158 | name="CustomerChurnTrain", 159 | estimator=xgb_train, 160 | inputs={ 161 | "train": TrainingInput( 162 | s3_data=step_process.properties.ProcessingOutputConfig.Outputs[ 163 | "train" 164 | ].S3Output.S3Uri, 165 | content_type="text/csv", 166 | ), 167 | "validation": TrainingInput( 168 | s3_data=step_process.properties.ProcessingOutputConfig.Outputs[ 169 | "validation" 170 | ].S3Output.S3Uri, 171 | content_type="text/csv", 172 | ), 173 | }, 174 | ) 175 | 176 | # Processing step for evaluation 177 | script_eval = ScriptProcessor( 178 | image_uri=image_uri, 179 | command=["python3"], 180 | instance_type=processing_instance_type, 181 | instance_count=1, 182 | base_job_name=f"{base_job_prefix}/script-CustomerChurn-eval", 183 | sagemaker_session=sagemaker_session, 184 | role=role, 185 | ) 186 | evaluation_report = PropertyFile( 187 | name="EvaluationReport", 188 | output_name="evaluation", 189 | path="evaluation.json", 190 | ) 191 | step_eval = ProcessingStep( 192 | name="CustomerChurnEval", 193 | processor=script_eval, 194 | inputs=[ 195 | ProcessingInput( 196 | source=step_train.properties.ModelArtifacts.S3ModelArtifacts, 197 | destination="/opt/ml/processing/model", 198 | ), 199 | ProcessingInput( 200 | source=step_process.properties.ProcessingOutputConfig.Outputs[ 201 | "test" 202 | ].S3Output.S3Uri, 203 | destination="/opt/ml/processing/test", 204 | ), 205 | ], 206 | outputs=[ 207 | ProcessingOutput(output_name="evaluation", source="/opt/ml/processing/evaluation"), 208 | ], 209 | code=os.path.join(BASE_DIR, "evaluate.py"), 210 | property_files=[evaluation_report], 211 | ) 212 | 213 | # Register model step that will be conditionally executed 214 | model_metrics = ModelMetrics( 215 | model_statistics=MetricsSource( 216 | s3_uri="{}/evaluation.json".format( 217 | step_eval.arguments["ProcessingOutputConfig"]["Outputs"][0]["S3Output"]["S3Uri"] 218 | ), 219 | content_type="application/json", 220 | ) 221 | ) 222 | 223 | # Register model step that will be conditionally executed 224 | step_register = RegisterModel( 225 | name="CustomerChurnRegisterModel", 226 | estimator=xgb_train, 227 | model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts, 228 | content_types=["text/csv"], 229 | response_types=["text/csv"], 230 | inference_instances=["ml.t2.medium", "ml.m5.large"], 231 | transform_instances=["ml.m5.large"], 232 | model_package_group_name=model_package_group_name, 233 | approval_status=model_approval_status, 234 | model_metrics=model_metrics, 235 | ) 236 | 237 | # Condition step for evaluating model quality and branching execution 238 | cond_lte = ConditionGreaterThanOrEqualTo( # You can change the condition here 239 | left=JsonGet( 240 | step_name=step_eval.name, 241 | property_file=evaluation_report, 242 | json_path="binary_classification_metrics.accuracy.value", # This should follow the structure of your report_dict defined in the evaluate.py file. 243 | ), 244 | right=0.8, # You can change the threshold here 245 | ) 246 | step_cond = ConditionStep( 247 | name="CustomerChurnAccuracyCond", 248 | conditions=[cond_lte], 249 | if_steps=[step_register], 250 | else_steps=[], 251 | ) 252 | 253 | # Pipeline instance 254 | pipeline = Pipeline( 255 | name=pipeline_name, 256 | parameters=[ 257 | processing_instance_type, 258 | processing_instance_count, 259 | training_instance_type, 260 | model_approval_status, 261 | input_data, 262 | ], 263 | steps=[step_process, step_train, step_eval, step_cond], 264 | sagemaker_session=sagemaker_session, 265 | ) 266 | return pipeline 267 | -------------------------------------------------------------------------------- /modelbuild_pipeline/pipelines/customer_churn/.ipynb_checkpoints/pipeline-checkpoint.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). You 4 | # may not use this file except in compliance with the License. A copy of 5 | # the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0/ 8 | # 9 | # or in the "license" file accompanying this file. This file is 10 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11 | # ANY KIND, either express or implied. See the License for the specific 12 | # language governing permissions and limitations under the License. 13 | """Example workflow pipeline script for CustomerChurn pipeline. 14 | . -RegisterModel 15 | . 16 | Process-> Train -> Evaluate -> Condition . 17 | . 18 | . -(stop) 19 | Implements a get_pipeline(**kwargs) method. 20 | """ 21 | 22 | import os 23 | 24 | import boto3 25 | import sagemaker 26 | import sagemaker.session 27 | 28 | from sagemaker.estimator import Estimator 29 | from sagemaker.inputs import TrainingInput 30 | from sagemaker.processing import ( 31 | ProcessingInput, 32 | ProcessingOutput, 33 | ScriptProcessor, 34 | ) 35 | from sagemaker.sklearn.processing import SKLearnProcessor 36 | from sagemaker.workflow.conditions import ( 37 | ConditionGreaterThanOrEqualTo, 38 | ) 39 | from sagemaker.workflow.condition_step import ( 40 | ConditionStep, 41 | JsonGet, 42 | ) 43 | from sagemaker.model_metrics import ( 44 | MetricsSource, 45 | ModelMetrics, 46 | ) 47 | from sagemaker.workflow.parameters import ( 48 | ParameterInteger, 49 | ParameterString, 50 | ) 51 | from sagemaker.workflow.pipeline import Pipeline 52 | from sagemaker.workflow.properties import PropertyFile 53 | from sagemaker.workflow.steps import ( 54 | ProcessingStep, 55 | TrainingStep, 56 | ) 57 | from sagemaker.workflow.step_collections import RegisterModel 58 | 59 | 60 | BASE_DIR = os.path.dirname(os.path.realpath(__file__)) 61 | 62 | 63 | def get_session(region, default_bucket): 64 | """Gets the sagemaker session based on the region. 65 | Args: 66 | region: the aws region to start the session 67 | default_bucket: the bucket to use for storing the artifacts 68 | Returns: 69 | `sagemaker.session.Session instance 70 | """ 71 | 72 | boto_session = boto3.Session(region_name=region) 73 | 74 | sagemaker_client = boto_session.client("sagemaker") 75 | runtime_client = boto_session.client("sagemaker-runtime") 76 | return sagemaker.session.Session( 77 | boto_session=boto_session, 78 | sagemaker_client=sagemaker_client, 79 | sagemaker_runtime_client=runtime_client, 80 | default_bucket=default_bucket, 81 | ) 82 | 83 | 84 | def get_pipeline( 85 | region, 86 | role=None, 87 | default_bucket=None, 88 | model_package_group_name="CustomerChurnPackageGroup", # Choose any name 89 | pipeline_name="CustomerChurnDemo-p-ewf8t7lvhivm", # You can find your pipeline name in the Studio UI (project -> Pipelines -> name) 90 | base_job_prefix="CustomerChurn", # Choose any name 91 | ): 92 | """Gets a SageMaker ML Pipeline instance working with on CustomerChurn data. 93 | Args: 94 | region: AWS region to create and run the pipeline. 95 | role: IAM role to create and run steps and pipeline. 96 | default_bucket: the bucket to use for storing the artifacts 97 | Returns: 98 | an instance of a pipeline 99 | """ 100 | sagemaker_session = get_session(region, default_bucket) 101 | if role is None: 102 | role = sagemaker.session.get_execution_role(sagemaker_session) 103 | 104 | # Parameters for pipeline execution 105 | processing_instance_count = ParameterInteger( 106 | name="ProcessingInstanceCount", default_value=1 107 | ) 108 | processing_instance_type = ParameterString( 109 | name="ProcessingInstanceType", default_value="ml.m5.xlarge" 110 | ) 111 | training_instance_type = ParameterString( 112 | name="TrainingInstanceType", default_value="ml.m5.xlarge" 113 | ) 114 | model_approval_status = ParameterString( 115 | name="ModelApprovalStatus", 116 | default_value="PendingManualApproval", # ModelApprovalStatus can be set to a default of "Approved" if you don't want manual approval. 117 | ) 118 | input_data = ParameterString( 119 | name="InputDataUrl", 120 | default_value=f"s3://sagemaker-us-east-1-544507241185/sagemaker/DEMO-xgboost-churn/data/RawData.csv", # Change this to point to the s3 location of your raw input data. 121 | ) 122 | 123 | # Processing step for feature engineering 124 | sklearn_processor = SKLearnProcessor( 125 | framework_version="0.23-1", 126 | instance_type=processing_instance_type, 127 | instance_count=processing_instance_count, 128 | base_job_name=f"{base_job_prefix}/sklearn-CustomerChurn-preprocess", # choose any name 129 | sagemaker_session=sagemaker_session, 130 | role=role, 131 | ) 132 | step_process = ProcessingStep( 133 | name="CustomerChurnProcess", # choose any name 134 | processor=sklearn_processor, 135 | outputs=[ 136 | ProcessingOutput(output_name="train", source="/opt/ml/processing/train"), 137 | ProcessingOutput( 138 | output_name="validation", source="/opt/ml/processing/validation" 139 | ), 140 | ProcessingOutput(output_name="test", source="/opt/ml/processing/test"), 141 | ], 142 | code=os.path.join(BASE_DIR, "preprocess.py"), 143 | job_arguments=["--input-data", input_data], 144 | ) 145 | 146 | # Training step for generating model artifacts 147 | model_path = f"s3://{sagemaker_session.default_bucket()}/{base_job_prefix}/CustomerChurnTrain" 148 | image_uri = sagemaker.image_uris.retrieve( 149 | framework="xgboost", # we are using the Sagemaker built in xgboost algorithm 150 | region=region, 151 | version="1.0-1", 152 | py_version="py3", 153 | instance_type=training_instance_type, 154 | ) 155 | xgb_train = Estimator( 156 | image_uri=image_uri, 157 | instance_type=training_instance_type, 158 | instance_count=1, 159 | output_path=model_path, 160 | base_job_name=f"{base_job_prefix}/CustomerChurn-train", 161 | sagemaker_session=sagemaker_session, 162 | role=role, 163 | ) 164 | xgb_train.set_hyperparameters( 165 | objective="binary:logistic", 166 | num_round=50, 167 | max_depth=5, 168 | eta=0.2, 169 | gamma=4, 170 | min_child_weight=6, 171 | subsample=0.7, 172 | silent=0, 173 | ) 174 | step_train = TrainingStep( 175 | name="CustomerChurnTrain", 176 | estimator=xgb_train, 177 | inputs={ 178 | "train": TrainingInput( 179 | s3_data=step_process.properties.ProcessingOutputConfig.Outputs[ 180 | "train" 181 | ].S3Output.S3Uri, 182 | content_type="text/csv", 183 | ), 184 | "validation": TrainingInput( 185 | s3_data=step_process.properties.ProcessingOutputConfig.Outputs[ 186 | "validation" 187 | ].S3Output.S3Uri, 188 | content_type="text/csv", 189 | ), 190 | }, 191 | ) 192 | 193 | # Processing step for evaluation 194 | script_eval = ScriptProcessor( 195 | image_uri=image_uri, 196 | command=["python3"], 197 | instance_type=processing_instance_type, 198 | instance_count=1, 199 | base_job_name=f"{base_job_prefix}/script-CustomerChurn-eval", 200 | sagemaker_session=sagemaker_session, 201 | role=role, 202 | ) 203 | evaluation_report = PropertyFile( 204 | name="EvaluationReport", 205 | output_name="evaluation", 206 | path="evaluation.json", 207 | ) 208 | step_eval = ProcessingStep( 209 | name="CustomerChurnEval", 210 | processor=script_eval, 211 | inputs=[ 212 | ProcessingInput( 213 | source=step_train.properties.ModelArtifacts.S3ModelArtifacts, 214 | destination="/opt/ml/processing/model", 215 | ), 216 | ProcessingInput( 217 | source=step_process.properties.ProcessingOutputConfig.Outputs[ 218 | "test" 219 | ].S3Output.S3Uri, 220 | destination="/opt/ml/processing/test", 221 | ), 222 | ], 223 | outputs=[ 224 | ProcessingOutput( 225 | output_name="evaluation", source="/opt/ml/processing/evaluation" 226 | ), 227 | ], 228 | code=os.path.join(BASE_DIR, "evaluate.py"), 229 | property_files=[evaluation_report], 230 | ) 231 | 232 | # Register model step that will be conditionally executed 233 | model_metrics = ModelMetrics( 234 | model_statistics=MetricsSource( 235 | s3_uri="{}/evaluation.json".format( 236 | step_eval.arguments["ProcessingOutputConfig"]["Outputs"][0]["S3Output"][ 237 | "S3Uri" 238 | ] 239 | ), 240 | content_type="application/json", 241 | ) 242 | ) 243 | 244 | # Register model step that will be conditionally executed 245 | step_register = RegisterModel( 246 | name="CustomerChurnRegisterModel", 247 | estimator=xgb_train, 248 | model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts, 249 | content_types=["text/csv"], 250 | response_types=["text/csv"], 251 | inference_instances=["ml.t2.medium", "ml.m5.large"], 252 | transform_instances=["ml.m5.large"], 253 | model_package_group_name=model_package_group_name, 254 | approval_status=model_approval_status, 255 | model_metrics=model_metrics, 256 | ) 257 | 258 | # Condition step for evaluating model quality and branching execution 259 | cond_lte = ConditionGreaterThanOrEqualTo( 260 | left=JsonGet( 261 | step=step_eval, 262 | property_file=evaluation_report, 263 | json_path="binary_classification_metrics.accuracy.value"),right=0.8 264 | ) 265 | step_cond = ConditionStep( 266 | name="CustomerChurnAccuracyCond", 267 | conditions=[cond_lte], 268 | if_steps=[step_register], 269 | else_steps=[], 270 | ) 271 | 272 | # Pipeline instance 273 | pipeline = Pipeline( 274 | name=pipeline_name, 275 | parameters=[ 276 | processing_instance_type, 277 | processing_instance_count, 278 | training_instance_type, 279 | model_approval_status, 280 | input_data, 281 | ], 282 | steps=[step_process, step_train, step_eval, step_cond], 283 | sagemaker_session=sagemaker_session, 284 | ) 285 | return pipeline -------------------------------------------------------------------------------- /Notebooks/SageMaker_Customer_Churn_XGB_end2end.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## SageMaker Model Building and Deployment " 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "\n", 15 | "In this notebook we show how to use Amazon SageMaker to develop, train, tune and deploy a XGBoost model. Sythetic customer churn data is used. \n", 16 | "\n", 17 | "The data is in AWS public S3 bucket: s3://sagemaker-sample-files/datasets/tabular/synthetic/churn.txt\n", 18 | "\n", 19 | "Sklearn Processor is used to process the raw data.\n", 20 | "\n", 21 | "* XGBoost https://sagemaker.readthedocs.io/en/stable/frameworks/xgboost/using_xgboost.html?highlight=xgboost\n", 22 | "* Doc https://sagemaker.readthedocs.io/en/stable/using_sklearn.html\n", 23 | "* SDK https://sagemaker.readthedocs.io/en/stable/sagemaker.sklearn.html\n", 24 | "* boto3 https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#client\n", 25 | " \n", 26 | "**This sample is provided for demonstration purposes, make sure to conduct appropriate testing if derivating this code for your own use-cases!**" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "%matplotlib inline\n", 36 | "import os\n", 37 | "import time\n", 38 | "import logging\n", 39 | "import pandas as pd\n", 40 | "import numpy as np\n", 41 | "import sagemaker\n", 42 | "import json\n", 43 | "import boto3\n", 44 | "from sagemaker import get_execution_role\n", 45 | "\n", 46 | "sm_client = boto3.client('sagemaker')" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "# Retrieve the bucket\n", 56 | "sagemaker_session = sagemaker.Session()\n", 57 | "bucket = sagemaker_session.default_bucket() # this could also be a hard-coded bucket name\n", 58 | "region = sagemaker_session.boto_region_name\n", 59 | "print(region)\n", 60 | "role = get_execution_role()\n", 61 | "\n", 62 | "project_name = \"test_pro\"\n", 63 | "project_id = \"test_id\"\n", 64 | "#model_package_group_name = project_name\n", 65 | "print(f\"sagemaker role arn <{role}>\")\n", 66 | "\n", 67 | "assert(len(project_name) <= 15 ) # the project name should not have more than 15 chars" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "print(bucket)" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "## Load Raw Data to S3" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": {}, 89 | "source": [ 90 | "Load raw data from the public S3 bucket to your own S3 bucket." 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "#load raw data to S3 bucket" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "!aws s3 cp s3://sagemaker-sample-files/datasets/tabular/synthetic/churn.txt s3://{bucket}/sagemaker/DEMO-xgboost-churn/data/RawData.csv " 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "## Prepare script to process raw data" 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "Create preprocessing script. This script will be used by SageMaker process job instance to preocess raw data." 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "%%writefile preprocess.py\n", 132 | "\n", 133 | "\"\"\"Preprocess the customer churn dataset.\"\"\"\n", 134 | "\n", 135 | "import argparse\n", 136 | "import logging\n", 137 | "import pathlib\n", 138 | "\n", 139 | "import boto3\n", 140 | "import numpy as np\n", 141 | "import pandas as pd\n", 142 | "\n", 143 | "logger = logging.getLogger()\n", 144 | "logger.setLevel(logging.INFO)\n", 145 | "logger.addHandler(logging.StreamHandler())\n", 146 | "\n", 147 | "if __name__ == \"__main__\":\n", 148 | " logger.info(\"Starting preprocessing.\")\n", 149 | " parser = argparse.ArgumentParser()\n", 150 | " parser.add_argument(\"--input-data\", type=str, required=True)\n", 151 | " args = parser.parse_args()\n", 152 | "\n", 153 | " base_dir = \"/opt/ml/processing\"\n", 154 | " pathlib.Path(f\"{base_dir}/data\").mkdir(parents=True, exist_ok=True)\n", 155 | " input_data = args.input_data\n", 156 | " print(input_data)\n", 157 | " bucket = input_data.split(\"/\")[2]\n", 158 | " key = \"/\".join(input_data.split(\"/\")[3:])\n", 159 | "\n", 160 | " logger.info(\"Downloading data from bucket: %s, key: %s\", bucket, key)\n", 161 | " fn = f\"{base_dir}/data/raw-data.csv\"\n", 162 | " s3 = boto3.resource(\"s3\")\n", 163 | " s3.Bucket(bucket).download_file(key, fn)\n", 164 | "\n", 165 | " logger.info(\"Reading downloaded data.\")\n", 166 | "\n", 167 | " # read in csv\n", 168 | " df = pd.read_csv(fn)\n", 169 | "\n", 170 | " # drop the \"Phone\" feature column\n", 171 | " df = df.drop([\"Phone\"], axis=1)\n", 172 | "\n", 173 | " # Change the data type of \"Area Code\"\n", 174 | " df[\"Area Code\"] = df[\"Area Code\"].astype(object)\n", 175 | "\n", 176 | " # Drop several other columns\n", 177 | " df = df.drop([\"Day Charge\", \"Eve Charge\", \"Night Charge\", \"Intl Charge\"], axis=1)\n", 178 | "\n", 179 | " # Convert categorical variables into dummy/indicator variables.\n", 180 | " model_data = pd.get_dummies(df)\n", 181 | "\n", 182 | " # Create one binary classification target column\n", 183 | " model_data = pd.concat(\n", 184 | " [\n", 185 | " model_data[\"Churn?_True.\"],\n", 186 | " model_data.drop([\"Churn?_False.\", \"Churn?_True.\"], axis=1),\n", 187 | " ],\n", 188 | " axis=1,\n", 189 | " )\n", 190 | "\n", 191 | " # Split the data\n", 192 | " train_data, validation_data, test_data = np.split(\n", 193 | " model_data.sample(frac=1, random_state=1729),\n", 194 | " [int(0.7 * len(model_data)), int(0.9 * len(model_data))],\n", 195 | " )\n", 196 | "\n", 197 | " pd.DataFrame(train_data).to_csv(\n", 198 | " f\"{base_dir}/train/train.csv\", header=False, index=False\n", 199 | " )\n", 200 | " pd.DataFrame(validation_data).to_csv(\n", 201 | " f\"{base_dir}/validation/validation.csv\", header=False, index=False\n", 202 | " )\n", 203 | " pd.DataFrame(test_data).to_csv(\n", 204 | " f\"{base_dir}/test/test.csv\", header=False, index=False\n", 205 | " )\n" 206 | ] 207 | }, 208 | { 209 | "cell_type": "markdown", 210 | "metadata": {}, 211 | "source": [ 212 | "## Prepare data for model training" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": null, 218 | "metadata": {}, 219 | "outputs": [], 220 | "source": [ 221 | "from sagemaker.workflow.parameters import ParameterInteger, ParameterString" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": null, 227 | "metadata": {}, 228 | "outputs": [], 229 | "source": [ 230 | "processing_instance_count = ParameterInteger(name=\"ProcessingInstanceCount\", default_value=1)\n", 231 | "processing_instance_type = ParameterString(\n", 232 | " name=\"ProcessingInstanceType\", default_value=\"ml.m5.xlarge\"\n", 233 | " )\n", 234 | "\n", 235 | "training_instance_type = ParameterString(\n", 236 | " name=\"TrainingInstanceType\", default_value=\"ml.m5.xlarge\"\n", 237 | " )\n", 238 | "\n", 239 | "model_approval_status = ParameterString(\n", 240 | " name=\"ModelApprovalStatus\",\n", 241 | " default_value=\"Approved\", # ModelApprovalStatus can be set to a default of \"Approved\" if you don't want manual approval.\n", 242 | " )\n", 243 | "\n", 244 | "input_data = ParameterString(\n", 245 | " name=\"InputDataUrl\",\n", 246 | " default_value=f\"s3://{sagemaker_session.default_bucket()}/sagemaker/DEMO-xgboost-churn/data/RawData.csv\", # Change this to point to the s3 location of your raw input data.\n", 247 | " )" 248 | ] 249 | }, 250 | { 251 | "cell_type": "markdown", 252 | "metadata": {}, 253 | "source": [ 254 | "SageMaker Process instance with sklearn image is used to process raw data." 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": null, 260 | "metadata": {}, 261 | "outputs": [], 262 | "source": [ 263 | "from sagemaker.sklearn.processing import SKLearnProcessor\n", 264 | "\n", 265 | "sklearn_processor = SKLearnProcessor(\n", 266 | " framework_version=\"0.23-1\",\n", 267 | " role=role,\n", 268 | " instance_type=processing_instance_type,\n", 269 | " instance_count=processing_instance_count,\n", 270 | "\n", 271 | ")" 272 | ] 273 | }, 274 | { 275 | "cell_type": "markdown", 276 | "metadata": {}, 277 | "source": [ 278 | "Processed data is saved back to S3 bucket." 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": null, 284 | "metadata": {}, 285 | "outputs": [], 286 | "source": [ 287 | "from sagemaker.processing import ProcessingInput, ProcessingOutput\n", 288 | "\n", 289 | "sklearn_processor.run(\n", 290 | " code=\"preprocess.py\", \n", 291 | " outputs=[\n", 292 | " ProcessingOutput(output_name=\"train\", source=\"/opt/ml/processing/train\"),\n", 293 | " ProcessingOutput(output_name=\"validation\", source=\"/opt/ml/processing/validation\"),\n", 294 | " ProcessingOutput(output_name=\"test\", source=\"/opt/ml/processing/test\"),\n", 295 | " ],\n", 296 | " arguments=[\"--input-data\", input_data],\n", 297 | ")\n", 298 | "\n", 299 | "preprocessing_job_description = sklearn_processor.jobs[-1].describe()" 300 | ] 301 | }, 302 | { 303 | "cell_type": "markdown", 304 | "metadata": {}, 305 | "source": [ 306 | "## Model Training" 307 | ] 308 | }, 309 | { 310 | "cell_type": "markdown", 311 | "metadata": {}, 312 | "source": [ 313 | "Get training and validation data paths." 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": null, 319 | "metadata": {}, 320 | "outputs": [], 321 | "source": [ 322 | "s3_input_train=preprocessing_job_description['ProcessingOutputConfig']['Outputs'][0]['S3Output']['S3Uri']" 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": null, 328 | "metadata": {}, 329 | "outputs": [], 330 | "source": [ 331 | "s3_input_validation=preprocessing_job_description['ProcessingOutputConfig']['Outputs'][1]['S3Output']['S3Uri']" 332 | ] 333 | }, 334 | { 335 | "cell_type": "markdown", 336 | "metadata": {}, 337 | "source": [ 338 | "Define XGBoost model" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": null, 344 | "metadata": {}, 345 | "outputs": [], 346 | "source": [ 347 | "from sagemaker.inputs import TrainingInput\n", 348 | "\n", 349 | "content_type = \"csv\"\n", 350 | "train_input = TrainingInput(s3_input_train, content_type=content_type)\n", 351 | "validation_input = TrainingInput(s3_input_validation, content_type=content_type)\n" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": null, 357 | "metadata": {}, 358 | "outputs": [], 359 | "source": [ 360 | "import sagemaker\n", 361 | "from sagemaker.serializers import CSVSerializer" 362 | ] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "execution_count": null, 367 | "metadata": {}, 368 | "outputs": [], 369 | "source": [ 370 | "prefix = 'sagemaker/xgboost_cutomer_churn'" 371 | ] 372 | }, 373 | { 374 | "cell_type": "code", 375 | "execution_count": null, 376 | "metadata": {}, 377 | "outputs": [], 378 | "source": [ 379 | "container=sagemaker.image_uris.retrieve(\"xgboost\", region, \"1.2-1\")\n", 380 | "print(container)" 381 | ] 382 | }, 383 | { 384 | "cell_type": "code", 385 | "execution_count": null, 386 | "metadata": {}, 387 | "outputs": [], 388 | "source": [ 389 | "# initialize hyperparameters\n", 390 | "hyperparameters = {\n", 391 | " \"max_depth\":\"5\",\n", 392 | " \"eta\":\"0.2\",\n", 393 | " \"gamma\":\"4\",\n", 394 | " \"min_child_weight\":\"6\",\n", 395 | " \"subsample\":\"0.7\",\n", 396 | " \"objective\":\"binary:logistic\",\n", 397 | " \"num_round\":\"50\"}" 398 | ] 399 | }, 400 | { 401 | "cell_type": "code", 402 | "execution_count": null, 403 | "metadata": {}, 404 | "outputs": [], 405 | "source": [ 406 | "xgb = sagemaker.estimator.Estimator(container,\n", 407 | " role, \n", 408 | " instance_count=1, \n", 409 | " instance_type='ml.m4.xlarge',\n", 410 | " hyperparameters=hyperparameters,\n", 411 | " output_path='s3://{}/{}/output'.format(bucket, prefix),\n", 412 | " sagemaker_session=sagemaker_session)" 413 | ] 414 | }, 415 | { 416 | "cell_type": "markdown", 417 | "metadata": {}, 418 | "source": [ 419 | "Train the XGboost model" 420 | ] 421 | }, 422 | { 423 | "cell_type": "code", 424 | "execution_count": null, 425 | "metadata": {}, 426 | "outputs": [], 427 | "source": [ 428 | "xgb.fit({'train': train_input, 'validation': validation_input})" 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "execution_count": null, 434 | "metadata": {}, 435 | "outputs": [], 436 | "source": [ 437 | "sm_boto3 = boto3.client(\"sagemaker\")" 438 | ] 439 | }, 440 | { 441 | "cell_type": "code", 442 | "execution_count": null, 443 | "metadata": {}, 444 | "outputs": [], 445 | "source": [ 446 | "artifact = sm_boto3.describe_training_job(\n", 447 | " TrainingJobName=xgb.latest_training_job.name\n", 448 | ")[\"ModelArtifacts\"][\"S3ModelArtifacts\"]\n", 449 | "\n", 450 | "print(\"Model artifact persisted at \" + artifact)" 451 | ] 452 | }, 453 | { 454 | "cell_type": "code", 455 | "execution_count": null, 456 | "metadata": {}, 457 | "outputs": [], 458 | "source": [] 459 | }, 460 | { 461 | "cell_type": "markdown", 462 | "metadata": {}, 463 | "source": [ 464 | "## Create Endpoint" 465 | ] 466 | }, 467 | { 468 | "cell_type": "markdown", 469 | "metadata": {}, 470 | "source": [ 471 | "Create an endpoint using SageMaker SDK" 472 | ] 473 | }, 474 | { 475 | "cell_type": "code", 476 | "execution_count": null, 477 | "metadata": {}, 478 | "outputs": [], 479 | "source": [ 480 | "xgb_predictor = xgb.deploy(\n", 481 | "initial_instance_count = 1,\n", 482 | "instance_type = 'ml.m4.xlarge',\n", 483 | "serializer = CSVSerializer())" 484 | ] 485 | }, 486 | { 487 | "cell_type": "code", 488 | "execution_count": null, 489 | "metadata": {}, 490 | "outputs": [], 491 | "source": [ 492 | "print(f'Endpoint name: {xgb_predictor.endpoint_name}')" 493 | ] 494 | }, 495 | { 496 | "cell_type": "markdown", 497 | "metadata": {}, 498 | "source": [ 499 | "## Invoke Endpoint" 500 | ] 501 | }, 502 | { 503 | "cell_type": "code", 504 | "execution_count": null, 505 | "metadata": {}, 506 | "outputs": [], 507 | "source": [ 508 | "test_data=pd.read_csv('test.csv',header=None)" 509 | ] 510 | }, 511 | { 512 | "cell_type": "code", 513 | "execution_count": null, 514 | "metadata": {}, 515 | "outputs": [], 516 | "source": [ 517 | "def predict(data, rows=500):\n", 518 | " split_array = np.array_split(data, int(data.shape[0] / float(rows) + 1))\n", 519 | " predictions = ''\n", 520 | " for array in split_array:\n", 521 | " predictions = ','.join([predictions, xgb_predictor.predict(array).decode('utf-8')])\n", 522 | "\n", 523 | " return np.fromstring(predictions[1:], sep=',')\n", 524 | "\n", 525 | "predictions = predict(test_data.to_numpy()[:1,1:])\n", 526 | "predictions" 527 | ] 528 | }, 529 | { 530 | "cell_type": "code", 531 | "execution_count": null, 532 | "metadata": {}, 533 | "outputs": [], 534 | "source": [] 535 | }, 536 | { 537 | "cell_type": "code", 538 | "execution_count": null, 539 | "metadata": {}, 540 | "outputs": [], 541 | "source": [] 542 | } 543 | ], 544 | "metadata": { 545 | "instance_type": "ml.t3.medium", 546 | "kernelspec": { 547 | "display_name": "Python 3.9.13 64-bit", 548 | "language": "python", 549 | "name": "python3" 550 | }, 551 | "language_info": { 552 | "codemirror_mode": { 553 | "name": "ipython", 554 | "version": 3 555 | }, 556 | "file_extension": ".py", 557 | "mimetype": "text/x-python", 558 | "name": "python", 559 | "nbconvert_exporter": "python", 560 | "pygments_lexer": "ipython3", 561 | "version": "3.9.13" 562 | }, 563 | "vscode": { 564 | "interpreter": { 565 | "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49" 566 | } 567 | } 568 | }, 569 | "nbformat": 4, 570 | "nbformat_minor": 4 571 | } 572 | -------------------------------------------------------------------------------- /Notebooks/SageMaker_Customer_Churn_XGB_Pipeline.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## SageMaker Model Building and Deployment using SageMaker Workflow Pipeline" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "\n", 15 | "In this notebook we show how to use Amazon SageMaker to develop, train, tune and deploy a XGBoost model. Sythetic customer churn data is used. \n", 16 | "\n", 17 | "The data is in AWS public S3 bucket: s3://sagemaker-sample-files/datasets/tabular/synthetic/churn.txt\n", 18 | "\n", 19 | "Sklearn Processor is used to process the raw data.\n", 20 | "\n", 21 | "* XGBoost https://sagemaker.readthedocs.io/en/stable/frameworks/xgboost/using_xgboost.html?highlight=xgboost\n", 22 | "* Doc https://sagemaker.readthedocs.io/en/stable/using_sklearn.html\n", 23 | "* SDK https://sagemaker.readthedocs.io/en/stable/sagemaker.sklearn.html\n", 24 | "* boto3 https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#client\n", 25 | " \n", 26 | "**This sample is provided for demonstration purposes, make sure to conduct appropriate testing if derivating this code for your own use-cases!**" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "%matplotlib inline\n", 36 | "import os\n", 37 | "import time\n", 38 | "import logging\n", 39 | "import pandas as pd\n", 40 | "import numpy as np\n", 41 | "import sagemaker\n", 42 | "import json\n", 43 | "import boto3\n", 44 | "from sagemaker import get_execution_role\n", 45 | "\n", 46 | "sm_client = boto3.client('sagemaker')" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "from sagemaker.estimator import Estimator\n", 56 | "from sagemaker.inputs import TrainingInput\n", 57 | "from sagemaker.model_metrics import MetricsSource, ModelMetrics\n", 58 | "from sagemaker.processing import ProcessingInput, ProcessingOutput, ScriptProcessor\n", 59 | "from sagemaker.sklearn.processing import SKLearnProcessor\n", 60 | "from sagemaker.workflow.condition_step import ConditionStep\n", 61 | "from sagemaker.workflow.conditions import ConditionGreaterThanOrEqualTo\n", 62 | "from sagemaker.workflow.functions import JsonGet\n", 63 | "from sagemaker.workflow.parameters import ParameterInteger, ParameterString\n", 64 | "from sagemaker.workflow.pipeline import Pipeline\n", 65 | "from sagemaker.workflow.properties import PropertyFile\n", 66 | "from sagemaker.workflow.step_collections import RegisterModel\n", 67 | "from sagemaker.workflow.steps import ProcessingStep, TrainingStep" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "# Retrieve the bucket\n", 77 | "sagemaker_session = sagemaker.Session()\n", 78 | "bucket = sagemaker_session.default_bucket() # this could also be a hard-coded bucket name\n", 79 | "region = sagemaker_session.boto_region_name\n", 80 | "print(region)\n", 81 | "role = get_execution_role()\n", 82 | "\n", 83 | "project_name = \"test_pro\"\n", 84 | "project_id = \"test_id\"\n", 85 | "#model_package_group_name = project_name\n", 86 | "print(f\"sagemaker role arn <{role}>\")\n", 87 | "\n", 88 | "assert(len(project_name) <= 15 ) # the project name should not have more than 15 chars" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "print(bucket)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "## Load Raw Data to S3" 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": {}, 110 | "source": [ 111 | "Load raw data from the public S3 bucket to your own S3 bucket." 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "#load raw data to S3 bucket" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "!aws s3 cp s3://sagemaker-sample-files/datasets/tabular/synthetic/churn.txt s3://{bucket}/sagemaker/DEMO-xgboost-churn/data/RawData.csv " 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": {}, 135 | "source": [ 136 | "## Prepare script to be used by preprocessing job and model evaluation" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": {}, 142 | "source": [ 143 | "Create preprocessing script. This script will be used by SageMaker process job instance to preocess raw data." 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "%%writefile preprocess.py\n", 153 | "\n", 154 | "\"\"\"Preprocess the customer churn dataset.\"\"\"\n", 155 | "\n", 156 | "import argparse\n", 157 | "import logging\n", 158 | "import pathlib\n", 159 | "\n", 160 | "import boto3\n", 161 | "import numpy as np\n", 162 | "import pandas as pd\n", 163 | "\n", 164 | "logger = logging.getLogger()\n", 165 | "logger.setLevel(logging.INFO)\n", 166 | "logger.addHandler(logging.StreamHandler())\n", 167 | "\n", 168 | "if __name__ == \"__main__\":\n", 169 | " logger.info(\"Starting preprocessing.\")\n", 170 | " parser = argparse.ArgumentParser()\n", 171 | " parser.add_argument(\"--input-data\", type=str, required=True)\n", 172 | " args = parser.parse_args()\n", 173 | "\n", 174 | " base_dir = \"/opt/ml/processing\"\n", 175 | " pathlib.Path(f\"{base_dir}/data\").mkdir(parents=True, exist_ok=True)\n", 176 | " input_data = args.input_data\n", 177 | " print(input_data)\n", 178 | " bucket = input_data.split(\"/\")[2]\n", 179 | " key = \"/\".join(input_data.split(\"/\")[3:])\n", 180 | "\n", 181 | " logger.info(\"Downloading data from bucket: %s, key: %s\", bucket, key)\n", 182 | " fn = f\"{base_dir}/data/raw-data.csv\"\n", 183 | " s3 = boto3.resource(\"s3\")\n", 184 | " s3.Bucket(bucket).download_file(key, fn)\n", 185 | "\n", 186 | " logger.info(\"Reading downloaded data.\")\n", 187 | "\n", 188 | " # read in csv\n", 189 | " df = pd.read_csv(fn)\n", 190 | "\n", 191 | " # drop the \"Phone\" feature column\n", 192 | " df = df.drop([\"Phone\"], axis=1)\n", 193 | "\n", 194 | " # Change the data type of \"Area Code\"\n", 195 | " df[\"Area Code\"] = df[\"Area Code\"].astype(object)\n", 196 | "\n", 197 | " # Drop several other columns\n", 198 | " df = df.drop([\"Day Charge\", \"Eve Charge\", \"Night Charge\", \"Intl Charge\"], axis=1)\n", 199 | "\n", 200 | " # Convert categorical variables into dummy/indicator variables.\n", 201 | " model_data = pd.get_dummies(df)\n", 202 | "\n", 203 | " # Create one binary classification target column\n", 204 | " model_data = pd.concat(\n", 205 | " [\n", 206 | " model_data[\"Churn?_True.\"],\n", 207 | " model_data.drop([\"Churn?_False.\", \"Churn?_True.\"], axis=1),\n", 208 | " ],\n", 209 | " axis=1,\n", 210 | " )\n", 211 | "\n", 212 | " # Split the data\n", 213 | " train_data, validation_data, test_data = np.split(\n", 214 | " model_data.sample(frac=1, random_state=1729),\n", 215 | " [int(0.7 * len(model_data)), int(0.9 * len(model_data))],\n", 216 | " )\n", 217 | "\n", 218 | " pd.DataFrame(train_data).to_csv(\n", 219 | " f\"{base_dir}/train/train.csv\", header=False, index=False\n", 220 | " )\n", 221 | " pd.DataFrame(validation_data).to_csv(\n", 222 | " f\"{base_dir}/validation/validation.csv\", header=False, index=False\n", 223 | " )\n", 224 | " pd.DataFrame(test_data).to_csv(\n", 225 | " f\"{base_dir}/test/test.csv\", header=False, index=False\n", 226 | " )\n" 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | "### Prepare evaluation script" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "metadata": {}, 240 | "outputs": [], 241 | "source": [ 242 | "%%writefile evaluate.py\n", 243 | "\n", 244 | "\"\"\"Evaluation script for measuring model accuracy.\"\"\"\n", 245 | "\n", 246 | "import json\n", 247 | "import os\n", 248 | "import tarfile\n", 249 | "import logging\n", 250 | "import pickle\n", 251 | "\n", 252 | "import pandas as pd\n", 253 | "import xgboost\n", 254 | "\n", 255 | "logger = logging.getLogger()\n", 256 | "logger.setLevel(logging.INFO)\n", 257 | "logger.addHandler(logging.StreamHandler())\n", 258 | "\n", 259 | "# May need to import additional metrics depending on what you are measuring.\n", 260 | "# See https://docs.aws.amazon.com/sagemaker/latest/dg/model-monitor-model-quality-metrics.html\n", 261 | "from sklearn.metrics import classification_report, roc_auc_score, accuracy_score\n", 262 | "\n", 263 | "\n", 264 | "if __name__ == \"__main__\":\n", 265 | " model_path = \"/opt/ml/processing/model/model.tar.gz\"\n", 266 | " with tarfile.open(model_path) as tar:\n", 267 | " tar.extractall(path=\"..\")\n", 268 | "\n", 269 | " logger.debug(\"Loading xgboost model.\")\n", 270 | " model = pickle.load(open(\"xgboost-model\", \"rb\"))\n", 271 | "\n", 272 | " print(\"Loading test input data\")\n", 273 | " test_path = \"/opt/ml/processing/test/test.csv\"\n", 274 | " df = pd.read_csv(test_path, header=None)\n", 275 | "\n", 276 | " logger.debug(\"Reading test data.\")\n", 277 | " y_test = df.iloc[:, 0].to_numpy()\n", 278 | " df.drop(df.columns[0], axis=1, inplace=True)\n", 279 | " X_test = xgboost.DMatrix(df.values)\n", 280 | "\n", 281 | " logger.info(\"Performing predictions against test data.\")\n", 282 | " predictions = model.predict(X_test)\n", 283 | "\n", 284 | " print(\"Creating classification evaluation report\")\n", 285 | " acc = accuracy_score(y_test, predictions.round())\n", 286 | " auc = roc_auc_score(y_test, predictions.round())\n", 287 | "\n", 288 | " # The metrics reported can change based on the model used, but it must be a specific name per (https://docs.aws.amazon.com/sagemaker/latest/dg/model-monitor-model-quality-metrics.html)\n", 289 | " report_dict = {\n", 290 | " \"binary_classification_metrics\": {\n", 291 | " \"accuracy\": {\n", 292 | " \"value\": acc,\n", 293 | " \"standard_deviation\" : \"NaN\"\n", 294 | " },\n", 295 | " \"auc\" : {\n", 296 | " \"value\" : auc,\n", 297 | " \"standard_deviation\": \"NaN\"\n", 298 | " },\n", 299 | " },\n", 300 | " }\n", 301 | " evaluation_output_path = '/opt/ml/processing/evaluation/evaluation.json'\n", 302 | " with open(evaluation_output_path, 'w') as f:\n", 303 | " f.write(json.dumps(report_dict))\n", 304 | " \n" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": null, 310 | "metadata": {}, 311 | "outputs": [], 312 | "source": [] 313 | }, 314 | { 315 | "cell_type": "markdown", 316 | "metadata": {}, 317 | "source": [ 318 | "## Define Model Building Pipeline" 319 | ] 320 | }, 321 | { 322 | "cell_type": "markdown", 323 | "metadata": {}, 324 | "source": [ 325 | " Pipeline input parameters are listed below. " 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": null, 331 | "metadata": {}, 332 | "outputs": [], 333 | "source": [ 334 | "processing_instance_count = ParameterInteger(name=\"ProcessingInstanceCount\", default_value=1)\n", 335 | "processing_instance_type = ParameterString(\n", 336 | " name=\"ProcessingInstanceType\", default_value=\"ml.m5.xlarge\"\n", 337 | " )\n", 338 | "\n", 339 | "training_instance_type = ParameterString(\n", 340 | " name=\"TrainingInstanceType\", default_value=\"ml.m5.xlarge\"\n", 341 | " )\n", 342 | "\n", 343 | "model_approval_status = ParameterString(\n", 344 | " name=\"ModelApprovalStatus\",\n", 345 | " default_value=\"Approved\", # ModelApprovalStatus can be set to a default of \"Approved\" if you don't want manual approval.\n", 346 | " )\n", 347 | "\n", 348 | "input_data = ParameterString(\n", 349 | " name=\"InputDataUrl\",\n", 350 | " default_value=f\"s3://{sagemaker_session.default_bucket()}/sagemaker/DEMO-xgboost-churn/data/RawData.csv\", # Change this to point to the s3 location of your raw input data.\n", 351 | " )" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": null, 357 | "metadata": {}, 358 | "outputs": [], 359 | "source": [ 360 | "model_package_group_name=\"CustomerChurnPackageGroup\" # Choose any name\n", 361 | "#pipeline_name=\"CustomerChurnDemoPipe2\" # \n", 362 | "base_job_prefix=\"CustomerChurn\" # Choose any name" 363 | ] 364 | }, 365 | { 366 | "cell_type": "code", 367 | "execution_count": null, 368 | "metadata": {}, 369 | "outputs": [], 370 | "source": [ 371 | "from time import strftime,gmtime\n", 372 | "pipeline_name = 'CustomerChurn-Pipe-' + strftime(\"%M%S\", gmtime())" 373 | ] 374 | }, 375 | { 376 | "cell_type": "markdown", 377 | "metadata": {}, 378 | "source": [ 379 | "The below defines a SageMaker model buidling pipeline using workflow. " 380 | ] 381 | }, 382 | { 383 | "cell_type": "code", 384 | "execution_count": null, 385 | "metadata": {}, 386 | "outputs": [], 387 | "source": [ 388 | "# Processing step for feature engineering\n", 389 | "sklearn_processor = SKLearnProcessor(\n", 390 | " framework_version=\"0.23-1\",\n", 391 | " instance_type=processing_instance_type,\n", 392 | " instance_count=processing_instance_count,\n", 393 | " base_job_name=f\"{base_job_prefix}/sklearn-CustomerChurn-preprocess\", # choose any name\n", 394 | " sagemaker_session=sagemaker_session,\n", 395 | " role=role,\n", 396 | ")\n", 397 | "step_process = ProcessingStep(\n", 398 | " name=\"CustomerChurnProcess\", # choose any name\n", 399 | " processor=sklearn_processor,\n", 400 | " outputs=[\n", 401 | " ProcessingOutput(output_name=\"train\", source=\"/opt/ml/processing/train\"),\n", 402 | " ProcessingOutput(output_name=\"validation\", source=\"/opt/ml/processing/validation\"),\n", 403 | " ProcessingOutput(output_name=\"test\", source=\"/opt/ml/processing/test\"),\n", 404 | " ],\n", 405 | " code=(\"preprocess.py\"),\n", 406 | " job_arguments=[\"--input-data\", input_data],\n", 407 | ")\n", 408 | "# Training step for generating model artifacts\n", 409 | "model_path = f\"s3://{sagemaker_session.default_bucket()}/{base_job_prefix}/CustomerChurnTrain\"\n", 410 | "image_uri = sagemaker.image_uris.retrieve(\n", 411 | " framework=\"xgboost\", # we are using the Sagemaker built in xgboost algorithm\n", 412 | " region=region,\n", 413 | " version=\"1.0-1\",\n", 414 | " py_version=\"py3\",\n", 415 | " instance_type=training_instance_type,\n", 416 | ")\n", 417 | "xgb_train = Estimator(\n", 418 | " image_uri=image_uri,\n", 419 | " instance_type=training_instance_type,\n", 420 | " instance_count=1,\n", 421 | " output_path=model_path,\n", 422 | " base_job_name=f\"{base_job_prefix}/CustomerChurn-train\",\n", 423 | " sagemaker_session=sagemaker_session,\n", 424 | " role=role,\n", 425 | ")\n", 426 | "xgb_train.set_hyperparameters(\n", 427 | " objective=\"binary:logistic\",\n", 428 | " num_round=50,\n", 429 | " max_depth=5,\n", 430 | " eta=0.2,\n", 431 | " gamma=4,\n", 432 | " min_child_weight=6,\n", 433 | " subsample=0.7,\n", 434 | " silent=0,\n", 435 | ")\n", 436 | "step_train = TrainingStep(\n", 437 | " name=\"CustomerChurnTrain\",\n", 438 | " estimator=xgb_train,\n", 439 | " inputs={\n", 440 | " \"train\": TrainingInput(\n", 441 | " s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\n", 442 | " \"train\"\n", 443 | " ].S3Output.S3Uri,\n", 444 | " content_type=\"text/csv\",\n", 445 | " ),\n", 446 | " \"validation\": TrainingInput(\n", 447 | " s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\n", 448 | " \"validation\"\n", 449 | " ].S3Output.S3Uri,\n", 450 | " content_type=\"text/csv\",\n", 451 | " ),\n", 452 | " },\n", 453 | ")\n", 454 | "# Processing step for evaluation\n", 455 | "script_eval = ScriptProcessor(\n", 456 | " image_uri=image_uri,\n", 457 | " command=[\"python3\"],\n", 458 | " instance_type=processing_instance_type,\n", 459 | " instance_count=1,\n", 460 | " base_job_name=f\"{base_job_prefix}/script-CustomerChurn-eval\",\n", 461 | " sagemaker_session=sagemaker_session,\n", 462 | " role=role,\n", 463 | ")\n", 464 | "evaluation_report = PropertyFile(\n", 465 | " name=\"EvaluationReport\",\n", 466 | " output_name=\"evaluation\",\n", 467 | " path=\"evaluation.json\",\n", 468 | ")\n", 469 | "step_eval = ProcessingStep(\n", 470 | " name=\"CustomerChurnEval\",\n", 471 | " processor=script_eval,\n", 472 | " inputs=[\n", 473 | " ProcessingInput(\n", 474 | " source=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n", 475 | " destination=\"/opt/ml/processing/model\",\n", 476 | " ),\n", 477 | " ProcessingInput(\n", 478 | " source=step_process.properties.ProcessingOutputConfig.Outputs[\n", 479 | " \"test\"\n", 480 | " ].S3Output.S3Uri,\n", 481 | " destination=\"/opt/ml/processing/test\",\n", 482 | " ),\n", 483 | " ],\n", 484 | " outputs=[\n", 485 | " ProcessingOutput(output_name=\"evaluation\", source=\"/opt/ml/processing/evaluation\"),\n", 486 | " ],\n", 487 | " code=(\"evaluate.py\"),\n", 488 | " property_files=[evaluation_report],\n", 489 | ")\n", 490 | "# Register model step that will be conditionally executed\n", 491 | "model_metrics = ModelMetrics(\n", 492 | " model_statistics=MetricsSource(\n", 493 | " s3_uri=\"{}/evaluation.json\".format(\n", 494 | " step_eval.arguments[\"ProcessingOutputConfig\"][\"Outputs\"][0][\"S3Output\"][\"S3Uri\"]\n", 495 | " ),\n", 496 | " content_type=\"application/json\",\n", 497 | " )\n", 498 | ")\n", 499 | "# Register model step that will be conditionally executed\n", 500 | "step_register = RegisterModel(\n", 501 | " name=\"CustomerChurnRegisterModel\",\n", 502 | " estimator=xgb_train,\n", 503 | " model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n", 504 | " content_types=[\"text/csv\"],\n", 505 | " response_types=[\"text/csv\"],\n", 506 | " inference_instances=[\"ml.t2.medium\", \"ml.m5.large\"],\n", 507 | " transform_instances=[\"ml.m5.large\"],\n", 508 | " model_package_group_name=model_package_group_name,\n", 509 | " approval_status=model_approval_status,\n", 510 | " model_metrics=model_metrics,\n", 511 | ")\n", 512 | "# Condition step for evaluating model quality and branching execution\n", 513 | "cond_lte = ConditionGreaterThanOrEqualTo( # You can change the condition here\n", 514 | " left=JsonGet(\n", 515 | " step_name=step_eval.name,\n", 516 | " property_file=evaluation_report,\n", 517 | " json_path=\"binary_classification_metrics.accuracy.value\", # This should follow the structure of your report_dict defined in the evaluate.py file.\n", 518 | " ),\n", 519 | " right=0.8, # You can change the threshold here\n", 520 | ")\n", 521 | "step_cond = ConditionStep(\n", 522 | " name=\"CustomerChurnAccuracyCond\",\n", 523 | " conditions=[cond_lte],\n", 524 | " if_steps=[step_register],\n", 525 | " else_steps=[],\n", 526 | ")\n", 527 | "\n", 528 | "# Pipeline instance\n", 529 | "pipeline = Pipeline(\n", 530 | " name=pipeline_name,\n", 531 | " parameters=[\n", 532 | " processing_instance_type,\n", 533 | " processing_instance_count,\n", 534 | " training_instance_type,\n", 535 | " model_approval_status,\n", 536 | " input_data,\n", 537 | " ],\n", 538 | " steps=[step_process, step_train, step_eval, step_cond],\n", 539 | " sagemaker_session=sagemaker_session,\n", 540 | ")" 541 | ] 542 | }, 543 | { 544 | "cell_type": "markdown", 545 | "metadata": {}, 546 | "source": [ 547 | "## Create Pipeline" 548 | ] 549 | }, 550 | { 551 | "cell_type": "code", 552 | "execution_count": null, 553 | "metadata": {}, 554 | "outputs": [], 555 | "source": [ 556 | "from botocore.exceptions import ClientError, ValidationError\n", 557 | "\n", 558 | "try:\n", 559 | " response = pipeline.create(role_arn=role)\n", 560 | "except ClientError as e:\n", 561 | " error = e.response[\"Error\"]\n", 562 | " if error[\"Code\"] == \"ValidationError\" and \"Pipeline names must be unique within\" in error[\"Message\"]:\n", 563 | " print(error[\"Message\"])\n", 564 | " response = pipeline.describe()\n", 565 | " else:\n", 566 | " raise\n", 567 | "\n", 568 | "pipeline_arn = response[\"PipelineArn\"]\n", 569 | "sm_client.add_tags(\n", 570 | " ResourceArn=pipeline_arn,\n", 571 | " Tags=[\n", 572 | " {'Key': 'sagemaker:project-name', 'Value': project_name },\n", 573 | " {'Key': 'sagemaker:project-id', 'Value': project_id }\n", 574 | " ]\n", 575 | ")\n", 576 | "print(pipeline_arn)" 577 | ] 578 | }, 579 | { 580 | "cell_type": "code", 581 | "execution_count": null, 582 | "metadata": {}, 583 | "outputs": [], 584 | "source": [] 585 | }, 586 | { 587 | "cell_type": "markdown", 588 | "metadata": {}, 589 | "source": [ 590 | "## Run Pipeline" 591 | ] 592 | }, 593 | { 594 | "cell_type": "code", 595 | "execution_count": null, 596 | "metadata": {}, 597 | "outputs": [], 598 | "source": [ 599 | "start_response = pipeline.start()\n", 600 | "\n", 601 | "pipeline_execution_arn = start_response.arn\n", 602 | "print(pipeline_execution_arn)\n", 603 | "\n", 604 | "while True:\n", 605 | " resp = sm_client.describe_pipeline_execution(PipelineExecutionArn=pipeline_execution_arn)\n", 606 | " if resp['PipelineExecutionStatus'] == 'Executing':\n", 607 | " print('Running...')\n", 608 | " else:\n", 609 | " print(resp['PipelineExecutionStatus'], pipeline_execution_arn)\n", 610 | " break\n", 611 | " time.sleep(15)" 612 | ] 613 | }, 614 | { 615 | "cell_type": "markdown", 616 | "metadata": {}, 617 | "source": [ 618 | "## Approve the model to kick-off the deployment process" 619 | ] 620 | }, 621 | { 622 | "cell_type": "code", 623 | "execution_count": null, 624 | "metadata": {}, 625 | "outputs": [], 626 | "source": [ 627 | "# list all packages and select the latest one\n", 628 | "packages = sm_client.list_model_packages(ModelPackageGroupName=model_package_group_name)['ModelPackageSummaryList']\n", 629 | "packages = sorted(packages, key=lambda x: x['CreationTime'], reverse=True)\n", 630 | "\n", 631 | "latest_model_package_arn = packages[0]['ModelPackageArn']" 632 | ] 633 | }, 634 | { 635 | "cell_type": "code", 636 | "execution_count": null, 637 | "metadata": {}, 638 | "outputs": [], 639 | "source": [ 640 | "sm_client.list_model_packages(ModelPackageGroupName=model_package_group_name)['ModelPackageSummaryList']" 641 | ] 642 | }, 643 | { 644 | "cell_type": "code", 645 | "execution_count": null, 646 | "metadata": {}, 647 | "outputs": [], 648 | "source": [ 649 | "print(latest_model_package_arn)" 650 | ] 651 | }, 652 | { 653 | "cell_type": "markdown", 654 | "metadata": {}, 655 | "source": [ 656 | "## Approve model\n", 657 | "Approval permission controlled by IAM role" 658 | ] 659 | }, 660 | { 661 | "cell_type": "code", 662 | "execution_count": null, 663 | "metadata": {}, 664 | "outputs": [], 665 | "source": [ 666 | "model_package_update_response = sm_client.update_model_package(\n", 667 | " ModelPackageArn=latest_model_package_arn,\n", 668 | " ModelApprovalStatus=\"Approved\",\n", 669 | ")" 670 | ] 671 | }, 672 | { 673 | "cell_type": "markdown", 674 | "metadata": {}, 675 | "source": [ 676 | "## Get Approved model" 677 | ] 678 | }, 679 | { 680 | "cell_type": "code", 681 | "execution_count": null, 682 | "metadata": {}, 683 | "outputs": [], 684 | "source": [ 685 | "model_details=sm_client.describe_model_package(ModelPackageName=latest_model_package_arn)" 686 | ] 687 | }, 688 | { 689 | "cell_type": "code", 690 | "execution_count": null, 691 | "metadata": {}, 692 | "outputs": [], 693 | "source": [ 694 | "model_data=model_details['InferenceSpecification']['Containers'][0]['ModelDataUrl']" 695 | ] 696 | }, 697 | { 698 | "cell_type": "code", 699 | "execution_count": null, 700 | "metadata": {}, 701 | "outputs": [], 702 | "source": [ 703 | "image_path=model_details['InferenceSpecification']['Containers'][0]['Image']" 704 | ] 705 | }, 706 | { 707 | "cell_type": "code", 708 | "execution_count": null, 709 | "metadata": {}, 710 | "outputs": [], 711 | "source": [ 712 | "model_data" 713 | ] 714 | }, 715 | { 716 | "cell_type": "code", 717 | "execution_count": null, 718 | "metadata": {}, 719 | "outputs": [], 720 | "source": [ 721 | "image_path" 722 | ] 723 | }, 724 | { 725 | "cell_type": "code", 726 | "execution_count": null, 727 | "metadata": {}, 728 | "outputs": [], 729 | "source": [ 730 | "mkdir pipeline_model" 731 | ] 732 | }, 733 | { 734 | "cell_type": "code", 735 | "execution_count": null, 736 | "metadata": {}, 737 | "outputs": [], 738 | "source": [ 739 | "!aws s3 cp {model_data} ./pipeline_model" 740 | ] 741 | }, 742 | { 743 | "cell_type": "code", 744 | "execution_count": null, 745 | "metadata": {}, 746 | "outputs": [], 747 | "source": [ 748 | "import tarfile\n", 749 | "# open file\n", 750 | "file = tarfile.open('./pipeline_model/model.tar.gz')\n", 751 | " \n", 752 | "# extracting file\n", 753 | "file.extractall('./pipeline_model')\n", 754 | " \n", 755 | "file.close()" 756 | ] 757 | }, 758 | { 759 | "cell_type": "markdown", 760 | "metadata": {}, 761 | "source": [ 762 | "## Make prediction using Local Model" 763 | ] 764 | }, 765 | { 766 | "cell_type": "markdown", 767 | "metadata": {}, 768 | "source": [ 769 | "A model is created in the SageMaker notebook instance instead of using SageMaker endpoint instance. The local model in the notebook instance is used to check predictions within the notebook. " 770 | ] 771 | }, 772 | { 773 | "cell_type": "markdown", 774 | "metadata": {}, 775 | "source": [ 776 | "Steps:\n", 777 | "1. Install XGboost library on the Notebook instance if not installed. \n", 778 | "2. Load the trained model.\n", 779 | "3. Use test data to make predictions." 780 | ] 781 | }, 782 | { 783 | "cell_type": "code", 784 | "execution_count": null, 785 | "metadata": {}, 786 | "outputs": [], 787 | "source": [ 788 | "!pip install xgboost" 789 | ] 790 | }, 791 | { 792 | "cell_type": "code", 793 | "execution_count": null, 794 | "metadata": {}, 795 | "outputs": [], 796 | "source": [ 797 | "import xgboost as xgb" 798 | ] 799 | }, 800 | { 801 | "cell_type": "code", 802 | "execution_count": null, 803 | "metadata": {}, 804 | "outputs": [], 805 | "source": [ 806 | "import joblib\n", 807 | "loaded_model = joblib.load(\"./pipeline_model/xgboost-model\")" 808 | ] 809 | }, 810 | { 811 | "cell_type": "code", 812 | "execution_count": null, 813 | "metadata": {}, 814 | "outputs": [], 815 | "source": [ 816 | "test_data=pd.read_csv('test.csv',header=None)" 817 | ] 818 | }, 819 | { 820 | "cell_type": "code", 821 | "execution_count": null, 822 | "metadata": {}, 823 | "outputs": [], 824 | "source": [ 825 | "X=test_data.iloc[0:1,1:]" 826 | ] 827 | }, 828 | { 829 | "cell_type": "code", 830 | "execution_count": null, 831 | "metadata": {}, 832 | "outputs": [], 833 | "source": [ 834 | "X.shape" 835 | ] 836 | }, 837 | { 838 | "cell_type": "code", 839 | "execution_count": null, 840 | "metadata": {}, 841 | "outputs": [], 842 | "source": [ 843 | "xgtest = xgb.DMatrix(X.values)" 844 | ] 845 | }, 846 | { 847 | "cell_type": "code", 848 | "execution_count": null, 849 | "metadata": {}, 850 | "outputs": [], 851 | "source": [ 852 | "loaded_model.predict(xgtest)" 853 | ] 854 | }, 855 | { 856 | "cell_type": "markdown", 857 | "metadata": {}, 858 | "source": [ 859 | "## Create Endpoint" 860 | ] 861 | }, 862 | { 863 | "cell_type": "markdown", 864 | "metadata": {}, 865 | "source": [ 866 | "Create Model and deploy an endpoint" 867 | ] 868 | }, 869 | { 870 | "cell_type": "code", 871 | "execution_count": null, 872 | "metadata": {}, 873 | "outputs": [], 874 | "source": [ 875 | "from time import strftime,gmtime\n", 876 | "model_name = 'CustomerChurn-model-' + strftime(\"%M%S\", gmtime())\n", 877 | "model_version_arn=latest_model_package_arn\n", 878 | "\n", 879 | "print(\"Model name : {}\".format(model_name))\n", 880 | "\n", 881 | "create_model_response = sm_client.create_model(\n", 882 | " ModelName = model_name,\n", 883 | " ExecutionRoleArn = role, \n", 884 | " PrimaryContainer = {\n", 885 | " \"ModelPackageName\": model_version_arn,\n", 886 | " \n", 887 | " }\n", 888 | " \n", 889 | ") \n", 890 | "print(\"Model arn : {}\".format(create_model_response[\"ModelArn\"]))" 891 | ] 892 | }, 893 | { 894 | "cell_type": "code", 895 | "execution_count": null, 896 | "metadata": {}, 897 | "outputs": [], 898 | "source": [ 899 | "#Create endpointconfig" 900 | ] 901 | }, 902 | { 903 | "cell_type": "code", 904 | "execution_count": null, 905 | "metadata": {}, 906 | "outputs": [], 907 | "source": [ 908 | "endpoint_config_name = 'Test-EndpointConfig-' + strftime(\"%Y-%m-%d-%H-%M-%S\", gmtime())\n", 909 | "print(endpoint_config_name)\n", 910 | "create_endpoint_config_response = sm_client.create_endpoint_config(\n", 911 | " EndpointConfigName = endpoint_config_name,\n", 912 | " ProductionVariants=[{\n", 913 | " 'InstanceType':'ml.t2.medium',\n", 914 | " 'InitialVariantWeight':1,\n", 915 | " 'InitialInstanceCount':1,\n", 916 | " 'ModelName':model_name,\n", 917 | " 'VariantName':'AllTraffic'}])\n" 918 | ] 919 | }, 920 | { 921 | "cell_type": "code", 922 | "execution_count": null, 923 | "metadata": {}, 924 | "outputs": [], 925 | "source": [ 926 | "#Deploy endpoint" 927 | ] 928 | }, 929 | { 930 | "cell_type": "code", 931 | "execution_count": null, 932 | "metadata": {}, 933 | "outputs": [], 934 | "source": [ 935 | "endpoint_name = 'Test-endpoint-' + strftime(\"%Y-%m-%d-%H-%M-%S\", gmtime())\n", 936 | "print(\"EndpointName={}\".format(endpoint_name))\n", 937 | "\n", 938 | "create_endpoint_response = sm_client.create_endpoint(\n", 939 | " EndpointName=endpoint_name,\n", 940 | " EndpointConfigName=endpoint_config_name)\n", 941 | "print(create_endpoint_response['EndpointArn'])" 942 | ] 943 | }, 944 | { 945 | "cell_type": "code", 946 | "execution_count": null, 947 | "metadata": {}, 948 | "outputs": [], 949 | "source": [ 950 | "sm_client.describe_endpoint(EndpointName=endpoint_name)" 951 | ] 952 | }, 953 | { 954 | "cell_type": "code", 955 | "execution_count": null, 956 | "metadata": {}, 957 | "outputs": [], 958 | "source": [ 959 | "def wait_for_response(client, endpoint_name, poll_interval=30):\n", 960 | " ### Wait until the job finishes\n", 961 | " status = 'Creating'\n", 962 | " while(status == 'Creating'):\n", 963 | " response = client.describe_endpoint(EndpointName=endpoint_name)\n", 964 | " status = response['EndpointStatus']\n", 965 | " print('Creating job is still in status: {} ...'.format(status))\n", 966 | " if status == 'Failed':\n", 967 | " message = response['FailureReason']\n", 968 | " logging.info('Endpoint Creation failed with the following error: {}'.format(message))\n", 969 | " print('Endpoint failed with the following error: {}'.format(message))\n", 970 | " raise Exception('Creating Endpoint failed')\n", 971 | " logging.info(\"Creating job is still in status: \" + status)\n", 972 | " time.sleep(poll_interval)\n", 973 | "\n", 974 | " if status == 'InService':\n", 975 | " logging.info(\"Creating job ended with status: \" + status)\n", 976 | " print('Creating job ended with status: {}'.format(status))\n", 977 | " else:\n", 978 | " raise Exception('Creating job stopped')" 979 | ] 980 | }, 981 | { 982 | "cell_type": "code", 983 | "execution_count": null, 984 | "metadata": {}, 985 | "outputs": [], 986 | "source": [ 987 | "wait_for_response(sm_client, endpoint_name, poll_interval=30)" 988 | ] 989 | }, 990 | { 991 | "cell_type": "markdown", 992 | "metadata": {}, 993 | "source": [ 994 | "## Invoke Endpoint" 995 | ] 996 | }, 997 | { 998 | "cell_type": "markdown", 999 | "metadata": {}, 1000 | "source": [ 1001 | "Invoke the endpoint to make predictions." 1002 | ] 1003 | }, 1004 | { 1005 | "cell_type": "code", 1006 | "execution_count": null, 1007 | "metadata": {}, 1008 | "outputs": [], 1009 | "source": [ 1010 | "test_data=pd.read_csv('test.csv',header=None)\n", 1011 | "testdata1=test_data.iloc[0:1,1:]" 1012 | ] 1013 | }, 1014 | { 1015 | "cell_type": "code", 1016 | "execution_count": null, 1017 | "metadata": {}, 1018 | "outputs": [], 1019 | "source": [ 1020 | "runtime = boto3.client(\"sagemaker-runtime\")\n", 1021 | "Endpoint_name=endpoint_name" 1022 | ] 1023 | }, 1024 | { 1025 | "cell_type": "code", 1026 | "execution_count": null, 1027 | "metadata": {}, 1028 | "outputs": [], 1029 | "source": [ 1030 | "%time\n", 1031 | "# csv serialization\n", 1032 | "\n", 1033 | "prediction = runtime.invoke_endpoint(\n", 1034 | " EndpointName=Endpoint_name,\n", 1035 | " Body=testdata1.to_csv(header=False, index=False).encode(\"utf-8\"),\n", 1036 | " ContentType=\"text/csv\",\n", 1037 | " Accept= \"text/csv\",\n", 1038 | ")" 1039 | ] 1040 | }, 1041 | { 1042 | "cell_type": "code", 1043 | "execution_count": null, 1044 | "metadata": {}, 1045 | "outputs": [], 1046 | "source": [ 1047 | "print(prediction[\"Body\"].read())" 1048 | ] 1049 | }, 1050 | { 1051 | "cell_type": "code", 1052 | "execution_count": null, 1053 | "metadata": {}, 1054 | "outputs": [], 1055 | "source": [] 1056 | } 1057 | ], 1058 | "metadata": { 1059 | "instance_type": "ml.t3.medium", 1060 | "kernelspec": { 1061 | "display_name": "Python 3.9.13 64-bit", 1062 | "language": "python", 1063 | "name": "python3" 1064 | }, 1065 | "language_info": { 1066 | "codemirror_mode": { 1067 | "name": "ipython", 1068 | "version": 3 1069 | }, 1070 | "file_extension": ".py", 1071 | "mimetype": "text/x-python", 1072 | "name": "python", 1073 | "nbconvert_exporter": "python", 1074 | "pygments_lexer": "ipython3", 1075 | "version": "3.9.13" 1076 | }, 1077 | "vscode": { 1078 | "interpreter": { 1079 | "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49" 1080 | } 1081 | } 1082 | }, 1083 | "nbformat": 4, 1084 | "nbformat_minor": 4 1085 | } 1086 | --------------------------------------------------------------------------------