├── modules
    ├── sagemaker
    │   ├── output.tf
    │   ├── variables.tf
    │   ├── template
    │   │   └── sagemaker_instance_init.sh
    │   └── main.tf
    ├── s3
    │   ├── outputs.tf
    │   ├── variables.tf
    │   └── main.tf
    └── iam
    │   ├── variables.tf
    │   ├── outputs.tf
    │   └── main.tf
├── main
    ├── terraform_backend.tf.template
    ├── variables.tf
    ├── terraform.tfvars.template
    └── main.tf
├── .gitignore
├── LICENSE
├── README.md
└── source
    ├── scripts
        └── scikit_learn_script.py
    └── notebooks
        └── Scikit-learn_Estimator_Example_With_Terraform.ipynb


/modules/sagemaker/output.tf:
--------------------------------------------------------------------------------
1 | variable "bucket_name" {}
2 | 


--------------------------------------------------------------------------------
/modules/s3/outputs.tf:
--------------------------------------------------------------------------------
1 | output "bucket_name" {
2 |   value = aws_s3_bucket.notebook.id
3 | }
4 | 
5 | 


--------------------------------------------------------------------------------
/modules/s3/variables.tf:
--------------------------------------------------------------------------------
1 | variable "notebook_bucket_name" {}
2 | variable "sagemaker_bucket_name" {}
3 | 


--------------------------------------------------------------------------------
/modules/sagemaker/variables.tf:
--------------------------------------------------------------------------------
1 | variable "sagemaker_notebook_name" {}
2 | variable "aws_iam_role" {}
3 | 


--------------------------------------------------------------------------------
/modules/iam/variables.tf:
--------------------------------------------------------------------------------
1 | variable "aws_region" {}
2 | variable "iam_name" {}
3 | variable "identifier" {}
4 | data "aws_caller_identity" "current" {}
5 | 


--------------------------------------------------------------------------------
/main/terraform_backend.tf.template:
--------------------------------------------------------------------------------
1 | terraform {
2 |   required_version = "0.12.6"
3 |   backend "s3" {
4 |     bucket = "<bucket-name>"
5 |     key    = "sagemaker-sample/terraform.tfstate"
6 |     region = "<region>"
7 |   }
8 | }
9 | 


--------------------------------------------------------------------------------
/main/variables.tf:
--------------------------------------------------------------------------------
1 | variable "aws_region" {}
2 | variable "aws_profile" {}
3 | variable "iam_name" {}
4 | variable "identifier" {}
5 | variable "notebook_bucket_name" {}
6 | variable "sagemaker_bucket_name" {}
7 | variable "sagemaker_notebook_name" {}
8 | 


--------------------------------------------------------------------------------
/modules/iam/outputs.tf:
--------------------------------------------------------------------------------
 1 | output "iam_role_arn" {
 2 |   value = aws_iam_role.default.arn
 3 | }
 4 | 
 5 | output "iam_role_name" {
 6 |   value = aws_iam_role.default.name
 7 | }
 8 | 
 9 | output "policy_attachment_id" {
10 |   value = aws_iam_role_policy_attachment.default.id
11 | }
12 | 
13 | 


--------------------------------------------------------------------------------
/main/terraform.tfvars.template:
--------------------------------------------------------------------------------
1 | aws_region = "<aws_region>"
2 | aws_profile = "<aws_profile>"
3 | iam_name = "<iam_name>"
4 | identifier = "sagemaker.amazonaws.com"
5 | notebook_bucket_name = "<notebook_bucket_name>"
6 | sagemaker_bucket_name = "<sagemaker_bucket_name>"
7 | sagemaker_notebook_name = "<sagemaker_notebook_name>"
8 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # See https://help.github.com/articles/ignoring-files for more about ignoring files.
 2 | #
 3 | # If you find yourself ignoring temporary files generated by your text editor
 4 | # or operating system, you probably want to add a global ignore instead:
 5 | #   git config --global core.excludesfile '~/.gitignore_global'
 6 | 
 7 | # Ignore build files.
 8 | */.terraform
 9 | */.terraform/*
10 | */terraform_backend.tf
11 | */terraform.tfvars
12 | 


--------------------------------------------------------------------------------
/modules/sagemaker/template/sagemaker_instance_init.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | cd /home/ec2-user/SageMaker
 5 | aws s3 cp s3://${bucket_name}/sagemaker/sample/notebooks/Scikit-learn_Estimator_Example_With_Terraform.ipynb .
 6 | aws s3 cp s3://${bucket_name}/sagemaker/sample/scripts/scikit_learn_script.py .
 7 | 
 8 | ENVIRONMENT=python3
 9 | NOTEBOOK_FILE=/home/ec2-user/SageMaker/Scikit-learn_Estimator_Example_With_Terraform.ipynb
10 | 
11 | source /home/ec2-user/anaconda3/bin/activate "$ENVIRONMENT"
12 | nohup jupyter nbconvert "$NOTEBOOK_FILE" --ExecutePreprocessor.kernel_name=python3 --ExecutePreprocessor.timeout=1500 --execute
13 | source /home/ec2-user/anaconda3/bin/deactivate
14 | 


--------------------------------------------------------------------------------
/main/main.tf:
--------------------------------------------------------------------------------
 1 | provider "aws" {
 2 |   region = var.aws_region
 3 |   profile = var.aws_profile
 4 |   version = "2.23.0"
 5 | }
 6 | 
 7 | module "iam" {
 8 |   source = "../modules/iam"
 9 |   aws_region = var.aws_region
10 | 
11 |   iam_name = var.iam_name
12 |   identifier = var.identifier
13 | }
14 | 
15 | module "s3" {
16 |   source = "../modules/s3"
17 | 
18 |   notebook_bucket_name = var.notebook_bucket_name
19 |   sagemaker_bucket_name = var.sagemaker_bucket_name
20 | }
21 | 
22 | module "sagemaker" {
23 |   source = "../modules/sagemaker"
24 | 
25 |   sagemaker_notebook_name = var.sagemaker_notebook_name
26 |   aws_iam_role = "${module.iam.iam_role_arn}"
27 |   bucket_name = "${module.s3.bucket_name}"
28 | }
29 | 
30 | 


--------------------------------------------------------------------------------
/modules/sagemaker/main.tf:
--------------------------------------------------------------------------------
 1 | resource "aws_sagemaker_notebook_instance" "default" {
 2 |   name = var.sagemaker_notebook_name
 3 |   role_arn = var.aws_iam_role
 4 |   instance_type = "ml.t2.medium"
 5 |   lifecycle_config_name = aws_sagemaker_notebook_instance_lifecycle_configuration.default.name
 6 | }
 7 | 
 8 | data "template_file" "instance_init" {
 9 |   template = "${file("${path.module}/template/sagemaker_instance_init.sh")}"
10 | 
11 |   vars = {
12 |     bucket_name = "${var.bucket_name}"
13 |   }
14 | }
15 | 
16 | resource "aws_sagemaker_notebook_instance_lifecycle_configuration" "default" {
17 |   name = var.sagemaker_notebook_name
18 |   on_start = "${base64encode(data.template_file.instance_init.rendered)}"
19 | }
20 | 
21 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Yuya Sugano
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Machine Learning Infrastructure with Terraform
 2 | 
 3 | This example show how to set up end to end demo architecture for predicting boston housing dataset with Machine Learning using `Amazon SageMaker` and `Terraform`. 
 4 | 
 5 | ## Terraform version
 6 | 
 7 | Ensure your `Terraform` version is as follows (some modifications would be required if you run other `Terraform` versions):
 8 | ```sh
 9 | $ cd main
10 | $ terraform --version
11 | Terraform v0.12.6
12 | + provider.aws v2.23.0
13 | + provider.template v2.1.2
14 | ```
15 | To download `Terraform`, visit https://releases.hashicorp.com/terraform/
16 | 
17 | ## Setup steps
18 | 
19 | From `terraform` folder:
20 | 1. Copy `terraform_backend.tf.template` to `terraform_backend.tf` and modify values accordingly. You need to manually create an S3 bucket or use an existing one to store the Terraform state file.
21 | 2. Copy `terraform.tfvars.template` to `terraform.tfvars` and modify input variables accordingly. You don't need to create any buckets specified in here, they're to be created by terraform apply.
22 | 3. Run the followings:
23 | ```sh
24 | export AWS_PROFILE=<your desired profile>
25 | 
26 | terraform init
27 | terraform validate
28 | terraform plan -var-file=terraform.tfvars
29 | terraform apply -var-file=terraform.tfvars
30 | ```
31 | 
32 | ## Clean up
33 | 
34 | ```
35 | terraform plan -destroy -var-file=terraform.tfvars
36 | terraform destroy -var-file=terraform.tfvars
37 | ```
38 | 
39 | ## License
40 | 
41 | This library is licensed under the Apache 2.0 License.
42 | 


--------------------------------------------------------------------------------
/modules/s3/main.tf:
--------------------------------------------------------------------------------
 1 | resource "aws_s3_bucket" "notebook" {
 2 |   bucket = var.notebook_bucket_name
 3 |   force_destroy = true
 4 |   acl = "private"
 5 | 
 6 |   server_side_encryption_configuration {
 7 |     rule {
 8 |       apply_server_side_encryption_by_default {
 9 |         sse_algorithm = "AES256"
10 |       }
11 |     }
12 |   }
13 | }
14 | 
15 | resource "aws_s3_bucket" "sagemaker" {
16 |   bucket = var.sagemaker_bucket_name
17 |   force_destroy = true
18 |   acl = "private"
19 | 
20 |   server_side_encryption_configuration {
21 |     rule {
22 |       apply_server_side_encryption_by_default {
23 |         sse_algorithm = "AES256"
24 |       }
25 |     }
26 |   }
27 | }
28 | 
29 | resource "aws_s3_bucket_object" "notebook" {
30 |   bucket = aws_s3_bucket.notebook.id
31 |   key = "sagemaker/sample/notebooks/Scikit-learn_Estimator_Example_With_Terraform.ipynb"
32 |   source = "${path.module}/../../source/notebooks/Scikit-learn_Estimator_Example_With_Terraform.ipynb"
33 | 
34 |   # The filemd5() function is available in Terraform 0.11.12 and later
35 |   # For Terraform 0.11.11 and earlier, use the md5() function and the file() function:
36 |   # etag = "${md5(file("path/to/file"))}"
37 |   etag = "${filemd5("${path.module}/../../source/notebooks/Scikit-learn_Estimator_Example_With_Terraform.ipynb")}"
38 | }
39 | 
40 | resource "aws_s3_bucket_object" "script" {
41 |   bucket = aws_s3_bucket.notebook.id
42 |   key = "sagemaker/sample/scripts/scikit_learn_script.py"
43 |   source = "${path.module}/../../source/scripts/scikit_learn_script.py"
44 | 
45 |   # The filemd5() function is available in Terraform 0.11.12 and later
46 |   # For Terraform 0.11.11 and earlier, use the md5() function and the file() function:
47 |   # etag = "${md5(file("path/to/file"))}"
48 |   etag = "${filemd5("${path.module}/../../source/scripts/scikit_learn_script.py")}"
49 | }
50 | 
51 | 


--------------------------------------------------------------------------------
/modules/iam/main.tf:
--------------------------------------------------------------------------------
  1 | resource "aws_iam_role" "default" {
  2 |   name = var.iam_name
  3 |   assume_role_policy = data.aws_iam_policy_document.assume_role.json
  4 | }
  5 | 
  6 | data "aws_iam_policy_document" "assume_role" {
  7 |   statement {
  8 |     actions = ["sts:AssumeRole"]
  9 | 
 10 |     principals {
 11 |       type        = "Service"
 12 |       identifiers = [var.identifier]
 13 |     }
 14 |   }
 15 | }
 16 | 
 17 | resource "aws_iam_role_policy_attachment" "default" {
 18 |   role = aws_iam_role.default.name
 19 |   policy_arn = aws_iam_policy.default.arn
 20 | }
 21 | 
 22 | resource "aws_iam_policy" "default" {
 23 |   name = var.iam_name
 24 |   path = "/"
 25 |   description = "Policy for the Notebook Instance to manage training jobs, models and endpoints"
 26 |   policy = data.aws_iam_policy_document.sagemaker_role_policy.json
 27 | }
 28 | 
 29 | data "aws_iam_policy_document" "sagemaker_role_policy" {
 30 |   statement {
 31 |     effect = "Allow"
 32 |     actions = [
 33 |       "s3:CreateBucket",
 34 |       "s3:GetBucketLocation",
 35 |       "s3:ListBucket",
 36 |       "s3:ListAllMyBuckets",
 37 |       "s3:GetObject",
 38 |       "s3:PutObject",
 39 |       "s3:DeleteObject",
 40 |       "s3:GetBucketCors",
 41 |       "s3:PutBucketCors"
 42 |     ]
 43 |     resources = [
 44 |       "arn:aws:s3:::*"
 45 |     ]
 46 |   }
 47 | 
 48 |   statement {
 49 |     effect = "Allow"
 50 |     actions = [
 51 |       "sagemaker:CreateTrainingJob",
 52 |       "sagemaker:DescribeTrainingJob",
 53 |       "sagemaker:CreateModel",
 54 |       "sagemaker:DescribeModel",
 55 |       "sagemaker:DeleteModel",
 56 |       "sagemaker:CreateEndpoint",
 57 |       "sagemaker:CreateEndpointConfig",
 58 |       "sagemaker:DescribeEndpoint",
 59 |       "sagemaker:DescribeEndpointConfig",
 60 |       "sagemaker:DeleteEndpoint"
 61 |     ]
 62 |     resources = [
 63 |       "arn:aws:sagemaker:${var.aws_region}:${data.aws_caller_identity.current.account_id}:*"
 64 |     ]
 65 |   }
 66 | 
 67 |   statement {
 68 |     effect = "Allow"
 69 |     actions = [
 70 |       "ecr:GetDownloadUrlForLayer",
 71 |       "ecr:BatchGetImage",
 72 |       "ecr:BatchCheckLayerAvailability"
 73 |     ]
 74 |     resources = [
 75 |       "arn:aws:ecr:${var.aws_region}:${data.aws_caller_identity.current.account_id}:repository/*"
 76 |     ]
 77 |   }
 78 | 
 79 |   statement {
 80 |     effect = "Allow"
 81 |     actions = [
 82 |       "ec2:CreateVpcEndpoint",
 83 |       "ec2:DescribeRouteTables"
 84 |     ]
 85 |     resources = [
 86 |       "*"
 87 |     ]
 88 |   }
 89 | 
 90 |   statement {
 91 |     effect = "Allow"
 92 |     actions = [
 93 |       "cloudwatch:PutMetricData",
 94 |       "cloudwatch:GetMetricData",
 95 |       "cloudwatch:GetMetricStatistics",
 96 |       "cloudwatch:ListMetrics"
 97 |     ]
 98 |     resources = [
 99 |       "arn:aws:cloudwatch:${var.aws_region}:${data.aws_caller_identity.current.account_id}:*"
100 |     ]
101 |   }
102 | 
103 |   statement {
104 |     effect = "Allow"
105 |     actions = [
106 |       "logs:CreateLogGroup",
107 |       "logs:CreateLogStream",
108 |       "logs:DescribeLogStreams",
109 |       "logs:GetLogEvents",
110 |       "logs:PutLogEvents"
111 |     ]
112 |     resources = [
113 |       "arn:aws:logs:${var.aws_region}:${data.aws_caller_identity.current.account_id}:log-group:/aws/sagemaker/*"
114 |     ]
115 |   }
116 | 
117 |   statement {
118 |     effect = "Allow"
119 |     actions = ["iam:PassRole"]
120 |     resources = [
121 |       "${aws_iam_role.default.arn}"
122 |     ]
123 |     condition {
124 |       test = "StringEquals"
125 |       variable = "iam:PassedToService"
126 |       values = ["sagemaker.amazonaws.com"]
127 |     }
128 |   }
129 | 
130 |   statement {
131 |     effect = "Allow"
132 |     actions = ["iam:GetRole"]
133 |     resources = [
134 |       "${aws_iam_role.default.arn}"
135 |     ]
136 |   }
137 | }
138 | 
139 | 


--------------------------------------------------------------------------------
/source/scripts/scikit_learn_script.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | DERIVED FROM:https://github.com/aws/sagemaker-python-sdk/blob/master/src/sagemaker/sklearn/README.rst
 3 | Preparing the Scikit-learn training script
 4 | Your Scikit-learn training script must be a Python 2.7 or 3.5 compatible source file.
 5 | The training script is very similar to a training script you might run outside of SageMaker, 
 6 | but you can access useful properties about the training environment through various environment variables, 
 7 | such as
 8 | - SM_MODEL_DIR: 
 9 |         A string representing the path to the directory to write model artifacts to. 
10 |         These artifacts are uploaded to S3 for model hosting.
11 | - SM_OUTPUT_DATA_DIR: 
12 |         A string representing the filesystem path to write output artifacts to. 
13 |         Output artifacts may include checkpoints, graphs, and other files to save, 
14 |         not including model artifacts. These artifacts are compressed and uploaded 
15 |         to S3 to the same S3 prefix as the model artifacts.
16 |         Supposing two input channels, 'train' and 'test', 
17 |         were used in the call to the Scikit-learn estimator's fit() method, 
18 |         the following will be set, following the format "SM_CHANNEL_[channel_name]":
19 | - SM_CHANNEL_TRAIN: 
20 |         A string representing the path to the directory containing data in the 'train' channel
21 | - SM_CHANNEL_TEST: 
22 |         Same as above, but for the 'test' channel.
23 |         A typical training script loads data from the input channels, 
24 |         configures training with hyperparameters, trains a model, 
25 |         and saves a model to model_dir so that it can be hosted later. 
26 |         Hyperparameters are passed to your script as arguments and can 
27 |         be retrieved with an argparse.ArgumentParser instance. 
28 |         For example, a training script might start with the following:
29 | Because the SageMaker imports your training script, 
30 | you should put your training code in a main guard (if __name__=='__main__':) 
31 | if you are using the same script to host your model, 
32 | so that SageMaker does not inadvertently run your training code at the wrong point in execution.
33 | For more on training environment variables, please visit https://github.com/aws/sagemaker-containers.
34 | '''
35 | 
36 | import argparse
37 | import pandas as pd
38 | import os
39 | 
40 | # GradientBoosting Regressor
41 | from sklearn.ensemble import GradientBoostingRegressor
42 | from sklearn.externals import joblib
43 | 
44 | # Pipeline and StandardScaler
45 | from sklearn.preprocessing import StandardScaler
46 | from sklearn.pipeline import Pipeline
47 | 
48 | if __name__ == '__main__':
49 |     parser = argparse.ArgumentParser()
50 | 
51 |     # Hyperparameters are described here. In this simple example we are just including one hyperparameter.
52 |     parser.add_argument('--learning_rate', type=float, default=0.1)
53 |     parser.add_argument('--n_estimators', type=int, default=100)
54 |     
55 |     # Sagemaker specific arguments. Defaults are set in the environment variables.
56 |     parser.add_argument('--output-data-dir', type=str, default=os.environ['SM_OUTPUT_DATA_DIR'])
57 |     parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR'])
58 |     parser.add_argument('--train', type=str, default=os.environ['SM_CHANNEL_TRAIN'])
59 | 
60 |     args = parser.parse_args()
61 |     
62 |     # Take the set of files and read them all into a single pandas dataframe
63 |     input_files = [os.path.join(args.train, file) for file in os.listdir(args.train) ]
64 |     if len(input_files) == 0:
65 |         raise ValueError(('There are no files in {}.\n' +
66 |                           'This usually indicates that the channel ({}) was incorrectly specified,\n' +
67 |                           'the data specification in S3 was incorrectly specified or the role specified\n' +
68 |                           'does not have permission to access the data.').format(args.train, "train"))
69 |     raw_data = [pd.read_csv(file, header=None, engine="python") for file in input_files]
70 |     train_data = pd.concat(raw_data)
71 | 
72 |     # labels are in the last column, train data are in the latter columns
73 |     train_y = train_data.iloc[:,-1]
74 |     train_X = train_data.iloc[:,0:-1]
75 | 
76 |     # Here we support a single hyperparameter
77 |     learning_rate = args.learning_rate
78 |     n_estimators = args.n_estimators
79 | 
80 |     # Now use scikit-learn's decision tree classifier to train the model.
81 |     clf = GradientBoostingRegressor(learning_rate=learning_rate, n_estimators=n_estimators)
82 |     clf = clf.fit(train_X, train_y)
83 |     print(clf)
84 | 
85 |     # The trained classifier, and save the coefficients
86 |     joblib.dump(clf, os.path.join(args.model_dir, "model.joblib"))
87 | 
88 | def model_fn(model_dir):
89 |     """Deserialized and return fitted model
90 | 
91 |     Note that this should have the same name as the serialized model in the main method
92 |     """
93 |     clf = joblib.load(os.path.join(model_dir, "model.joblib"))
94 |     return clf
95 | 


--------------------------------------------------------------------------------
/source/notebooks/Scikit-learn_Estimator_Example_With_Terraform.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 5,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "data": {
 10 |       "text/html": [
 11 |        "<div>\n",
 12 |        "<style scoped>\n",
 13 |        "    .dataframe tbody tr th:only-of-type {\n",
 14 |        "        vertical-align: middle;\n",
 15 |        "    }\n",
 16 |        "\n",
 17 |        "    .dataframe tbody tr th {\n",
 18 |        "        vertical-align: top;\n",
 19 |        "    }\n",
 20 |        "\n",
 21 |        "    .dataframe thead th {\n",
 22 |        "        text-align: right;\n",
 23 |        "    }\n",
 24 |        "</style>\n",
 25 |        "<table border=\"1\" class=\"dataframe\">\n",
 26 |        "  <thead>\n",
 27 |        "    <tr style=\"text-align: right;\">\n",
 28 |        "      <th></th>\n",
 29 |        "      <th>CRIM</th>\n",
 30 |        "      <th>ZN</th>\n",
 31 |        "      <th>INDUS</th>\n",
 32 |        "      <th>CHAS</th>\n",
 33 |        "      <th>NOX</th>\n",
 34 |        "      <th>RM</th>\n",
 35 |        "      <th>AGE</th>\n",
 36 |        "      <th>DIS</th>\n",
 37 |        "      <th>RAD</th>\n",
 38 |        "      <th>TAX</th>\n",
 39 |        "      <th>PTRATIO</th>\n",
 40 |        "      <th>B</th>\n",
 41 |        "      <th>LSTAT</th>\n",
 42 |        "    </tr>\n",
 43 |        "  </thead>\n",
 44 |        "  <tbody>\n",
 45 |        "    <tr>\n",
 46 |        "      <th>count</th>\n",
 47 |        "      <td>506.000000</td>\n",
 48 |        "      <td>506.000000</td>\n",
 49 |        "      <td>506.000000</td>\n",
 50 |        "      <td>506.000000</td>\n",
 51 |        "      <td>506.000000</td>\n",
 52 |        "      <td>506.000000</td>\n",
 53 |        "      <td>506.000000</td>\n",
 54 |        "      <td>506.000000</td>\n",
 55 |        "      <td>506.000000</td>\n",
 56 |        "      <td>506.000000</td>\n",
 57 |        "      <td>506.000000</td>\n",
 58 |        "      <td>506.000000</td>\n",
 59 |        "      <td>506.000000</td>\n",
 60 |        "    </tr>\n",
 61 |        "    <tr>\n",
 62 |        "      <th>mean</th>\n",
 63 |        "      <td>3.613524</td>\n",
 64 |        "      <td>11.363636</td>\n",
 65 |        "      <td>11.136779</td>\n",
 66 |        "      <td>0.069170</td>\n",
 67 |        "      <td>0.554695</td>\n",
 68 |        "      <td>6.284634</td>\n",
 69 |        "      <td>68.574901</td>\n",
 70 |        "      <td>3.795043</td>\n",
 71 |        "      <td>9.549407</td>\n",
 72 |        "      <td>408.237154</td>\n",
 73 |        "      <td>18.455534</td>\n",
 74 |        "      <td>356.674032</td>\n",
 75 |        "      <td>12.653063</td>\n",
 76 |        "    </tr>\n",
 77 |        "    <tr>\n",
 78 |        "      <th>std</th>\n",
 79 |        "      <td>8.601545</td>\n",
 80 |        "      <td>23.322453</td>\n",
 81 |        "      <td>6.860353</td>\n",
 82 |        "      <td>0.253994</td>\n",
 83 |        "      <td>0.115878</td>\n",
 84 |        "      <td>0.702617</td>\n",
 85 |        "      <td>28.148861</td>\n",
 86 |        "      <td>2.105710</td>\n",
 87 |        "      <td>8.707259</td>\n",
 88 |        "      <td>168.537116</td>\n",
 89 |        "      <td>2.164946</td>\n",
 90 |        "      <td>91.294864</td>\n",
 91 |        "      <td>7.141062</td>\n",
 92 |        "    </tr>\n",
 93 |        "    <tr>\n",
 94 |        "      <th>min</th>\n",
 95 |        "      <td>0.006320</td>\n",
 96 |        "      <td>0.000000</td>\n",
 97 |        "      <td>0.460000</td>\n",
 98 |        "      <td>0.000000</td>\n",
 99 |        "      <td>0.385000</td>\n",
100 |        "      <td>3.561000</td>\n",
101 |        "      <td>2.900000</td>\n",
102 |        "      <td>1.129600</td>\n",
103 |        "      <td>1.000000</td>\n",
104 |        "      <td>187.000000</td>\n",
105 |        "      <td>12.600000</td>\n",
106 |        "      <td>0.320000</td>\n",
107 |        "      <td>1.730000</td>\n",
108 |        "    </tr>\n",
109 |        "    <tr>\n",
110 |        "      <th>25%</th>\n",
111 |        "      <td>0.082045</td>\n",
112 |        "      <td>0.000000</td>\n",
113 |        "      <td>5.190000</td>\n",
114 |        "      <td>0.000000</td>\n",
115 |        "      <td>0.449000</td>\n",
116 |        "      <td>5.885500</td>\n",
117 |        "      <td>45.025000</td>\n",
118 |        "      <td>2.100175</td>\n",
119 |        "      <td>4.000000</td>\n",
120 |        "      <td>279.000000</td>\n",
121 |        "      <td>17.400000</td>\n",
122 |        "      <td>375.377500</td>\n",
123 |        "      <td>6.950000</td>\n",
124 |        "    </tr>\n",
125 |        "    <tr>\n",
126 |        "      <th>50%</th>\n",
127 |        "      <td>0.256510</td>\n",
128 |        "      <td>0.000000</td>\n",
129 |        "      <td>9.690000</td>\n",
130 |        "      <td>0.000000</td>\n",
131 |        "      <td>0.538000</td>\n",
132 |        "      <td>6.208500</td>\n",
133 |        "      <td>77.500000</td>\n",
134 |        "      <td>3.207450</td>\n",
135 |        "      <td>5.000000</td>\n",
136 |        "      <td>330.000000</td>\n",
137 |        "      <td>19.050000</td>\n",
138 |        "      <td>391.440000</td>\n",
139 |        "      <td>11.360000</td>\n",
140 |        "    </tr>\n",
141 |        "    <tr>\n",
142 |        "      <th>75%</th>\n",
143 |        "      <td>3.677083</td>\n",
144 |        "      <td>12.500000</td>\n",
145 |        "      <td>18.100000</td>\n",
146 |        "      <td>0.000000</td>\n",
147 |        "      <td>0.624000</td>\n",
148 |        "      <td>6.623500</td>\n",
149 |        "      <td>94.075000</td>\n",
150 |        "      <td>5.188425</td>\n",
151 |        "      <td>24.000000</td>\n",
152 |        "      <td>666.000000</td>\n",
153 |        "      <td>20.200000</td>\n",
154 |        "      <td>396.225000</td>\n",
155 |        "      <td>16.955000</td>\n",
156 |        "    </tr>\n",
157 |        "    <tr>\n",
158 |        "      <th>max</th>\n",
159 |        "      <td>88.976200</td>\n",
160 |        "      <td>100.000000</td>\n",
161 |        "      <td>27.740000</td>\n",
162 |        "      <td>1.000000</td>\n",
163 |        "      <td>0.871000</td>\n",
164 |        "      <td>8.780000</td>\n",
165 |        "      <td>100.000000</td>\n",
166 |        "      <td>12.126500</td>\n",
167 |        "      <td>24.000000</td>\n",
168 |        "      <td>711.000000</td>\n",
169 |        "      <td>22.000000</td>\n",
170 |        "      <td>396.900000</td>\n",
171 |        "      <td>37.970000</td>\n",
172 |        "    </tr>\n",
173 |        "  </tbody>\n",
174 |        "</table>\n",
175 |        "</div>"
176 |       ],
177 |       "text/plain": [
178 |        "             CRIM          ZN       INDUS        CHAS         NOX          RM  \\\n",
179 |        "count  506.000000  506.000000  506.000000  506.000000  506.000000  506.000000   \n",
180 |        "mean     3.613524   11.363636   11.136779    0.069170    0.554695    6.284634   \n",
181 |        "std      8.601545   23.322453    6.860353    0.253994    0.115878    0.702617   \n",
182 |        "min      0.006320    0.000000    0.460000    0.000000    0.385000    3.561000   \n",
183 |        "25%      0.082045    0.000000    5.190000    0.000000    0.449000    5.885500   \n",
184 |        "50%      0.256510    0.000000    9.690000    0.000000    0.538000    6.208500   \n",
185 |        "75%      3.677083   12.500000   18.100000    0.000000    0.624000    6.623500   \n",
186 |        "max     88.976200  100.000000   27.740000    1.000000    0.871000    8.780000   \n",
187 |        "\n",
188 |        "              AGE         DIS         RAD         TAX     PTRATIO           B  \\\n",
189 |        "count  506.000000  506.000000  506.000000  506.000000  506.000000  506.000000   \n",
190 |        "mean    68.574901    3.795043    9.549407  408.237154   18.455534  356.674032   \n",
191 |        "std     28.148861    2.105710    8.707259  168.537116    2.164946   91.294864   \n",
192 |        "min      2.900000    1.129600    1.000000  187.000000   12.600000    0.320000   \n",
193 |        "25%     45.025000    2.100175    4.000000  279.000000   17.400000  375.377500   \n",
194 |        "50%     77.500000    3.207450    5.000000  330.000000   19.050000  391.440000   \n",
195 |        "75%     94.075000    5.188425   24.000000  666.000000   20.200000  396.225000   \n",
196 |        "max    100.000000   12.126500   24.000000  711.000000   22.000000  396.900000   \n",
197 |        "\n",
198 |        "            LSTAT  \n",
199 |        "count  506.000000  \n",
200 |        "mean    12.653063  \n",
201 |        "std      7.141062  \n",
202 |        "min      1.730000  \n",
203 |        "25%      6.950000  \n",
204 |        "50%     11.360000  \n",
205 |        "75%     16.955000  \n",
206 |        "max     37.970000  "
207 |       ]
208 |      },
209 |      "execution_count": 5,
210 |      "metadata": {},
211 |      "output_type": "execute_result"
212 |     }
213 |    ],
214 |    "source": [
215 |     "# Reading boston housing dataset\n",
216 |     "from sklearn.datasets import load_boston\n",
217 |     "from sklearn.model_selection import train_test_split\n",
218 |     "import pandas as pd\n",
219 |     "\n",
220 |     "boston = load_boston()\n",
221 |     "df = pd.DataFrame(boston.data, columns=boston.feature_names)\n",
222 |     "df.describe() # describe dataset overview"
223 |    ]
224 |   },
225 |   {
226 |    "cell_type": "code",
227 |    "execution_count": 7,
228 |    "metadata": {},
229 |    "outputs": [],
230 |    "source": [
231 |     "# save files as csv\n",
232 |     "import os\n",
233 |     "\n",
234 |     "WORK_DIRECTORY='data'\n",
235 |     "os.makedirs('{}'.format(WORK_DIRECTORY), exist_ok=True)\n",
236 |     "df.to_csv('{}/boston_housing.csv'.format(WORK_DIRECTORY), header=False, index=False)"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "code",
241 |    "execution_count": 12,
242 |    "metadata": {},
243 |    "outputs": [
244 |     {
245 |      "name": "stdout",
246 |      "output_type": "stream",
247 |      "text": [
248 |       "Execution role is arn:aws:iam::251344623468:role/service-role/AmazonSageMaker-ExecutionRole-20191017T203175\n",
249 |       "Success - the MySageMakerInstance is in the ap-northeast-1.\n"
250 |      ]
251 |     }
252 |    ],
253 |    "source": [
254 |     "# S3 prefix\n",
255 |     "bucket = 'sagemaker-bucket-sample-test'\n",
256 |     "prefix = 'sagemaker/sample'\n",
257 |     "\n",
258 |     "# Import libraries\n",
259 |     "from sagemaker import get_execution_role\n",
260 |     "import boto3, sys, os\n",
261 |     "import sagemaker\n",
262 |     "\n",
263 |     "sagemaker_session = sagemaker.Session()\n",
264 |     "\n",
265 |     "# Get a SageMaker-compatible role used by this Notebook Instance.\n",
266 |     "role = get_execution_role()\n",
267 |     "my_region = boto3.session.Session().region_name # set the region of the instance\n",
268 |     "print(\"Execution role is \" + role)\n",
269 |     "print(\"Success - the MySageMakerInstance is in the \" + my_region + \".\")"
270 |    ]
271 |   },
272 |   {
273 |    "cell_type": "code",
274 |    "execution_count": 13,
275 |    "metadata": {},
276 |    "outputs": [
277 |     {
278 |      "name": "stdout",
279 |      "output_type": "stream",
280 |      "text": [
281 |       "S3 error:  An error occurred (IllegalLocationConstraintException) when calling the CreateBucket operation: The unspecified location constraint is incompatible for the region specific endpoint this request was sent to.\n"
282 |      ]
283 |     }
284 |    ],
285 |    "source": [
286 |     "s3 = boto3.resource('s3')\n",
287 |     "\n",
288 |     "try:\n",
289 |     "    if my_region == 'ap-northeast-1':\n",
290 |     "        s3.create_bucket(Bucket=bucket)\n",
291 |     "    else:\n",
292 |     "        s3.create_bucket(Bucket=bucket, CreateBucketConfiguration={'LocationConstraint': my_region})\n",
293 |     "    print('S3 bucket created successfully')\n",
294 |     "except Exception as e:\n",
295 |     "    print('S3 error: ', e)"
296 |    ]
297 |   },
298 |   {
299 |    "cell_type": "code",
300 |    "execution_count": 14,
301 |    "metadata": {},
302 |    "outputs": [
303 |     {
304 |      "name": "stdout",
305 |      "output_type": "stream",
306 |      "text": [
307 |       "Uploaded training data location: s3://sagemaker-getting-start-test/sagemaker/sample/data\n",
308 |       "Training artifacts will be uploaded to: s3://sagemaker-getting-start-test/sagemaker/sample/output\n"
309 |      ]
310 |     }
311 |    ],
312 |    "source": [
313 |     "# send data to S3.SageMaker will take training data from s3\n",
314 |     "training_path = sagemaker_session.upload_data(path='{}/boston_housing.csv'.format(WORK_DIRECTORY), bucket=bucket, key_prefix=prefix)\n",
315 |     "s3_train_data = 's3://{}/{}/{}'.format(bucket, prefix, WORK_DIRECTORY)\n",
316 |     "print('Uploaded training data location: {}'.format(s3_train_data))\n",
317 |     "\n",
318 |     "output_location = 's3://{}/{}/output'.format(bucket, prefix)\n",
319 |     "print('Training artifacts will be uploaded to: {}'.format(output_location))"
320 |    ]
321 |   },
322 |   {
323 |    "cell_type": "code",
324 |    "execution_count": 15,
325 |    "metadata": {},
326 |    "outputs": [
327 |     {
328 |      "name": "stdout",
329 |      "output_type": "stream",
330 |      "text": [
331 |       "Estimator object: <sagemaker.sklearn.estimator.SKLearn object at 0x7fc7954f9160>\n"
332 |      ]
333 |     }
334 |    ],
335 |    "source": [
336 |     "# We use the Estimator from the SageMaker Python SDK\n",
337 |     "from sagemaker.sklearn.estimator import SKLearn\n",
338 |     "\n",
339 |     "script_path = 'scikit_learn_script.py'\n",
340 |     "\n",
341 |     "# Initialise SDK\n",
342 |     "sklearn_estimator = SKLearn(\n",
343 |     "        entry_point=script_path,\n",
344 |     "        role = role,\n",
345 |     "        train_instance_type=\"ml.c4.xlarge\",\n",
346 |     "        sagemaker_session=sagemaker_session,\n",
347 |     "        output_path=output_location\n",
348 |     ")\n",
349 |     "\n",
350 |     "print(\"Estimator object: {}\".format(sklearn_estimator))"
351 |    ]
352 |   },
353 |   {
354 |    "cell_type": "code",
355 |    "execution_count": 16,
356 |    "metadata": {},
357 |    "outputs": [
358 |     {
359 |      "name": "stdout",
360 |      "output_type": "stream",
361 |      "text": [
362 |       "2020-01-13 06:19:35 Starting - Starting the training job...\n",
363 |       "2020-01-13 06:19:37 Starting - Launching requested ML instances......\n",
364 |       "2020-01-13 06:20:40 Starting - Preparing the instances for training...\n",
365 |       "2020-01-13 06:21:18 Downloading - Downloading input data...\n",
366 |       "2020-01-13 06:22:03 Training - Training image download completed. Training in progress.\n",
367 |       "2020-01-13 06:22:03 Uploading - Uploading generated training model\u001b[34m2020-01-13 06:21:58,438 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training\u001b[0m\n",
368 |       "\u001b[34m2020-01-13 06:21:58,440 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)\u001b[0m\n",
369 |       "\u001b[34m2020-01-13 06:21:58,450 sagemaker_sklearn_container.training INFO     Invoking user training script.\u001b[0m\n",
370 |       "\u001b[34m2020-01-13 06:21:58,782 sagemaker-containers INFO     Module scikit_learn_script does not provide a setup.py. \u001b[0m\n",
371 |       "\u001b[34mGenerating setup.py\u001b[0m\n",
372 |       "\u001b[34m2020-01-13 06:21:58,782 sagemaker-containers INFO     Generating setup.cfg\u001b[0m\n",
373 |       "\u001b[34m2020-01-13 06:21:58,782 sagemaker-containers INFO     Generating MANIFEST.in\u001b[0m\n",
374 |       "\u001b[34m2020-01-13 06:21:58,783 sagemaker-containers INFO     Installing module with the following command:\u001b[0m\n",
375 |       "\u001b[34m/miniconda3/bin/python -m pip install . \u001b[0m\n",
376 |       "\u001b[34mProcessing /opt/ml/code\u001b[0m\n",
377 |       "\u001b[34mBuilding wheels for collected packages: scikit-learn-script\n",
378 |       "  Building wheel for scikit-learn-script (setup.py): started\n",
379 |       "  Building wheel for scikit-learn-script (setup.py): finished with status 'done'\n",
380 |       "  Created wheel for scikit-learn-script: filename=scikit_learn_script-1.0.0-py2.py3-none-any.whl size=8295 sha256=07fb54998da9c4d696b6abc931d34235002b0285255da2131d0051e86cb4d9e9\n",
381 |       "  Stored in directory: /tmp/pip-ephem-wheel-cache-vhkmjzek/wheels/35/24/16/37574d11bf9bde50616c67372a334f94fa8356bc7164af8ca3\u001b[0m\n",
382 |       "\u001b[34mSuccessfully built scikit-learn-script\u001b[0m\n",
383 |       "\u001b[34mInstalling collected packages: scikit-learn-script\u001b[0m\n",
384 |       "\u001b[34mSuccessfully installed scikit-learn-script-1.0.0\u001b[0m\n",
385 |       "\u001b[34m2020-01-13 06:22:00,086 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)\u001b[0m\n",
386 |       "\u001b[34m2020-01-13 06:22:00,097 sagemaker-containers INFO     Invoking user script\n",
387 |       "\u001b[0m\n",
388 |       "\u001b[34mTraining Env:\n",
389 |       "\u001b[0m\n",
390 |       "\u001b[34m{\n",
391 |       "    \"additional_framework_parameters\": {},\n",
392 |       "    \"channel_input_dirs\": {\n",
393 |       "        \"train\": \"/opt/ml/input/data/train\"\n",
394 |       "    },\n",
395 |       "    \"current_host\": \"algo-1\",\n",
396 |       "    \"framework_module\": \"sagemaker_sklearn_container.training:main\",\n",
397 |       "    \"hosts\": [\n",
398 |       "        \"algo-1\"\n",
399 |       "    ],\n",
400 |       "    \"hyperparameters\": {},\n",
401 |       "    \"input_config_dir\": \"/opt/ml/input/config\",\n",
402 |       "    \"input_data_config\": {\n",
403 |       "        \"train\": {\n",
404 |       "            \"TrainingInputMode\": \"File\",\n",
405 |       "            \"S3DistributionType\": \"FullyReplicated\",\n",
406 |       "            \"RecordWrapperType\": \"None\"\n",
407 |       "        }\n",
408 |       "    },\n",
409 |       "    \"input_dir\": \"/opt/ml/input\",\n",
410 |       "    \"is_master\": true,\n",
411 |       "    \"job_name\": \"sagemaker-scikit-learn-2020-01-13-06-19-35-519\",\n",
412 |       "    \"log_level\": 20,\n",
413 |       "    \"master_hostname\": \"algo-1\",\n",
414 |       "    \"model_dir\": \"/opt/ml/model\",\n",
415 |       "    \"module_dir\": \"s3://sagemaker-getting-start-test/sagemaker-scikit-learn-2020-01-13-06-19-35-519/source/sourcedir.tar.gz\",\n",
416 |       "    \"module_name\": \"scikit_learn_script\",\n",
417 |       "    \"network_interface_name\": \"eth0\",\n",
418 |       "    \"num_cpus\": 4,\n",
419 |       "    \"num_gpus\": 0,\n",
420 |       "    \"output_data_dir\": \"/opt/ml/output/data\",\n",
421 |       "    \"output_dir\": \"/opt/ml/output\",\n",
422 |       "    \"output_intermediate_dir\": \"/opt/ml/output/intermediate\",\n",
423 |       "    \"resource_config\": {\n",
424 |       "        \"current_host\": \"algo-1\",\n",
425 |       "        \"hosts\": [\n",
426 |       "            \"algo-1\"\n",
427 |       "        ],\n",
428 |       "        \"network_interface_name\": \"eth0\"\n",
429 |       "    },\n",
430 |       "    \"user_entry_point\": \"scikit_learn_script.py\"\u001b[0m\n",
431 |       "\u001b[34m}\n",
432 |       "\u001b[0m\n",
433 |       "\u001b[34mEnvironment variables:\n",
434 |       "\u001b[0m\n",
435 |       "\u001b[34mSM_HOSTS=[\"algo-1\"]\u001b[0m\n",
436 |       "\u001b[34mSM_NETWORK_INTERFACE_NAME=eth0\u001b[0m\n",
437 |       "\u001b[34mSM_HPS={}\u001b[0m\n",
438 |       "\u001b[34mSM_USER_ENTRY_POINT=scikit_learn_script.py\u001b[0m\n",
439 |       "\u001b[34mSM_FRAMEWORK_PARAMS={}\u001b[0m\n",
440 |       "\u001b[34mSM_RESOURCE_CONFIG={\"current_host\":\"algo-1\",\"hosts\":[\"algo-1\"],\"network_interface_name\":\"eth0\"}\u001b[0m\n",
441 |       "\u001b[34mSM_INPUT_DATA_CONFIG={\"train\":{\"RecordWrapperType\":\"None\",\"S3DistributionType\":\"FullyReplicated\",\"TrainingInputMode\":\"File\"}}\u001b[0m\n",
442 |       "\u001b[34mSM_OUTPUT_DATA_DIR=/opt/ml/output/data\u001b[0m\n",
443 |       "\u001b[34mSM_CHANNELS=[\"train\"]\u001b[0m\n",
444 |       "\u001b[34mSM_CURRENT_HOST=algo-1\u001b[0m\n",
445 |       "\u001b[34mSM_MODULE_NAME=scikit_learn_script\u001b[0m\n",
446 |       "\u001b[34mSM_LOG_LEVEL=20\u001b[0m\n",
447 |       "\u001b[34mSM_FRAMEWORK_MODULE=sagemaker_sklearn_container.training:main\u001b[0m\n",
448 |       "\u001b[34mSM_INPUT_DIR=/opt/ml/input\u001b[0m\n",
449 |       "\u001b[34mSM_INPUT_CONFIG_DIR=/opt/ml/input/config\u001b[0m\n",
450 |       "\u001b[34mSM_OUTPUT_DIR=/opt/ml/output\u001b[0m\n",
451 |       "\u001b[34mSM_NUM_CPUS=4\u001b[0m\n",
452 |       "\u001b[34mSM_NUM_GPUS=0\u001b[0m\n",
453 |       "\u001b[34mSM_MODEL_DIR=/opt/ml/model\u001b[0m\n",
454 |       "\u001b[34mSM_MODULE_DIR=s3://sagemaker-getting-start-test/sagemaker-scikit-learn-2020-01-13-06-19-35-519/source/sourcedir.tar.gz\u001b[0m\n",
455 |       "\u001b[34mSM_TRAINING_ENV={\"additional_framework_parameters\":{},\"channel_input_dirs\":{\"train\":\"/opt/ml/input/data/train\"},\"current_host\":\"algo-1\",\"framework_module\":\"sagemaker_sklearn_container.training:main\",\"hosts\":[\"algo-1\"],\"hyperparameters\":{},\"input_config_dir\":\"/opt/ml/input/config\",\"input_data_config\":{\"train\":{\"RecordWrapperType\":\"None\",\"S3DistributionType\":\"FullyReplicated\",\"TrainingInputMode\":\"File\"}},\"input_dir\":\"/opt/ml/input\",\"is_master\":true,\"job_name\":\"sagemaker-scikit-learn-2020-01-13-06-19-35-519\",\"log_level\":20,\"master_hostname\":\"algo-1\",\"model_dir\":\"/opt/ml/model\",\"module_dir\":\"s3://sagemaker-getting-start-test/sagemaker-scikit-learn-2020-01-13-06-19-35-519/source/sourcedir.tar.gz\",\"module_name\":\"scikit_learn_script\",\"network_interface_name\":\"eth0\",\"num_cpus\":4,\"num_gpus\":0,\"output_data_dir\":\"/opt/ml/output/data\",\"output_dir\":\"/opt/ml/output\",\"output_intermediate_dir\":\"/opt/ml/output/intermediate\",\"resource_config\":{\"current_host\":\"algo-1\",\"hosts\":[\"algo-1\"],\"network_interface_name\":\"eth0\"},\"user_entry_point\":\"scikit_learn_script.py\"}\u001b[0m\n",
456 |       "\u001b[34mSM_USER_ARGS=[]\u001b[0m\n",
457 |       "\u001b[34mSM_OUTPUT_INTERMEDIATE_DIR=/opt/ml/output/intermediate\u001b[0m\n",
458 |       "\u001b[34mSM_CHANNEL_TRAIN=/opt/ml/input/data/train\u001b[0m\n",
459 |       "\u001b[34mPYTHONPATH=/miniconda3/bin:/miniconda3/lib/python37.zip:/miniconda3/lib/python3.7:/miniconda3/lib/python3.7/lib-dynload:/miniconda3/lib/python3.7/site-packages\n",
460 |       "\u001b[0m\n",
461 |       "\u001b[34mInvoking script with the following command:\n",
462 |       "\u001b[0m\n",
463 |       "\u001b[34m/miniconda3/bin/python -m scikit_learn_script\n",
464 |       "\n",
465 |       "\u001b[0m\n",
466 |       "\u001b[34m/miniconda3/lib/python3.7/site-packages/sklearn/externals/joblib/externals/cloudpickle/cloudpickle.py:47: DeprecationWarning: the imp module is deprecated in favour of importlib; see the module's documentation for alternative uses\n",
467 |       "  import imp\u001b[0m\n",
468 |       "\u001b[34mGradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,\n",
469 |       "             learning_rate=0.1, loss='ls', max_depth=3, max_features=None,\n",
470 |       "             max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
471 |       "             min_impurity_split=None, min_samples_leaf=1,\n",
472 |       "             min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
473 |       "             n_estimators=100, n_iter_no_change=None, presort='auto',\n",
474 |       "             random_state=None, subsample=1.0, tol=0.0001,\n",
475 |       "             validation_fraction=0.1, verbose=0, warm_start=False)\u001b[0m\n",
476 |       "\u001b[34m2020-01-13 06:22:01,491 sagemaker-containers INFO     Reporting training SUCCESS\u001b[0m\n",
477 |       "\n",
478 |       "2020-01-13 06:22:10 Completed - Training job completed\n",
479 |       "Training seconds: 52\n",
480 |       "Billable seconds: 52\n"
481 |      ]
482 |     }
483 |    ],
484 |    "source": [
485 |     "# Run model training job\n",
486 |     "sklearn_estimator.fit({'train': training_path})"
487 |    ]
488 |   },
489 |   {
490 |    "cell_type": "code",
491 |    "execution_count": 17,
492 |    "metadata": {},
493 |    "outputs": [
494 |     {
495 |      "name": "stdout",
496 |      "output_type": "stream",
497 |      "text": [
498 |       "---------------------------------------------------------------------------!"
499 |      ]
500 |     }
501 |    ],
502 |    "source": [
503 |     "# Deploy an estimator and endpoint\n",
504 |     "from sagemaker.predictor import csv_serializer, json_deserializer\n",
505 |     "predictor = sklearn_estimator.deploy(initial_instance_count=1, instance_type=\"ml.m4.xlarge\", endpoint_name=\"sagemaker-terraform-test\")\n",
506 |     "\n",
507 |     "# Specify input and output formats.\n",
508 |     "predictor.content_type = 'text/csv'\n",
509 |     "predictor.serializer = csv_serializer\n",
510 |     "predictor.deserializer = json_deserializer"
511 |    ]
512 |   },
513 |   {
514 |    "cell_type": "code",
515 |    "execution_count": 18,
516 |    "metadata": {},
517 |    "outputs": [],
518 |    "source": [
519 |     "# predictor.delete_endpoint()"
520 |    ]
521 |   },
522 |   {
523 |    "cell_type": "code",
524 |    "execution_count": null,
525 |    "metadata": {},
526 |    "outputs": [],
527 |    "source": []
528 |   }
529 |  ],
530 |  "metadata": {
531 |   "kernelspec": {
532 |    "display_name": "conda_python3",
533 |    "language": "python",
534 |    "name": "conda_python3"
535 |   },
536 |   "language_info": {
537 |    "codemirror_mode": {
538 |     "name": "ipython",
539 |     "version": 3
540 |    },
541 |    "file_extension": ".py",
542 |    "mimetype": "text/x-python",
543 |    "name": "python",
544 |    "nbconvert_exporter": "python",
545 |    "pygments_lexer": "ipython3",
546 |    "version": "3.6.5"
547 |   }
548 |  },
549 |  "nbformat": 4,
550 |  "nbformat_minor": 2
551 | }
552 | 


--------------------------------------------------------------------------------
	CRIM	ZN	INDUS	CHAS	NOX	RM	AGE	DIS	RAD	TAX	PTRATIO	B	LSTAT
count	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000
mean	3.613524	11.363636	11.136779	0.069170	0.554695	6.284634	68.574901	3.795043	9.549407	408.237154	18.455534	356.674032	12.653063
std	8.601545	23.322453	6.860353	0.253994	0.115878	0.702617	28.148861	2.105710	8.707259	168.537116	2.164946	91.294864	7.141062
min	0.006320	0.000000	0.460000	0.000000	0.385000	3.561000	2.900000	1.129600	1.000000	187.000000	12.600000	0.320000	1.730000
25%	0.082045	0.000000	5.190000	0.000000	0.449000	5.885500	45.025000	2.100175	4.000000	279.000000	17.400000	375.377500	6.950000
50%	0.256510	0.000000	9.690000	0.000000	0.538000	6.208500	77.500000	3.207450	5.000000	330.000000	19.050000	391.440000	11.360000
75%	3.677083	12.500000	18.100000	0.000000	0.624000	6.623500	94.075000	5.188425	24.000000	666.000000	20.200000	396.225000	16.955000
max	88.976200	100.000000	27.740000	1.000000	0.871000	8.780000	100.000000	12.126500	24.000000	711.000000	22.000000	396.900000	37.970000