├── .DS_Store
├── .github
    └── workflows
    │   └── manual.yml
├── CODEOWNERS
├── LICENSE.md
├── README.md
├── lesson2
    ├── .DS_Store
    ├── .ipynb_checkpoints
    │   └── Lesson 2, Exercise 1 - Training Jobs-checkpoint.ipynb
    ├── Lesson 2, Exercise 1 - Training Jobs Solution.ipynb
    ├── Lesson 2, Exercise 1 - Training Jobs.ipynb
    ├── Lesson 2, Exercise 2 - Endpoints Solution.ipynb
    ├── Lesson 2, Exercise 2 - Endpoints.ipynb
    ├── Lesson 2, Exercise 3 - Batch Transform Solution.ipynb
    ├── Lesson 2, Exercise 3 - Batch Transform.ipynb
    ├── Lesson 2, Exercise 4 - Processing Job Solution.ipynb
    ├── Lesson 2, Exercise 4 - Processing Job.ipynb
    ├── Lesson 2, Exercise 5 - Tying it All Together .ipynb
    ├── Lesson 2, Exercise 5 - Tying it All Together Solution.ipynb
    ├── Toys_and_Games_5.json.zip
    ├── demo
    │   ├── Lesson 2, Lecture 1 Demo - Training Job.ipynb
    │   ├── Lesson 2, Lecture 2 Demo - Endpoint.ipynb
    │   ├── Lesson 2, Lecture 3 Demo - Batch Transform.ipynb
    │   ├── Lesson 2, Lecture 4 Demo - Processing Job.ipynb
    │   ├── demo_cli_script.sh
    │   ├── input_data_config.json
    │   └── readme.md
    ├── demo_boston_data
    │   ├── test.csv
    │   ├── train.csv
    │   └── validation.csv
    └── reviews_Musical_Instruments_5.json.zip
├── lesson3
    ├── .DS_Store
    ├── HelloBlazePreprocess.py
    ├── HelloBlazePreprocessLambda.py
    ├── Lesson 3, Exercise 1 - Lambda Solution.ipynb
    ├── Lesson 3, Exercise 2 - Invoking Lambda Functions Solution.ipynb
    ├── Lesson 3, Exercise 2 - Invoking Lambda Functions.ipynb
    ├── Lesson 3, Exercise 3 - Creating Workflows with Step Functions Solution.ipynb
    ├── Lesson 3, Exercise 3 - Creating Workflows with Step Functions.ipynb
    ├── Lesson 3, Exercise 4 - Tying it All Together Solution.ipynb
    ├── Lesson 3, Exercise 4 - Tying it All Together.ipynb
    ├── demo
    │   ├── Lesson 3, Lecture 2 Demo - Lambda.ipynb
    │   ├── Lesson 3, Lecture 3 Demo - Triggering Lambda.ipynb
    │   ├── Lesson 3, Lecture 4 Demo - Step Functions.ipynb
    │   └── readme.md
    └── reviews_Patio_Lawn_and_Garden_5.json.zip
├── lesson4
    ├── demos.ipynb
    ├── exercises-solutions.ipynb
    └── exercises-starters.ipynb
└── project
    └── starter.ipynb


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/udacity-nd009t-C2-Developing-ML-Workflow/d1928db95b27ca4dec5b73f460a357d9cdcea9d7/.DS_Store


--------------------------------------------------------------------------------
/.github/workflows/manual.yml:
--------------------------------------------------------------------------------
 1 | # Workflow to ensure whenever a Github PR is submitted, 
 2 | # a JIRA ticket gets created automatically. 
 3 | name: Manual Workflow
 4 | 
 5 | # Controls when the action will run. 
 6 | on:
 7 |   # Triggers the workflow on pull request events but only for the master branch
 8 |   pull_request_target:
 9 |     types: [assigned, opened, reopened]
10 | 
11 |   # Allows you to run this workflow manually from the Actions tab
12 |   workflow_dispatch:
13 | 
14 | jobs:
15 |   test-transition-issue:
16 |     name: Convert Github Issue to Jira Issue
17 |     runs-on: ubuntu-latest
18 |     steps:
19 |     - name: Checkout
20 |       uses: actions/checkout@master
21 | 
22 |     - name: Login
23 |       uses: atlassian/gajira-login@master
24 |       env:
25 |         JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
26 |         JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
27 |         JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
28 |         
29 |     - name: Create NEW JIRA ticket
30 |       id: create
31 |       uses: atlassian/gajira-create@master
32 |       with:
33 |         project: CONUPDATE
34 |         issuetype: Task
35 |         summary: |
36 |           Github PR - nd009t Machine Learning Engineering C2 Developing Your First ML Workflow | Repo: ${{ github.repository }}  | PR# ${{github.event.number}}
37 |         description: |
38 |            Repo link: https://github.com/${{ github.repository }}   
39 |            PR no. ${{ github.event.pull_request.number }} 
40 |            PR title: ${{ github.event.pull_request.title }}  
41 |            PR description: ${{ github.event.pull_request.description }}  
42 |            In addition, please resolve other issues, if any. 
43 |         fields: '{"components": [{"name":"nd009t - Machine Learning Engineer ND"}], "customfield_16449":"https://classroom.udacity.com/", "customfield_16450":"Resolve the PR", "labels": ["github"], "priority":{"id": "4"}}'
44 | 
45 |     - name: Log created issue
46 |       run: echo "Issue ${{ steps.create.outputs.issue }} was created"
47 | 


--------------------------------------------------------------------------------
/CODEOWNERS:
--------------------------------------------------------------------------------
1 | *           @udacity/active-public-content


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | 
 2 | Copyright © 2012 - 2020, Udacity, Inc.
 3 | 
 4 | Udacity hereby grants you a license in and to the Educational Content, including but not limited to homework assignments, programming assignments, code samples, and other educational materials and tools (as further described in the Udacity Terms of Use),  subject to, as modified herein, the terms and conditions of the Creative Commons Attribution-NonCommercial- NoDerivs 3.0 License located at http://creativecommons.org/licenses/by-nc-nd/4.0 and successor locations for such license (the "CC License") provided that, in each case, the Educational Content is specifically marked as being subject to the CC License.
 5 | Udacity expressly defines the following as falling outside the definition of "non-commercial":
 6 | (a) the sale or rental of (i) any part of the Educational Content, (ii) any derivative works based at least in part on the Educational Content, or (iii) any collective work that includes any part of the Educational Content;
 7 | (b) the sale of access or a link to any part of the Educational Content without first obtaining informed consent from the buyer (that the buyer is aware that the Educational Content, or such part thereof, is available at the Website free of charge);
 8 | (c) providing training, support, or editorial services that use or reference the Educational Content in exchange for a fee;
 9 | (d) the sale of advertisements, sponsorships, or promotions placed on the Educational Content, or any part thereof, or the sale of advertisements, sponsorships, or promotions on any website or blog containing any part of the Educational Material, including without limitation any "pop-up advertisements";
10 | (e) the use of Educational Content by a college, university, school, or other educational institution for instruction where tuition is charged; and
11 | (f) the use of Educational Content by a for-profit corporation or non-profit entity for internal professional development or training.
12 | 
13 | 
14 | 
15 | THE SERVICES AND ONLINE COURSES (INCLUDING ANY CONTENT) ARE PROVIDED "AS IS" AND "AS AVAILABLE" WITH NO REPRESENTATIONS OR WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. YOU ASSUME TOTAL RESPONSIBILITY AND THE ENTIRE RISK FOR YOUR USE OF THE SERVICES, ONLINE COURSES, AND CONTENT. WITHOUT LIMITING THE FOREGOING, WE DO NOT WARRANT THAT (A) THE SERVICES, WEBSITES, CONTENT, OR THE ONLINE COURSES WILL MEET YOUR REQUIREMENTS OR EXPECTATIONS OR ACHIEVE THE INTENDED PURPOSES, (B) THE WEBSITES OR THE ONLINE COURSES WILL NOT EXPERIENCE OUTAGES OR OTHERWISE BE UNINTERRUPTED, TIMELY, SECURE OR ERROR-FREE, (C) THE INFORMATION OR CONTENT OBTAINED THROUGH THE SERVICES, SUCH AS CHAT ROOM SERVICES, WILL BE ACCURATE, COMPLETE, CURRENT, ERROR- FREE, COMPLETELY SECURE OR RELIABLE, OR (D) THAT DEFECTS IN OR ON THE SERVICES OR CONTENT WILL BE CORRECTED. YOU ASSUME ALL RISK OF PERSONAL INJURY, INCLUDING DEATH AND DAMAGE TO PERSONAL PROPERTY, SUSTAINED FROM USE OF SERVICES.
16 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Udacity-Developing-Your-First-ML-Workflow
 2 | This is the Github repo for Udacity Developing your first ML workflow course. This repo contains the code for demos, exercises and the final project,
 3 | ## Folder Structure
 4 | This repo contains a folder for each lesson and one project folder.
 5 | 
 6 | ## Lessons Folder
 7 | Each lesson folder contains files for exercises and demos. The exercise code should contain instructions necessary for the exercises along with the solution. The demo code contains the files the instructor uses in the lesson demos.
 8 | 
 9 | ## Project Folder
10 | The project folder contains all files and instructions necessary for the project.
11 | 


--------------------------------------------------------------------------------
/lesson2/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/udacity-nd009t-C2-Developing-ML-Workflow/d1928db95b27ca4dec5b73f460a357d9cdcea9d7/lesson2/.DS_Store


--------------------------------------------------------------------------------
/lesson2/.ipynb_checkpoints/Lesson 2, Exercise 1 - Training Jobs-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "17d07dd2",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# UDACITY SageMaker Essentials: Training Job Exercise"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "id": "f71ab7eb",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "Good job on your work so far! You've gotten an overview of building an ML Workflow in AWS. Now, it's time to practice your skills. In this exercise, you will be training a BlazingText model to help predict the helpfulness of Amazon reviews. The model & parameters have already been chosen for you; it's your task to properly upload the data necessary for the job and launch the training.  "
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 6,
 22 |    "id": "19aed147",
 23 |    "metadata": {},
 24 |    "outputs": [
 25 |     {
 26 |      "name": "stdout",
 27 |      "output_type": "stream",
 28 |      "text": [
 29 |       "arn:aws:iam::565094796913:role/execution_role\n"
 30 |      ]
 31 |     }
 32 |    ],
 33 |    "source": [
 34 |     "# Note for Huimin: Some of the setup code has been taken from the L6 curriculum. \n",
 35 |     "\n",
 36 |     "import os\n",
 37 |     "import boto3\n",
 38 |     "import sagemaker\n",
 39 |     "\n",
 40 |     "import pandas as pd\n",
 41 |     "import numpy as np\n",
 42 |     "\n",
 43 |     "\n",
 44 |     "\n",
 45 |     "# create a default bucket\n",
 46 |     "bucket = sagemaker.Session().default_bucket()\n",
 47 |     "prefix = \"sagemaker/UDACITY-amazon-blazingText\"\n",
 48 |     "\n",
 49 |     "# Define IAM role\n",
 50 |     "from sagemaker import get_execution_role\n",
 51 |     "\n",
 52 |     "try:\n",
 53 |     "    role = get_execution_role('sagemaker')\n",
 54 |     "except:\n",
 55 |     "    iam = boto3.client('iam')\n",
 56 |     "    # Had an issue with authentication, code is from https://github.com/aws-samples/aws-deepracer-workshops/issues/47\n",
 57 |     "    role = iam.get_role(RoleName='execution_role')['Role']['Arn']\n",
 58 |     "\n",
 59 |     "\n",
 60 |     "print(role)\n"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "id": "0aed289a",
 66 |    "metadata": {},
 67 |    "source": [
 68 |     "## Preprocessing"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "markdown",
 73 |    "id": "0172dab3",
 74 |    "metadata": {},
 75 |    "source": [
 76 |     "## Upload Data"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "markdown",
 81 |    "id": "8b01555c",
 82 |    "metadata": {},
 83 |    "source": [
 84 |     "## Import SageMaker BlazingText"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "markdown",
 89 |    "id": "5f427dd5",
 90 |    "metadata": {},
 91 |    "source": [
 92 |     "## Train SageMaker Model"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "markdown",
 97 |    "id": "2acd8d0a",
 98 |    "metadata": {},
 99 |    "source": [
100 |     "## Citations"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "markdown",
105 |    "id": "639d0bf9",
106 |    "metadata": {},
107 |    "source": [
108 |     "Ups and downs: Modeling the visual evolution of fashion trends with one-class collaborative filtering  \n",
109 |     "R. He, J. McAuley  \n",
110 |     "WWW, 2016\n",
111 |     "\n",
112 |     "\n",
113 |     "Image-based recommendations on styles and substitutes  \n",
114 |     "J. McAuley, C. Targett, J. Shi, A. van den Hengel  \n",
115 |     "SIGIR, 2015\n"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": null,
121 |    "id": "7a0a09bc",
122 |    "metadata": {},
123 |    "outputs": [],
124 |    "source": []
125 |   }
126 |  ],
127 |  "metadata": {
128 |   "kernelspec": {
129 |    "display_name": "Python 3 (ipykernel)",
130 |    "language": "python",
131 |    "name": "python3"
132 |   },
133 |   "language_info": {
134 |    "codemirror_mode": {
135 |     "name": "ipython",
136 |     "version": 3
137 |    },
138 |    "file_extension": ".py",
139 |    "mimetype": "text/x-python",
140 |    "name": "python",
141 |    "nbconvert_exporter": "python",
142 |    "pygments_lexer": "ipython3",
143 |    "version": "3.8.0"
144 |   }
145 |  },
146 |  "nbformat": 4,
147 |  "nbformat_minor": 5
148 | }
149 | 


--------------------------------------------------------------------------------
/lesson2/Lesson 2, Exercise 1 - Training Jobs Solution.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "fe94adfd",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "## Starter Code from Notebook\n"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": null,
 14 |    "id": "1bd309e9",
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import os\n",
 19 |     "import boto3\n",
 20 |     "import sagemaker\n",
 21 |     "import json\n",
 22 |     "import zipfile\n",
 23 |     "\n",
 24 |     "import pandas as pd\n",
 25 |     "import numpy as np\n",
 26 |     "\n",
 27 |     "\n",
 28 |     "\n",
 29 |     "# Function below unzips the archive to the local directory. \n",
 30 |     "\n",
 31 |     "def unzip_data(input_data_path):\n",
 32 |     "    with zipfile.ZipFile(input_data_path, 'r') as input_data_zip:\n",
 33 |     "        input_data_zip.extractall('.')\n",
 34 |     "\n",
 35 |     "# Input data is a file with a single JSON object per line with the following format: \n",
 36 |     "# {\n",
 37 |     "#  \"reviewerID\": <string>,\n",
 38 |     "#  \"asin\": <string>,\n",
 39 |     "#  \"reviewerName\" <string>,\n",
 40 |     "#  \"helpful\": [\n",
 41 |     "#    <int>, (indicating number of \"helpful votes\")\n",
 42 |     "#    <int>  (indicating total number of votes)\n",
 43 |     "#  ],\n",
 44 |     "#  \"reviewText\": \"<string>\",\n",
 45 |     "#  \"overall\": <int>,\n",
 46 |     "#  \"summary\": \"<string>\",\n",
 47 |     "#  \"unixReviewTime\": <int>,\n",
 48 |     "#  \"reviewTime\": \"<string>\"\n",
 49 |     "# }\n",
 50 |     "# \n",
 51 |     "# We are specifically interested in the fields \"helpful\" and \"reviewText\"\n",
 52 |     "#\n",
 53 |     "\n",
 54 |     "def label_data(input_data):\n",
 55 |     "    labeled_data = []\n",
 56 |     "    HELPFUL_LABEL = \"__label__1\"\n",
 57 |     "    UNHELPFUL_LABEL = \"__label__2\"\n",
 58 |     "     \n",
 59 |     "    for l in open(input_data, 'r'):\n",
 60 |     "        l_object = json.loads(l)\n",
 61 |     "        helpful_votes = float(l_object['helpful'][0])\n",
 62 |     "        total_votes = l_object['helpful'][1]\n",
 63 |     "        reviewText = l_object['reviewText']\n",
 64 |     "        if total_votes != 0:\n",
 65 |     "            if helpful_votes / total_votes > .5:\n",
 66 |     "                labeled_data.append(\" \".join([HELPFUL_LABEL, reviewText]))\n",
 67 |     "            elif helpful_votes / total_votes < .5:\n",
 68 |     "                labeled_data.append(\" \".join([UNHELPFUL_LABEL, reviewText]))\n",
 69 |     "          \n",
 70 |     "    return labeled_data\n",
 71 |     "\n",
 72 |     "\n",
 73 |     "# Labeled data is a list of sentences, starting with the label defined in label_data. \n",
 74 |     "\n",
 75 |     "def split_sentences(labeled_data):\n",
 76 |     "    split_sentences = []\n",
 77 |     "    for d in labeled_data:\n",
 78 |     "        label = d.split()[0]        \n",
 79 |     "        sentences = \" \".join(d.split()[1:]).split(\".\") # Initially split to separate label, then separate sentences\n",
 80 |     "        for s in sentences:\n",
 81 |     "            if s: # Make sure sentences isn't empty. Common w/ \"...\"\n",
 82 |     "                split_sentences.append(\" \".join([label, s]))\n",
 83 |     "    return split_sentences\n",
 84 |     "\n",
 85 |     "\n",
 86 |     "input_data  = unzip_data('Toys_and_Games_5.json.zip')\n",
 87 |     "labeled_data = label_data('Toys_and_Games_5.json')\n",
 88 |     "split_sentence_data = split_sentences(labeled_data)\n",
 89 |     "\n",
 90 |     "print(split_sentence_data[0:9])\n"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "markdown",
 95 |    "id": "a55b9518",
 96 |    "metadata": {},
 97 |    "source": [
 98 |     "## Exercise Solution: Upload Data"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": null,
104 |    "id": "980d3684",
105 |    "metadata": {},
106 |    "outputs": [],
107 |    "source": [
108 |     "import boto3\n",
109 |     "from botocore.exceptions import ClientError\n",
110 |     "# Note: This solution implies that the bucket below has already been made and that you have access\n",
111 |     "# to that bucket. You would need to change the bucket below to a bucket that you have write\n",
112 |     "# premissions to. This will take time depending on your internet connection, the training file is ~ 40 mb\n",
113 |     "\n",
114 |     "BUCKET = \"udacity-sagemaker-solutiondata2021\"\n",
115 |     "\n",
116 |     "def cycle_data(fp, data):\n",
117 |     "    for d in data:\n",
118 |     "        fp.write(d + \"\\n\")\n",
119 |     "\n",
120 |     "def write_trainfile(split_sentence_data):\n",
121 |     "    train_path = \"hello_blaze_train\"\n",
122 |     "    with open(train_path, 'w') as f:\n",
123 |     "        cycle_data(f, split_sentence_data)\n",
124 |     "    return train_path\n",
125 |     "\n",
126 |     "def write_validationfile(split_sentence_data):\n",
127 |     "    validation_path = \"hello_blaze_validation\"\n",
128 |     "    with open(validation_path, 'w') as f:\n",
129 |     "        cycle_data(f, split_sentence_data)\n",
130 |     "    return validation_path \n",
131 |     "\n",
132 |     "def upload_file_to_s3(file_name, s3_prefix):\n",
133 |     "    object_name = os.path.join(s3_prefix, file_name)\n",
134 |     "    s3_client = boto3.client('s3')\n",
135 |     "    try:\n",
136 |     "        response = s3_client.upload_file(file_name, BUCKET, object_name)\n",
137 |     "    except ClientError as e:\n",
138 |     "        logging.error(e)\n",
139 |     "        return False\n",
140 |     "\n",
141 |     "s3_prefix = \"l2e1\"\n",
142 |     "\n",
143 |     "split_data_trainlen = int(len(split_sentence_data) * .9)\n",
144 |     "split_data_validationlen = int(len(split_sentence_data) * .1)\n",
145 |     "\n",
146 |     "\n",
147 |     "train_path = write_trainfile(split_sentence_data[:split_data_trainlen])\n",
148 |     "print(\"Training file written!\")\n",
149 |     "validation_path = write_validationfile(split_sentence_data[split_data_trainlen:])\n",
150 |     "print(\"Validation file written!\")\n",
151 |     "\n",
152 |     "upload_file_to_s3(train_path, s3_prefix)\n",
153 |     "print(\"Train file uploaded!\")\n",
154 |     "upload_file_to_s3(validation_path, s3_prefix)\n",
155 |     "print(\"Validation file uploaded!\")\n",
156 |     "\n",
157 |     "print(\" \".join([train_path, validation_path]))"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": null,
163 |    "id": "6de070b1",
164 |    "metadata": {},
165 |    "outputs": [],
166 |    "source": []
167 |   }
168 |  ],
169 |  "metadata": {
170 |   "kernelspec": {
171 |    "display_name": "Python 3 (ipykernel)",
172 |    "language": "python",
173 |    "name": "python3"
174 |   },
175 |   "language_info": {
176 |    "codemirror_mode": {
177 |     "name": "ipython",
178 |     "version": 3
179 |    },
180 |    "file_extension": ".py",
181 |    "mimetype": "text/x-python",
182 |    "name": "python",
183 |    "nbconvert_exporter": "python",
184 |    "pygments_lexer": "ipython3",
185 |    "version": "3.9.5"
186 |   }
187 |  },
188 |  "nbformat": 4,
189 |  "nbformat_minor": 5
190 | }
191 | 


--------------------------------------------------------------------------------
/lesson2/Lesson 2, Exercise 1 - Training Jobs.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "345c18af",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# UDACITY SageMaker Essentials: Training Job Exercise"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "id": "5ea8b4d8",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "Good job on your work so far! You've gotten an overview of building an ML Workflow in AWS. Now, it's time to practice your skills. In this exercise, you will be training a BlazingText model to help predict the helpfulness of Amazon reviews. The model & parameters have already been chosen for you; it's your task to properly upload the data necessary for the job and launch the training.  "
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 2,
 22 |    "id": "a9e6bfbd",
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "import os\n",
 27 |     "import boto3\n",
 28 |     "import sagemaker\n",
 29 |     "import json\n",
 30 |     "import zipfile\n",
 31 |     "\n",
 32 |     "import pandas as pd\n",
 33 |     "import numpy as np\n",
 34 |     "\n"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "id": "16340311",
 40 |    "metadata": {},
 41 |    "source": [
 42 |     "## Preprocessing"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "markdown",
 47 |    "id": "0d83e2cb",
 48 |    "metadata": {},
 49 |    "source": [
 50 |     "The data we'll be examining today is a collection of reviews for an assortment of toys and games found on Amazon. This data includes, but is not limited to, the text of the review itself as well as the number of user \"votes\" on whether or not the review was helpful. Today, we will be making a model that predicts the usefulness of a review, given only the text of the review. This is an example of a problem in the domain of supervised sentiment analysis; we are trying to extract something subjective from text given prior labeled text."
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "markdown",
 55 |    "id": "ea38f892",
 56 |    "metadata": {},
 57 |    "source": [
 58 |     "Before we get started, we want to know what form of data is accepted in the algorithm we're using. We'll be using BlazingText, an implemention of Word2Vec optimized for SageMaker. In order for this optimization to be effective, data needs to be preprocessed to match the correct format. The documentation for this algorithm can be found here: https://docs.aws.amazon.com/sagemaker/latest/dg/blazingtext.html"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "markdown",
 63 |    "id": "59be0c47",
 64 |    "metadata": {},
 65 |    "source": [
 66 |     "We will be training under \"File Mode\", which requires us to do two things in preprocessing this data. First, we need to generate labels from the votes. For this exercise, if the majority of votes for a review is helpful, we will designate it \\_\\_label\\_\\_1, and if the majority of votes for a review is unhelpful, we will designate it \\_\\_label\\_\\_2. In the edge case where the values are equal, we will drop the review from consideration. Second, we need to separate the sentences, while keeping the original label for the review. These reviews will often consist of several sentences, and this algorithm is optimized to perform best on many small sentences rather than fewer larger paragraphs. We will separate these sentences by the character \".\"\n"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "markdown",
 71 |    "id": "6b79bab4",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "(This process is obviously very naive, but we will get remarkable results even without a lot of finetuning!)"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "markdown",
 79 |    "id": "09129d9c",
 80 |    "metadata": {},
 81 |    "source": [
 82 |     "This preprocessing is done for you in the cells below. Make sure you go through the code and understand what's being done in each step. "
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": 3,
 88 |    "id": "52fb5427",
 89 |    "metadata": {},
 90 |    "outputs": [
 91 |     {
 92 |      "name": "stdout",
 93 |      "output_type": "stream",
 94 |      "text": [
 95 |       "['__label__1 Love the magnet easel', '__label__1  great for moving to different areas', '__label__1  Wish it had some sort of non skid pad on bottom though', '__label__1 Both sides are magnetic', \"__label__1  A real plus when you're entertaining more than one child\", '__label__1  The four-year old can find the letters for the words, while the two-year old can find the pictures the words spell', '__label__1  (I bought letters and magnetic pictures to go with this board)', '__label__1  Both grandkids liked it a lot, which means I like it a lot as well', '__label__1  Have not even introduced markers, as this will be used strictly as a magnetic board']\n"
 96 |      ]
 97 |     }
 98 |    ],
 99 |    "source": [
100 |     "import zipfile\n",
101 |     "\n",
102 |     "# Function below unzips the archive to the local directory. \n",
103 |     "\n",
104 |     "def unzip_data(input_data_path):\n",
105 |     "    with zipfile.ZipFile(input_data_path, 'r') as input_data_zip:\n",
106 |     "        input_data_zip.extractall('.')\n",
107 |     "\n",
108 |     "# Input data is a file with a single JSON object per line with the following format: \n",
109 |     "# {\n",
110 |     "#  \"reviewerID\": <string>,\n",
111 |     "#  \"asin\": <string>,\n",
112 |     "#  \"reviewerName\" <string>,\n",
113 |     "#  \"helpful\": [\n",
114 |     "#    <int>, (indicating number of \"helpful votes\")\n",
115 |     "#    <int>  (indicating total number of votes)\n",
116 |     "#  ],\n",
117 |     "#  \"reviewText\": \"<string>\",\n",
118 |     "#  \"overall\": <int>,\n",
119 |     "#  \"summary\": \"<string>\",\n",
120 |     "#  \"unixReviewTime\": <int>,\n",
121 |     "#  \"reviewTime\": \"<string>\"\n",
122 |     "# }\n",
123 |     "# \n",
124 |     "# We are specifically interested in the fields \"helpful\" and \"reviewText\"\n",
125 |     "#\n",
126 |     "\n",
127 |     "def label_data(input_data):\n",
128 |     "    labeled_data = []\n",
129 |     "    HELPFUL_LABEL = \"__label__1\"\n",
130 |     "    UNHELPFUL_LABEL = \"__label__2\"\n",
131 |     "     \n",
132 |     "    for l in open(input_data, 'r'):\n",
133 |     "        l_object = json.loads(l)\n",
134 |     "        helpful_votes = float(l_object['helpful'][0])\n",
135 |     "        total_votes = l_object['helpful'][1]\n",
136 |     "        reviewText = l_object['reviewText']\n",
137 |     "        if total_votes != 0:\n",
138 |     "            if helpful_votes / total_votes > .5:\n",
139 |     "                labeled_data.append(\" \".join([HELPFUL_LABEL, reviewText]))\n",
140 |     "            elif helpful_votes / total_votes < .5:\n",
141 |     "                labeled_data.append(\" \".join([UNHELPFUL_LABEL, reviewText]))\n",
142 |     "          \n",
143 |     "    return labeled_data\n",
144 |     "\n",
145 |     "\n",
146 |     "# Labeled data is a list of sentences, starting with the label defined in label_data. \n",
147 |     "\n",
148 |     "def split_sentences(labeled_data):\n",
149 |     "    split_sentences = []\n",
150 |     "    for d in labeled_data:\n",
151 |     "        label = d.split()[0]        \n",
152 |     "        sentences = \" \".join(d.split()[1:]).split(\".\") # Initially split to separate label, then separate sentences\n",
153 |     "        for s in sentences:\n",
154 |     "            if s: # Make sure sentences isn't empty. Common w/ \"...\"\n",
155 |     "                split_sentences.append(\" \".join([label, s]))\n",
156 |     "    return split_sentences\n",
157 |     "\n",
158 |     "\n",
159 |     "input_data  = unzip_data('Toys_and_Games_5.json.zip')\n",
160 |     "labeled_data = label_data('Toys_and_Games_5.json')\n",
161 |     "split_sentence_data = split_sentences(labeled_data)\n",
162 |     "\n",
163 |     "print(split_sentence_data[0:9])\n"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "markdown",
168 |    "id": "478252a7",
169 |    "metadata": {},
170 |    "source": [
171 |     "## Exercise: Upload Data"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "markdown",
176 |    "id": "0724810e",
177 |    "metadata": {},
178 |    "source": [
179 |     "Your first responsibility is to separate that `split_sentence_data` into a `training_file` and a `validation_file`. Have the training file make up 90% of the data, and have the validation file make up 10% of the data. Careful that the data doesn't overlap! (This will result in overfitting, which might result in nice validation metrics, but crummy generalization.)\n",
180 |     "\n",
181 |     "Using the methodology of your choice, upload these files to S3. (In practice, it's important to know how to do this through the console, programatically, and through the CLI. If you're feeling frisky, try all 3!) If you're doing this programatically, the Boto3 documentation would be a good reference. https://boto3.amazonaws.com/v1/documentation/api/latest/index.html\n",
182 |     "\n",
183 |     "The BUCKET will be the name of the bucket you wish to upload it to. The s3_prefix will be the name of the desired 'file-path' that you upload your file to within the bucket. For example, if you wanted to upload a file to:\n",
184 |     "\n",
185 |     "\"s3://example-bucket/1/2/3/example.txt\n",
186 |     "\n",
187 |     "The \"BUCKET\" will be 'example-bucket', and the s3_prefix would be '1/2/3'\n",
188 |     "\n",
189 |     "The code below shows you how to upload it programatically."
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "code",
194 |    "execution_count": 9,
195 |    "id": "6820e8f5",
196 |    "metadata": {},
197 |    "outputs": [
198 |     {
199 |      "name": "stdout",
200 |      "output_type": "stream",
201 |      "text": [
202 |       " \n"
203 |      ]
204 |     }
205 |    ],
206 |    "source": [
207 |     "import boto3\n",
208 |     "from botocore.exceptions import ClientError\n",
209 |     "# Note: This section implies that the bucket below has already been made and that you have access\n",
210 |     "# to that bucket. You would need to change the bucket below to a bucket that you have write\n",
211 |     "# premissions to. This will take time depending on your internet connection, the training file is ~ 40 mb\n",
212 |     "\n",
213 |     "BUCKET = \"CHANGE THIS\"\n",
214 |     "s3_prefix = \"CHANGE THIS\"\n",
215 |     "\n",
216 |     "\n",
217 |     "def cycle_data(fp, data):\n",
218 |     "    for d in data:\n",
219 |     "        fp.write(d + \"\\n\")\n",
220 |     "\n",
221 |     "def write_trainfile(split_sentence_data):\n",
222 |     "    train_path = \"hello_blaze_train\"\n",
223 |     "    with open(train_path, 'w') as f:\n",
224 |     "        cycle_data(f, split_sentence_data)\n",
225 |     "    return train_path\n",
226 |     "\n",
227 |     "def write_validationfile(split_sentence_data):\n",
228 |     "    validation_path = \"hello_blaze_validation\"\n",
229 |     "    with open(validation_path, 'w') as f:\n",
230 |     "        cycle_data(f, split_sentence_data)\n",
231 |     "    return validation_path \n",
232 |     "\n",
233 |     "def upload_file_to_s3(file_name, s3_prefix):\n",
234 |     "    object_name = os.path.join(s3_prefix, file_name)\n",
235 |     "    s3_client = boto3.client('s3')\n",
236 |     "    try:\n",
237 |     "        response = s3_client.upload_file(file_name, BUCKET, object_name)\n",
238 |     "    except ClientError as e:\n",
239 |     "        logging.error(e)\n",
240 |     "        return False\n",
241 |     "    \n",
242 |     "# Split the data\n",
243 |     "split_data_trainlen = int(len(split_sentence_data) * .9)\n",
244 |     "split_data_validationlen = int(len(split_sentence_data) * .1)\n",
245 |     "\n",
246 |     "# Todo: write the training file\n",
247 |     "train_path = write_trainfile()\n",
248 |     "print(\"Training file written!\")\n",
249 |     "\n",
250 |     "# Todo: write the validation file\n",
251 |     "validation_path = write_validationfile()\n",
252 |     "print(\"Validation file written!\")\n",
253 |     "\n",
254 |     "upload_file_to_s3(train_path, s3_prefix)\n",
255 |     "print(\"Train file uploaded!\")\n",
256 |     "upload_file_to_s3(validation_path, s3_prefix)\n",
257 |     "print(\"Validation file uploaded!\")\n",
258 |     "\n",
259 |     "print(\" \".join([train_path, validation_path]))"
260 |    ]
261 |   },
262 |   {
263 |    "cell_type": "markdown",
264 |    "id": "f103ba51",
265 |    "metadata": {},
266 |    "source": [
267 |     "## Exercise: Train SageMaker Model"
268 |    ]
269 |   },
270 |   {
271 |    "cell_type": "markdown",
272 |    "id": "3cd8b876",
273 |    "metadata": {},
274 |    "source": [
275 |     "Believe it or not, you're already almost done! Part of the appeal of SageMaker is that AWS has already done the heavy implementation lifting for you. Launch a \"BlazingText\" training job from the SageMaker console. You can do so by searching \"SageMaker\", and navigating to Training Jobs on the left hand side. After selecting \"Create Training Job\", perform the following steps:\n",
276 |     "* Select \"BlazingText\" from the algorithms available. \n",
277 |     "* Specify the \"file\" input mode of training. \n",
278 |     "* Under \"resource configuration\", select the \"ml.m5.large\" instance type. Specify 5 additional GBs of memory. \n",
279 |     "* Set a stopping condition for 15 minutes. \n",
280 |     "* Under hyperparameters, set \"mode\" to \"supervised\"\n",
281 |     "* Under input_data configuration, input the S3 path to your training and validation datasets under the \"train\" and \"validation\" channels. You will need to create a channel named \"validation\".  \n",
282 |     "* Specify an output path in the same bucket that you uploaded training and validation data. \n"
283 |    ]
284 |   },
285 |   {
286 |    "cell_type": "markdown",
287 |    "id": "44a48c1d",
288 |    "metadata": {},
289 |    "source": [
290 |     "If successful, you should see a training job launch in the UI. Go grab a coffee, this will take a little bit of time. If there was a failure, you should see it there. Googling the error should direct "
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "markdown",
295 |    "id": "c2604071",
296 |    "metadata": {},
297 |    "source": [
298 |     "## Citations"
299 |    ]
300 |   },
301 |   {
302 |    "cell_type": "markdown",
303 |    "id": "429bb72f",
304 |    "metadata": {},
305 |    "source": [
306 |     "Ups and downs: Modeling the visual evolution of fashion trends with one-class collaborative filtering  \n",
307 |     "R. He, J. McAuley  \n",
308 |     "WWW, 2016\n",
309 |     "\n",
310 |     "\n",
311 |     "Image-based recommendations on styles and substitutes  \n",
312 |     "J. McAuley, C. Targett, J. Shi, A. van den Hengel  \n",
313 |     "SIGIR, 2015\n"
314 |    ]
315 |   }
316 |  ],
317 |  "metadata": {
318 |   "kernelspec": {
319 |    "display_name": "Python 3 (ipykernel)",
320 |    "language": "python",
321 |    "name": "python3"
322 |   },
323 |   "language_info": {
324 |    "codemirror_mode": {
325 |     "name": "ipython",
326 |     "version": 3
327 |    },
328 |    "file_extension": ".py",
329 |    "mimetype": "text/x-python",
330 |    "name": "python",
331 |    "nbconvert_exporter": "python",
332 |    "pygments_lexer": "ipython3",
333 |    "version": "3.9.5"
334 |   }
335 |  },
336 |  "nbformat": 4,
337 |  "nbformat_minor": 5
338 | }
339 | 


--------------------------------------------------------------------------------
/lesson2/Lesson 2, Exercise 2 - Endpoints Solution.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Exercise: Deploy Model Solution"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 2,
 13 |    "metadata": {},
 14 |    "outputs": [
 15 |     {
 16 |      "name": "stdout",
 17 |      "output_type": "stream",
 18 |      "text": [
 19 |       "['The product does exactly as it should and is quite affordable', 'I did not realized it was double screened until it arrived, so it was even better than I had expected', \"As an added bonus, one of the screens carries a small hint of the smell of an old grape candy I used to buy, so for reminiscent's sake, I cannot stop putting the pop filter next to my nose and smelling it after recording\", ' :DIf you needed a pop filter, this will work just as well as the expensive ones, and it may even come with a pleasing aroma like mine did!Buy this product! :]', 'The primary job of this device is to block the breath that would otherwise produce a popping sound, while allowing your voice to pass through with no noticeable reduction of volume or high frequencies', ' The double cloth filter blocks the pops and lets the voice through with no coloration', ' The metal clamp mount attaches to the mike stand secure enough to keep it attached', ' The goose neck needs a little coaxing to stay where you put it', 'Monster makes a wide array of cables, including some that are very high end']\n"
 20 |      ]
 21 |     }
 22 |    ],
 23 |    "source": [
 24 |     "import boto3\n",
 25 |     "import json\n",
 26 |     "import sagemaker\n",
 27 |     "import zipfile\n",
 28 |     "\n",
 29 |     "# Function below unzips the archive to the local directory. \n",
 30 |     "\n",
 31 |     "def unzip_data(input_data_path):\n",
 32 |     "    with zipfile.ZipFile(input_data_path, 'r') as input_data_zip:\n",
 33 |     "        input_data_zip.extractall('.')\n",
 34 |     "\n",
 35 |     "# Input data is a file with a single JSON object per line with the following format: \n",
 36 |     "# {\n",
 37 |     "#  \"reviewerID\": <string>,\n",
 38 |     "#  \"asin\": <string>,\n",
 39 |     "#  \"reviewerName\" <string>,\n",
 40 |     "#  \"helpful\": [\n",
 41 |     "#    <int>, (indicating number of \"helpful votes\")\n",
 42 |     "#    <int>  (indicating total number of votes)\n",
 43 |     "#  ],\n",
 44 |     "#  \"reviewText\": \"<string>\",\n",
 45 |     "#  \"overall\": <int>,\n",
 46 |     "#  \"summary\": \"<string>\",\n",
 47 |     "#  \"unixReviewTime\": <int>,\n",
 48 |     "#  \"reviewTime\": \"<string>\"\n",
 49 |     "# }\n",
 50 |     "# \n",
 51 |     "# We are specifically interested in the fields \"helpful\" and \"reviewText\"\n",
 52 |     "#\n",
 53 |     "\n",
 54 |     "def label_data(input_data):\n",
 55 |     "    labeled_data = []\n",
 56 |     "    HELPFUL_LABEL = \"__label__1\"\n",
 57 |     "    UNHELPFUL_LABEL = \"__label__2\"\n",
 58 |     "     \n",
 59 |     "    for l in open(input_data, 'r'):\n",
 60 |     "        l_object = json.loads(l)\n",
 61 |     "        helpful_votes = float(l_object['helpful'][0])\n",
 62 |     "        total_votes = l_object['helpful'][1]\n",
 63 |     "        reviewText = l_object['reviewText']\n",
 64 |     "        if total_votes != 0:\n",
 65 |     "            if helpful_votes / total_votes > .5:\n",
 66 |     "                labeled_data.append(\" \".join([HELPFUL_LABEL, reviewText]))\n",
 67 |     "            elif helpful_votes / total_votes < .5:\n",
 68 |     "                labeled_data.append(\" \".join([UNHELPFUL_LABEL, reviewText]))\n",
 69 |     "          \n",
 70 |     "    return labeled_data\n",
 71 |     "\n",
 72 |     "\n",
 73 |     "# Labeled data is a list of sentences, starting with the label defined in label_data. \n",
 74 |     "\n",
 75 |     "def split_sentences(labeled_data):\n",
 76 |     "    new_split_sentences = []\n",
 77 |     "    for d in labeled_data:       \n",
 78 |     "        sentences = \" \".join(d.split()[1:]).split(\".\") # Initially split to separate label, then separate sentences\n",
 79 |     "        for s in sentences:\n",
 80 |     "            if s: # Make sure sentences isn't empty. Common w/ \"...\"\n",
 81 |     "                new_split_sentences.append(s)\n",
 82 |     "    return new_split_sentences\n",
 83 |     "\n",
 84 |     "\n",
 85 |     "unzip_data('reviews_Musical_Instruments_5.json.zip')\n",
 86 |     "labeled_data = label_data('reviews_Musical_Instruments_5.json')\n",
 87 |     "new_split_sentence_data = split_sentences(labeled_data)\n",
 88 |     "\n",
 89 |     "print(new_split_sentence_data[0:9])"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": 13,
 95 |    "metadata": {},
 96 |    "outputs": [
 97 |     {
 98 |      "name": "stdout",
 99 |      "output_type": "stream",
100 |      "text": [
101 |       "-------------!"
102 |      ]
103 |     }
104 |    ],
105 |    "source": [
106 |     "from sagemaker import get_execution_role\n",
107 |     "from sagemaker.model import Model\n",
108 |     "from sagemaker import image_uris\n",
109 |     "\n",
110 |     "role = get_execution_role()\n",
111 |     "\n",
112 |     "image_uri = image_uris.retrieve(framework='blazingtext',region='us-east-1')\n",
113 |     "\n",
114 |     "model_data = \"s3://udacity-sagemaker-solutiondata2021/l2e1/model_artifact/hello-blaze2021-2/output/model.tar.gz\"\n",
115 |     "\n",
116 |     "model = Model(image_uri=image_uri, model_data=model_data, role=role)\n",
117 |     "\n",
118 |     "predictor = model.deploy(initial_instance_count=1, instance_type=\"ml.m5.large\")"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "metadata": {},
124 |    "source": [
125 |     "## Exercise: Evaluate Data Solution"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": 18,
131 |    "metadata": {},
132 |    "outputs": [
133 |     {
134 |      "name": "stdout",
135 |      "output_type": "stream",
136 |      "text": [
137 |       "{\"instances\": [\"The product does exactly as it should and is quite affordable\", \"I did not realized it was double screened until it arrived, so it was even better than I had expected\", \"As an added bonus, one of the screens carries a small hint of the smell of an old grape candy I used to buy, so for reminiscent's sake, I cannot stop putting the pop filter next to my nose and smelling it after recording\"]}\n"
138 |      ]
139 |     }
140 |    ],
141 |    "source": [
142 |     "from sagemaker.predictor import Predictor\n",
143 |     "import json\n",
144 |     "\n",
145 |     "predictor = Predictor(\"blazingtext-2021-08-11-21-22-32-902\")\n",
146 |     "\n",
147 |     "example_sentences = new_split_sentence_data[0:5]\n",
148 |     "\n",
149 |     "payload = {\"instances\": example_sentences}\n",
150 |     "\n",
151 |     "print(json.dumps(payload))\n",
152 |     "\n",
153 |     "predictions = json.loads(predictor.predict(json.dumps(payload), initial_args={'ContentType': 'application/json'}))"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": 22,
159 |    "metadata": {},
160 |    "outputs": [],
161 |    "source": [
162 |     "predictor.delete_endpoint()"
163 |    ]
164 |   }
165 |  ],
166 |  "metadata": {
167 |   "instance_type": "ml.t3.medium",
168 |   "kernelspec": {
169 |    "display_name": "Python 3 (MXNet 1.6 Python 3.6 CPU Optimized)",
170 |    "language": "python",
171 |    "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/mxnet-1.6-cpu-py36"
172 |   },
173 |   "language_info": {
174 |    "codemirror_mode": {
175 |     "name": "ipython",
176 |     "version": 3
177 |    },
178 |    "file_extension": ".py",
179 |    "mimetype": "text/x-python",
180 |    "name": "python",
181 |    "nbconvert_exporter": "python",
182 |    "pygments_lexer": "ipython3",
183 |    "version": "3.6.13"
184 |   }
185 |  },
186 |  "nbformat": 4,
187 |  "nbformat_minor": 5
188 | }
189 | 


--------------------------------------------------------------------------------
/lesson2/Lesson 2, Exercise 2 - Endpoints.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# UDACITY SageMaker Essentials: Endpoint Exercise"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "In the last exercise, you trained a BlazingText supervised sentiment analysis model. (Let's call this model HelloBlaze.) You've recently learned about how we can take a model we've previously trained and generate an endpoint that we can call to efficently evaluate new data. Here, we'll put what we've learned into practice. You will take HelloBlaze and use it to create an endpoint. Then, you'll evaluate some sample data on that model to see how well the model we've trained generalizes. (Sentiment analysis is a notoriously difficult problem, so we'll keep our expectations modest.)"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 2,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "import boto3\n",
 24 |     "import json\n",
 25 |     "import sagemaker\n",
 26 |     "import zipfile"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "markdown",
 31 |    "metadata": {},
 32 |    "source": [
 33 |     "## Understanding Exercise: Preprocessing Data (again)"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "Before we start, we're going to do preprocessing on a new set of data that we'll be evaluating on HelloBlaze. We won't keep track of the labels here, we're just seeing how we could potentially evaluate new data using an existing model. This code should be very familiar, and requires no modification. Something to note: it is getting tedious to have to manually process the data ourselves whenever we want to do something with our model. We are also doing this on our local machine. Can you think of potential limitations and dangers to the preprocessing setup we currently have? Keep this in mind when we move on to our lesson about batch-transform jobs.  "
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 10,
 46 |    "metadata": {},
 47 |    "outputs": [
 48 |     {
 49 |      "name": "stdout",
 50 |      "output_type": "stream",
 51 |      "text": [
 52 |       "['The product does exactly as it should and is quite affordable', 'I did not realized it was double screened until it arrived, so it was even better than I had expected', \"As an added bonus, one of the screens carries a small hint of the smell of an old grape candy I used to buy, so for reminiscent's sake, I cannot stop putting the pop filter next to my nose and smelling it after recording\", ' :DIf you needed a pop filter, this will work just as well as the expensive ones, and it may even come with a pleasing aroma like mine did!Buy this product! :]', 'The primary job of this device is to block the breath that would otherwise produce a popping sound, while allowing your voice to pass through with no noticeable reduction of volume or high frequencies', ' The double cloth filter blocks the pops and lets the voice through with no coloration', ' The metal clamp mount attaches to the mike stand secure enough to keep it attached', ' The goose neck needs a little coaxing to stay where you put it', 'Monster makes a wide array of cables, including some that are very high end']\n"
 53 |      ]
 54 |     }
 55 |    ],
 56 |    "source": [
 57 |     "# Function below unzips the archive to the local directory. \n",
 58 |     "\n",
 59 |     "def unzip_data(input_data_path):\n",
 60 |     "    with zipfile.ZipFile(input_data_path, 'r') as input_data_zip:\n",
 61 |     "        input_data_zip.extractall('.')\n",
 62 |     "\n",
 63 |     "# Input data is a file with a single JSON object per line with the following format: \n",
 64 |     "# {\n",
 65 |     "#  \"reviewerID\": <string>,\n",
 66 |     "#  \"asin\": <string>,\n",
 67 |     "#  \"reviewerName\" <string>,\n",
 68 |     "#  \"helpful\": [\n",
 69 |     "#    <int>, (indicating number of \"helpful votes\")\n",
 70 |     "#    <int>  (indicating total number of votes)\n",
 71 |     "#  ],\n",
 72 |     "#  \"reviewText\": \"<string>\",\n",
 73 |     "#  \"overall\": <int>,\n",
 74 |     "#  \"summary\": \"<string>\",\n",
 75 |     "#  \"unixReviewTime\": <int>,\n",
 76 |     "#  \"reviewTime\": \"<string>\"\n",
 77 |     "# }\n",
 78 |     "# \n",
 79 |     "# We are specifically interested in the fields \"helpful\" and \"reviewText\"\n",
 80 |     "#\n",
 81 |     "\n",
 82 |     "def label_data(input_data):\n",
 83 |     "    labeled_data = []\n",
 84 |     "    HELPFUL_LABEL = \"__label__1\"\n",
 85 |     "    UNHELPFUL_LABEL = \"__label__2\"\n",
 86 |     "     \n",
 87 |     "    for l in open(input_data, 'r'):\n",
 88 |     "        l_object = json.loads(l)\n",
 89 |     "        helpful_votes = float(l_object['helpful'][0])\n",
 90 |     "        total_votes = l_object['helpful'][1]\n",
 91 |     "        reviewText = l_object['reviewText']\n",
 92 |     "        if total_votes != 0:\n",
 93 |     "            if helpful_votes / total_votes > .5:\n",
 94 |     "                labeled_data.append(\" \".join([HELPFUL_LABEL, reviewText]))\n",
 95 |     "            elif helpful_votes / total_votes < .5:\n",
 96 |     "                labeled_data.append(\" \".join([UNHELPFUL_LABEL, reviewText]))\n",
 97 |     "          \n",
 98 |     "    return labeled_data\n",
 99 |     "\n",
100 |     "\n",
101 |     "# Labeled data is a list of sentences, starting with the label defined in label_data. \n",
102 |     "\n",
103 |     "def split_sentences(labeled_data):\n",
104 |     "    new_split_sentences = []\n",
105 |     "    for d in labeled_data:       \n",
106 |     "        sentences = \" \".join(d.split()[1:]).split(\".\") # Initially split to separate label, then separate sentences\n",
107 |     "        for s in sentences:\n",
108 |     "            if s: # Make sure sentences isn't empty. Common w/ \"...\"\n",
109 |     "                new_split_sentences.append(s)\n",
110 |     "    return new_split_sentences\n",
111 |     "\n",
112 |     "\n",
113 |     "unzip_data('reviews_Musical_Instruments_5.json.zip')\n",
114 |     "labeled_data = label_data('reviews_Musical_Instruments_5.json')\n",
115 |     "new_split_sentence_data = split_sentences(labeled_data)\n",
116 |     "\n",
117 |     "print(new_split_sentence_data[0:9])"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "markdown",
122 |    "metadata": {},
123 |    "source": [
124 |     "## Exercise: Deploy Model"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "markdown",
129 |    "metadata": {},
130 |    "source": [
131 |     "Once you have your model, it's trivially easy to create an endpoint. All you need to do is initialize a \"model\" object, and call the deploy method. Fill in the method below with the proper addresses and an endpoint will be created, serving your model. Once this is done, confirm that the endpoint is live by consulting the SageMaker Console. You'll see this under \"Endpoints\" in the \"Inference\" menu on the left-hand side. If done correctly, this will take a while to get instantiated. \n",
132 |     "\n",
133 |     "You will need the following methods: \n",
134 |     "\n",
135 |     "* You'll need `image_uris.retrieve` method to determine the image uri to get a BlazingText docker image uri https://sagemaker.readthedocs.io/en/stable/api/utility/image_uris.html\n",
136 |     "* You'll need a `model_data` to pass the S3 location of a SageMaker model data\n",
137 |     "* You'll need to use the `Model` object https://sagemaker.readthedocs.io/en/stable/api/inference/model.html\n",
138 |     "* You'll need to the get execution role. \n",
139 |     "* You'll need to use the `deploy` method of the model object, using a single instance of \"ml.m5.large\""
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 5,
145 |    "metadata": {},
146 |    "outputs": [],
147 |    "source": [
148 |     "from sagemaker import get_execution_role\n",
149 |     "from sagemaker.model import Model\n",
150 |     "from sagemaker import image_uris\n",
151 |     "\n",
152 |     "# get the execution role\n",
153 |     "role =\n",
154 |     "# get the image using the \"blazingtext\" framework and your region\n",
155 |     "image_uri = \n",
156 |     "# get the S3 location of a SageMaker model data\n",
157 |     "model_data = \n",
158 |     "# define a model object\n",
159 |     "model =\n",
160 |     "# deploy the model using a single instance of \"ml.m5.large\"\n",
161 |     "predictor = "
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "markdown",
166 |    "metadata": {},
167 |    "source": [
168 |     "## Exercise: Evaluate Data"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "markdown",
173 |    "metadata": {},
174 |    "source": [
175 |     "Alright, we now have an easy way to evaluate our data! You will want to interact with the endpoint using the predictor interface: https://sagemaker.readthedocs.io/en/stable/api/inference/predictors.html\n",
176 |     "\n",
177 |     "Predictor is not the endpoint itself, but instead is an interface that we can use to easily interact with our deployed model. Your task is to take `new_split_sentence_data` and evaluate it using the predictor.  \n",
178 |     "\n",
179 |     "Note that the BlazingText supports \"application/json\" as the content-type for inference and the model expects a payload that contains a list of sentences with the key as “instances”.\n",
180 |     "\n",
181 |     "The method you'll need to call is highlighted below.\n",
182 |     "\n",
183 |     "Another recommendation: try evaluating a subset of the data before evaluating all of the data. This will make debugging significantly faster."
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "code",
188 |    "execution_count": 8,
189 |    "metadata": {},
190 |    "outputs": [],
191 |    "source": [
192 |     "from sagemaker.predictor import Predictor\n",
193 |     "import json\n",
194 |     "\n",
195 |     "predictor = \n",
196 |     "\n",
197 |     "# load the first five reviews from new_split_sentence_data\n",
198 |     "example_sentences = \n",
199 |     "\n",
200 |     "payload = {\"instances\": example_sentences}\n",
201 |     "\n",
202 |     "print(json.dumps(payload))\n",
203 |     "\n",
204 |     "# make predictions using the \"predict\" method. Set initial_args to {'ContentType': 'application/json'}\n",
205 |     "predictions = \n",
206 |     "\n",
207 |     "print(predictions)"
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "markdown",
212 |    "metadata": {},
213 |    "source": [
214 |     "## Make sure you stop/delete the endpoint after completing the exercise to avoid cost."
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "code",
219 |    "execution_count": null,
220 |    "metadata": {},
221 |    "outputs": [],
222 |    "source": [
223 |     "predictor.delete_endpoint()"
224 |    ]
225 |   }
226 |  ],
227 |  "metadata": {
228 |   "kernelspec": {
229 |    "display_name": "Python 3",
230 |    "language": "python",
231 |    "name": "python3"
232 |   },
233 |   "language_info": {
234 |    "codemirror_mode": {
235 |     "name": "ipython",
236 |     "version": 3
237 |    },
238 |    "file_extension": ".py",
239 |    "mimetype": "text/x-python",
240 |    "name": "python",
241 |    "nbconvert_exporter": "python",
242 |    "pygments_lexer": "ipython3",
243 |    "version": "3.6.3"
244 |   }
245 |  },
246 |  "nbformat": 4,
247 |  "nbformat_minor": 5
248 | }
249 | 


--------------------------------------------------------------------------------
/lesson2/Lesson 2, Exercise 3 - Batch Transform Solution.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "f1b85b0c",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "## Exercise Solution: Upload (again, again) to S3"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": null,
 14 |    "id": "4c80c3c6",
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import boto3\n",
 19 |     "import json\n",
 20 |     "import os\n",
 21 |     "import zipfile\n",
 22 |     "from botocore.exceptions import ClientError\n",
 23 |     "\n",
 24 |     "# Input the s3 bucket\n",
 25 |     "BUCKET = \"udacity-sagemaker-solutiondata2021\"\n",
 26 |     "# Input the s3 prefix\n",
 27 |     "s3_prefix = \"l2e3\"\n",
 28 |     "# Input the the file to write the data to\n",
 29 |     "file_name = \"music_instruments_reviews.txt\"\n",
 30 |     "\n",
 31 |     "\n",
 32 |     "def unzip_data(input_data_path):\n",
 33 |     "    with zipfile.ZipFile(input_data_path, 'r') as input_data_zip:\n",
 34 |     "        input_data_zip.extractall('.')\n",
 35 |     "\n",
 36 |     "def split_sentences(input_data):\n",
 37 |     "    split_sentences = []\n",
 38 |     "    for l in open(input_data, 'r'):\n",
 39 |     "        l_object = json.loads(l)\n",
 40 |     "        helpful_votes = float(l_object['helpful'][0])\n",
 41 |     "        total_votes = l_object['helpful'][1]\n",
 42 |     "        if total_votes != 0 and helpful_votes/total_votes != .5:  # Filter out same data as prior jobs. \n",
 43 |     "            reviewText = l_object['reviewText']\n",
 44 |     "            sentences = reviewText.split(\".\") \n",
 45 |     "            for s in sentences:\n",
 46 |     "                if s: # Make sure sentences isn't empty. Common w/ \"...\"\n",
 47 |     "                    split_sentences.append(s)\n",
 48 |     "    return split_sentences\n",
 49 |     "\n",
 50 |     "# Format the data as  {'source': 'THIS IS A SAMPLE SENTENCE'}\n",
 51 |     "# And write the data into a file\n",
 52 |     "def cycle_data(fp, data):\n",
 53 |     "    for d in data:\n",
 54 |     "        fp.write(json.dumps({'source':d}) + '\\n')\n",
 55 |     "\n",
 56 |     "# upload the data to s3\n",
 57 |     "def upload_file_to_s3(file_name, s3_prefix):\n",
 58 |     "    object_name = os.path.join(s3_prefix, file_name)\n",
 59 |     "    s3_client = boto3.client('s3')\n",
 60 |     "    try:\n",
 61 |     "        response = s3_client.upload_file(file_name, BUCKET, object_name)\n",
 62 |     "    except ClientError as e:\n",
 63 |     "        logging.error(e)\n",
 64 |     "        return False\n",
 65 |     "\n",
 66 |     "# Unzip archive\n",
 67 |     "unzip_data('reviews_Musical_Instruments_5.json.zip')\n",
 68 |     "\n",
 69 |     "# Preprocess reviews_Musical_Instruments_5.json\n",
 70 |     "sentences = split_sentences('reviews_Musical_Instruments_5.json')\n",
 71 |     "\n",
 72 |     "# Write data to a file and upload it to s3.   \n",
 73 |     "with open(file_name, 'w') as f:\n",
 74 |     "    cycle_data(f, sentences)\n",
 75 |     "\n",
 76 |     "upload_file_to_s3(file_name, s3_prefix)\n",
 77 |     "\n",
 78 |     "# Get the s3 path for the data\n",
 79 |     "batch_transform_input_path = \"s3://\" +  \"/\".join([BUCKET, s3_prefix, file_name])\n",
 80 |     "\n",
 81 |     "print(batch_transform_input_path)\n"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "markdown",
 86 |    "id": "8f8b7c95",
 87 |    "metadata": {},
 88 |    "source": [
 89 |     "## Exercise Solution: Use Batch Transform "
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": null,
 95 |    "id": "9c501488",
 96 |    "metadata": {},
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "from sagemaker import get_execution_role\n",
100 |     "from sagemaker.model import Model\n",
101 |     "from sagemaker import image_uris\n",
102 |     "\n",
103 |     "role = get_execution_role()\n",
104 |     "\n",
105 |     "image_uri = image_uris.retrieve(framework='blazingtext',region='us-west-2')\n",
106 |     "\n",
107 |     "model_data = \"s3://udacity-sagemaker-solutiondata2021/l2e1/model_artifact/hello-blaze2021-2/output/model.tar.gz\"\n",
108 |     "\n",
109 |     "batch_transform_output_path = \"s3://udacity-sagemaker-solutiondata2021/l2e3/batchtransform_output\"\n",
110 |     "\n",
111 |     "model = Model(image_uri=image_uri, model_data=model_data, role=role)\n",
112 |     "\n",
113 |     "transformer = model.transformer(\n",
114 |     "    instance_count=1, \n",
115 |     "    instance_type='ml.m4.xlarge', \n",
116 |     "    output_path=batch_transform_output_path\n",
117 |     "    \n",
118 |     ")\n",
119 |     "\n",
120 |     "transformer.transform(\n",
121 |     "    data=batch_transform_input_path, \n",
122 |     "    data_type='S3Prefix',\n",
123 |     "    content_type='application/jsonlines', \n",
124 |     "    split_type='Line'\n",
125 |     ")\n",
126 |     "\n",
127 |     "transformer.wait()"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": null,
133 |    "id": "632223bc",
134 |    "metadata": {},
135 |    "outputs": [],
136 |    "source": []
137 |   }
138 |  ],
139 |  "metadata": {
140 |   "kernelspec": {
141 |    "display_name": "Python 3 (ipykernel)",
142 |    "language": "python",
143 |    "name": "python3"
144 |   },
145 |   "language_info": {
146 |    "codemirror_mode": {
147 |     "name": "ipython",
148 |     "version": 3
149 |    },
150 |    "file_extension": ".py",
151 |    "mimetype": "text/x-python",
152 |    "name": "python",
153 |    "nbconvert_exporter": "python",
154 |    "pygments_lexer": "ipython3",
155 |    "version": "3.9.5"
156 |   }
157 |  },
158 |  "nbformat": 4,
159 |  "nbformat_minor": 5
160 | }
161 | 


--------------------------------------------------------------------------------
/lesson2/Lesson 2, Exercise 3 - Batch Transform.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "6e342763",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# UDACITY SageMaker Essentials: Batch Transform"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "id": "035b3eee",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "In the last exercise, we asked you to reflect on the disadvantages of having to perform preprocessing on a local machine. In addition to those disadvantages, such as user error and hardware limitations, you may have also encountered another frustration in submitting a large amount of data to an endpoint. There may be network limitations on your end, there may be security/privacy concerns, and there might be an obvious performance advantage in parallelism that may be difficult to implement. "
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "id": "1fc794fe",
 22 |    "metadata": {},
 23 |    "source": [
 24 |     "Batch transform essentially addresses all of these issues. The primary use case for this is to make an inference on a dataset rather than making many individual calls to an endpoint. AWS SageMaker, similar to other tools that we encountered, does the heavy implementation lifting of reading data and splitting the burden among instances. All that's required of us is to give batch transform the correct directions to the data we want to submit. "
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "markdown",
 29 |    "id": "ae54cdc1",
 30 |    "metadata": {},
 31 |    "source": [
 32 |     "Alas, this dataset is unfortunately not quite in the correct format to be properly digested by batch transform. Although this tool is capable of digesting lists of json objects, it is not capable of the processing operations that we would ideally perform on it. So, yet again, we must preprocess data. "
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "id": "887aa98f",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "## Exercise: Preprocess (again, again) and upload to S3"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "markdown",
 45 |    "id": "692a223d",
 46 |    "metadata": {},
 47 |    "source": [
 48 |     "The cell below provides you two functions. The `split_sentences` preprocesses the reviews and you should be very familiar with function. Remember that the BlazingText expects a input with JSON format, the `cycle_data` formats the review to the following: {'source': 'THIS IS A SAMPLE SENTENCE'} and writes it into a file.\n",
 49 |     "\n",
 50 |     "Using the cell to complete the following tasks:\n",
 51 |     "* preprecessing reviews_Musical_Instruments_5.json \n",
 52 |     "* upload the file consisting of the data to s3"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 3,
 58 |    "id": "03829953",
 59 |    "metadata": {},
 60 |    "outputs": [
 61 |     {
 62 |      "name": "stdout",
 63 |      "output_type": "stream",
 64 |      "text": [
 65 |       "//\n"
 66 |      ]
 67 |     }
 68 |    ],
 69 |    "source": [
 70 |     "import boto3\n",
 71 |     "import json\n",
 72 |     "import os\n",
 73 |     "import zipfile\n",
 74 |     "\n",
 75 |     "# Todo: Input the s3 bucket\n",
 76 |     "s3_bucket = \"CHANGE THIS\"\n",
 77 |     "\n",
 78 |     "# Todo: Input the s3 prefix\n",
 79 |     "s3_prefix = \"CHANGE THIS\"\n",
 80 |     "\n",
 81 |     "# Todo: Input the the file to write the data to\n",
 82 |     "file_name = \"CHANGE THIS\"\n",
 83 |     "\n",
 84 |     "# Function below unzips the archive to the local directory. \n",
 85 |     "\n",
 86 |     "def unzip_data(input_data_path):\n",
 87 |     "    with zipfile.ZipFile(input_data_path, 'r') as input_data_zip:\n",
 88 |     "        input_data_zip.extractall('.')\n",
 89 |     "\n",
 90 |     "\n",
 91 |     "def split_sentences(input_data):\n",
 92 |     "    split_sentences = []\n",
 93 |     "    for l in open(input_data, 'r'):\n",
 94 |     "        l_object = json.loads(l)\n",
 95 |     "        helpful_votes = float(l_object['helpful'][0])\n",
 96 |     "        total_votes = l_object['helpful'][1]\n",
 97 |     "        if total_votes != 0 and helpful_votes/total_votes != .5:  # Filter out same data as prior jobs. \n",
 98 |     "            reviewText = l_object['reviewText']\n",
 99 |     "            sentences = reviewText.split(\".\") \n",
100 |     "            for s in sentences:\n",
101 |     "                if s: # Make sure sentences isn't empty. Common w/ \"...\"\n",
102 |     "                    split_sentences.append(s)\n",
103 |     "    return split_sentences\n",
104 |     "\n",
105 |     "# Format the data as {'source': 'THIS IS A SAMPLE SENTENCE'}\n",
106 |     "# And write the data into a file\n",
107 |     "def cycle_data(fp, data):\n",
108 |     "    for d in data:\n",
109 |     "        fp.write(json.dumps({'source':d}) + '\\n')\n",
110 |     "\n",
111 |     "# Todo: write a function to upload the data to s3\n",
112 |     "def upload_file_to_s3(file_name, s3_prefix):\n",
113 |     "    return\n",
114 |     "\n",
115 |     "\n",
116 |     "# Unzips file.\n",
117 |     "unzip_data('reviews_Musical_Instruments_5.json.zip')\n",
118 |     "\n",
119 |     "# Todo: preprocess reviews_Musical_Instruments_5.json \n",
120 |     "sentences = split_sentences('')\n",
121 |     "\n",
122 |     "# Write data to a file and upload it to s3.\n",
123 |     "with open(file_name, 'w') as f:\n",
124 |     "    cycle_data(f, sentences)\n",
125 |     "\n",
126 |     "upload_file_to_s3(file_name, s3_prefix)\n",
127 |     "\n",
128 |     "# Get the s3 path for the data\n",
129 |     "batch_transform_input_path = \"s3://\" + \"/\".join([s3_bucket, s3_prefix, file_name])\n",
130 |     "\n",
131 |     "print(batch_transform_input_path)"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "markdown",
136 |    "id": "5724938c",
137 |    "metadata": {},
138 |    "source": [
139 |     "## Exercise: Use Batch Transform to perform an inference on the dataset"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "markdown",
144 |    "id": "b3b34d68",
145 |    "metadata": {},
146 |    "source": [
147 |     "We utilize batch transform through a transformer object. Similar to how we initialized a predictor object in the last exercise, complete the code below to initialize a transformer object and launch a transform job.   \n",
148 |     "\n",
149 |     "You will need the following:\n",
150 |     "\n",
151 |     "* Similar to last exercise, you will need to get a BlazingText image uri from AWS. The methodology you use to do so should be identical to the last exercise.  \n",
152 |     "* You will need to instantiate a \"model\" object.\n",
153 |     "* You will need to call the \"transformer\" method on the model object to create a transformer. We suggest using 1 instance of ml.m4.xlarge. If this isn't available in your region, feel free to use another instance, such as ml.m5.large\n",
154 |     "* You will need to use this transformer on the data we uploaded to s3. You will be able to do so by inserting an \"S3Prefix\" data_type and a \"application/jsonlines\" content_type, split by \"Line\".\n",
155 |     "\n",
156 |     "Consult the following documentation: https://sagemaker.readthedocs.io/en/stable/api/inference/transformer.html\n",
157 |     "\n",
158 |     "End-to-end, this process should take about 5 minutes on the whole dataset. While developing, consider uploading a subset of the data to s3, and evaluate on that instead. \n"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": null,
164 |    "id": "94c8c613",
165 |    "metadata": {},
166 |    "outputs": [],
167 |    "source": [
168 |     "from sagemaker import get_execution_role\n",
169 |     "from sagemaker.model import Model\n",
170 |     "from sagemaker import image_uris\n",
171 |     "\n",
172 |     "# Get the execution role\n",
173 |     "\n",
174 |     "role = \n",
175 |     "\n",
176 |     "# Get the image uri using the \"blazingtext\" algorithm in your region. \n",
177 |     "\n",
178 |     "image_uri = \n",
179 |     "\n",
180 |     "# Get the model artifact from S3\n",
181 |     "\n",
182 |     "model_data = \n",
183 |     "\n",
184 |     "# Get the s3 path for the batch transform data\n",
185 |     "\n",
186 |     "batch_transform_output_path = \n",
187 |     "\n",
188 |     "# Define a model object\n",
189 |     "\n",
190 |     "model = \n",
191 |     "\n",
192 |     "# Define a transformer object, using a single instance ml.m4.xlarge. Specify an output path to your s3 bucket. \n",
193 |     "\n",
194 |     "transformer = \n",
195 |     "\n",
196 |     "# Call the transform method. Set content_type='application/jsonlines', split_type='Line'\n",
197 |     "\n",
198 |     "transformer.transform(\n",
199 |     "    data=, \n",
200 |     "    data_type=,\n",
201 |     "    content_type=, \n",
202 |     "    split_type=\n",
203 |     ")\n",
204 |     "\n",
205 |     "transformer.wait()\n"
206 |    ]
207 |   },
208 |   {
209 |    "cell_type": "markdown",
210 |    "id": "f34ab01a",
211 |    "metadata": {},
212 |    "source": [
213 |     "## Exercise: Sanity Check - Are Results the Same? "
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "markdown",
218 |    "id": "1a68319e",
219 |    "metadata": {},
220 |    "source": [
221 |     "The results of the inference should be printed to the s3 path specified in batch_transform_output_path. We have evaluated the same data on the same model, so if all is done correctly on both exercises, the inferences should be the same. Compare the first five or so inferences on the last exercise and on this exercise to confirm this. "
222 |    ]
223 |   }
224 |  ],
225 |  "metadata": {
226 |   "kernelspec": {
227 |    "display_name": "Python 3 (ipykernel)",
228 |    "language": "python",
229 |    "name": "python3"
230 |   },
231 |   "language_info": {
232 |    "codemirror_mode": {
233 |     "name": "ipython",
234 |     "version": 3
235 |    },
236 |    "file_extension": ".py",
237 |    "mimetype": "text/x-python",
238 |    "name": "python",
239 |    "nbconvert_exporter": "python",
240 |    "pygments_lexer": "ipython3",
241 |    "version": "3.9.5"
242 |   }
243 |  },
244 |  "nbformat": 4,
245 |  "nbformat_minor": 5
246 | }
247 | 


--------------------------------------------------------------------------------
/lesson2/Lesson 2, Exercise 4 - Processing Job Solution.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Preprocessing (for the final time!)"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import sklearn\n",
 17 |     "import boto3\n",
 18 |     "import jsonm"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "%%writefile HelloBlazePreprocess.py\n",
 28 |     "\n",
 29 |     "import json\n",
 30 |     "import zipfile\n",
 31 |     "\n",
 32 |     "# Function below unzips the archive to the local directory. \n",
 33 |     "\n",
 34 |     "def unzip_data(input_data_path):\n",
 35 |     "    with zipfile.ZipFile(input_data_path, 'r') as input_data_zip:\n",
 36 |     "        input_data_zip.extractall('.')\n",
 37 |     "        return input_data_zip.namelist()[0]\n",
 38 |     "\n",
 39 |     "# Input data is a file with a single JSON object per line with the following format: \n",
 40 |     "# {\n",
 41 |     "#  \"reviewerID\": <string>,\n",
 42 |     "#  \"asin\": <string>,\n",
 43 |     "#  \"reviewerName\" <string>,\n",
 44 |     "#  \"helpful\": [\n",
 45 |     "#    <int>, (indicating number of \"helpful votes\")\n",
 46 |     "#    <int>  (indicating total number of votes)\n",
 47 |     "#  ],\n",
 48 |     "#  \"reviewText\": \"<string>\",\n",
 49 |     "#  \"overall\": <int>,\n",
 50 |     "#  \"summary\": \"<string>\",\n",
 51 |     "#  \"unixReviewTime\": <int>,\n",
 52 |     "#  \"reviewTime\": \"<string>\"\n",
 53 |     "# }\n",
 54 |     "# \n",
 55 |     "# We are specifically interested in the fields \"helpful\" and \"reviewText\"\n",
 56 |     "#\n",
 57 |     "\n",
 58 |     "def label_data(input_data):\n",
 59 |     "    labeled_data = []\n",
 60 |     "    HELPFUL_LABEL = \"__label__1\"\n",
 61 |     "    UNHELPFUL_LABEL = \"__label__2\"\n",
 62 |     "     \n",
 63 |     "    for l in open(input_data, 'r'):\n",
 64 |     "        l_object = json.loads(l)\n",
 65 |     "        helpful_votes = float(l_object['helpful'][0])\n",
 66 |     "        total_votes = l_object['helpful'][1]\n",
 67 |     "        reviewText = l_object['reviewText']\n",
 68 |     "        if total_votes != 0:\n",
 69 |     "            if helpful_votes / total_votes > .5:\n",
 70 |     "                labeled_data.append(\" \".join([HELPFUL_LABEL, reviewText]))\n",
 71 |     "            elif helpful_votes / total_votes < .5:\n",
 72 |     "                labeled_data.append(\" \".join([UNHELPFUL_LABEL, reviewText]))\n",
 73 |     "          \n",
 74 |     "    return labeled_data\n",
 75 |     "\n",
 76 |     "# Labeled data is a list of sentences, starting with the label defined in label_data. \n",
 77 |     "\n",
 78 |     "def split_sentences(labeled_data):\n",
 79 |     "    new_split_sentences = []\n",
 80 |     "    for d in labeled_data:\n",
 81 |     "        label = d.split()[0]        \n",
 82 |     "        sentences = \" \".join(d.split()[1:]).split(\".\") # Initially split to separate label, then separate sentences\n",
 83 |     "        for s in sentences:\n",
 84 |     "            if s: # Make sure sentences isn't empty. Common w/ \"...\"\n",
 85 |     "                new_split_sentences.append(\" \".join([label, s]))\n",
 86 |     "    return new_split_sentences\n",
 87 |     "\n",
 88 |     "def write_data(data, train_path, test_path, proportion):\n",
 89 |     "    border_index = int(proportion * len(data))\n",
 90 |     "    train_f = open(train_path, 'w')\n",
 91 |     "    test_f = open(test_path, 'w')\n",
 92 |     "    index = 0\n",
 93 |     "    for d in data:\n",
 94 |     "        if index < border_index:\n",
 95 |     "            train_f.write(d + '\\n')\n",
 96 |     "        else:\n",
 97 |     "            test_f.write(d + '\\n')\n",
 98 |     "        index += 1\n",
 99 |     "\n",
100 |     "if __name__ == \"__main__\":\n",
101 |     "    unzipped_path = unzip_data('/opt/ml/processing/input/Toys_and_Games_5.json.zip')\n",
102 |     "    labeled_data = label_data(unzipped_path)\n",
103 |     "    new_split_sentence_data = split_sentences(labeled_data)\n",
104 |     "    write_data(new_split_sentence_data, '/opt/ml/processing/output/train/hello_blaze_train_scikit', '/opt/ml/processing/output/test/hello_blaze_test_scikit', .9)"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "markdown",
109 |    "metadata": {},
110 |    "source": [
111 |     "## Exercise: Upload unprocessed data - Solution. "
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": 6,
117 |    "metadata": {},
118 |    "outputs": [
119 |     {
120 |      "name": "stdout",
121 |      "output_type": "stream",
122 |      "text": [
123 |       "s3://udacity-sagemaker-solutiondata2021/l2e4/Toys_and_Games_5.json.zip\n"
124 |      ]
125 |     }
126 |    ],
127 |    "source": [
128 |     "import os \n",
129 |     "import boto3\n",
130 |     "\n",
131 |     "BUCKET = \"udacity-sagemaker-solutiondata2021\"\n",
132 |     "s3_prefix = \"l2e4\"\n",
133 |     "item_name = \"Toys_and_Games_5.json.zip\"\n",
134 |     "\n",
135 |     "def upload_file_to_s3(file_name):\n",
136 |     "    object_name = os.path.join(s3_prefix, file_name)\n",
137 |     "    s3_client = boto3.client('s3')\n",
138 |     "    try:\n",
139 |     "        response = s3_client.upload_file(file_name, BUCKET, object_name)\n",
140 |     "    except ClientError as e:\n",
141 |     "        logging.error(e)\n",
142 |     "        return False\n",
143 |     "\n",
144 |     "upload_file_to_s3(item_name)\n",
145 |     "\n",
146 |     "source_path = \"s3://\" + \"/\".join([BUCKET, s3_prefix, item_name])\n",
147 |     "print(source_path)"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "markdown",
152 |    "metadata": {},
153 |    "source": [
154 |     "## Exercise: Launch a processing job through the SciKitLearn interface - Solution"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": 9,
160 |    "metadata": {},
161 |    "outputs": [
162 |     {
163 |      "name": "stdout",
164 |      "output_type": "stream",
165 |      "text": [
166 |       "\n",
167 |       "Job Name:  sagemaker-scikit-learn-2021-08-12-05-31-06-758\n",
168 |       "Inputs:  [{'InputName': 'input-1', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://udacity-sagemaker-solutiondata2021/l2e4/Toys_and_Games_5.json.zip', 'LocalPath': '/opt/ml/processing/input', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}, {'InputName': 'code', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://sagemaker-us-west-2-565094796913/sagemaker-scikit-learn-2021-08-12-05-31-06-758/input/code/HelloBlazePreprocess.py', 'LocalPath': '/opt/ml/processing/input/code', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}]\n",
169 |       "Outputs:  [{'OutputName': 'output-1', 'AppManaged': False, 'S3Output': {'S3Uri': 's3://sagemaker-us-west-2-565094796913/sagemaker-scikit-learn-2021-08-12-05-31-06-758/output/output-1', 'LocalPath': '/opt/ml/processing/output/train', 'S3UploadMode': 'EndOfJob'}}, {'OutputName': 'output-2', 'AppManaged': False, 'S3Output': {'S3Uri': 's3://sagemaker-us-west-2-565094796913/sagemaker-scikit-learn-2021-08-12-05-31-06-758/output/output-2', 'LocalPath': '/opt/ml/processing/output/test', 'S3UploadMode': 'EndOfJob'}}]\n",
170 |       "...........................\n",
171 |       ".."
172 |      ]
173 |     }
174 |    ],
175 |    "source": [
176 |     "from sagemaker import get_execution_role\n",
177 |     "from sagemaker.sklearn.processing import SKLearnProcessor\n",
178 |     "from sagemaker.processing import ProcessingInput, ProcessingOutput\n",
179 |     "\n",
180 |     "role = get_execution_role()\n",
181 |     "\n",
182 |     "sklearn_processor = SKLearnProcessor(framework_version='0.20.0',\n",
183 |     "                                     role=role,\n",
184 |     "                                     instance_type='ml.m5.large',\n",
185 |     "                                     instance_count=1)\n",
186 |     "\n",
187 |     "sklearn_processor.run(code='HelloBlazePreprocess.py',\n",
188 |     "                        inputs=[ProcessingInput(\n",
189 |     "                        source=source_path,\n",
190 |     "                        destination='/opt/ml/processing/input')],\n",
191 |     "                      outputs=[ProcessingOutput(source='/opt/ml/processing/output/train'),\n",
192 |     "                               ProcessingOutput(source='/opt/ml/processing/output/test')]\n",
193 |     "                     )"
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "markdown",
198 |    "metadata": {},
199 |    "source": [
200 |     "## Exercise: Sanity Check\n",
201 |     "\n",
202 |     "Use the method below to find the input and output path"
203 |    ]
204 |   },
205 |   {
206 |    "cell_type": "code",
207 |    "execution_count": 10,
208 |    "metadata": {},
209 |    "outputs": [
210 |     {
211 |      "data": {
212 |       "text/plain": [
213 |        "{'ProcessingInputs': [{'InputName': 'input-1',\n",
214 |        "   'AppManaged': False,\n",
215 |        "   'S3Input': {'S3Uri': 's3://udacity-sagemaker-solutiondata2021/l2e4/Toys_and_Games_5.json.zip',\n",
216 |        "    'LocalPath': '/opt/ml/processing/input',\n",
217 |        "    'S3DataType': 'S3Prefix',\n",
218 |        "    'S3InputMode': 'File',\n",
219 |        "    'S3DataDistributionType': 'FullyReplicated',\n",
220 |        "    'S3CompressionType': 'None'}},\n",
221 |        "  {'InputName': 'code',\n",
222 |        "   'AppManaged': False,\n",
223 |        "   'S3Input': {'S3Uri': 's3://sagemaker-us-west-2-565094796913/sagemaker-scikit-learn-2021-08-12-05-31-06-758/input/code/HelloBlazePreprocess.py',\n",
224 |        "    'LocalPath': '/opt/ml/processing/input/code',\n",
225 |        "    'S3DataType': 'S3Prefix',\n",
226 |        "    'S3InputMode': 'File',\n",
227 |        "    'S3DataDistributionType': 'FullyReplicated',\n",
228 |        "    'S3CompressionType': 'None'}}],\n",
229 |        " 'ProcessingOutputConfig': {'Outputs': [{'OutputName': 'output-1',\n",
230 |        "    'S3Output': {'S3Uri': 's3://sagemaker-us-west-2-565094796913/sagemaker-scikit-learn-2021-08-12-05-31-06-758/output/output-1',\n",
231 |        "     'LocalPath': '/opt/ml/processing/output/train',\n",
232 |        "     'S3UploadMode': 'EndOfJob'},\n",
233 |        "    'AppManaged': False},\n",
234 |        "   {'OutputName': 'output-2',\n",
235 |        "    'S3Output': {'S3Uri': 's3://sagemaker-us-west-2-565094796913/sagemaker-scikit-learn-2021-08-12-05-31-06-758/output/output-2',\n",
236 |        "     'LocalPath': '/opt/ml/processing/output/test',\n",
237 |        "     'S3UploadMode': 'EndOfJob'},\n",
238 |        "    'AppManaged': False}]},\n",
239 |        " 'ProcessingJobName': 'sagemaker-scikit-learn-2021-08-12-05-31-06-758',\n",
240 |        " 'ProcessingResources': {'ClusterConfig': {'InstanceCount': 1,\n",
241 |        "   'InstanceType': 'ml.m5.large',\n",
242 |        "   'VolumeSizeInGB': 30}},\n",
243 |        " 'StoppingCondition': {'MaxRuntimeInSeconds': 86400},\n",
244 |        " 'AppSpecification': {'ImageUri': '246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-scikit-learn:0.20.0-cpu-py3',\n",
245 |        "  'ContainerEntrypoint': ['python3',\n",
246 |        "   '/opt/ml/processing/input/code/HelloBlazePreprocess.py']},\n",
247 |        " 'RoleArn': 'arn:aws:iam::565094796913:role/execution_role',\n",
248 |        " 'ProcessingJobArn': 'arn:aws:sagemaker:us-west-2:565094796913:processing-job/sagemaker-scikit-learn-2021-08-12-05-31-06-758',\n",
249 |        " 'ProcessingJobStatus': 'Completed',\n",
250 |        " 'ProcessingEndTime': datetime.datetime(2021, 8, 11, 22, 35, 32, 897000, tzinfo=tzlocal()),\n",
251 |        " 'ProcessingStartTime': datetime.datetime(2021, 8, 11, 22, 35, 13, 834000, tzinfo=tzlocal()),\n",
252 |        " 'LastModifiedTime': datetime.datetime(2021, 8, 11, 22, 35, 33, 238000, tzinfo=tzlocal()),\n",
253 |        " 'CreationTime': datetime.datetime(2021, 8, 11, 22, 31, 9, 15000, tzinfo=tzlocal()),\n",
254 |        " 'ResponseMetadata': {'RequestId': '8d7258b4-df16-4a15-9d28-5684ce0a7f23',\n",
255 |        "  'HTTPStatusCode': 200,\n",
256 |        "  'HTTPHeaders': {'x-amzn-requestid': '8d7258b4-df16-4a15-9d28-5684ce0a7f23',\n",
257 |        "   'content-type': 'application/x-amz-json-1.1',\n",
258 |        "   'content-length': '1968',\n",
259 |        "   'date': 'Thu, 12 Aug 2021 05:40:02 GMT'},\n",
260 |        "  'RetryAttempts': 0}}"
261 |       ]
262 |      },
263 |      "execution_count": 10,
264 |      "metadata": {},
265 |      "output_type": "execute_result"
266 |     }
267 |    ],
268 |    "source": [
269 |     "sklearn_processor.jobs[-1].describe()"
270 |    ]
271 |   },
272 |   {
273 |    "cell_type": "code",
274 |    "execution_count": null,
275 |    "metadata": {},
276 |    "outputs": [],
277 |    "source": []
278 |   }
279 |  ],
280 |  "metadata": {
281 |   "kernelspec": {
282 |    "display_name": "Python 3",
283 |    "language": "python",
284 |    "name": "python3"
285 |   },
286 |   "language_info": {
287 |    "codemirror_mode": {
288 |     "name": "ipython",
289 |     "version": 3
290 |    },
291 |    "file_extension": ".py",
292 |    "mimetype": "text/x-python",
293 |    "name": "python",
294 |    "nbconvert_exporter": "python",
295 |    "pygments_lexer": "ipython3",
296 |    "version": "3.6.3"
297 |   }
298 |  },
299 |  "nbformat": 4,
300 |  "nbformat_minor": 5
301 | }
302 | 


--------------------------------------------------------------------------------
/lesson2/Lesson 2, Exercise 4 - Processing Job.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# UDACITY SageMaker Essentials: Processing Job Exercise"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "In prior exercises, we've been running and rerunning the same preprocessing job over and over again. For cleanly formatted data, it's possible to do some preprocessing utilizing batch transform. However, if slightly more sophisticated processing is needed, we would want to do so through a processing job. Finally, after beating around the bush for a few exercises, we're finally going offload the preprocessing step of our data!"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 4,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "import sklearn\n",
 24 |     "import boto3\n",
 25 |     "import json"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "metadata": {},
 31 |    "source": [
 32 |     "## Preprocessing (for the final time!)"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "The cell below should be very familiar to you by now. This cell will write the preprocessing code to a file called \"HelloBlazePreprocess.py\". This code will be utilized by AWS via a SciKitLearn processing interface to process our data. "
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 5,
 45 |    "metadata": {},
 46 |    "outputs": [
 47 |     {
 48 |      "name": "stdout",
 49 |      "output_type": "stream",
 50 |      "text": [
 51 |       "Overwriting HelloBlazePreprocess.py\n"
 52 |      ]
 53 |     }
 54 |    ],
 55 |    "source": [
 56 |     "%%writefile HelloBlazePreprocess.py\n",
 57 |     "\n",
 58 |     "import json\n",
 59 |     "import zipfile\n",
 60 |     "\n",
 61 |     "# Function below unzips the archive to the local directory. \n",
 62 |     "\n",
 63 |     "def unzip_data(input_data_path):\n",
 64 |     "    with zipfile.ZipFile(input_data_path, 'r') as input_data_zip:\n",
 65 |     "        input_data_zip.extractall('.')\n",
 66 |     "        return input_data_zip.namelist()[0]\n",
 67 |     "\n",
 68 |     "# Input data is a file with a single JSON object per line with the following format: \n",
 69 |     "# {\n",
 70 |     "#  \"reviewerID\": <string>,\n",
 71 |     "#  \"asin\": <string>,\n",
 72 |     "#  \"reviewerName\" <string>,\n",
 73 |     "#  \"helpful\": [\n",
 74 |     "#    <int>, (indicating number of \"helpful votes\")\n",
 75 |     "#    <int>  (indicating total number of votes)\n",
 76 |     "#  ],\n",
 77 |     "#  \"reviewText\": \"<string>\",\n",
 78 |     "#  \"overall\": <int>,\n",
 79 |     "#  \"summary\": \"<string>\",\n",
 80 |     "#  \"unixReviewTime\": <int>,\n",
 81 |     "#  \"reviewTime\": \"<string>\"\n",
 82 |     "# }\n",
 83 |     "# \n",
 84 |     "# We are specifically interested in the fields \"helpful\" and \"reviewText\"\n",
 85 |     "#\n",
 86 |     "\n",
 87 |     "def label_data(input_data):\n",
 88 |     "    labeled_data = []\n",
 89 |     "    HELPFUL_LABEL = \"__label__1\"\n",
 90 |     "    UNHELPFUL_LABEL = \"__label__2\"\n",
 91 |     "     \n",
 92 |     "    for l in open(input_data, 'r'):\n",
 93 |     "        l_object = json.loads(l)\n",
 94 |     "        helpful_votes = float(l_object['helpful'][0])\n",
 95 |     "        total_votes = l_object['helpful'][1]\n",
 96 |     "        reviewText = l_object['reviewText']\n",
 97 |     "        if total_votes != 0:\n",
 98 |     "            if helpful_votes / total_votes > .5:\n",
 99 |     "                labeled_data.append(\" \".join([HELPFUL_LABEL, reviewText]))\n",
100 |     "            elif helpful_votes / total_votes < .5:\n",
101 |     "                labeled_data.append(\" \".join([UNHELPFUL_LABEL, reviewText]))\n",
102 |     "          \n",
103 |     "    return labeled_data\n",
104 |     "\n",
105 |     "\n",
106 |     "# Labeled data is a list of sentences, starting with the label defined in label_data. \n",
107 |     "\n",
108 |     "def split_sentences(labeled_data):\n",
109 |     "    new_split_sentences = []\n",
110 |     "    for d in labeled_data:\n",
111 |     "        label = d.split()[0]        \n",
112 |     "        sentences = \" \".join(d.split()[1:]).split(\".\") # Initially split to separate label, then separate sentences\n",
113 |     "        for s in sentences:\n",
114 |     "            if s: # Make sure sentences isn't empty. Common w/ \"...\"\n",
115 |     "                new_split_sentences.append(\" \".join([label, s]))\n",
116 |     "    return new_split_sentences\n",
117 |     "\n",
118 |     "def write_data(data, train_path, test_path, proportion):\n",
119 |     "    border_index = int(proportion * len(data))\n",
120 |     "    train_f = open(train_path, 'w')\n",
121 |     "    test_f = open(test_path, 'w')\n",
122 |     "    index = 0\n",
123 |     "    for d in data:\n",
124 |     "        if index < border_index:\n",
125 |     "            train_f.write(d + '\\n')\n",
126 |     "        else:\n",
127 |     "            test_f.write(d + '\\n')\n",
128 |     "        index += 1\n",
129 |     "\n",
130 |     "if __name__ == \"__main__\":\n",
131 |     "    unzipped_path = unzip_data('/opt/ml/processing/input/Toys_and_Games_5.json.zip')\n",
132 |     "    labeled_data = label_data(unzipped_path)\n",
133 |     "    new_split_sentence_data = split_sentences(labeled_data)\n",
134 |     "    write_data(new_split_sentence_data, '/opt/ml/processing/output/train/hello_blaze_train_scikit', '/opt/ml/processing/output/test/hello_blaze_test_scikit', .9)"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "markdown",
139 |    "metadata": {},
140 |    "source": [
141 |     "## Exercise: Upload unprocessed data to s3"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "markdown",
146 |    "metadata": {},
147 |    "source": [
148 |     "No more local preprocessing! Upload the **raw zipped** Toys_and_Games dataset to s3. "
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": 5,
154 |    "metadata": {},
155 |    "outputs": [
156 |     {
157 |      "name": "stdout",
158 |      "output_type": "stream",
159 |      "text": [
160 |       "//\n"
161 |      ]
162 |     }
163 |    ],
164 |    "source": [
165 |     "# Todo\n",
166 |     "s3_bucket = \"\"\n",
167 |     "s3_prefix = \"\"\n",
168 |     "file_name = \"\"\n",
169 |     "\n",
170 |     "def upload_file_to_s3():\n",
171 |     "    return \n",
172 |     "\n",
173 |     "source_path = \"/\".join([s3_bucket, s3_prefix, file_name])\n",
174 |     "print(source_path)"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "markdown",
179 |    "metadata": {},
180 |    "source": [
181 |     "## Exercise: Launch a processing job through the SciKitLearn interface. "
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "markdown",
186 |    "metadata": {},
187 |    "source": [
188 |     "We'll be utilizing the SKLearnProcessor object from SageMaker to launch a processing job. Here is some information you'll need to complete the exercise: \n",
189 |     "\n",
190 |     "* You will need to use the SKLearnProcessor object. \n",
191 |     "* You will need 1 instance of ml.m5.large. You can specify this programatically. \n",
192 |     "* You will need an execution role.  \n",
193 |     "\n",
194 |     "* You will need to call the \"run\" method on the SKLearnProcessor object.\n",
195 |     "> * You will need to specify the preprocessing code\n",
196 |     "> * the S3 path of the unprocessed data\n",
197 |     "> * a 'local' directory path for the input to be downloaded into\n",
198 |     "> * 'local' directory paths for where you expect the output to be.\n",
199 |     "\n",
200 |     "you will need to make use of the ProcessingInput and ProcessingOutput features. Review the preprocessing code for where the output is going to go, and where it expects the input to come from.  \n",
201 |     "* It is highly recommended that you consult the documentation to help you implement this. https://docs.aws.amazon.com/sagemaker/latest/dg/processing-job.html\n",
202 |     "\n",
203 |     "Remember that, conceptually, you are creating a server that our code will be run from. This server will download data, execute code that we specify, and upload data to s3. \n",
204 |     "\n",
205 |     "If done successfully, you should see a processing job launch in the SageMaker console. To see it, go to the \"processing\" drop-down menu on the left-hand side and select \"processing jobs.\" Wait until the job is finished. "
206 |    ]
207 |   },
208 |   {
209 |    "cell_type": "code",
210 |    "execution_count": null,
211 |    "metadata": {},
212 |    "outputs": [],
213 |    "source": [
214 |     "from sagemaker import get_execution_role\n",
215 |     "from sagemaker.sklearn.processing import SKLearnProcessor\n",
216 |     "from sagemaker.processing import ProcessingInput, ProcessingOutput\n",
217 |     "\n",
218 |     "# Get role\n",
219 |     "\n",
220 |     "role = \n",
221 |     "\n",
222 |     "# Create an SKLearnProcessor. Set framework_version='0.20.0'.\n",
223 |     "\n",
224 |     "sklearn_processor = \n",
225 |     "\n",
226 |     "# Start a run job. You will pass in as parameters the local location of the processing code, \n",
227 |     "# a processing input object, two processing output objects. The paths that you pass in here are directories, \n",
228 |     "# not the files themselves. Check the preprocessing code for a hint about what these directories should be. \n",
229 |     "\n",
230 |     "sklearn_processor.run(code= , # preprocessing code\n",
231 |     "                      inputs=[ProcessingInput(\n",
232 |     "                          source = , # the S3 path of the unprocessed data\n",
233 |     "                          destination= , # a 'local' directory path for the input to be downloaded into\n",
234 |     "                      )],\n",
235 |     "                      outputs=[ProcessingOutput(source= ),# a 'local' directory path for where you expect the output for train data to be\n",
236 |     "                               ProcessingOutput(source= )]) # a 'local' directory path for where you expect the output for test data to be "
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "markdown",
241 |    "metadata": {},
242 |    "source": [
243 |     "## Exercise: Sanity Check & Reflection. "
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "markdown",
248 |    "metadata": {},
249 |    "source": [
250 |     "If all goes well, processed data should have been uploaded to S3. If you're having trouble locating the uri, check the `jobs` attribute of the SKLearnProcessor object. "
251 |    ]
252 |   },
253 |   {
254 |    "cell_type": "code",
255 |    "execution_count": null,
256 |    "metadata": {},
257 |    "outputs": [],
258 |    "source": [
259 |     "sklearn_processor.jobs[-1].describe()"
260 |    ]
261 |   },
262 |   {
263 |    "cell_type": "markdown",
264 |    "metadata": {},
265 |    "source": [
266 |     "Download these datasets and compare them to the datasets that we locally processed. The exact sentences in the training & the test sets may vary depending on your implementation, but the same number of sentences should be present in each job, and there should be one label and one sentence per line.  \n",
267 |     "\n",
268 |     "\n",
269 |     "Once you've confirmed that the data was accurately processed, take a step back and reflect on what you've done. You've created the individual components necessary to process data, train data, and perform inference on both individual instances of data and large datasets. What are we missing if we wanted to provide a live, continuous service? Keep this question in mind as we move on to designing workflows. "
270 |    ]
271 |   }
272 |  ],
273 |  "metadata": {
274 |   "kernelspec": {
275 |    "display_name": "Python 3",
276 |    "language": "python",
277 |    "name": "python3"
278 |   },
279 |   "language_info": {
280 |    "codemirror_mode": {
281 |     "name": "ipython",
282 |     "version": 3
283 |    },
284 |    "file_extension": ".py",
285 |    "mimetype": "text/x-python",
286 |    "name": "python",
287 |    "nbconvert_exporter": "python",
288 |    "pygments_lexer": "ipython3",
289 |    "version": "3.6.3"
290 |   }
291 |  },
292 |  "nbformat": 4,
293 |  "nbformat_minor": 5
294 | }
295 | 


--------------------------------------------------------------------------------
/lesson2/Toys_and_Games_5.json.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/udacity-nd009t-C2-Developing-ML-Workflow/d1928db95b27ca4dec5b73f460a357d9cdcea9d7/lesson2/Toys_and_Games_5.json.zip


--------------------------------------------------------------------------------
/lesson2/demo/Lesson 2, Lecture 1 Demo - Training Job.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "7a44fa08",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# UDACITY SageMaker Essentials: Training Job Demo"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": null,
 14 |    "id": "dfc247b8",
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import sagemaker\n",
 19 |     "from sagemaker import get_execution_role\n",
 20 |     "from sagemaker import image_uris\n",
 21 |     "from sagemaker.predictor import csv_serializer\n",
 22 |     "\n",
 23 |     "session = sagemaker.Session()\n",
 24 |     "\n",
 25 |     "role = get_execution_role()\n",
 26 |     "\n",
 27 |     "# If you're following along, you'll need to upload these datasets to your own bucket in S3. \n",
 28 |     "\n",
 29 |     "test_location = 's3://sagemaker-us-west-2-565094796913/boston-xgboost-HL/test.csv'\n",
 30 |     "val_location = 's3://sagemaker-us-west-2-565094796913/boston-xgboost-HL/validation.csv'\n",
 31 |     "train_location = 's3://sagemaker-us-west-2-565094796913/boston-xgboost-HL/train.csv'\n",
 32 |     "\n",
 33 |     "# We use this prefix to help us determine where the output will go. \n",
 34 |     "\n",
 35 |     "prefix = 's3://sagemaker-us-west-2-565094796913/'\n",
 36 |     "\n",
 37 |     "# We need to get the location of the container. \n",
 38 |     "\n",
 39 |     "container = image_uris.retrieve('xgboost', session.boto_region_name, version='latest')\n",
 40 |     "\n",
 41 |     "# Now that we know which container to use, we can construct the estimator object.\n",
 42 |     "xgb = sagemaker.estimator.Estimator(container, # The image name of the training container\n",
 43 |     "                                    role,      # The IAM role to use (our current role in this case)\n",
 44 |     "                                    instance_count=1, # The number of instances to use for training\n",
 45 |     "                                    instance_type='ml.m4.xlarge', # The type of instance to use for training\n",
 46 |     "                                    output_path='s3://{}/{}/output'.format(session.default_bucket(), prefix),\n",
 47 |     "                                                                        # Where to save the output (the model artifacts)\n",
 48 |     "                                    sagemaker_session=session) # The current SageMaker session\n",
 49 |     "             \n",
 50 |     "# These hyperparameters are beyond the scope of this course, but you can research the algoirthm here: \n",
 51 |     "# https://docs.aws.amazon.com/sagemaker/latest/dg/xgboost.html    \n",
 52 |     "    \n",
 53 |     "xgb.set_hyperparameters(max_depth=5,\n",
 54 |     "                        eta=0.2,\n",
 55 |     "                        gamma=4,\n",
 56 |     "                        min_child_weight=6,\n",
 57 |     "                        subsample=0.8,\n",
 58 |     "                        objective='reg:linear',\n",
 59 |     "                        early_stopping_rounds=10,\n",
 60 |     "                        num_round=200)\n",
 61 |     "                        \n",
 62 |     "s3_input_train = sagemaker.inputs.TrainingInput(s3_data=train_location, content_type='csv')\n",
 63 |     "s3_input_validation = sagemaker.inputs.TrainingInput(s3_data=val_location, content_type='csv')\n",
 64 |     "\n",
 65 |     "# The fit method launches the training job. \n",
 66 |     "\n",
 67 |     "xgb.fit({'train': s3_input_train, 'validation': s3_input_validation})\n"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": null,
 73 |    "id": "304639d6",
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": []
 77 |   }
 78 |  ],
 79 |  "metadata": {
 80 |   "kernelspec": {
 81 |    "display_name": "Python 3 (ipykernel)",
 82 |    "language": "python",
 83 |    "name": "python3"
 84 |   },
 85 |   "language_info": {
 86 |    "codemirror_mode": {
 87 |     "name": "ipython",
 88 |     "version": 3
 89 |    },
 90 |    "file_extension": ".py",
 91 |    "mimetype": "text/x-python",
 92 |    "name": "python",
 93 |    "nbconvert_exporter": "python",
 94 |    "pygments_lexer": "ipython3",
 95 |    "version": "3.9.5"
 96 |   }
 97 |  },
 98 |  "nbformat": 4,
 99 |  "nbformat_minor": 5
100 | }
101 | 


--------------------------------------------------------------------------------
/lesson2/demo/Lesson 2, Lecture 2 Demo - Endpoint.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "f3b2110b",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "# UDACITY: Endpoint Demo"
 9 |    ]
10 |   },
11 |   {
12 |    "cell_type": "code",
13 |    "execution_count": null,
14 |    "id": "57950aab",
15 |    "metadata": {},
16 |    "outputs": [],
17 |    "source": [
18 |     "from sagemaker import get_execution_role\n",
19 |     "from sagemaker.model import Model\n",
20 |     "from sagemaker import image_uris\n",
21 |     "\n",
22 |     "role = get_execution_role()\n",
23 |     "\n",
24 |     "# You'll need to confirm that this region is located in the same place as the S3 uri of your training job.\n",
25 |     "# (Check the upper right-hand side of the console.)\n",
26 |     "\n",
27 |     "image_uri = image_uris.retrieve(framework='xgboost',region='us-west-2', version='latest')\n",
28 |     "\n",
29 |     "# You'll need to replace this model data with the output S3 uri of your training job. \n",
30 |     "\n",
31 |     "model_data = \"s3://sagemaker-us-west-2-565094796913/boston-xgboost-HL/output/xgboost-2021-08-31-23-02-30-970/output/model.tar.gz\"\n",
32 |     "\n",
33 |     "model = Model(image_uri=image_uri, model_data=model_data, role=role)\n",
34 |     "\n",
35 |     "predictor = model.deploy(initial_instance_count=1, instance_type=\"ml.m5.large\")\n"
36 |    ]
37 |   }
38 |  ],
39 |  "metadata": {
40 |   "kernelspec": {
41 |    "display_name": "Python 3 (ipykernel)",
42 |    "language": "python",
43 |    "name": "python3"
44 |   },
45 |   "language_info": {
46 |    "codemirror_mode": {
47 |     "name": "ipython",
48 |     "version": 3
49 |    },
50 |    "file_extension": ".py",
51 |    "mimetype": "text/x-python",
52 |    "name": "python",
53 |    "nbconvert_exporter": "python",
54 |    "pygments_lexer": "ipython3",
55 |    "version": "3.8.0"
56 |   }
57 |  },
58 |  "nbformat": 4,
59 |  "nbformat_minor": 5
60 | }
61 | 


--------------------------------------------------------------------------------
/lesson2/demo/Lesson 2, Lecture 3 Demo - Batch Transform.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "fe050337",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "# UDACITY: Batch Transform Demo"
 9 |    ]
10 |   },
11 |   {
12 |    "cell_type": "code",
13 |    "execution_count": null,
14 |    "id": "4112f374",
15 |    "metadata": {},
16 |    "outputs": [],
17 |    "source": [
18 |     "from sagemaker import get_execution_role\n",
19 |     "from sagemaker.model import Model\n",
20 |     "from sagemaker import image_uris\n",
21 |     "\n",
22 |     "role = get_execution_role()\n",
23 |     "\n",
24 |     "# You'll need to confirm that this region is located in the same place as the S3 uri of your training job.\n",
25 |     "# (Check the upper right-hand side of the console.)\n",
26 |     "\n",
27 |     "image_uri = image_uris.retrieve(framework='xgboost',region='us-west-2', version='latest')\n",
28 |     "\n",
29 |     "# You'll need to replace this with the output uri of a training job. \n",
30 |     "\n",
31 |     "model_data = \"s3://sagemaker-us-west-2-565094796913/boston-xgboost-HL/output/xgboost-2021-08-31-23-02-30-970/output/model.tar.gz\"\n",
32 |     "\n",
33 |     "# You'll need to replace this with the desired output of your batch transform job. \n",
34 |     "\n",
35 |     "batch_transform_output_path = \"s3://sagemaker-us-west-2-565094796913/boston-xgboost-HL/test_batch_output-2\"\n",
36 |     "\n",
37 |     "model = Model(image_uri=image_uri, model_data=model_data, role=role)\n",
38 |     "\n",
39 |     "transformer = model.transformer(\n",
40 |     "    instance_count=1,\n",
41 |     "    instance_type='ml.m4.xlarge',\n",
42 |     "    output_path=batch_transform_output_path\n",
43 |     ")\n",
44 |     "\n",
45 |     "# You'll need to replace the output data with your S3 uri of your dataset in S3. \n",
46 |     "\n",
47 |     "transformer.transform(\n",
48 |     "    data=\"s3://sagemaker-us-west-2-565094796913/boston-xgboost-HL/test.csv\",\n",
49 |     "    data_type='S3Prefix',\n",
50 |     "    content_type='text/csv',\n",
51 |     "    split_type='Line'\n",
52 |     ")\n"
53 |    ]
54 |   }
55 |  ],
56 |  "metadata": {
57 |   "kernelspec": {
58 |    "display_name": "Python 3 (ipykernel)",
59 |    "language": "python",
60 |    "name": "python3"
61 |   },
62 |   "language_info": {
63 |    "codemirror_mode": {
64 |     "name": "ipython",
65 |     "version": 3
66 |    },
67 |    "file_extension": ".py",
68 |    "mimetype": "text/x-python",
69 |    "name": "python",
70 |    "nbconvert_exporter": "python",
71 |    "pygments_lexer": "ipython3",
72 |    "version": "3.8.0"
73 |   }
74 |  },
75 |  "nbformat": 4,
76 |  "nbformat_minor": 5
77 | }
78 | 


--------------------------------------------------------------------------------
/lesson2/demo/Lesson 2, Lecture 4 Demo - Processing Job.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "c8f391ef",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "# UDACITY: Processing Job Demo"
 9 |    ]
10 |   },
11 |   {
12 |    "cell_type": "code",
13 |    "execution_count": null,
14 |    "id": "ff2295f7",
15 |    "metadata": {},
16 |    "outputs": [],
17 |    "source": [
18 |     "%%writefile xgboost_process_script.py\n",
19 |     "\n",
20 |     "# Execute this cell first to write this script to your local directory. \n",
21 |     "\n",
22 |     "import pandas\n",
23 |     "\n",
24 |     "# This method filters out the column at index 1, which is the crime data. \n",
25 |     "\n",
26 |     "def filter_crime_data(input_data_path):\n",
27 |     "    with open(input_data_path, 'r') as f:\n",
28 |     "        df = pandas.read_csv(f)\n",
29 |     "    df.drop(df.columns[[1]], axis=1)\n",
30 |     "    return df\n",
31 |     "\n",
32 |     "# The main method takes in data at '/opt/ml/processing/input/data/train.csv' \n",
33 |     "# and outputs it as a csv to '/opt/ml/processing/output/data_processed'\n",
34 |     "\n",
35 |     "if __name__ == \"__main__\":\n",
36 |     "    filtered_data = filter_crime_data('/opt/ml/processing/input/data/train.csv')\n",
37 |     "    filtered_data.to_csv('/opt/ml/processing/output/data_processed')\n",
38 |     "\n"
39 |    ]
40 |   },
41 |   {
42 |    "cell_type": "code",
43 |    "execution_count": null,
44 |    "id": "08e4c2ca",
45 |    "metadata": {},
46 |    "outputs": [],
47 |    "source": [
48 |     "import boto3\n",
49 |     "\n",
50 |     "from sagemaker import get_execution_role\n",
51 |     "from sagemaker.sklearn.processing import SKLearnProcessor\n",
52 |     "from sagemaker.processing import ProcessingInput, ProcessingOutput\n",
53 |     "\n",
54 |     "role = get_execution_role()\n",
55 |     "\n",
56 |     "sklearn_processor = SKLearnProcessor(framework_version='0.20.0',\n",
57 |     "                                     role=role,\n",
58 |     "                                     instance_type='ml.m5.large',\n",
59 |     "                                     instance_count=1)\n",
60 |     "\n",
61 |     "\n",
62 |     "# You will need to replace the 'source' code with the location of the dataset you want to process. \n",
63 |     "\n",
64 |     "sklearn_processor.run(code='xgboost_process_script.py',\n",
65 |     "                        inputs=[ProcessingInput(\n",
66 |     "                        source='s3://sagemaker-us-west-2-565094796913/boston-xgboost-HL/train.csv',\n",
67 |     "                        destination='/opt/ml/processing/input/data/')],\n",
68 |     "                      outputs=[ProcessingOutput(source='/opt/ml/processing/output')]\n",
69 |     "                     )\n"
70 |    ]
71 |   }
72 |  ],
73 |  "metadata": {
74 |   "kernelspec": {
75 |    "display_name": "Python 3 (ipykernel)",
76 |    "language": "python",
77 |    "name": "python3"
78 |   },
79 |   "language_info": {
80 |    "codemirror_mode": {
81 |     "name": "ipython",
82 |     "version": 3
83 |    },
84 |    "file_extension": ".py",
85 |    "mimetype": "text/x-python",
86 |    "name": "python",
87 |    "nbconvert_exporter": "python",
88 |    "pygments_lexer": "ipython3",
89 |    "version": "3.8.0"
90 |   }
91 |  },
92 |  "nbformat": 4,
93 |  "nbformat_minor": 5
94 | }
95 | 


--------------------------------------------------------------------------------
/lesson2/demo/demo_cli_script.sh:
--------------------------------------------------------------------------------
1 | aws sagemaker create-training-job --training-job-name  xgboost-cli-demo --role-arn arn:aws:iam::565094796913:role/execution_role --algorithm-specification TrainingImage=433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:1,TrainingInputMode=File --input-data-config file://input_data_config.json  --output-data-config S3OutputPath=s3://sagemaker-us-west-2-565094796913/cli-output --resource-config InstanceType='ml.m4.xlarge',InstanceCount=1,VolumeSizeInGB=10  --stopping-condition MaxRuntimeInSeconds=60
2 | 


--------------------------------------------------------------------------------
/lesson2/demo/input_data_config.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "ChannelName": "train",
 4 |     "DataSource": {
 5 |       "S3DataSource": {
 6 |         "S3DataType": "S3Prefix",
 7 |         "S3Uri": "s3://sagemaker-us-west-2-565094796913/boston-xgboost-HL/train.csv"
 8 |       }
 9 |     },
10 |     "ContentType": "csv",
11 |     "InputMode": "File"
12 |     
13 |   }, 
14 |   { 
15 |     "ChannelName": "validation",
16 |     "DataSource": {
17 |       "S3DataSource": {
18 |         "S3DataType": "S3Prefix",
19 |         "S3Uri": "s3://sagemaker-us-west-2-565094796913/boston-xgboost-HL/validation.csv"
20 |       }
21 |     },
22 |     "ContentType": "csv",
23 |     "InputMode": "File"
24 |     
25 |   }
26 | ]
27 | 


--------------------------------------------------------------------------------
/lesson2/demo/readme.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/lesson2/demo_boston_data/test.csv:
--------------------------------------------------------------------------------
  1 | 7.83932,0.0,18.1,0.0,0.655,6.209,65.4,2.9634,24.0,666.0,20.2,396.9,13.22
  2 | 0.46296,0.0,6.2,0.0,0.504,7.412,76.9,3.6715,8.0,307.0,17.4,376.14,5.25
  3 | 0.5405,20.0,3.97,0.0,0.575,7.47,52.6,2.872,5.0,264.0,13.0,390.3,3.16
  4 | 0.15445,25.0,5.13,0.0,0.453,6.145,29.2,7.8148,8.0,284.0,19.7,390.68,6.86
  5 | 0.03738,0.0,5.19,0.0,0.515,6.31,38.5,6.4584,5.0,224.0,20.2,389.4,6.75
  6 | 0.08265,0.0,13.92,0.0,0.437,6.127,18.4,5.5027,4.0,289.0,16.0,396.9,8.58
  7 | 0.11069,0.0,13.89,1.0,0.55,5.951,93.8,2.8893,5.0,276.0,16.4,396.9,17.92
  8 | 0.22438,0.0,9.69,0.0,0.585,6.027,79.7,2.4982,6.0,391.0,19.2,396.9,14.33
  9 | 0.08707,0.0,12.83,0.0,0.437,6.14,45.8,4.0905,5.0,398.0,18.7,386.96,10.27
 10 | 8.71675,0.0,18.1,0.0,0.693,6.471,98.8,1.7257,24.0,666.0,20.2,391.98,17.12
 11 | 13.5222,0.0,18.1,0.0,0.631,3.863,100.0,1.5106,24.0,666.0,20.2,131.42,13.33
 12 | 0.10959,0.0,11.93,0.0,0.573,6.794,89.3,2.3889,1.0,273.0,21.0,393.45,6.48
 13 | 0.01709,90.0,2.02,0.0,0.41,6.728,36.1,12.1265,5.0,187.0,17.0,384.46,4.5
 14 | 0.17446,0.0,10.59,1.0,0.489,5.96,92.1,3.8771,4.0,277.0,18.6,393.25,17.27
 15 | 0.08829,12.5,7.87,0.0,0.524,6.012,66.6,5.5605,5.0,311.0,15.2,395.6,12.43
 16 | 0.07165,0.0,25.65,0.0,0.581,6.004,84.1,2.1974,2.0,188.0,19.1,377.67,14.27
 17 | 0.03932,0.0,3.41,0.0,0.489,6.405,73.9,3.0921,2.0,270.0,17.8,393.55,8.2
 18 | 1.15172,0.0,8.14,0.0,0.538,5.701,95.0,3.7872,4.0,307.0,21.0,358.77,18.35
 19 | 0.12083,0.0,2.89,0.0,0.445,8.069,76.0,3.4952,2.0,276.0,18.0,396.9,4.21
 20 | 0.52014,20.0,3.97,0.0,0.647,8.398,91.5,2.2885,5.0,264.0,13.0,386.86,5.91
 21 | 12.8023,0.0,18.1,0.0,0.74,5.854,96.6,1.8956,24.0,666.0,20.2,240.52,23.79
 22 | 0.11432,0.0,8.56,0.0,0.52,6.781,71.3,2.8561,5.0,384.0,20.9,395.58,7.67
 23 | 0.21038,20.0,3.33,0.0,0.4429,6.812,32.2,4.1007,5.0,216.0,14.9,396.9,4.85
 24 | 0.32264,0.0,21.89,0.0,0.624,5.942,93.5,1.9669,4.0,437.0,21.2,378.25,16.9
 25 | 0.09065,20.0,6.96,1.0,0.464,5.92,61.5,3.9175,3.0,223.0,18.6,391.34,13.65
 26 | 6.65492,0.0,18.1,0.0,0.713,6.317,83.0,2.7344,24.0,666.0,20.2,396.9,13.99
 27 | 0.12269,0.0,6.91,0.0,0.448,6.069,40.0,5.7209,3.0,233.0,17.9,389.39,9.55
 28 | 7.36711,0.0,18.1,0.0,0.679,6.193,78.1,1.9356,24.0,666.0,20.2,96.73,21.52
 29 | 15.1772,0.0,18.1,0.0,0.74,6.152,100.0,1.9142,24.0,666.0,20.2,9.32,26.45
 30 | 51.1358,0.0,18.1,0.0,0.597,5.757,100.0,1.413,24.0,666.0,20.2,2.6,10.11
 31 | 25.9406,0.0,18.1,0.0,0.679,5.304,89.1,1.6475,24.0,666.0,20.2,127.36,26.64
 32 | 8.64476,0.0,18.1,0.0,0.693,6.193,92.6,1.7912,24.0,666.0,20.2,396.9,15.17
 33 | 0.33045,0.0,6.2,0.0,0.507,6.086,61.5,3.6519,8.0,307.0,17.4,376.75,10.88
 34 | 11.9511,0.0,18.1,0.0,0.659,5.608,100.0,1.2852,24.0,666.0,20.2,332.09,12.13
 35 | 0.2896,0.0,9.69,0.0,0.585,5.39,72.9,2.7986,6.0,391.0,19.2,396.9,21.14
 36 | 0.12744,0.0,6.91,0.0,0.448,6.77,2.9,5.7209,3.0,233.0,17.9,385.41,4.84
 37 | 0.06888,0.0,2.46,0.0,0.488,6.144,62.2,2.5979,3.0,193.0,17.8,396.9,9.45
 38 | 12.0482,0.0,18.1,0.0,0.614,5.648,87.6,1.9512,24.0,666.0,20.2,291.55,14.1
 39 | 0.09512,0.0,12.83,0.0,0.437,6.286,45.0,4.5026,5.0,398.0,18.7,383.23,8.94
 40 | 0.26938,0.0,9.9,0.0,0.544,6.266,82.8,3.2628,4.0,304.0,18.4,393.39,7.9
 41 | 0.04294,28.0,15.04,0.0,0.464,6.249,77.3,3.615,4.0,270.0,18.2,396.9,10.59
 42 | 0.25387,0.0,6.91,0.0,0.448,5.399,95.3,5.87,3.0,233.0,17.9,396.9,30.81
 43 | 0.09378,12.5,7.87,0.0,0.524,5.889,39.0,5.4509,5.0,311.0,15.2,390.5,15.71
 44 | 6.80117,0.0,18.1,0.0,0.713,6.081,84.4,2.7175,24.0,666.0,20.2,396.9,14.7
 45 | 0.10659,80.0,1.91,0.0,0.413,5.936,19.5,10.5857,4.0,334.0,22.0,376.04,5.57
 46 | 0.14231,0.0,10.01,0.0,0.547,6.254,84.2,2.2565,6.0,432.0,17.8,388.74,10.45
 47 | 0.06617,0.0,3.24,0.0,0.46,5.868,25.8,5.2146,4.0,430.0,16.9,382.44,9.97
 48 | 1.19294,0.0,21.89,0.0,0.624,6.326,97.7,2.271,4.0,437.0,21.2,396.9,12.26
 49 | 5.82115,0.0,18.1,0.0,0.713,6.513,89.9,2.8016,24.0,666.0,20.2,393.82,10.29
 50 | 0.19073,22.0,5.86,0.0,0.431,6.718,17.5,7.8265,7.0,330.0,19.1,393.74,6.56
 51 | 0.15098,0.0,10.01,0.0,0.547,6.021,82.6,2.7474,6.0,432.0,17.8,394.51,10.3
 52 | 23.6482,0.0,18.1,0.0,0.671,6.38,96.2,1.3861,24.0,666.0,20.2,396.9,23.69
 53 | 0.54452,0.0,21.89,0.0,0.624,6.151,97.9,1.6687,4.0,437.0,21.2,396.9,18.46
 54 | 0.06899,0.0,25.65,0.0,0.581,5.87,69.7,2.2577,2.0,188.0,19.1,389.15,14.37
 55 | 0.17331,0.0,9.69,0.0,0.585,5.707,54.0,2.3817,6.0,391.0,19.2,396.9,12.01
 56 | 0.10612,30.0,4.93,0.0,0.428,6.095,65.1,6.3361,6.0,300.0,16.6,394.62,12.4
 57 | 0.09266,34.0,6.09,0.0,0.433,6.495,18.4,5.4917,7.0,329.0,16.1,383.61,8.67
 58 | 0.03548,80.0,3.64,0.0,0.392,5.876,19.1,9.2203,1.0,315.0,16.4,395.18,9.25
 59 | 9.33889,0.0,18.1,0.0,0.679,6.38,95.6,1.9682,24.0,666.0,20.2,60.72,24.08
 60 | 0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
 61 | 0.13554,12.5,6.07,0.0,0.409,5.594,36.8,6.498,4.0,345.0,18.9,396.9,13.09
 62 | 0.53412,20.0,3.97,0.0,0.647,7.52,89.4,2.1398,5.0,264.0,13.0,388.37,7.26
 63 | 0.0351,95.0,2.68,0.0,0.4161,7.853,33.2,5.118,4.0,224.0,14.7,392.78,3.81
 64 | 1.13081,0.0,8.14,0.0,0.538,5.713,94.1,4.233,4.0,307.0,21.0,360.17,22.6
 65 | 1.34284,0.0,19.58,0.0,0.605,6.066,100.0,1.7573,5.0,403.0,14.7,353.89,6.43
 66 | 0.03551,25.0,4.86,0.0,0.426,6.167,46.7,5.4007,4.0,281.0,19.0,390.64,7.51
 67 | 0.67191,0.0,8.14,0.0,0.538,5.813,90.3,4.682,4.0,307.0,21.0,376.88,14.81
 68 | 0.0837,45.0,3.44,0.0,0.437,7.185,38.9,4.5667,5.0,398.0,15.2,396.9,5.39
 69 | 4.54192,0.0,18.1,0.0,0.77,6.398,88.0,2.5182,24.0,666.0,20.2,374.56,7.79
 70 | 0.01501,80.0,2.01,0.0,0.435,6.635,29.7,8.344,4.0,280.0,17.0,390.94,5.99
 71 | 10.6718,0.0,18.1,0.0,0.74,6.459,94.8,1.9879,24.0,666.0,20.2,43.06,23.98
 72 | 2.77974,0.0,19.58,0.0,0.871,4.903,97.8,1.3459,5.0,403.0,14.7,396.9,29.29
 73 | 0.05646,0.0,12.83,0.0,0.437,6.232,53.7,5.0141,5.0,398.0,18.7,386.4,12.34
 74 | 0.26363,0.0,8.56,0.0,0.52,6.229,91.2,2.5451,5.0,384.0,20.9,391.23,15.55
 75 | 0.01965,80.0,1.76,0.0,0.385,6.23,31.5,9.0892,1.0,241.0,18.2,341.6,12.93
 76 | 0.17134,0.0,10.01,0.0,0.547,5.928,88.2,2.4631,6.0,432.0,17.8,344.91,15.76
 77 | 4.0974,0.0,19.58,0.0,0.871,5.468,100.0,1.4118,5.0,403.0,14.7,396.9,26.42
 78 | 15.8744,0.0,18.1,0.0,0.671,6.545,99.1,1.5192,24.0,666.0,20.2,396.9,21.08
 79 | 0.06664,0.0,4.05,0.0,0.51,6.546,33.1,3.1323,5.0,296.0,16.6,390.96,5.33
 80 | 0.02055,85.0,0.74,0.0,0.41,6.383,35.7,9.1876,2.0,313.0,17.3,396.9,5.77
 81 | 0.20746,0.0,27.74,0.0,0.609,5.093,98.0,1.8226,4.0,711.0,20.1,318.43,29.68
 82 | 0.19802,0.0,10.59,0.0,0.489,6.182,42.4,3.9454,4.0,277.0,18.6,393.63,9.47
 83 | 0.01096,55.0,2.25,0.0,0.389,6.453,31.9,7.3073,1.0,300.0,15.3,394.72,8.23
 84 | 18.811,0.0,18.1,0.0,0.597,4.628,100.0,1.5539,24.0,666.0,20.2,28.79,34.37
 85 | 0.02187,60.0,2.93,0.0,0.401,6.8,9.9,6.2196,1.0,265.0,15.6,393.37,5.03
 86 | 0.05059,0.0,4.49,0.0,0.449,6.389,48.0,4.7794,3.0,247.0,18.5,396.9,9.62
 87 | 0.40202,0.0,9.9,0.0,0.544,6.382,67.2,3.5325,4.0,304.0,18.4,395.21,10.36
 88 | 1.49632,0.0,19.58,0.0,0.871,5.404,100.0,1.5916,5.0,403.0,14.7,341.6,13.28
 89 | 22.5971,0.0,18.1,0.0,0.7,5.0,89.5,1.5184,24.0,666.0,20.2,396.9,31.99
 90 | 0.03306,0.0,5.19,0.0,0.515,6.059,37.3,4.8122,5.0,224.0,20.2,396.14,8.51
 91 | 0.04741,0.0,11.93,0.0,0.573,6.03,80.8,2.505,1.0,273.0,21.0,396.9,7.88
 92 | 0.537,0.0,6.2,0.0,0.504,5.981,68.1,3.6715,8.0,307.0,17.4,378.35,11.65
 93 | 0.61154,20.0,3.97,0.0,0.647,8.704,86.9,1.801,5.0,264.0,13.0,389.7,5.12
 94 | 0.1415,0.0,6.91,0.0,0.448,6.169,6.6,5.7209,3.0,233.0,17.9,383.37,5.81
 95 | 1.05393,0.0,8.14,0.0,0.538,5.935,29.3,4.4986,4.0,307.0,21.0,386.85,6.58
 96 | 7.75223,0.0,18.1,0.0,0.713,6.301,83.7,2.7831,24.0,666.0,20.2,272.21,16.23
 97 | 0.51183,0.0,6.2,0.0,0.507,7.358,71.6,4.148,8.0,307.0,17.4,390.07,4.73
 98 | 0.44791,0.0,6.2,1.0,0.507,6.726,66.5,3.6519,8.0,307.0,17.4,360.2,8.05
 99 | 0.05083,0.0,5.19,0.0,0.515,6.316,38.1,6.4584,5.0,224.0,20.2,389.71,5.68
100 | 0.06724,0.0,3.24,0.0,0.46,6.333,17.2,5.2146,4.0,430.0,16.9,375.21,7.34
101 | 0.22188,20.0,6.96,1.0,0.464,7.691,51.8,4.3665,3.0,223.0,18.6,390.77,6.58
102 | 67.9208,0.0,18.1,0.0,0.693,5.683,100.0,1.4254,24.0,666.0,20.2,384.97,22.98
103 | 0.17142,0.0,6.91,0.0,0.448,5.682,33.8,5.1004,3.0,233.0,17.9,396.9,10.21
104 | 0.21124,12.5,7.87,0.0,0.524,5.631,100.0,6.0821,5.0,311.0,15.2,386.63,29.93
105 | 0.11425,0.0,13.89,1.0,0.55,6.373,92.4,3.3633,5.0,276.0,16.4,393.74,10.5
106 | 0.06129,20.0,3.33,1.0,0.4429,7.645,49.7,5.2119,5.0,216.0,14.9,377.07,3.01
107 | 0.07886,80.0,4.95,0.0,0.411,7.148,27.7,5.1167,4.0,245.0,19.2,396.9,3.56
108 | 0.10153,0.0,12.83,0.0,0.437,6.279,74.5,4.0522,5.0,398.0,18.7,373.66,11.97
109 | 0.12816,12.5,6.07,0.0,0.409,5.885,33.0,6.498,4.0,345.0,18.9,396.9,8.79
110 | 2.81838,0.0,18.1,0.0,0.532,5.762,40.3,4.0983,24.0,666.0,20.2,392.92,10.42
111 | 0.06211,40.0,1.25,0.0,0.429,6.49,44.4,8.7921,1.0,335.0,19.7,396.9,5.98
112 | 0.14052,0.0,10.59,0.0,0.489,6.375,32.3,3.9454,4.0,277.0,18.6,385.81,9.38
113 | 18.4982,0.0,18.1,0.0,0.668,4.138,100.0,1.137,24.0,666.0,20.2,396.9,37.97
114 | 18.0846,0.0,18.1,0.0,0.679,6.434,100.0,1.8347,24.0,666.0,20.2,27.25,29.05
115 | 0.0456,0.0,13.89,1.0,0.55,5.888,56.0,3.1121,5.0,276.0,16.4,392.8,13.51
116 | 0.29916,20.0,6.96,0.0,0.464,5.856,42.1,4.429,3.0,223.0,18.6,388.65,13.0
117 | 0.17899,0.0,9.69,0.0,0.585,5.67,28.8,2.7986,6.0,391.0,19.2,393.29,17.6
118 | 0.03578,20.0,3.33,0.0,0.4429,7.82,64.5,4.6947,5.0,216.0,14.9,387.31,3.76
119 | 73.5341,0.0,18.1,0.0,0.679,5.957,100.0,1.8026,24.0,666.0,20.2,16.45,20.62
120 | 0.03049,55.0,3.78,0.0,0.484,6.874,28.1,6.4654,5.0,370.0,17.6,387.97,4.61
121 | 0.13587,0.0,10.59,1.0,0.489,6.064,59.1,4.2392,4.0,277.0,18.6,381.32,14.66
122 | 2.24236,0.0,19.58,0.0,0.605,5.854,91.8,2.422,5.0,403.0,14.7,395.11,11.64
123 | 0.01951,17.5,1.38,0.0,0.4161,7.104,59.5,9.2229,3.0,216.0,18.6,393.24,8.05
124 | 14.4383,0.0,18.1,0.0,0.597,6.852,100.0,1.4655,24.0,666.0,20.2,179.36,19.78
125 | 0.44178,0.0,6.2,0.0,0.504,6.552,21.4,3.3751,8.0,307.0,17.4,380.34,3.76
126 | 0.06642,0.0,4.05,0.0,0.51,6.86,74.4,2.9153,5.0,296.0,16.6,391.27,6.92
127 | 3.69695,0.0,18.1,0.0,0.718,4.963,91.4,1.7523,24.0,666.0,20.2,316.03,14.0
128 | 0.08199,0.0,13.92,0.0,0.437,6.009,42.3,5.5027,4.0,289.0,16.0,396.9,10.4
129 | 11.1081,0.0,18.1,0.0,0.668,4.906,100.0,1.1742,24.0,666.0,20.2,396.9,34.77
130 | 0.08308,0.0,2.46,0.0,0.488,5.604,89.8,2.9879,3.0,193.0,17.8,391.0,13.98
131 | 4.89822,0.0,18.1,0.0,0.631,4.97,100.0,1.3325,24.0,666.0,20.2,375.52,3.26
132 | 0.06911,45.0,3.44,0.0,0.437,6.739,30.8,6.4798,5.0,398.0,15.2,389.71,4.69
133 | 2.14918,0.0,19.58,0.0,0.871,5.709,98.5,1.6232,5.0,403.0,14.7,261.95,15.79
134 | 0.06466,70.0,2.24,0.0,0.4,6.345,20.1,7.8278,5.0,358.0,14.8,368.24,4.97
135 | 0.01439,60.0,2.93,0.0,0.401,6.604,18.8,6.2196,1.0,265.0,15.6,376.7,4.38
136 | 0.66351,20.0,3.97,0.0,0.647,7.333,100.0,1.8946,5.0,264.0,13.0,383.29,7.79
137 | 0.08244,30.0,4.93,0.0,0.428,6.481,18.5,6.1899,6.0,300.0,16.6,379.41,6.36
138 | 0.24103,0.0,7.38,0.0,0.493,6.083,43.7,5.4159,5.0,287.0,19.6,396.9,12.79
139 | 0.41238,0.0,6.2,0.0,0.504,7.163,79.9,3.2157,8.0,307.0,17.4,372.08,6.36
140 | 0.08221,22.0,5.86,0.0,0.431,6.957,6.8,8.9067,7.0,330.0,19.1,386.09,3.53
141 | 0.52058,0.0,6.2,1.0,0.507,6.631,76.5,4.148,8.0,307.0,17.4,388.45,9.54
142 | 4.22239,0.0,18.1,1.0,0.77,5.803,89.0,1.9047,24.0,666.0,20.2,353.04,14.64
143 | 0.13158,0.0,10.01,0.0,0.547,6.176,72.5,2.7301,6.0,432.0,17.8,393.3,12.04
144 | 0.12757,30.0,4.93,0.0,0.428,6.393,7.8,7.0355,6.0,300.0,16.6,374.71,5.19
145 | 0.18337,0.0,27.74,0.0,0.609,5.414,98.3,1.7554,4.0,711.0,20.1,344.05,23.97
146 | 4.42228,0.0,18.1,0.0,0.584,6.003,94.5,2.5403,24.0,666.0,20.2,331.29,21.32
147 | 0.47547,0.0,9.9,0.0,0.544,6.113,58.8,4.0019,4.0,304.0,18.4,396.23,12.73
148 | 0.19539,0.0,10.81,0.0,0.413,6.245,6.2,5.2873,4.0,305.0,19.2,377.17,7.54
149 | 0.15876,0.0,10.81,0.0,0.413,5.961,17.5,5.2873,4.0,305.0,19.2,376.94,9.88
150 | 0.09299,0.0,25.65,0.0,0.581,5.961,92.9,2.0869,2.0,188.0,19.1,378.09,17.93
151 | 0.34109,0.0,7.38,0.0,0.493,6.415,40.1,4.7211,5.0,287.0,19.6,396.9,6.12
152 | 0.05302,0.0,3.41,0.0,0.489,7.079,63.1,3.4145,2.0,270.0,17.8,396.06,5.7
153 | 2.3139,0.0,19.58,0.0,0.605,5.88,97.3,2.3887,5.0,403.0,14.7,348.13,12.03
154 | 0.03427,0.0,5.19,0.0,0.515,5.869,46.3,5.2311,5.0,224.0,20.2,396.9,9.8
155 | 0.13914,0.0,4.05,0.0,0.51,5.572,88.5,2.5961,5.0,296.0,16.6,396.9,14.69
156 | 0.80271,0.0,8.14,0.0,0.538,5.456,36.6,3.7965,4.0,307.0,21.0,288.99,11.69
157 | 14.4208,0.0,18.1,0.0,0.74,6.461,93.3,2.0026,24.0,666.0,20.2,27.49,18.05
158 | 0.12579,45.0,3.44,0.0,0.437,6.556,29.1,4.5667,5.0,398.0,15.2,382.84,4.56
159 | 9.18702,0.0,18.1,0.0,0.7,5.536,100.0,1.5804,24.0,666.0,20.2,396.9,23.6
160 | 0.30347,0.0,7.38,0.0,0.493,6.312,28.9,5.4159,5.0,287.0,19.6,396.9,6.15
161 | 0.05602,0.0,2.46,0.0,0.488,7.831,53.6,3.1992,3.0,193.0,17.8,392.63,4.45
162 | 0.05561,70.0,2.24,0.0,0.4,7.041,10.0,7.8278,5.0,358.0,14.8,371.58,4.74
163 | 1.83377,0.0,19.58,1.0,0.605,7.802,98.2,2.0407,5.0,403.0,14.7,389.61,1.92
164 | 1.35472,0.0,8.14,0.0,0.538,6.072,100.0,4.175,4.0,307.0,21.0,376.73,13.04
165 | 0.21161,0.0,8.56,0.0,0.52,6.137,87.4,2.7147,5.0,384.0,20.9,394.47,13.44
166 | 0.0536,21.0,5.64,0.0,0.439,6.511,21.1,6.8147,4.0,243.0,16.8,396.9,5.28
167 | 0.01432,100.0,1.32,0.0,0.411,6.816,40.5,8.3248,5.0,256.0,15.1,392.9,3.95
168 | 


--------------------------------------------------------------------------------
/lesson2/demo_boston_data/train.csv:
--------------------------------------------------------------------------------
  1 | 16.7,4.87141,0.0,18.1,0.0,0.614,6.484,93.6,2.3053,24.0,666.0,20.2,396.21,18.68
  2 | 13.8,2.37934,0.0,19.58,0.0,0.871,6.13,100.0,1.4191,5.0,403.0,14.7,172.91,27.8
  3 | 20.1,0.11132,0.0,27.74,0.0,0.609,5.983,83.5,2.1099,4.0,711.0,20.1,396.9,13.35
  4 | 16.1,2.63548,0.0,9.9,0.0,0.544,4.973,37.8,2.5194,4.0,304.0,18.4,350.45,12.64
  5 | 21.7,3.8497,0.0,18.1,1.0,0.77,6.395,91.0,2.5052,24.0,666.0,20.2,391.34,13.27
  6 | 23.8,2.3004,0.0,19.58,0.0,0.605,6.319,96.1,2.1,5.0,403.0,14.7,297.09,11.1
  7 | 18.2,0.63796,0.0,8.14,0.0,0.538,6.096,84.5,4.4619,4.0,307.0,21.0,380.02,10.26
  8 | 17.8,8.98296,0.0,18.1,1.0,0.77,6.212,97.4,2.1222,24.0,666.0,20.2,377.73,17.6
  9 | 13.5,8.20058,0.0,18.1,0.0,0.713,5.936,80.3,2.7792,24.0,666.0,20.2,3.5,16.94
 10 | 22.2,0.1029,30.0,4.93,0.0,0.428,6.358,52.9,7.0355,6.0,300.0,16.6,372.75,11.22
 11 | 7.0,45.7461,0.0,18.1,0.0,0.693,4.519,100.0,1.6582,24.0,666.0,20.2,88.27,36.98
 12 | 16.7,11.0874,0.0,18.1,0.0,0.718,6.411,100.0,1.8589,24.0,666.0,20.2,318.75,15.02
 13 | 21.7,0.10793,0.0,8.56,0.0,0.52,6.195,54.4,2.7778,5.0,384.0,20.9,393.49,13.0
 14 | 24.4,0.1403,22.0,5.86,0.0,0.431,6.487,13.0,7.3967,7.0,330.0,19.1,396.28,5.9
 15 | 20.1,13.0751,0.0,18.1,0.0,0.58,5.713,56.7,2.8237,24.0,666.0,20.2,396.9,14.76
 16 | 24.7,0.15936,0.0,6.91,0.0,0.448,6.211,6.5,5.7209,3.0,233.0,17.9,394.46,7.44
 17 | 19.6,0.85204,0.0,8.14,0.0,0.538,5.965,89.2,4.0123,4.0,307.0,21.0,392.53,13.83
 18 | 36.2,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33
 19 | 24.7,0.17505,0.0,5.96,0.0,0.499,5.966,30.2,3.8473,5.0,279.0,19.2,393.43,10.13
 20 | 18.2,0.04301,80.0,1.91,0.0,0.413,5.663,21.9,10.5857,4.0,334.0,22.0,382.8,8.05
 21 | 12.0,15.0234,0.0,18.1,0.0,0.614,5.304,97.3,2.1007,24.0,666.0,20.2,349.48,24.91
 22 | 15.4,2.73397,0.0,19.58,0.0,0.871,5.597,94.9,1.5257,5.0,403.0,14.7,351.85,21.45
 23 | 13.2,1.38799,0.0,8.14,0.0,0.538,5.95,82.0,3.99,4.0,307.0,21.0,232.6,27.71
 24 | 18.4,0.32982,0.0,21.89,0.0,0.624,5.822,95.4,2.4699,4.0,437.0,21.2,388.69,15.03
 25 | 33.3,0.04011,80.0,1.52,0.0,0.404,7.287,34.1,7.309,2.0,329.0,12.6,396.9,4.08
 26 | 22.6,0.08447,0.0,4.05,0.0,0.51,5.859,68.7,2.7019,5.0,296.0,16.6,393.23,9.64
 27 | 14.1,4.75237,0.0,18.1,0.0,0.713,6.525,86.5,2.4358,24.0,666.0,20.2,50.92,18.13
 28 | 22.5,0.05188,0.0,4.49,0.0,0.449,6.015,45.1,4.4272,3.0,247.0,18.5,395.99,12.86
 29 | 50.0,0.52693,0.0,6.2,0.0,0.504,8.725,83.0,2.8944,8.0,307.0,17.4,382.0,4.63
 30 | 14.1,9.32909,0.0,18.1,0.0,0.713,6.185,98.7,2.2616,24.0,666.0,20.2,396.9,18.13
 31 | 33.8,0.54011,20.0,3.97,0.0,0.647,7.203,81.8,2.1121,5.0,264.0,13.0,392.8,9.59
 32 | 19.3,0.14476,0.0,10.01,0.0,0.547,5.731,65.2,2.7592,6.0,432.0,17.8,391.5,13.61
 33 | 19.3,0.37578,0.0,10.59,1.0,0.489,5.404,88.6,3.665,4.0,277.0,18.6,395.24,23.98
 34 | 23.0,0.59005,0.0,21.89,0.0,0.624,6.372,97.9,2.3274,4.0,437.0,21.2,385.76,11.12
 35 | 21.9,3.47428,0.0,18.1,1.0,0.718,8.78,82.9,1.9047,24.0,666.0,20.2,354.55,5.29
 36 | 20.4,0.13117,0.0,8.56,0.0,0.52,6.127,85.2,2.1224,5.0,384.0,20.9,387.69,14.09
 37 | 10.2,17.8667,0.0,18.1,0.0,0.671,6.223,100.0,1.3861,24.0,666.0,20.2,393.74,21.78
 38 | 21.4,14.3337,0.0,18.1,0.0,0.614,6.229,88.0,1.9512,24.0,666.0,20.2,383.32,13.11
 39 | 43.8,0.08187,0.0,2.89,0.0,0.445,7.82,36.9,3.4952,2.0,276.0,18.0,393.53,3.57
 40 | 26.7,0.35809,0.0,6.2,1.0,0.507,6.951,88.5,2.8617,8.0,307.0,17.4,391.7,9.71
 41 | 18.3,0.26838,0.0,9.69,0.0,0.585,5.794,70.6,2.8927,6.0,391.0,19.2,396.9,14.1
 42 | 19.9,3.1636,0.0,18.1,0.0,0.655,5.759,48.2,3.0665,24.0,666.0,20.2,334.4,14.13
 43 | 22.9,0.04203,28.0,15.04,0.0,0.464,6.442,53.6,3.6659,4.0,270.0,18.2,395.01,8.16
 44 | 13.9,0.84054,0.0,8.14,0.0,0.538,5.599,85.7,4.4546,4.0,307.0,21.0,303.42,16.51
 45 | 16.2,0.25356,0.0,9.9,0.0,0.544,5.705,77.7,3.945,4.0,304.0,18.4,396.42,11.5
 46 | 22.1,0.79041,0.0,9.9,0.0,0.544,6.122,52.8,2.6403,4.0,304.0,18.4,396.9,5.98
 47 | 13.4,7.05042,0.0,18.1,0.0,0.614,6.103,85.1,2.0218,24.0,666.0,20.2,2.52,23.29
 48 | 20.3,0.14103,0.0,13.92,0.0,0.437,5.79,58.0,6.32,4.0,289.0,16.0,396.9,15.84
 49 | 24.5,0.27957,0.0,9.69,0.0,0.585,5.926,42.6,2.3817,6.0,391.0,19.2,396.9,13.59
 50 | 33.4,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
 51 | 13.5,1.61282,0.0,8.14,0.0,0.538,6.096,96.9,3.7598,4.0,307.0,21.0,248.31,20.34
 52 | 36.1,0.05515,33.0,2.18,0.0,0.472,7.236,41.1,4.022,7.0,222.0,18.4,393.68,6.93
 53 | 17.1,0.05023,35.0,6.06,0.0,0.4379,5.706,28.4,6.6407,1.0,304.0,16.9,394.02,12.43
 54 | 21.1,0.03961,0.0,5.19,0.0,0.515,6.037,34.5,5.9853,5.0,224.0,20.2,396.9,8.01
 55 | 15.4,9.96654,0.0,18.1,0.0,0.74,6.485,100.0,1.9784,24.0,666.0,20.2,386.73,18.85
 56 | 17.8,0.31827,0.0,9.9,0.0,0.544,5.914,83.2,3.9986,4.0,304.0,18.4,390.7,18.33
 57 | 16.0,0.17171,25.0,5.13,0.0,0.453,5.966,93.4,6.8185,8.0,284.0,19.7,378.08,14.44
 58 | 28.2,0.04932,33.0,2.18,0.0,0.472,6.849,70.3,3.1827,7.0,222.0,18.4,396.9,7.53
 59 | 17.4,1.20742,0.0,19.58,0.0,0.605,5.875,94.6,2.4259,5.0,403.0,14.7,292.29,14.43
 60 | 19.9,4.34879,0.0,18.1,0.0,0.58,6.167,84.0,3.0334,24.0,666.0,20.2,396.9,16.29
 61 | 22.5,0.25199,0.0,10.59,0.0,0.489,5.783,72.7,4.3549,4.0,277.0,18.6,389.43,18.06
 62 | 22.8,0.10084,0.0,10.01,0.0,0.547,6.715,81.6,2.6775,6.0,432.0,17.8,395.59,10.16
 63 | 23.0,5.82401,0.0,18.1,0.0,0.532,6.242,64.7,3.4242,24.0,666.0,20.2,396.9,10.74
 64 | 15.7,0.38735,0.0,25.65,0.0,0.581,5.613,95.6,1.7572,2.0,188.0,19.1,359.29,27.26
 65 | 23.9,0.06076,0.0,11.93,0.0,0.573,6.976,91.0,2.1675,1.0,273.0,21.0,396.9,5.64
 66 | 18.0,0.32543,0.0,21.89,0.0,0.624,6.431,98.8,1.8125,4.0,437.0,21.2,396.9,15.39
 67 | 15.6,2.15505,0.0,19.58,0.0,0.871,5.628,100.0,1.5166,5.0,403.0,14.7,169.27,16.65
 68 | 41.3,1.22358,0.0,19.58,0.0,0.605,6.943,97.4,1.8773,5.0,403.0,14.7,363.43,4.59
 69 | 27.1,0.14455,12.5,7.87,0.0,0.524,6.172,96.1,5.9505,5.0,311.0,15.2,396.9,19.15
 70 | 28.4,0.05479,33.0,2.18,0.0,0.472,6.616,58.1,3.37,7.0,222.0,18.4,393.36,8.93
 71 | 16.5,0.02498,0.0,1.89,0.0,0.518,6.54,59.7,6.2669,1.0,422.0,15.9,389.96,8.65
 72 | 20.1,0.1396,0.0,8.56,0.0,0.52,6.167,90.0,2.421,5.0,384.0,20.9,392.69,12.33
 73 | 18.2,0.7258,0.0,8.14,0.0,0.538,5.727,69.5,3.7965,4.0,307.0,21.0,390.95,11.28
 74 | 23.2,0.03871,52.5,5.32,0.0,0.405,6.209,31.3,7.3172,6.0,293.0,16.6,396.9,7.14
 75 | 28.7,0.07013,0.0,13.89,0.0,0.55,6.642,85.1,3.4211,5.0,276.0,16.4,392.78,9.69
 76 | 19.0,3.77498,0.0,18.1,0.0,0.655,5.952,84.7,2.8715,24.0,666.0,20.2,22.01,17.15
 77 | 16.3,28.6558,0.0,18.1,0.0,0.597,5.155,100.0,1.5894,24.0,666.0,20.2,210.97,20.08
 78 | 17.5,0.7842,0.0,8.14,0.0,0.538,5.99,81.7,4.2579,4.0,307.0,21.0,386.75,14.67
 79 | 22.8,0.76162,20.0,3.97,0.0,0.647,5.56,62.8,1.9865,5.0,264.0,13.0,392.4,10.45
 80 | 23.7,5.70818,0.0,18.1,0.0,0.532,6.75,74.9,3.3317,24.0,666.0,20.2,393.07,7.74
 81 | 28.4,0.12204,0.0,2.89,0.0,0.445,6.625,57.8,3.4952,2.0,276.0,18.0,357.98,6.65
 82 | 22.7,5.20177,0.0,18.1,1.0,0.77,6.127,83.4,2.7227,24.0,666.0,20.2,395.43,11.48
 83 | 22.6,0.13642,0.0,10.59,0.0,0.489,5.891,22.3,3.9454,4.0,277.0,18.6,396.9,10.87
 84 | 23.2,5.29305,0.0,18.1,0.0,0.7,6.051,82.5,2.1678,24.0,666.0,20.2,378.38,18.76
 85 | 7.2,16.8118,0.0,18.1,0.0,0.7,5.277,98.1,1.4261,24.0,666.0,20.2,396.9,30.81
 86 | 15.3,1.12658,0.0,19.58,1.0,0.871,5.012,88.0,1.6102,5.0,403.0,14.7,343.28,12.12
 87 | 16.1,6.44405,0.0,18.1,0.0,0.584,6.425,74.8,2.2004,24.0,666.0,20.2,97.95,12.03
 88 | 50.0,8.26725,0.0,18.1,1.0,0.668,5.875,89.6,1.1296,24.0,666.0,20.2,347.88,8.88
 89 | 16.1,5.09017,0.0,18.1,0.0,0.713,6.297,91.8,2.3682,24.0,666.0,20.2,385.09,17.27
 90 | 18.6,0.07244,60.0,1.69,0.0,0.411,5.884,18.5,10.7103,4.0,411.0,18.3,392.33,7.79
 91 | 13.3,0.2498,0.0,21.89,0.0,0.624,5.857,98.2,1.6686,4.0,437.0,21.2,392.04,21.32
 92 | 14.6,10.233,0.0,18.1,0.0,0.614,6.185,96.7,2.1705,24.0,666.0,20.2,379.7,18.03
 93 | 19.6,0.10328,25.0,5.13,0.0,0.453,5.927,47.2,6.932,8.0,284.0,19.7,396.9,9.22
 94 | 21.2,0.23912,0.0,9.69,0.0,0.585,6.019,65.3,2.4091,6.0,391.0,19.2,396.9,12.92
 95 | 19.4,0.26169,0.0,9.9,0.0,0.544,6.023,90.4,2.834,4.0,304.0,18.4,396.3,11.72
 96 | 50.0,0.01501,90.0,1.21,1.0,0.401,7.923,24.8,5.885,1.0,198.0,13.6,395.52,3.16
 97 | 32.9,0.01778,95.0,1.47,0.0,0.403,7.135,13.9,7.6534,3.0,402.0,17.0,384.3,4.45
 98 | 20.6,0.04527,0.0,11.93,0.0,0.573,6.12,76.7,2.2875,1.0,273.0,21.0,396.9,9.08
 99 | 29.1,0.07978,40.0,6.41,0.0,0.447,6.482,32.1,4.1403,4.0,254.0,17.6,396.9,7.19
100 | 13.1,2.44668,0.0,19.58,0.0,0.871,5.272,94.0,1.7364,5.0,403.0,14.7,88.63,16.14
101 | 10.2,12.2472,0.0,18.1,0.0,0.584,5.837,59.7,1.9976,24.0,666.0,20.2,24.65,15.69
102 | 11.5,8.15174,0.0,18.1,0.0,0.7,5.39,98.9,1.7281,24.0,666.0,20.2,396.9,20.85
103 | 14.4,1.62864,0.0,21.89,0.0,0.624,5.019,100.0,1.4394,4.0,437.0,21.2,396.9,34.41
104 | 50.0,2.01019,0.0,19.58,0.0,0.605,7.929,96.2,2.0459,5.0,403.0,14.7,369.3,3.7
105 | 12.6,9.92485,0.0,18.1,0.0,0.74,6.251,96.6,2.198,24.0,666.0,20.2,388.52,16.44
106 | 22.2,0.11027,25.0,5.13,0.0,0.453,6.456,67.8,7.2255,8.0,284.0,19.7,396.9,6.73
107 | 32.5,0.10008,0.0,2.46,0.0,0.488,6.563,95.6,2.847,3.0,193.0,17.8,396.9,5.68
108 | 20.8,3.67822,0.0,18.1,0.0,0.77,5.362,96.2,2.1036,24.0,666.0,20.2,380.79,10.19
109 | 25.2,0.16211,20.0,6.96,0.0,0.464,6.24,16.3,4.429,3.0,223.0,18.6,396.9,6.59
110 | 14.5,0.98843,0.0,8.14,0.0,0.538,5.813,100.0,4.0952,4.0,307.0,21.0,394.54,19.88
111 | 15.0,19.6091,0.0,18.1,0.0,0.671,7.313,97.9,1.3163,24.0,666.0,20.2,396.9,13.44
112 | 29.6,0.06047,0.0,2.46,0.0,0.488,6.153,68.8,3.2797,3.0,193.0,17.8,387.11,13.15
113 | 44.0,0.01538,90.0,3.75,0.0,0.394,7.454,34.2,6.3361,3.0,244.0,15.9,386.34,3.11
114 | 7.2,14.2362,0.0,18.1,0.0,0.693,6.343,100.0,1.5741,24.0,666.0,20.2,396.9,20.32
115 | 32.2,0.00906,90.0,2.97,0.0,0.4,7.088,20.8,7.3073,1.0,285.0,15.3,394.72,7.85
116 | 36.4,0.08664,45.0,3.44,0.0,0.437,7.178,26.3,6.4798,5.0,398.0,15.2,390.49,2.87
117 | 19.1,15.5757,0.0,18.1,0.0,0.58,5.926,71.0,2.9084,24.0,666.0,20.2,368.74,18.13
118 | 23.2,0.07022,0.0,4.05,0.0,0.51,6.02,47.2,3.5549,5.0,296.0,16.6,393.23,10.11
119 | 15.0,0.22489,12.5,7.87,0.0,0.524,6.377,94.3,6.3467,5.0,311.0,15.2,392.52,20.45
120 | 50.0,0.01381,80.0,0.46,0.0,0.422,7.875,32.0,5.6484,4.0,255.0,14.4,394.23,2.97
121 | 25.0,0.1265,25.0,5.13,0.0,0.453,6.762,43.4,7.9809,8.0,284.0,19.7,395.58,9.5
122 | 20.0,0.18836,0.0,6.91,0.0,0.448,5.786,33.3,5.1004,3.0,233.0,17.9,396.9,14.15
123 | 8.4,11.8123,0.0,18.1,0.0,0.718,6.824,76.5,1.794,24.0,666.0,20.2,48.45,22.74
124 | 20.0,0.09744,0.0,5.96,0.0,0.499,5.841,61.4,3.3779,5.0,279.0,19.2,377.56,11.41
125 | 33.1,0.06127,40.0,6.41,1.0,0.447,6.826,27.6,4.8628,4.0,254.0,17.6,393.45,4.16
126 | 19.4,0.21977,0.0,6.91,0.0,0.448,5.602,62.0,6.0877,3.0,233.0,17.9,396.9,16.2
127 | 34.7,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
128 | 23.8,1.80028,0.0,19.58,0.0,0.605,5.877,79.2,2.4259,5.0,403.0,14.7,227.61,12.14
129 | 10.2,14.3337,0.0,18.1,0.0,0.7,4.88,100.0,1.5895,24.0,666.0,20.2,372.92,30.62
130 | 13.3,6.39312,0.0,18.1,0.0,0.584,6.162,97.4,2.206,24.0,666.0,20.2,302.76,24.1
131 | 18.5,0.28392,0.0,7.38,0.0,0.493,5.708,74.3,4.7211,5.0,287.0,19.6,391.13,11.74
132 | 12.7,4.66883,0.0,18.1,0.0,0.713,5.976,87.9,2.5806,24.0,666.0,20.2,10.48,19.01
133 | 19.8,0.04544,0.0,3.24,0.0,0.46,6.144,32.2,5.8736,4.0,430.0,16.9,368.57,9.09
134 | 20.4,0.62976,0.0,8.14,0.0,0.538,5.949,61.8,4.7075,4.0,307.0,21.0,396.9,8.26
135 | 19.5,0.1712,0.0,8.56,0.0,0.52,5.836,91.9,2.211,5.0,384.0,20.9,395.67,18.66
136 | 19.1,5.69175,0.0,18.1,0.0,0.583,6.114,79.8,3.5459,24.0,666.0,20.2,392.68,14.98
137 | 24.2,0.08826,0.0,10.81,0.0,0.413,6.417,6.6,5.2873,4.0,305.0,19.2,383.73,6.72
138 | 13.9,15.288,0.0,18.1,0.0,0.671,6.649,93.3,1.3449,24.0,666.0,20.2,363.02,23.24
139 | 50.0,0.02009,95.0,2.68,0.0,0.4161,8.034,31.9,5.118,4.0,224.0,14.7,390.55,2.88
140 | 19.9,3.83684,0.0,18.1,0.0,0.77,6.251,91.1,2.2955,24.0,666.0,20.2,350.65,14.19
141 | 23.3,1.42502,0.0,19.58,0.0,0.871,6.51,100.0,1.7659,5.0,403.0,14.7,364.31,7.39
142 | 20.6,2.37857,0.0,18.1,0.0,0.583,5.871,41.9,3.724,24.0,666.0,20.2,370.73,13.34
143 | 18.1,0.55778,0.0,21.89,0.0,0.624,6.335,98.2,2.1107,4.0,437.0,21.2,394.67,16.96
144 | 41.7,0.57529,0.0,6.2,0.0,0.507,8.337,73.3,3.8384,8.0,307.0,17.4,385.91,2.47
145 | 27.0,1.27346,0.0,19.58,1.0,0.605,6.25,92.6,1.7984,5.0,403.0,14.7,338.92,5.5
146 | 15.2,1.23247,0.0,8.14,0.0,0.538,6.142,91.7,3.9769,4.0,307.0,21.0,396.9,18.72
147 | 24.6,0.19186,0.0,7.38,0.0,0.493,6.431,14.7,5.4159,5.0,287.0,19.6,393.68,5.08
148 | 28.0,0.04113,25.0,4.86,0.0,0.426,6.727,33.5,5.4007,4.0,281.0,19.0,396.9,5.29
149 | 30.8,0.02763,75.0,2.95,0.0,0.428,6.595,21.8,5.4011,3.0,252.0,18.3,395.63,4.32
150 | 24.1,0.07896,0.0,12.83,0.0,0.437,6.273,6.0,4.2515,5.0,398.0,18.7,394.92,6.78
151 | 18.9,0.11747,12.5,7.87,0.0,0.524,6.009,82.9,6.2267,5.0,311.0,15.2,396.9,13.27
152 | 13.3,9.82349,0.0,18.1,0.0,0.671,6.794,98.8,1.358,24.0,666.0,20.2,396.9,21.24
153 | 14.9,9.51363,0.0,18.1,0.0,0.713,6.728,94.1,2.4961,24.0,666.0,20.2,6.68,18.71
154 | 8.3,15.8603,0.0,18.1,0.0,0.679,5.896,95.4,1.9096,24.0,666.0,20.2,7.68,24.39
155 | 34.9,0.0315,95.0,1.47,0.0,0.403,6.975,15.3,7.6534,3.0,402.0,17.0,396.9,4.56
156 | 21.0,0.08014,0.0,5.96,0.0,0.499,5.85,41.5,3.9342,5.0,279.0,19.2,396.9,8.77
157 | 11.7,13.9134,0.0,18.1,0.0,0.713,6.208,95.0,2.2222,24.0,666.0,20.2,100.63,15.17
158 | 50.0,9.2323,0.0,18.1,0.0,0.631,6.216,100.0,1.1691,24.0,666.0,20.2,366.15,9.53
159 | 23.1,0.0187,85.0,4.15,0.0,0.429,6.516,27.7,8.5353,4.0,351.0,17.9,392.43,6.36
160 | 21.2,3.67367,0.0,18.1,0.0,0.583,6.312,51.9,3.9917,24.0,666.0,20.2,388.62,10.58
161 | 50.0,1.46336,0.0,19.58,0.0,0.605,7.489,90.8,1.9709,5.0,403.0,14.7,374.43,1.73
162 | 15.6,3.53501,0.0,19.58,1.0,0.871,6.152,82.6,1.7455,5.0,403.0,14.7,88.01,15.02
163 | 37.2,0.0578,0.0,2.46,0.0,0.488,6.98,58.4,2.829,3.0,193.0,17.8,396.9,5.04
164 | 31.0,0.82526,20.0,3.97,0.0,0.647,7.327,94.5,2.0788,5.0,264.0,13.0,393.42,11.25
165 | 18.8,0.12329,0.0,10.01,0.0,0.547,5.913,92.9,2.3534,6.0,432.0,17.8,394.95,16.21
166 | 27.5,0.62356,0.0,6.2,1.0,0.507,6.879,77.7,3.2721,8.0,307.0,17.4,390.39,9.93
167 | 33.2,0.0686,0.0,2.89,0.0,0.445,7.416,62.5,3.4952,2.0,276.0,18.0,396.9,6.19
168 | 32.4,0.05644,40.0,6.41,1.0,0.447,6.758,32.9,4.0776,4.0,254.0,17.6,396.9,3.53
169 | 20.6,4.83567,0.0,18.1,0.0,0.583,5.905,53.2,3.1523,24.0,666.0,20.2,388.22,11.45
170 | 39.8,0.06588,0.0,2.46,0.0,0.488,7.765,83.3,2.741,3.0,193.0,17.8,395.56,7.56
171 | 19.7,0.08873,21.0,5.64,0.0,0.439,5.963,45.7,6.8147,4.0,243.0,16.8,395.56,13.45
172 | 23.9,0.04462,25.0,4.86,0.0,0.426,6.619,70.4,5.4007,4.0,281.0,19.0,395.63,7.22
173 | 23.9,0.02543,55.0,3.78,0.0,0.484,6.696,56.4,5.7321,5.0,370.0,17.6,396.9,7.18
174 | 17.1,0.35233,0.0,21.89,0.0,0.624,6.454,98.4,1.8498,4.0,437.0,21.2,394.08,14.59
175 | 17.5,0.03113,0.0,4.39,0.0,0.442,6.014,48.5,8.0136,3.0,352.0,18.8,385.64,10.53
176 | 22.3,0.0459,52.5,5.32,0.0,0.405,6.315,45.6,7.3172,6.0,293.0,16.6,396.9,7.6
177 | 22.0,0.05789,12.5,6.07,0.0,0.409,5.878,21.4,6.498,4.0,345.0,18.9,396.21,8.1
178 | 23.4,0.04981,21.0,5.64,0.0,0.439,5.998,21.4,6.8147,4.0,243.0,16.8,396.9,8.43
179 | 37.9,0.09103,0.0,2.46,0.0,0.488,7.155,92.2,2.7006,3.0,193.0,17.8,394.12,4.82
180 | 20.3,0.3494,0.0,9.9,0.0,0.544,5.972,76.7,3.1025,4.0,304.0,18.4,396.24,9.97
181 | 10.9,37.6619,0.0,18.1,0.0,0.679,6.202,78.7,1.8629,24.0,666.0,20.2,18.82,14.52
182 | 23.1,0.18159,0.0,7.38,0.0,0.493,6.376,54.3,4.5404,5.0,287.0,19.6,396.9,6.87
183 | 12.1,9.59571,0.0,18.1,0.0,0.693,6.404,100.0,1.639,24.0,666.0,20.2,376.11,20.31
184 | 23.6,0.0566,0.0,3.41,0.0,0.489,7.007,86.3,3.4217,2.0,270.0,17.8,396.9,5.5
185 | 19.4,0.04379,80.0,3.37,0.0,0.398,5.787,31.1,6.6115,4.0,337.0,16.1,396.9,10.24
186 | 22.4,0.21719,0.0,10.59,1.0,0.489,5.807,53.8,3.6526,4.0,277.0,18.6,390.94,16.03
187 | 24.5,0.16439,22.0,5.86,0.0,0.431,6.433,49.1,7.8265,7.0,330.0,19.1,374.71,9.52
188 | 13.4,6.71772,0.0,18.1,0.0,0.713,6.749,92.6,2.3236,24.0,666.0,20.2,0.32,17.44
189 | 19.9,0.62739,0.0,8.14,0.0,0.538,5.834,56.5,4.4986,4.0,307.0,21.0,395.62,8.47
190 | 25.0,5.73116,0.0,18.1,0.0,0.532,7.061,77.0,3.4106,24.0,666.0,20.2,395.28,7.01
191 | 23.7,0.28955,0.0,10.59,0.0,0.489,5.412,9.8,3.5875,4.0,277.0,18.6,348.93,29.55
192 | 18.7,0.22212,0.0,10.01,0.0,0.547,6.092,95.4,2.548,6.0,432.0,17.8,396.9,17.09
193 | 32.0,0.09604,40.0,6.41,0.0,0.447,6.854,42.8,4.2673,4.0,254.0,17.6,396.9,2.98
194 | 23.8,0.3692,0.0,9.9,0.0,0.544,6.567,87.3,3.6023,4.0,304.0,18.4,395.69,9.28
195 | 32.0,0.07875,45.0,3.44,0.0,0.437,6.782,41.1,3.7886,5.0,398.0,15.2,393.87,6.68
196 | 18.6,0.22876,0.0,8.56,0.0,0.52,6.405,85.4,2.7147,5.0,384.0,20.9,70.8,10.63
197 | 16.6,0.22927,0.0,6.91,0.0,0.448,6.03,85.5,5.6894,3.0,233.0,17.9,392.74,18.8
198 | 19.6,4.03841,0.0,18.1,0.0,0.532,6.229,90.7,3.0993,24.0,666.0,20.2,395.33,12.87
199 | 12.5,5.87205,0.0,18.1,0.0,0.693,6.405,96.0,1.6768,24.0,666.0,20.2,396.9,19.37
200 | 12.7,13.3598,0.0,18.1,0.0,0.693,5.887,94.7,1.7821,24.0,666.0,20.2,396.9,16.35
201 | 15.1,6.96215,0.0,18.1,0.0,0.7,5.713,97.0,1.9265,24.0,666.0,20.2,394.43,17.11
202 | 33.1,0.1,34.0,6.09,0.0,0.433,6.982,17.7,5.4917,7.0,329.0,16.1,390.43,4.86
203 | 7.5,10.8342,0.0,18.1,0.0,0.679,6.782,90.8,1.8195,24.0,666.0,20.2,21.57,25.79
204 | 20.4,0.13058,0.0,10.01,0.0,0.547,5.872,73.1,2.4775,6.0,432.0,17.8,338.63,15.37
205 | 18.4,5.66637,0.0,18.1,0.0,0.74,6.219,100.0,2.0048,24.0,666.0,20.2,395.69,16.59
206 | 14.0,0.2909,0.0,21.89,0.0,0.624,6.174,93.6,1.6119,4.0,437.0,21.2,388.08,24.16
207 | 17.2,7.40389,0.0,18.1,0.0,0.597,5.617,97.9,1.4547,24.0,666.0,20.2,314.64,26.4
208 | 13.4,11.1604,0.0,18.1,0.0,0.74,6.629,94.6,2.1247,24.0,666.0,20.2,109.85,23.27
209 | 8.3,24.8017,0.0,18.1,0.0,0.693,5.349,96.0,1.7028,24.0,666.0,20.2,396.9,19.77
210 | 6.3,9.91655,0.0,18.1,0.0,0.693,5.852,77.8,1.5004,24.0,666.0,20.2,338.16,29.97
211 | 13.6,0.10574,0.0,27.74,0.0,0.609,5.983,98.8,1.8681,4.0,711.0,20.1,390.11,18.07
212 | 34.6,0.03768,80.0,1.52,0.0,0.404,7.274,38.3,7.309,2.0,329.0,12.6,392.2,6.62
213 | 28.6,0.12932,0.0,13.92,0.0,0.437,6.678,31.1,5.9604,4.0,289.0,16.0,396.9,6.27
214 | 14.2,7.02259,0.0,18.1,0.0,0.718,6.006,95.3,1.8746,24.0,666.0,20.2,319.98,15.7
215 | 22.4,0.06263,0.0,11.93,0.0,0.573,6.593,69.1,2.4786,1.0,273.0,21.0,391.99,9.67
216 | 50.0,6.53876,0.0,18.1,1.0,0.631,7.016,97.5,1.2024,24.0,666.0,20.2,392.05,2.96
217 | 18.5,0.19133,22.0,5.86,0.0,0.431,5.605,70.2,7.9549,7.0,330.0,19.1,389.13,18.46
218 | 37.0,0.09068,45.0,3.44,0.0,0.437,6.951,21.5,6.4798,5.0,398.0,15.2,377.68,5.1
219 | 20.5,0.19657,22.0,5.86,0.0,0.431,6.226,79.2,8.0555,7.0,330.0,19.1,376.14,10.15
220 | 24.8,0.04417,70.0,2.24,0.0,0.4,6.871,47.4,7.8278,5.0,358.0,14.8,390.86,6.07
221 | 18.8,0.09849,0.0,25.65,0.0,0.581,5.879,95.8,2.0063,2.0,188.0,19.1,379.38,17.58
222 | 18.4,0.77299,0.0,8.14,0.0,0.538,6.495,94.4,4.4547,4.0,307.0,21.0,387.94,12.8
223 | 24.4,0.1146,20.0,6.96,0.0,0.464,6.538,58.7,3.9175,3.0,223.0,18.6,394.96,7.73
224 | 26.6,0.02899,40.0,1.25,0.0,0.429,6.939,34.5,8.7921,1.0,335.0,19.7,389.85,5.89
225 | 20.3,0.08387,0.0,12.83,0.0,0.437,5.874,36.6,4.5026,5.0,398.0,18.7,396.06,9.1
226 | 35.4,0.03705,20.0,3.33,0.0,0.4429,6.968,37.2,5.2447,5.0,216.0,14.9,392.23,4.59
227 | 19.2,0.34006,0.0,21.89,0.0,0.624,6.458,98.9,2.1185,4.0,437.0,21.2,395.04,12.6
228 | 


--------------------------------------------------------------------------------
/lesson2/demo_boston_data/validation.csv:
--------------------------------------------------------------------------------
  1 | 22.6,0.04684,0.0,3.41,0.0,0.489,6.417,66.1,3.0923,2.0,270.0,17.8,392.18,8.81
  2 | 13.0,7.52601,0.0,18.1,0.0,0.713,6.417,98.3,2.185,24.0,666.0,20.2,304.21,19.31
  3 | 27.5,4.55587,0.0,18.1,0.0,0.718,3.561,87.9,1.6132,24.0,666.0,20.2,354.7,7.12
  4 | 18.7,0.06151,0.0,5.19,0.0,0.515,5.968,58.5,4.8122,5.0,224.0,20.2,396.9,9.29
  5 | 33.2,0.10469,40.0,6.41,1.0,0.447,7.267,49.0,4.7872,4.0,254.0,17.6,389.25,6.05
  6 | 8.5,7.67202,0.0,18.1,0.0,0.693,5.747,98.9,1.6334,24.0,666.0,20.2,393.1,19.92
  7 | 44.8,0.31533,0.0,6.2,0.0,0.504,8.266,78.3,2.8944,8.0,307.0,17.4,385.05,4.14
  8 | 25.0,0.02875,28.0,15.04,0.0,0.464,6.211,28.9,3.6659,4.0,270.0,18.2,396.33,6.21
  9 | 10.5,22.0511,0.0,18.1,0.0,0.74,5.818,92.4,1.8662,24.0,666.0,20.2,391.45,22.11
 10 | 35.4,0.01311,90.0,1.22,0.0,0.403,7.249,21.9,8.6966,5.0,226.0,17.9,395.93,4.81
 11 | 30.1,0.6147,0.0,6.2,0.0,0.507,6.618,80.8,3.2721,8.0,307.0,17.4,396.9,7.6
 12 | 8.8,20.0849,0.0,18.1,0.0,0.7,4.368,91.2,1.4395,24.0,666.0,20.2,285.83,30.63
 13 | 37.6,0.38214,0.0,6.2,0.0,0.504,8.04,86.5,3.2157,8.0,307.0,17.4,387.38,3.13
 14 | 19.4,0.03466,35.0,6.06,0.0,0.4379,6.031,23.3,6.6407,1.0,304.0,16.9,362.25,7.83
 15 | 24.0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
 16 | 32.7,0.01301,35.0,1.52,0.0,0.442,7.241,49.3,7.0379,1.0,284.0,15.5,394.74,5.49
 17 | 17.2,0.06162,0.0,4.39,0.0,0.442,5.898,52.3,8.0136,3.0,352.0,18.8,364.61,12.67
 18 | 29.8,4.64689,0.0,18.1,0.0,0.614,6.98,67.6,2.5329,24.0,666.0,20.2,374.68,11.66
 19 | 30.1,0.65665,20.0,3.97,0.0,0.647,6.842,100.0,2.0107,5.0,264.0,13.0,391.93,6.9
 20 | 24.1,0.0795,60.0,1.69,0.0,0.411,6.579,35.9,10.7103,4.0,411.0,18.3,370.78,5.49
 21 | 14.3,0.88125,0.0,21.89,0.0,0.624,5.637,94.7,1.9799,4.0,437.0,21.2,396.9,18.34
 22 | 8.5,41.5292,0.0,18.1,0.0,0.693,5.531,85.4,1.6074,24.0,666.0,20.2,329.46,27.38
 23 | 27.1,0.05372,0.0,13.92,0.0,0.437,6.549,51.0,5.9604,4.0,289.0,16.0,392.85,7.39
 24 | 12.8,9.39063,0.0,18.1,0.0,0.74,5.627,93.9,1.8172,24.0,666.0,20.2,396.9,22.88
 25 | 17.0,1.41385,0.0,19.58,1.0,0.871,6.129,96.0,1.7494,5.0,403.0,14.7,321.02,15.12
 26 | 24.8,0.04297,52.5,5.32,0.0,0.405,6.565,22.9,7.3172,6.0,293.0,16.6,371.72,9.51
 27 | 18.5,0.03041,0.0,5.19,0.0,0.515,5.895,59.6,5.615,5.0,224.0,20.2,394.81,10.56
 28 | 17.6,0.20608,22.0,5.86,0.0,0.431,5.593,76.5,7.9549,7.0,330.0,19.1,372.49,12.5
 29 | 10.5,24.3938,0.0,18.1,0.0,0.7,4.652,100.0,1.4672,24.0,666.0,20.2,396.9,28.28
 30 | 9.7,11.5779,0.0,18.1,0.0,0.7,5.036,97.0,1.77,24.0,666.0,20.2,396.9,25.68
 31 | 14.5,8.49213,0.0,18.1,0.0,0.584,6.348,86.1,2.0527,24.0,666.0,20.2,83.45,17.64
 32 | 23.3,0.09252,30.0,4.93,0.0,0.428,6.606,42.2,6.1899,6.0,300.0,16.6,383.78,7.37
 33 | 25.0,2.924,0.0,19.58,0.0,0.605,6.101,93.0,2.2834,5.0,403.0,14.7,240.16,9.81
 34 | 10.4,88.9762,0.0,18.1,0.0,0.671,6.968,91.9,1.4165,24.0,666.0,20.2,396.9,17.21
 35 | 20.5,0.04337,21.0,5.64,0.0,0.439,6.115,63.0,6.8147,4.0,243.0,16.8,393.97,9.43
 36 | 21.4,0.11504,0.0,2.89,0.0,0.445,6.163,69.6,3.4952,2.0,276.0,18.0,391.83,11.34
 37 | 34.9,0.03359,75.0,2.95,0.0,0.428,7.024,15.8,5.4011,3.0,252.0,18.3,395.62,1.98
 38 | 16.2,0.25915,0.0,21.89,0.0,0.624,5.693,96.0,1.7883,4.0,437.0,21.2,392.11,17.19
 39 | 21.9,0.04819,80.0,3.64,0.0,0.392,6.108,32.0,9.2203,1.0,315.0,16.4,392.89,6.57
 40 | 17.1,9.72418,0.0,18.1,0.0,0.74,6.406,97.2,2.0651,24.0,666.0,20.2,385.96,19.52
 41 | 17.2,14.0507,0.0,18.1,0.0,0.597,6.657,100.0,1.5275,24.0,666.0,20.2,35.05,21.22
 42 | 19.0,0.05497,0.0,5.19,0.0,0.515,5.985,45.4,4.8122,5.0,224.0,20.2,396.9,9.74
 43 | 42.3,0.02177,82.5,2.03,0.0,0.415,7.61,15.7,6.27,2.0,348.0,14.7,395.38,3.11
 44 | 27.5,0.14866,0.0,8.56,0.0,0.52,6.727,79.9,2.7778,5.0,384.0,20.9,394.76,9.42
 45 | 24.4,0.22969,0.0,10.59,0.0,0.489,6.326,52.5,4.3549,4.0,277.0,18.6,394.87,10.97
 46 | 11.7,8.79212,0.0,18.1,0.0,0.584,5.565,70.6,2.0635,24.0,666.0,20.2,3.65,17.16
 47 | 33.4,0.07503,33.0,2.18,0.0,0.472,7.42,71.9,3.0992,7.0,222.0,18.4,396.9,6.47
 48 | 21.0,1.00245,0.0,8.14,0.0,0.538,6.674,87.3,4.239,4.0,307.0,21.0,380.23,11.98
 49 | 15.2,5.44114,0.0,18.1,0.0,0.713,6.655,98.2,2.3552,24.0,666.0,20.2,355.29,17.73
 50 | 46.7,0.29819,0.0,6.2,0.0,0.504,7.686,17.0,3.3751,8.0,307.0,17.4,377.51,3.92
 51 | 13.4,3.32105,0.0,19.58,1.0,0.871,5.403,100.0,1.3216,5.0,403.0,14.7,396.9,26.82
 52 | 5.0,38.3518,0.0,18.1,0.0,0.693,5.453,100.0,1.4896,24.0,666.0,20.2,396.9,30.59
 53 | 22.2,0.07151,0.0,4.49,0.0,0.449,6.121,56.8,3.7476,3.0,247.0,18.5,395.15,8.44
 54 | 20.4,0.35114,0.0,7.38,0.0,0.493,6.041,49.9,4.7211,5.0,287.0,19.6,396.9,7.7
 55 | 13.8,8.05579,0.0,18.1,0.0,0.584,5.427,95.4,2.4298,24.0,666.0,20.2,352.58,18.14
 56 | 23.8,0.1676,0.0,7.38,0.0,0.493,6.426,52.3,4.5404,5.0,287.0,19.6,396.9,7.2
 57 | 28.7,0.02985,0.0,2.18,0.0,0.458,6.43,58.7,6.0622,3.0,222.0,18.7,394.12,5.21
 58 | 17.5,0.17783,0.0,9.69,0.0,0.585,5.569,73.5,2.3999,6.0,391.0,19.2,395.77,15.1
 59 | 22.8,0.49298,0.0,9.9,0.0,0.544,6.635,82.5,3.3175,4.0,304.0,18.4,396.9,4.54
 60 | 27.9,0.03615,80.0,4.95,0.0,0.411,6.63,23.4,5.1167,4.0,245.0,19.2,396.9,4.7
 61 | 24.8,0.21409,22.0,5.86,0.0,0.431,6.438,8.9,7.3967,7.0,330.0,19.1,377.07,3.59
 62 | 8.4,13.6781,0.0,18.1,0.0,0.74,5.935,87.9,1.8206,24.0,666.0,20.2,68.95,34.02
 63 | 23.6,0.09178,0.0,4.05,0.0,0.51,6.416,84.1,2.6463,5.0,296.0,16.6,395.5,9.04
 64 | 14.6,2.36862,0.0,19.58,0.0,0.871,4.926,95.7,1.4608,5.0,403.0,14.7,391.71,29.53
 65 | 20.0,0.43571,0.0,10.59,1.0,0.489,5.344,100.0,3.875,4.0,277.0,18.6,396.9,23.09
 66 | 24.1,0.03445,82.5,2.03,0.0,0.415,6.162,38.4,6.27,2.0,348.0,14.7,393.77,7.43
 67 | 24.8,0.03659,25.0,4.86,0.0,0.426,6.302,32.2,5.4007,4.0,281.0,19.0,396.9,6.72
 68 | 14.9,6.28807,0.0,18.1,0.0,0.74,6.341,96.4,2.072,24.0,666.0,20.2,318.01,17.79
 69 | 15.2,0.15086,0.0,27.74,0.0,0.609,5.454,92.7,1.8209,4.0,711.0,20.1,395.09,18.06
 70 | 48.3,0.33147,0.0,6.2,0.0,0.507,8.247,70.4,3.6519,8.0,307.0,17.4,378.95,3.95
 71 | 50.0,1.51902,0.0,19.58,1.0,0.605,8.375,93.9,2.162,5.0,403.0,14.7,388.45,3.32
 72 | 18.9,0.06417,0.0,5.96,0.0,0.499,5.933,68.2,3.3603,5.0,279.0,19.2,396.9,9.68
 73 | 19.8,0.12802,0.0,8.56,0.0,0.52,6.474,97.1,2.4329,5.0,384.0,20.9,395.24,12.27
 74 | 30.3,0.04666,80.0,1.52,0.0,0.404,7.107,36.6,7.309,2.0,329.0,12.6,354.31,8.61
 75 | 21.4,0.16902,0.0,25.65,0.0,0.581,5.986,88.4,1.9929,2.0,188.0,19.1,385.02,14.81
 76 | 21.7,0.40771,0.0,6.2,1.0,0.507,6.164,91.3,3.048,8.0,307.0,17.4,395.24,21.46
 77 | 28.5,0.03502,80.0,4.95,0.0,0.411,6.861,27.9,5.1167,4.0,245.0,19.2,396.9,3.33
 78 | 18.9,0.0136,75.0,4.0,0.0,0.41,5.888,47.6,7.3197,3.0,469.0,21.1,396.9,14.8
 79 | 24.6,0.05425,0.0,4.05,0.0,0.51,6.315,73.4,3.3175,5.0,296.0,16.6,395.6,6.29
 80 | 17.7,3.69311,0.0,18.1,0.0,0.713,6.376,88.4,2.5671,24.0,666.0,20.2,391.43,14.65
 81 | 22.8,0.09164,0.0,10.81,0.0,0.413,6.065,7.8,5.2873,4.0,305.0,19.2,390.91,5.52
 82 | 22.6,4.26131,0.0,18.1,0.0,0.77,6.112,81.3,2.5091,24.0,666.0,20.2,390.74,12.67
 83 | 16.4,4.81213,0.0,18.1,0.0,0.713,6.701,90.0,2.5975,24.0,666.0,20.2,255.23,16.42
 84 | 50.0,5.66998,0.0,18.1,1.0,0.631,6.683,96.8,1.3567,24.0,666.0,20.2,375.33,3.73
 85 | 42.8,0.36894,22.0,5.86,0.0,0.431,8.259,8.4,8.9067,7.0,330.0,19.1,396.9,3.54
 86 | 15.6,0.75026,0.0,8.14,0.0,0.538,5.924,94.1,4.3996,4.0,307.0,21.0,394.33,16.3
 87 | 22.3,2.44953,0.0,19.58,0.0,0.605,6.402,95.2,2.2625,5.0,403.0,14.7,330.04,11.32
 88 | 18.7,0.14932,25.0,5.13,0.0,0.453,5.741,66.2,7.2254,8.0,284.0,19.7,395.11,13.15
 89 | 17.8,2.33099,0.0,19.58,0.0,0.871,5.186,93.8,1.5296,5.0,403.0,14.7,356.99,28.32
 90 | 24.3,0.33983,22.0,5.86,0.0,0.431,6.108,34.9,8.0555,7.0,330.0,19.1,390.18,9.16
 91 | 22.0,0.11329,30.0,4.93,0.0,0.428,6.897,54.3,6.3361,6.0,300.0,16.6,391.25,11.38
 92 | 17.3,0.15038,0.0,25.65,0.0,0.581,5.856,97.0,1.9444,2.0,188.0,19.1,370.31,25.41
 93 | 36.5,0.55007,20.0,3.97,0.0,0.647,7.206,91.6,1.9301,5.0,264.0,13.0,387.89,8.1
 94 | 30.7,0.7857,20.0,3.97,0.0,0.647,7.014,84.6,2.1329,5.0,264.0,13.0,384.07,14.79
 95 | 11.9,20.7162,0.0,18.1,0.0,0.659,4.138,100.0,1.1781,24.0,666.0,20.2,370.22,23.34
 96 | 12.3,7.99248,0.0,18.1,0.0,0.7,5.52,100.0,1.5331,24.0,666.0,20.2,396.9,24.56
 97 | 23.5,0.03584,80.0,3.37,0.0,0.398,6.29,17.8,6.6115,4.0,337.0,16.1,396.9,4.67
 98 | 5.6,25.0461,0.0,18.1,0.0,0.693,5.987,100.0,1.5888,24.0,666.0,20.2,396.9,26.77
 99 | 13.6,1.25179,0.0,8.14,0.0,0.538,5.57,98.1,3.7979,4.0,307.0,21.0,376.57,21.02
100 | 21.5,1.6566,0.0,19.58,0.0,0.871,6.122,97.3,1.618,5.0,403.0,14.7,372.8,14.1
101 | 23.2,3.56868,0.0,18.1,0.0,0.58,6.437,75.0,2.8965,24.0,666.0,20.2,393.37,14.36
102 | 14.3,5.58107,0.0,18.1,0.0,0.713,6.436,87.9,2.3158,24.0,666.0,20.2,100.19,16.22
103 | 19.8,0.24522,0.0,9.9,0.0,0.544,5.782,71.7,4.0317,4.0,304.0,18.4,396.9,15.94
104 | 18.9,0.17004,12.5,7.87,0.0,0.524,6.004,85.9,6.5921,5.0,311.0,15.2,386.71,17.1
105 | 14.8,0.95577,0.0,8.14,0.0,0.538,6.047,88.8,4.4534,4.0,307.0,21.0,306.38,17.28
106 | 19.5,0.13262,0.0,8.56,0.0,0.52,5.851,96.7,2.1069,5.0,384.0,20.9,394.05,16.47
107 | 15.6,0.97617,0.0,21.89,0.0,0.624,5.757,98.4,2.346,4.0,437.0,21.2,262.76,17.31
108 | 50.0,0.57834,20.0,3.97,0.0,0.575,8.297,67.0,2.4216,5.0,264.0,13.0,384.54,7.44
109 | 14.1,10.0623,0.0,18.1,0.0,0.584,6.833,94.3,2.0882,24.0,666.0,20.2,81.33,19.69
110 | 22.0,0.03537,34.0,6.09,0.0,0.433,6.59,40.4,5.4917,7.0,329.0,16.1,395.75,9.5
111 | 17.8,8.24809,0.0,18.1,0.0,0.713,7.393,99.3,2.4527,24.0,666.0,20.2,375.87,16.74
112 | 26.6,0.05735,0.0,4.49,0.0,0.449,6.63,56.1,4.4377,3.0,247.0,18.5,392.3,6.53
113 | 


--------------------------------------------------------------------------------
/lesson2/reviews_Musical_Instruments_5.json.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/udacity-nd009t-C2-Developing-ML-Workflow/d1928db95b27ca4dec5b73f460a357d9cdcea9d7/lesson2/reviews_Musical_Instruments_5.json.zip


--------------------------------------------------------------------------------
/lesson3/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/udacity-nd009t-C2-Developing-ML-Workflow/d1928db95b27ca4dec5b73f460a357d9cdcea9d7/lesson3/.DS_Store


--------------------------------------------------------------------------------
/lesson3/HelloBlazePreprocess.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import json
 3 | import zipfile
 4 | 
 5 | # Function below unzips the archive to the local directory. 
 6 | 
 7 | def unzip_data(input_data_path):
 8 |     with zipfile.ZipFile(input_data_path, 'r') as input_data_zip:
 9 |         input_data_zip.extractall('.')
10 |         return input_data_zip.namelist()[0]
11 | 
12 | # Input data is a file with a single JSON object per line with the following format: 
13 | # {
14 | #  "reviewerID": <string>,
15 | #  "asin": <string>,
16 | #  "reviewerName" <string>,
17 | #  "helpful": [
18 | #    <int>, (indicating number of "helpful votes")
19 | #    <int>  (indicating total number of votes)
20 | #  ],
21 | #  "reviewText": "<string>",
22 | #  "overall": <int>,
23 | #  "summary": "<string>",
24 | #  "unixReviewTime": <int>,
25 | #  "reviewTime": "<string>"
26 | # }
27 | # 
28 | # We are specifically interested in the fields "helpful" and "reviewText"
29 | #
30 | 
31 | def label_data(input_data):
32 |     labeled_data = []
33 |     HELPFUL_LABEL = "__label__1"
34 |     UNHELPFUL_LABEL = "__label__2"
35 |      
36 |     for l in open(input_data, 'r'):
37 |         l_object = json.loads(l)
38 |         helpful_votes = float(l_object['helpful'][0])
39 |         total_votes = l_object['helpful'][1]
40 |         reviewText = l_object['reviewText']
41 |         if total_votes != 0:
42 |             if helpful_votes / total_votes > .5:
43 |                 labeled_data.append(" ".join([HELPFUL_LABEL, reviewText]))
44 |             elif helpful_votes / total_votes < .5:
45 |                 labeled_data.append(" ".join([UNHELPFUL_LABEL, reviewText]))
46 |           
47 |     return labeled_data
48 | 
49 | # Labeled data is a list of sentences, starting with the label defined in label_data. 
50 | 
51 | def split_sentences(labeled_data):
52 |     new_split_sentences = []
53 |     for d in labeled_data:
54 |         label = d.split()[0]        
55 |         sentences = " ".join(d.split()[1:]).split(".") # Initially split to separate label, then separate sentences
56 |         for s in sentences:
57 |             if s: # Make sure sentences isn't empty. Common w/ "..."
58 |                 new_split_sentences.append(" ".join([label, s]))
59 |     return new_split_sentences
60 | 
61 | def write_data(data, train_path, test_path, proportion):
62 |     border_index = int(proportion * len(data))
63 |     train_f = open(train_path, 'w')
64 |     test_f = open(test_path, 'w')
65 |     index = 0
66 |     for d in data:
67 |         if index < border_index:
68 |             train_f.write(d + '\n')
69 |         else:
70 |             test_f.write(d + '\n')
71 |         index += 1
72 | 
73 | if __name__ == "__main__":
74 |     unzipped_path = unzip_data('/opt/ml/processing/input/reviews_Musical_Instruments_5.json.zip')
75 |     labeled_data = label_data(unzipped_path)
76 |     new_split_sentence_data = split_sentences(labeled_data)
77 |     write_data(new_split_sentence_data, '/opt/ml/processing/output/train/hello_blaze_train_scikit', '/opt/ml/processing/output/test/hello_blaze_test_scikit', .9)
78 | 


--------------------------------------------------------------------------------
/lesson3/HelloBlazePreprocessLambda.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import json
  3 | import zipfile
  4 | import os
  5 | import boto3
  6 | from botocore.exceptions import ClientError
  7 | 
  8 | BUCKET_NAME = 'FILL_THIS_IN'
  9 | PREFIX = 'FILL_THIS_IN'
 10 | 
 11 | # Function below unzips the archive to the local directory. 
 12 | 
 13 | def unzip_data(input_data_path):
 14 |     with zipfile.ZipFile(input_data_path, 'r') as input_data_zip:
 15 |         input_data_zip.extractall('/tmp/')
 16 |         return '/tmp/' + input_data_zip.namelist()[0]
 17 | 
 18 | # Input data is a file with a single JSON object per line with the following format: 
 19 | # {
 20 | #  "reviewerID": <string>,
 21 | #  "asin": <string>,
 22 | #  "reviewerName" <string>,
 23 | #  "helpful": [
 24 | #    <int>, (indicating number of "helpful votes")
 25 | #    <int>  (indicating total number of votes)
 26 | #  ],
 27 | #  "reviewText": "<string>",
 28 | #  "overall": <int>,
 29 | #  "summary": "<string>",
 30 | #  "unixReviewTime": <int>,
 31 | #  "reviewTime": "<string>"
 32 | # }
 33 | # 
 34 | # We are specifically interested in the fields "helpful" and "reviewText"
 35 | #
 36 | 
 37 | def label_data(input_data):
 38 |     labeled_data = []
 39 |     HELPFUL_LABEL = "__label__1"
 40 |     UNHELPFUL_LABEL = "__label__2"
 41 |      
 42 |     for l in open(input_data, 'r'):
 43 |         l_object = json.loads(l)
 44 |         helpful_votes = float(l_object['helpful'][0])
 45 |         total_votes = l_object['helpful'][1]
 46 |         reviewText = l_object['reviewText']
 47 |         if total_votes != 0:
 48 |             if helpful_votes / total_votes > .5:
 49 |                 labeled_data.append(" ".join([HELPFUL_LABEL, reviewText]))
 50 |             elif helpful_votes / total_votes < .5:
 51 |                 labeled_data.append(" ".join([UNHELPFUL_LABEL, reviewText]))
 52 |           
 53 |     return labeled_data
 54 | 
 55 | 
 56 | # Labeled data is a list of sentences, starting with the label defined in label_data. 
 57 | 
 58 | def split_sentences(labeled_data):
 59 |     new_split_sentences = []
 60 |     for d in labeled_data:
 61 |         label = d.split()[0]        
 62 |         sentences = " ".join(d.split()[1:]).split(".") # Initially split to separate label, then separate sentences
 63 |         for s in sentences:
 64 |             if s: # Make sure sentences isn't empty. Common w/ "..."
 65 |                 new_split_sentences.append(" ".join([label, s]))
 66 |     return new_split_sentences
 67 | 
 68 | def upload_data(file_name):
 69 |     object_name = os.path.join(PREFIX, os.path.basename(file_name))
 70 |     s3_client = boto3.client('s3')
 71 |     try:
 72 |         response = s3_client.upload_file(file_name, BUCKET_NAME, object_name)
 73 |     except ClientError as e:
 74 |         logging.error(e)
 75 |         return False
 76 | 
 77 | def write_data(data, b_name, proportion):
 78 |     train_path = '/tmp/' + b_name + '_train'
 79 |     test_path = '/tmp/' +  b_name + '_test'
 80 |     border_index = int(proportion * len(data))
 81 |     train_f = open(train_path, 'w')
 82 |     test_f = open(test_path, 'w')
 83 |     index = 0
 84 |     for d in data:
 85 |         if index < border_index:
 86 |             train_f.write(d + '\n')
 87 |         else:
 88 |             test_f.write(d + '\n')
 89 |         index += 1
 90 |     train_f.close()
 91 |     test_f.close()
 92 |     upload_data(train_path)
 93 |     upload_data(test_path)
 94 | 
 95 | def download_data(s3_input_uri):
 96 |     s3 = boto3.client('s3')
 97 |     input_bucket = s3_input_uri.split('/')[0]
 98 |     input_object = '/'.join(s3_input_uri.split('/')[1:])
 99 |     file_name = '/tmp/' + os.path.basename(input_object)
100 |     s3.download_file(input_bucket, input_object, file_name)
101 |     return file_name
102 |         
103 | def preprocess(s3_input_uri):
104 |     f_name = download_data(s3_input_uri)
105 |     unzipped_path = unzip_data(f_name)
106 |     labeled_data = label_data(unzipped_path)
107 |     new_split_sentence_data = split_sentences(labeled_data)
108 |     write_data(new_split_sentence_data, os.path.basename(s3_input_uri), .9)
109 | 
110 | 


--------------------------------------------------------------------------------
/lesson3/Lesson 3, Exercise 1 - Lambda Solution.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "9213d171",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## Exercise Solution: Example Lambda Test Event"
 9 |    ]
10 |   },
11 |   {
12 |    "cell_type": "code",
13 |    "execution_count": 1,
14 |    "id": "9efff689",
15 |    "metadata": {},
16 |    "outputs": [
17 |     {
18 |      "data": {
19 |       "text/plain": [
20 |        "{'s3-dataset-uri': 'udacity-sagemaker-solutiondata2021/l3e1/reviews_Musical_Instruments_5.json.zip'}"
21 |       ]
22 |      },
23 |      "execution_count": 1,
24 |      "metadata": {},
25 |      "output_type": "execute_result"
26 |     }
27 |    ],
28 |    "source": [
29 |     "{\n",
30 |     "  \"s3-dataset-uri\": \"udacity-sagemaker-solutiondata2021/l3e1/reviews_Musical_Instruments_5.json.zip\"\n",
31 |     "}"
32 |    ]
33 |   },
34 |   {
35 |    "cell_type": "markdown",
36 |    "id": "600085e2",
37 |    "metadata": {},
38 |    "source": [
39 |     "## Exercise Solution: Example Lambda Handler"
40 |    ]
41 |   },
42 |   {
43 |    "cell_type": "code",
44 |    "execution_count": 3,
45 |    "id": "010950e0",
46 |    "metadata": {},
47 |    "outputs": [],
48 |    "source": [
49 |     "from HelloBlazePreprocessLambda import preprocess\n",
50 |     "\n",
51 |     "def lambda_handler(event, context):\n",
52 |     "    preprocess(event['s3-dataset-uri'])\n",
53 |     "    return {\n",
54 |     "        'statusCode': 200,\n",
55 |     "        'body': \"Good to go!\"\n",
56 |     "    }"
57 |    ]
58 |   },
59 |   {
60 |    "cell_type": "code",
61 |    "execution_count": null,
62 |    "id": "6da06b33",
63 |    "metadata": {},
64 |    "outputs": [],
65 |    "source": []
66 |   }
67 |  ],
68 |  "metadata": {
69 |   "kernelspec": {
70 |    "display_name": "Python 3 (ipykernel)",
71 |    "language": "python",
72 |    "name": "python3"
73 |   },
74 |   "language_info": {
75 |    "codemirror_mode": {
76 |     "name": "ipython",
77 |     "version": 3
78 |    },
79 |    "file_extension": ".py",
80 |    "mimetype": "text/x-python",
81 |    "name": "python",
82 |    "nbconvert_exporter": "python",
83 |    "pygments_lexer": "ipython3",
84 |    "version": "3.8.0"
85 |   }
86 |  },
87 |  "nbformat": 4,
88 |  "nbformat_minor": 5
89 | }
90 | 


--------------------------------------------------------------------------------
/lesson3/Lesson 3, Exercise 2 - Invoking Lambda Functions Solution.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "5cc872eb",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "# UDACITY : Designing Your First Workflow - Invoking Lambda Functions Solution"
 9 |    ]
10 |   },
11 |   {
12 |    "cell_type": "markdown",
13 |    "id": "be21ec8c",
14 |    "metadata": {},
15 |    "source": [
16 |     "## Synchronous invocation Solution"
17 |    ]
18 |   },
19 |   {
20 |    "cell_type": "code",
21 |    "execution_count": null,
22 |    "id": "31159809",
23 |    "metadata": {},
24 |    "outputs": [],
25 |    "source": [
26 |     "%%bash \n",
27 |     "aws lambda invoke --function-name preprocess-helloblze --payload '{\"s3-dataset-uri\": \"udacity-sagemaker-solutiondata2021/l3e1/reviews_Musical_Instruments_5.json.zip\"}' response.json"
28 |    ]
29 |   },
30 |   {
31 |    "cell_type": "markdown",
32 |    "id": "f98ff13e",
33 |    "metadata": {},
34 |    "source": [
35 |     "## Asynchronous invocation Solution"
36 |    ]
37 |   },
38 |   {
39 |    "cell_type": "code",
40 |    "execution_count": null,
41 |    "id": "0a0018ce",
42 |    "metadata": {},
43 |    "outputs": [],
44 |    "source": [
45 |     "import json\n",
46 |     "import urllib\n",
47 |     "from HelloBlazePreprocessLambda import preprocess\n",
48 |     "\n",
49 |     "def lambda_handler(event, context):\n",
50 |     "    for r in event['Records']:\n",
51 |     "        bucket = r['s3']['bucket']['name']\n",
52 |     "        key = urllib.parse.unquote_plus(r['s3']['object']['key'], encoding='utf-8')\n",
53 |     "        uri = \"/\".join([bucket, key])\n",
54 |     "        preprocess(uri)\n",
55 |     "    return {\n",
56 |     "        'statusCode': 200,\n",
57 |     "        'body': \"Good to go!\"\n",
58 |     "    }"
59 |    ]
60 |   },
61 |   {
62 |    "cell_type": "code",
63 |    "execution_count": null,
64 |    "id": "5dcdb0dd",
65 |    "metadata": {},
66 |    "outputs": [],
67 |    "source": []
68 |   }
69 |  ],
70 |  "metadata": {
71 |   "kernelspec": {
72 |    "display_name": "Python 3 (ipykernel)",
73 |    "language": "python",
74 |    "name": "python3"
75 |   },
76 |   "language_info": {
77 |    "codemirror_mode": {
78 |     "name": "ipython",
79 |     "version": 3
80 |    },
81 |    "file_extension": ".py",
82 |    "mimetype": "text/x-python",
83 |    "name": "python",
84 |    "nbconvert_exporter": "python",
85 |    "pygments_lexer": "ipython3",
86 |    "version": "3.8.0"
87 |   }
88 |  },
89 |  "nbformat": 4,
90 |  "nbformat_minor": 5
91 | }
92 | 


--------------------------------------------------------------------------------
/lesson3/Lesson 3, Exercise 2 - Invoking Lambda Functions.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "98c45b44",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# UDACITY : Designing Your First Workflow - Invoking Lambda Functions"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "id": "f1639b0d",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "In the last exercise, you created your own Lambda function. Without realizing it, you've already practiced invoking as well! Launching a test event is an example of synchronous invocation. In this exercise, you will continue working on the lambda function 'PreprocessLambda' from the previous exercise. However, you'll practice a different way to launch asynchronous invocation, and also practice the setup of an asynchronous invocation.\n",
 17 |     "These are only two examples. Lambda is one of the most flexible offerings in AWS and can be utilized in a variety of applications. The same Lambda function can be (and often is) both invoked synchronously and asynchronously."
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "markdown",
 22 |    "id": "7a766cf8",
 23 |    "metadata": {},
 24 |    "source": [
 25 |     "## Exercise: Synchronous invocation "
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "id": "fd13ac5d",
 31 |    "metadata": {},
 32 |    "source": [
 33 |     "Synchronous invocations occur when a call is made to a Lambda function and a response is waited for. The example we're asking you to implement is a CLI invocation, but Lambda functions can also be placed behind Amazon's API Gateway for potential users to directly invoke a Lambda function. This, in turn, could be the interface that you expose to users to allow them to interact with other AWS resources. These types of invocations are great for \"get\" and \"set\" methods."
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "id": "5737584f",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "Your task is to synchronously invoke the Lambda function you implemented in the last exercise using the CLI. The following documentation may be useful to you: https://docs.aws.amazon.com/lambda/latest/dg/invocation-sync.html \n",
 42 |     "\n",
 43 |     "You will need to attach the LambdaFullAccess policy to the SageMaker execution role used for your notebook. Once done, it will take a few minutes for the policy to register.  "
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "id": "335d9982",
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "%%bash \n",
 54 |     "echo \"Example CLI Command.\""
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "id": "3ec33b50",
 60 |    "metadata": {},
 61 |    "source": [
 62 |     "## Exercise: Asynchronous invocation "
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "markdown",
 67 |    "id": "748515a8",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "Asynchronous invocations occur when a service invokes lambda but does not wait for a response. The two most popular services that utilize asynchronous invocations are S3 (the storage we've been using) and SNS (Simple Notification Service.) We'll be setting up asynchronous invocations on an S3 bucket for our preprocessing function."
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "markdown",
 75 |    "id": "c0764490",
 76 |    "metadata": {},
 77 |    "source": [
 78 |     "Your task is to setup a trigger for the Lambda function we've been working  whenever a file is uploaded to a specific folder in S3. You will need to do the following:\n",
 79 |     "\n",
 80 |     "* Create a new s3 folder within an existing bucket. \n",
 81 |     "* Create a new lambda trigger for S3 by clicking '+Add trigger'. Specifying the bucket. Specify a prefix of the desired folder. Specify a suffix of \".zip\" to ensure that recursive calls don't occur. \n",
 82 |     "* Modify the lambda handler in the previous exercise using the starter code so that it properly parses the event that's sent to it. \n",
 83 |     "\n",
 84 |     "To test, upload reviews_Patio_Lawn_and_Garden_5.json.zip in this directory to your S3 bucket. \n",
 85 |     "To see if the lambda function is triggered, you can go to the Monitor tab. \n",
 86 |     "\n"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "markdown",
 91 |    "id": "edbd4522",
 92 |    "metadata": {},
 93 |    "source": [
 94 |     "## Lambda Handler Starter Code: Parsing S3 Upload Event. "
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": null,
100 |    "id": "5c3d3277",
101 |    "metadata": {},
102 |    "outputs": [],
103 |    "source": [
104 |     "# Todo: write a lambda_handler function here.\n",
105 |     "# The code to parse S3 event has provided to you, you only need to call the `preprocess` from the HelloBlazePreprocessLambda.py and return the status.\n",
106 |     "import json\n",
107 |     "import urllib\n",
108 |     "\n",
109 |     "for r in event['Records']:\n",
110 |     "        bucket = r['s3']['bucket']['name']\n",
111 |     "        key = urllib.parse.unquote_plus(r['s3']['object']['key'], encoding='utf-8')\n",
112 |     "        uri = \"/\".join([bucket, key])"
113 |    ]
114 |   }
115 |  ],
116 |  "metadata": {
117 |   "kernelspec": {
118 |    "display_name": "Python 3 (ipykernel)",
119 |    "language": "python",
120 |    "name": "python3"
121 |   },
122 |   "language_info": {
123 |    "codemirror_mode": {
124 |     "name": "ipython",
125 |     "version": 3
126 |    },
127 |    "file_extension": ".py",
128 |    "mimetype": "text/x-python",
129 |    "name": "python",
130 |    "nbconvert_exporter": "python",
131 |    "pygments_lexer": "ipython3",
132 |    "version": "3.8.0"
133 |   }
134 |  },
135 |  "nbformat": 4,
136 |  "nbformat_minor": 5
137 | }
138 | 


--------------------------------------------------------------------------------
/lesson3/Lesson 3, Exercise 3 - Creating Workflows with Step Functions.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "c5dc198f",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# UDACITY Designing Your First Workflow - Step Functions"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "id": "a00c09c5",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "## Step Functions & SageMaker"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "id": "2c2a5b51",
 22 |    "metadata": {},
 23 |    "source": [
 24 |     "In the prior exercises, we've been working with many small services. This can be overwhelming for a data scientist that wants to establish a consistent methodology for handling data. Step Functions is an orchestration service that can allow us to utilize SageMaker in a methodical and consistent way. Step Functions also integrates with Lambda, which can allow us to potentially automate our entire machine learning pipeline end-to-end. Let's get a handle on what a 'step' in a step function looks like.\n",
 25 |     "\n",
 26 |     "In this exercise, you will create a preprocessing step and a training step. Then you will create a step function to chain the two steps."
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "markdown",
 31 |    "id": "3e5c1c6b",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "## Exercise: Grant Permissions and install packages."
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "id": "1d76a91b",
 40 |    "metadata": {},
 41 |    "source": [
 42 |     "Attach the IAMFullAccess and the StepFunctionsFullAccess polices to your SageMaker execution role."
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "id": "981094fe",
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "%%bash\n",
 53 |     "pip install stepfunctions"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "markdown",
 58 |    "id": "2980c12c",
 59 |    "metadata": {},
 60 |    "source": [
 61 |     "## Exercise: Fill out preprocessing step."
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "markdown",
 66 |    "id": "752d3b79",
 67 |    "metadata": {},
 68 |    "source": [
 69 |     "The 'step' interface is designed to be quite similar to the Preprocessing Job in lesson 2. The main difference between these is the ability of a 'step' to interface with other steps. Given the successful outcome of a single step, the next step specified in a workflow will automatically continue. In our case, a training step will launch given the successful outcome of a preprocessing step. The preprocessing step has been encoded for you. Upload the preprocessing code 'HelloBlazePreprocess.py' and the zipped dataset 'reviews_Musical_Instruments_5.json.zip' to s3, and fill out the constants in the code below. "
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "markdown",
 74 |    "id": "0df2f1ac",
 75 |    "metadata": {},
 76 |    "source": [
 77 |     "Code below is the preprocessing step. Fill in the constants in the code."
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 14,
 83 |    "id": "9d82815d",
 84 |    "metadata": {},
 85 |    "outputs": [
 86 |     {
 87 |      "name": "stdout",
 88 |      "output_type": "stream",
 89 |      "text": [
 90 |       "s3://sagemaker-us-west-2-565094796913/hello_blaze_train_scikit s3://sagemaker-us-west-2-565094796913/hello_blaze_train_scikit\n"
 91 |      ]
 92 |     }
 93 |    ],
 94 |    "source": [
 95 |     "from sagemaker import get_execution_role\n",
 96 |     "from sagemaker.sklearn.processing import SKLearnProcessor\n",
 97 |     "from sagemaker.processing import ProcessingInput, ProcessingOutput\n",
 98 |     "from stepfunctions.steps.sagemaker import ProcessingStep\n",
 99 |     "import sagemaker\n",
100 |     "\n",
101 |     "role = get_execution_role()\n",
102 |     "\n",
103 |     "PREPROCESSING_JOB_NAME = \"CHANGE THIS\"\n",
104 |     "input_data = 'S3 LOCATION OF reviews_Musical_Instruments_5.json.zip'\n",
105 |     "input_preprocessing_code = 'S3 LOCATION OF HelloBlazePreprocess.py'\n",
106 |     "sess = sagemaker.Session()\n",
107 |     "\n",
108 |     "sklearn_processor = SKLearnProcessor(framework_version='0.20.0',\n",
109 |     "                                     role=role,\n",
110 |     "                                     instance_type='ml.m5.large',\n",
111 |     "                                     instance_count=1)\n",
112 |     "\n",
113 |     "\n",
114 |     "processed_data_train = \"{}{}/{}\".format(\"s3://\", sess.default_bucket(), 'hello_blaze_train_scikit')\n",
115 |     "processed_data_test = \"{}{}/{}\".format(\"s3://\", sess.default_bucket(), 'hello_blaze_test_scikit')\n",
116 |     "\n",
117 |     "inputs=[ProcessingInput(source=input_data, destination='/opt/ml/processing/input', input_name = 'input-1'),  ProcessingInput(source=input_preprocessing_code , destination='/opt/ml/processing/input/code', input_name = 'code')]\n",
118 |     "\n",
119 |     "\n",
120 |     "outputs=[ProcessingOutput(source='/opt/ml/processing/output/train', destination=processed_data_train, output_name = 'train_data'), ProcessingOutput(source='/opt/ml/processing/output/test', destination=processed_data_test, output_name = 'test_data')]\n",
121 |     "\n",
122 |     "\n",
123 |     "processing_step = ProcessingStep(\n",
124 |     "    \"SageMaker pre-processing step 4\",\n",
125 |     "    processor=sklearn_processor,\n",
126 |     "    job_name=PREPROCESSING_JOB_NAME,\n",
127 |     "    inputs=inputs,\n",
128 |     "    outputs=outputs,\n",
129 |     "    container_entrypoint=[\"python3\", \"/opt/ml/processing/input/code/HelloBlazePreprocess.py\"],\n",
130 |     ")\n"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "markdown",
135 |    "id": "5048e4f3",
136 |    "metadata": {},
137 |    "source": [
138 |     "## Exercise: Fill out Training Step"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "markdown",
143 |    "id": "a904f213",
144 |    "metadata": {},
145 |    "source": [
146 |     "Upon the success of the preprocessing step, we wish to execute a training step. A training step is defined below. Fill the constants in the code."
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": 21,
152 |    "id": "a7b7118c",
153 |    "metadata": {},
154 |    "outputs": [],
155 |    "source": [
156 |     "from stepfunctions.steps.sagemaker import TrainingStep\n",
157 |     "import boto3\n",
158 |     "\n",
159 |     "WORKFLOW_OUTPUT = \"CHANGE THIS\"\n",
160 |     "TRAINING_JOB_NAME = \"CHANGE THIS\"\n",
161 |     "\n",
162 |     "region_name = boto3.Session().region_name\n",
163 |     "container = sagemaker.image_uris.retrieve(\n",
164 |     "    region=region_name, framework=\"blazingtext\", version=\"latest\"\n",
165 |     ")\n",
166 |     "\n",
167 |     "helloBlazeEstimator = sagemaker.estimator.Estimator(\n",
168 |     "    container,\n",
169 |     "    role=role,\n",
170 |     "    instance_count=1,\n",
171 |     "    instance_type='ml.m5.large',\n",
172 |     "    volume_size=30,\n",
173 |     "    max_run=360000,\n",
174 |     "    input_mode=\"File\",\n",
175 |     "    output_path=WORKFLOW_OUTPUT,\n",
176 |     "    sagemaker_session=sess,\n",
177 |     ")\n",
178 |     "\n",
179 |     "helloBlazeEstimator.set_hyperparameters(mode='supervised')\n",
180 |     "\n",
181 |     "training_step = TrainingStep(\n",
182 |     "    \"SageMaker Training Step\",\n",
183 |     "    estimator=helloBlazeEstimator,\n",
184 |     "    data={\"train\": sagemaker.TrainingInput(processed_data_train, content_type=\"text/plain\"), \"validation\": sagemaker.TrainingInput(processed_data_test, content_type=\"text/plain\")},\n",
185 |     "    job_name=TRAINING_JOB_NAME,\n",
186 |     "    wait_for_completion=True,\n",
187 |     ")"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "markdown",
192 |    "id": "672f280c",
193 |    "metadata": {},
194 |    "source": [
195 |     "## Exercise: Create Workflow & Execute It. "
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "markdown",
200 |    "id": "4c549f3b",
201 |    "metadata": {},
202 |    "source": [
203 |     "To link the steps, you'll need to create a role that is capable of doing so. Go to IAM and create a Step Functions role, and attach the CloudWatchEventsFullAccess and SageMakerFullAccess policies. Once done, make use of the above steps to create a workflow. Quick debugging tip: jobs must have a unique name; you'll need to rename job names when debugging. Consider creating a method that will dynamically create unique job names! "
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "code",
208 |    "execution_count": null,
209 |    "id": "c4c39201",
210 |    "metadata": {},
211 |    "outputs": [],
212 |    "source": [
213 |     "from stepfunctions.steps import Chain\n",
214 |     "from stepfunctions.workflow import Workflow\n",
215 |     "\n",
216 |     "workflow_role = \"CHANGE THIS\"\n",
217 |     "\n",
218 | 
219 |     "workflow_graph = Chain([processing_step, training_step])\n",
220 | 
221 |     "workflow = Workflow(\n",
222 |     "    name=\"CHANGE THIS\",\n",
223 |     "    definition=workflow_graph,\n",
224 |     "    role=workflow_role,\n",
225 |     ")\n",
226 |     "\n",
227 |     "workflow.create()\n",
228 |     "\n",
229 |     "execution = workflow.execute(\n",
230 |     "    inputs={\n",
231 |     "        \"PreprocessingJobName\": PREPROCESSING_JOB_NAME,  # Each pre processing job (SageMaker processing job) requires a unique name,\n",
232 |     "        \"TrainingJobName\": TRAINING_JOB_NAME  # Each Sagemaker Training job requires a unique name,       \n",
233 |     "    }\n",
234 |     ")\n",
235 |     "\n",
236 |     "execution_output = execution.get_output(wait=True)"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "markdown",
241 |    "id": "d49a3122",
242 |    "metadata": {},
243 |    "source": [
244 |     "You can track the outcome of this workflow through a custom UI that gets generated! Check it out!"
245 |    ]
246 |   },
247 |   {
248 |    "cell_type": "code",
249 |    "execution_count": null,
250 |    "id": "c81e08b9",
251 |    "metadata": {},
252 |    "outputs": [],
253 |    "source": [
254 |     "execution.render_progress()"
255 |    ]
256 |   }
257 |  ],
258 |  "metadata": {
259 |   "kernelspec": {
260 |    "display_name": "Python 3 (ipykernel)",
261 |    "language": "python",
262 |    "name": "python3"
263 |   },
264 |   "language_info": {
265 |    "codemirror_mode": {
266 |     "name": "ipython",
267 |     "version": 3
268 |    },
269 |    "file_extension": ".py",
270 |    "mimetype": "text/x-python",
271 |    "name": "python",
272 |    "nbconvert_exporter": "python",
273 |    "pygments_lexer": "ipython3",
274 |    "version": "3.8.0"
275 |   }
276 |  },
277 |  "nbformat": 4,
278 |  "nbformat_minor": 5
279 | }
280 | 


--------------------------------------------------------------------------------
/lesson3/Lesson 3, Exercise 4 - Tying it All Together Solution.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "170c2db8",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# UDACITY Designing Your First Workflow - Tying it All Together Solution"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "id": "0149e226",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "## Exercise: Create the Lambda Function"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": null,
 22 |    "id": "bd081877",
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "import json\n",
 27 |     "import boto3\n",
 28 |     "import time\n",
 29 |     "\n",
 30 |     "client = boto3.client('stepfunctions')\n",
 31 |     "\n",
 32 |     "definition = \"\"\"{\n",
 33 |     "  \"StartAt\": \"SageMaker pre-processing step\",\n",
 34 |     "  \"States\": {\n",
 35 |     "    \"SageMaker pre-processing step\": {\n",
 36 |     "      \"Resource\": \"arn:aws:states:::sagemaker:createProcessingJob.sync\",\n",
 37 |     "      \"Parameters\": {\n",
 38 |     "        \"ProcessingJobName\": \"test-joblambda-preprocess\",\n",
 39 |     "        \"ProcessingInputs\": [\n",
 40 |     "          {\n",
 41 |     "            \"InputName\": \"input-1\",\n",
 42 |     "            \"AppManaged\": false,\n",
 43 |     "            \"S3Input\": {\n",
 44 |     "              \"S3Uri\": \"s3://udacity-sagemaker-solutiondata2021/l3e1/reviews_Musical_Instruments_5.json.zip\",\n",
 45 |     "              \"LocalPath\": \"/opt/ml/processing/input\",\n",
 46 |     "              \"S3DataType\": \"S3Prefix\",\n",
 47 |     "              \"S3InputMode\": \"File\",\n",
 48 |     "              \"S3DataDistributionType\": \"FullyReplicated\",\n",
 49 |     "              \"S3CompressionType\": \"None\"\n",
 50 |     "            }\n",
 51 |     "          },\n",
 52 |     "          {\n",
 53 |     "            \"InputName\": \"code\",\n",
 54 |     "            \"AppManaged\": false,\n",
 55 |     "            \"S3Input\": {\n",
 56 |     "              \"S3Uri\": \"s3://udacity-sagemaker-solutiondata2021/l3e3/HelloBlazePreprocess.py\",\n",
 57 |     "              \"LocalPath\": \"/opt/ml/processing/input/code\",\n",
 58 |     "              \"S3DataType\": \"S3Prefix\",\n",
 59 |     "              \"S3InputMode\": \"File\",\n",
 60 |     "              \"S3DataDistributionType\": \"FullyReplicated\",\n",
 61 |     "              \"S3CompressionType\": \"None\"\n",
 62 |     "            }\n",
 63 |     "          }\n",
 64 |     "        ],\n",
 65 |     "        \"ProcessingOutputConfig\": {\n",
 66 |     "          \"Outputs\": [\n",
 67 |     "            {\n",
 68 |     "              \"OutputName\": \"train_data\",\n",
 69 |     "              \"AppManaged\": false,\n",
 70 |     "              \"S3Output\": {\n",
 71 |     "                \"S3Uri\": \"s3://sagemaker-us-west-2-565094796913/hello_blaze_train_scikit\",\n",
 72 |     "                \"LocalPath\": \"/opt/ml/processing/output/train\",\n",
 73 |     "                \"S3UploadMode\": \"EndOfJob\"\n",
 74 |     "              }\n",
 75 |     "            },\n",
 76 |     "            {\n",
 77 |     "              \"OutputName\": \"test_data\",\n",
 78 |     "              \"AppManaged\": false,\n",
 79 |     "              \"S3Output\": {\n",
 80 |     "                \"S3Uri\": \"s3://sagemaker-us-west-2-565094796913/hello_blaze_test_scikit\",\n",
 81 |     "                \"LocalPath\": \"/opt/ml/processing/output/test\",\n",
 82 |     "                \"S3UploadMode\": \"EndOfJob\"\n",
 83 |     "              }\n",
 84 |     "            }\n",
 85 |     "          ]\n",
 86 |     "        },\n",
 87 |     "        \"AppSpecification\": {\n",
 88 |     "          \"ImageUri\": \"246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-scikit-learn:0.20.0-cpu-py3\",\n",
 89 |     "          \"ContainerEntrypoint\": [\n",
 90 |     "            \"python3\",\n",
 91 |     "            \"/opt/ml/processing/input/code/HelloBlazePreprocess.py\"\n",
 92 |     "          ]\n",
 93 |     "        },\n",
 94 |     "        \"RoleArn\": \"arn:aws:iam::565094796913:role/execution_role\",\n",
 95 |     "        \"ProcessingResources\": {\n",
 96 |     "          \"ClusterConfig\": {\n",
 97 |     "            \"InstanceCount\": 1,\n",
 98 |     "            \"InstanceType\": \"ml.m5.large\",\n",
 99 |     "            \"VolumeSizeInGB\": 30\n",
100 |     "          }\n",
101 |     "        }\n",
102 |     "      },\n",
103 |     "      \"Type\": \"Task\",\n",
104 |     "      \"Next\": \"SageMaker Training Step\"\n",
105 |     "    },\n",
106 |     "    \"SageMaker Training Step\": {\n",
107 |     "      \"Resource\": \"arn:aws:states:::sagemaker:createTrainingJob.sync\",\n",
108 |     "      \"Parameters\": {\n",
109 |     "        \"AlgorithmSpecification\": {\n",
110 |     "          \"TrainingImage\": \"433757028032.dkr.ecr.us-west-2.amazonaws.com/blazingtext:1\",\n",
111 |     "          \"TrainingInputMode\": \"File\"\n",
112 |     "        },\n",
113 |     "        \"OutputDataConfig\": {\n",
114 |     "          \"S3OutputPath\": \"s3://udacity-sagemaker-solutiondata2021/l3e3/workflow_output\"\n",
115 |     "        },\n",
116 |     "        \"StoppingCondition\": {\n",
117 |     "          \"MaxRuntimeInSeconds\": 360000\n",
118 |     "        },\n",
119 |     "        \"ResourceConfig\": {\n",
120 |     "          \"InstanceCount\": 1,\n",
121 |     "          \"InstanceType\": \"ml.m5.large\",\n",
122 |     "          \"VolumeSizeInGB\": 30\n",
123 |     "        },\n",
124 |     "        \"RoleArn\": \"arn:aws:iam::565094796913:role/execution_role\",\n",
125 |     "        \"InputDataConfig\": [\n",
126 |     "          {\n",
127 |     "            \"DataSource\": {\n",
128 |     "              \"S3DataSource\": {\n",
129 |     "                \"S3DataType\": \"S3Prefix\",\n",
130 |     "                \"S3Uri\": \"s3://sagemaker-us-west-2-565094796913/hello_blaze_train_scikit\",\n",
131 |     "                \"S3DataDistributionType\": \"FullyReplicated\"\n",
132 |     "              }\n",
133 |     "            },\n",
134 |     "            \"ContentType\": \"text/plain\",\n",
135 |     "            \"ChannelName\": \"train\"\n",
136 |     "          },\n",
137 |     "          {\n",
138 |     "            \"DataSource\": {\n",
139 |     "              \"S3DataSource\": {\n",
140 |     "                \"S3DataType\": \"S3Prefix\",\n",
141 |     "                \"S3Uri\": \"s3://sagemaker-us-west-2-565094796913/hello_blaze_test_scikit\",\n",
142 |     "                \"S3DataDistributionType\": \"FullyReplicated\"\n",
143 |     "              }\n",
144 |     "            },\n",
145 |     "            \"ContentType\": \"text/plain\",\n",
146 |     "            \"ChannelName\": \"validation\"\n",
147 |     "          }\n",
148 |     "        ],\n",
149 |     "        \"HyperParameters\": {\n",
150 |     "          \"mode\": \"supervised\"\n",
151 |     "        },\n",
152 |     "        \"TrainingJobName\": \"test-lambda-train\"\n",
153 |     "      },\n",
154 |     "      \"Type\": \"Task\",\n",
155 |     "      \"End\": true\n",
156 |     "    }\n",
157 |     "  }\n",
158 |     "}\"\"\"\n",
159 |     "\n",
160 |     "def lambda_handler(event, context):\n",
161 |     "    \n",
162 |     "    client.update_state_machine(definition=definition, stateMachineArn='arn:aws:states:us-west-2:565094796913:stateMachine:SageMakerProcessingWorkflow7')\n",
163 |     "    time.sleep(5)\n",
164 |     "    response = client.start_execution(input='{}', name='joblambda2', stateMachineArn='arn:aws:states:us-west-2:565094796913:stateMachine:SageMakerProcessingWorkflow7')\n",
165 |     "    \n",
166 |     "    return response\n"
167 |    ]
168 |   }
169 |  ],
170 |  "metadata": {
171 |   "kernelspec": {
172 |    "display_name": "Python 3 (ipykernel)",
173 |    "language": "python",
174 |    "name": "python3"
175 |   },
176 |   "language_info": {
177 |    "codemirror_mode": {
178 |     "name": "ipython",
179 |     "version": 3
180 |    },
181 |    "file_extension": ".py",
182 |    "mimetype": "text/x-python",
183 |    "name": "python",
184 |    "nbconvert_exporter": "python",
185 |    "pygments_lexer": "ipython3",
186 |    "version": "3.8.0"
187 |   }
188 |  },
189 |  "nbformat": 4,
190 |  "nbformat_minor": 5
191 | }
192 | 


--------------------------------------------------------------------------------
/lesson3/Lesson 3, Exercise 4 - Tying it All Together.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "134e2d03",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# UDACITY Designing Your First Workflow - Tying it All Together"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "id": "d6e03226",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "AWS is comprised of many services, and one of the main skills you'll develop as an ML Engineer working in AWS is in chaining these services together to accomplish specific data engineering goals. With Lambda, you've learned how to launch serverless jobs, and with Step Functions, you've learned how to create a workflow that chains jobs together. Now, you'll learn how to launch a Step Function using a Lambda job. "
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "id": "306881a2",
 22 |    "metadata": {},
 23 |    "source": [
 24 |     "Before starting this, it's important to highlight that this is not the only way to accomplish something like this. Multiple services integrate with Step Functions, and so it follows that there are multiple ways to launch Step Functions. These services, among others, include API Gateway, EventBridge, and even other Step Functions. "
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "markdown",
 29 |    "id": "fd72b5ed",
 30 |    "metadata": {},
 31 |    "source": [
 32 |     "Your task is to create a new lambda function that will launch the state machine you created in the **last exercise**. You'll then launch this lambda function from the command line. To find the definition of the step function you've made, click into the step function and look for the definition under the 'Definition' tab. "
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "id": "23232098",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "First, create a new Lambda role. Attach to this role the StepFunctionsFullAccess policy. Then create a new lambda function under the default template, and attach this new role to it. Use the starter code below to help you modify the lambda handler to accomplish your task. "
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "markdown",
 45 |    "id": "25d13b61",
 46 |    "metadata": {},
 47 |    "source": [
 48 |     "As Step Function cannot execute more than once with the same name, you must update the definition with a new name. You can find the existing definition of a Step Function in the AWS Console under 'Step Functions'. In the lambda function code below, update the 'definition' with the step function definition from your last exercise, with the only difference being the, step fucntion name, processing-job name and the training-job name. "
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "markdown",
 53 |    "id": "d84f26f3",
 54 |    "metadata": {},
 55 |    "source": [
 56 |     "## Exercise: Create the Lambda Function"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": null,
 62 |    "id": "70233ab1",
 63 |    "metadata": {},
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "import json\n",
 67 |     "import boto3\n",
 68 |     "import time\n",
 69 |     "\n",
 70 |     "client = boto3.client('stepfunctions')\n",
 71 |     "\n",
 72 |     "# todo, copy the definition from the last exercise and paste it below. \n",
 73 |     "# Also change the names of step function, training job and processing job. \n",
 74 |     "definition = \"\"\"\n",
 75 |     "\n",
 76 |     "\"\"\"\n",
 77 |     "\n",
 78 |     "def lambda_handler(event, context):\n",
 79 |     "    #todo \n",
 80 |     "    client.update_state_machine(definition=definition, stateMachineArn='CHANGE THIS') \n",
 81 |     "    # Give AWS time to register the defintion\n",
 82 |     "    time.sleep(5)\n",
 83 |     "    #todo\n",
 84 |     "    client.start_execution(input='{}', name='CHANGE THIS', stateMachineArn='CHANGE THIS') \n",
 85 |     "    \n",
 86 |     "    return {\n",
 87 |     "        'statusCode': 200,\n",
 88 |     "        'body': 'The step function has successfully launched!'\n",
 89 |     "    }\n"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "markdown",
 94 |    "id": "966f5b08",
 95 |    "metadata": {},
 96 |    "source": [
 97 |     "## Exercise: Launch the Lambda Function\n"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "markdown",
102 |    "id": "1abd804d",
103 |    "metadata": {},
104 |    "source": [
105 |     "Launch the lambda function and confirm the step function is created successfully."
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "markdown",
110 |    "id": "d8b6b4d5",
111 |    "metadata": {},
112 |    "source": [
113 |     "## Conceptual Exercise: What are next steps? "
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "markdown",
118 |    "id": "fb29a82d",
119 |    "metadata": {},
120 |    "source": [
121 |     "Right now, the Step Function that we made in the prior exercise has a hard-coded location of the dataset we input, as well as all of the locations of the intermediary steps. What are ways that you could modify the Step Function to make it more generalizable? If you could input an S3 location, how could you integrate it with Lambda so that it could asychronously be called? "
122 |    ]
123 |   }
124 |  ],
125 |  "metadata": {
126 |   "kernelspec": {
127 |    "display_name": "Python 3 (ipykernel)",
128 |    "language": "python",
129 |    "name": "python3"
130 |   },
131 |   "language_info": {
132 |    "codemirror_mode": {
133 |     "name": "ipython",
134 |     "version": 3
135 |    },
136 |    "file_extension": ".py",
137 |    "mimetype": "text/x-python",
138 |    "name": "python",
139 |    "nbconvert_exporter": "python",
140 |    "pygments_lexer": "ipython3",
141 |    "version": "3.8.0"
142 |   }
143 |  },
144 |  "nbformat": 4,
145 |  "nbformat_minor": 5
146 | }
147 | 


--------------------------------------------------------------------------------
/lesson3/demo/Lesson 3, Lecture 2 Demo - Lambda.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "689781d4",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# UDACITY Demo - Lambda"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": null,
 14 |    "id": "2c4ffece",
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "%%writefile lambda_function.py\n",
 19 |     "\n",
 20 |     "# This cell will write the function to your local machine. Note the name of the file and the name of the function. \n",
 21 |     "# Compare this to the 'Handler' parameter. \n",
 22 |     "\n",
 23 |     "import json\n",
 24 |     "\n",
 25 |     "def lambda_handler(event, context):\n",
 26 |     "    return {\n",
 27 |     "        'statusCode': 200,\n",
 28 |     "        'body': json.dumps('Hello from Lambda!')\n",
 29 |     "    }\n"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": null,
 35 |    "id": "72902858",
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "import boto3\n",
 40 |     "from sagemaker import get_execution_role\n",
 41 |     "from zipfile import ZipFile\n",
 42 |     "\n",
 43 |     "# Your role will need full access to Lambda. The easiest way to add this is through the IAM console. \n",
 44 |     "\n",
 45 |     "role = get_execution_role()\n",
 46 |     "\n",
 47 |     "client = boto3.client('lambda')\n",
 48 |     "\n",
 49 |     "with ZipFile('code.zip', 'w') as f:\n",
 50 |     "    f.write('lambda_function.py')\n",
 51 |     "    \n",
 52 |     "# If submitting as a ZipFile, you need to insert raw data. \n",
 53 |     "    \n",
 54 |     "with open('code.zip', 'rb') as f:\n",
 55 |     "    b_code = f.read()\n",
 56 |     "\n",
 57 |     "    \n",
 58 |     "# You will need to submit an 'execution role' to Lambda. The easiest way to add this is through the IAM console. \n",
 59 |     "# You'll need the full ARN (not just the name. )\n",
 60 |     "# Lambda function names also need to be unique within your AWS account. \n",
 61 |     "\n",
 62 |     "response = client.create_function(\n",
 63 |     "    FunctionName='botoLambdaFunction1',\n",
 64 |     "    Runtime='python3.9',\n",
 65 |     "    Handler='lambda_function.lambda_handler',\n",
 66 |     "    Code={\n",
 67 |     "        'ZipFile': b_code,\n",
 68 |     "    },\n",
 69 |     "    Description='string',\n",
 70 |     "    Timeout=30,\n",
 71 |     "    MemorySize=1024,\n",
 72 |     "    Publish=True,\n",
 73 |     "    PackageType='Zip',\n",
 74 |     "    Role='arn:aws:iam::565094796913:role/lambda_full_access'\n",
 75 |     ")\n"
 76 |    ]
 77 |   }
 78 |  ],
 79 |  "metadata": {
 80 |   "kernelspec": {
 81 |    "display_name": "Python 3 (ipykernel)",
 82 |    "language": "python",
 83 |    "name": "python3"
 84 |   },
 85 |   "language_info": {
 86 |    "codemirror_mode": {
 87 |     "name": "ipython",
 88 |     "version": 3
 89 |    },
 90 |    "file_extension": ".py",
 91 |    "mimetype": "text/x-python",
 92 |    "name": "python",
 93 |    "nbconvert_exporter": "python",
 94 |    "pygments_lexer": "ipython3",
 95 |    "version": "3.8.0"
 96 |   }
 97 |  },
 98 |  "nbformat": 4,
 99 |  "nbformat_minor": 5
100 | }
101 | 


--------------------------------------------------------------------------------
/lesson3/demo/Lesson 3, Lecture 3 Demo - Triggering Lambda.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "02d4826a",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "# UDACITY Demo - Triggering Lambda"
 9 |    ]
10 |   },
11 |   {
12 |    "cell_type": "code",
13 |    "execution_count": null,
14 |    "id": "a88bf9cc",
15 |    "metadata": {},
16 |    "outputs": [],
17 |    "source": [
18 |     "import boto3\n",
19 |     "from sagemaker import get_execution_role ## The SageMaker role executing your notebook needs to have Lambda permissions. \n",
20 |     "import json\n",
21 |     "\n",
22 |     "client = boto3.client('lambda')\n",
23 |     "\n",
24 |     "payload = {'key': 'value'}\n",
25 |     "\n",
26 |     "# json.dumps turns a JSON-object-like python object into a string, and .encode('utf-8') encodes the \n",
27 |     "# the string so that it can be properly passed to the client. \n",
28 |     "\n",
29 |     "payload_bytes = json.dumps(payload).encode('utf-8')\n",
30 |     "\n",
31 |     "response = client.invoke(\n",
32 |     "    FunctionName='example123',\n",
33 |     "    InvocationType='Event',\n",
34 |     "    Payload=payload_bytes\n",
35 |     ")"
36 |    ]
37 |   }
38 |  ],
39 |  "metadata": {
40 |   "kernelspec": {
41 |    "display_name": "Python 3 (ipykernel)",
42 |    "language": "python",
43 |    "name": "python3"
44 |   },
45 |   "language_info": {
46 |    "codemirror_mode": {
47 |     "name": "ipython",
48 |     "version": 3
49 |    },
50 |    "file_extension": ".py",
51 |    "mimetype": "text/x-python",
52 |    "name": "python",
53 |    "nbconvert_exporter": "python",
54 |    "pygments_lexer": "ipython3",
55 |    "version": "3.8.0"
56 |   }
57 |  },
58 |  "nbformat": 4,
59 |  "nbformat_minor": 5
60 | }
61 | 


--------------------------------------------------------------------------------
/lesson3/demo/Lesson 3, Lecture 4 Demo - Step Functions.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "ceb62ded",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# UDACITY Demo - Step Functions"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": null,
 14 |    "id": "4238a462",
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import boto3\n",
 19 |     "from sagemaker import get_execution_role\n",
 20 |     "\n",
 21 |     "client = boto3.client('stepfunctions')\n",
 22 |     "\n",
 23 |     "# Definitions are unweildly and also not valid python objects. Consider also using the Step interface\n",
 24 |     "# for the services you wish to invoke, or just constructing a Step Function through the UI \n",
 25 |     "# and pasting the definition that is automatically generated. \n",
 26 |     "\n",
 27 |     "definition = \"\"\"{\n",
 28 |     "  \"Comment\": \"A Hello World example of the Amazon States Language.\",\n",
 29 |     "  \"StartAt\": \"Lambda Invoke (First)\", \n",
 30 |     "  \"States\": {\n",
 31 |     "    \"Lambda Invoke (First)\": {\n",
 32 |     "      \"Type\": \"Task\",\n",
 33 |     "      \"Resource\": \"arn:aws:states:::lambda:invoke\",\n",
 34 |     "      \"OutputPath\": \"$.Payload\",\n",
 35 |     "      \"Parameters\": {\n",
 36 |     "        \"Payload.$\": \"$\",\n",
 37 |     "        \"FunctionName\": \"arn:aws:lambda:us-west-2:565094796913:function:example123:$LATEST\"\n",
 38 |     "      },\n",
 39 |     "      \"Retry\": [\n",
 40 |     "        {\n",
 41 |     "          \"ErrorEquals\": [\n",
 42 |     "            \"Lambda.ServiceException\",\n",
 43 |     "            \"Lambda.AWSLambdaException\",\n",
 44 |     "            \"Lambda.SdkClientException\"\n",
 45 |     "          ],\n",
 46 |     "          \"IntervalSeconds\": 2,\n",
 47 |     "          \"MaxAttempts\": 6,\n",
 48 |     "          \"BackoffRate\": 2\n",
 49 |     "        }\n",
 50 |     "      ],\n",
 51 |     "      \"Next\": \"Lambda Invoke (Second)\"\n",
 52 |     "    },\n",
 53 |     "    \"Lambda Invoke (Second)\": {\n",
 54 |     "      \"Type\": \"Task\",\n",
 55 |     "      \"Resource\": \"arn:aws:states:::lambda:invoke\",\n",
 56 |     "      \"OutputPath\": \"$.Payload\",\n",
 57 |     "      \"Parameters\": {\n",
 58 |     "        \"Payload.$\": \"$\",\n",
 59 |     "        \"FunctionName\": \"arn:aws:lambda:us-west-2:565094796913:function:example123:$LATEST\"\n",
 60 |     "      },\n",
 61 |     "      \"Retry\": [\n",
 62 |     "        {\n",
 63 |     "          \"ErrorEquals\": [\n",
 64 |     "            \"Lambda.ServiceException\",\n",
 65 |     "            \"Lambda.AWSLambdaException\",\n",
 66 |     "            \"Lambda.SdkClientException\"\n",
 67 |     "          ],\n",
 68 |     "          \"IntervalSeconds\": 2,\n",
 69 |     "          \"MaxAttempts\": 6,\n",
 70 |     "          \"BackoffRate\": 2\n",
 71 |     "        }\n",
 72 |     "      ],\n",
 73 |     "      \"End\": true\n",
 74 |     "    }\n",
 75 |     "  }\n",
 76 |     "}\n",
 77 |     "\"\"\""
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": null,
 83 |    "id": "825bb38c",
 84 |    "metadata": {},
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "# Your SageMaker execution role needs to have both StepFunctions permission and IAM permission. This\n",
 88 |     "# is seperate from the role passed in through 'roleArn' (You need IAM permissions specifically because\n",
 89 |     "# you are handling other roles.) The roleArn that's passed into the state machine needs to have permissions\n",
 90 |     "# to the services it's invoking. In this case, these are Lambda permissions. \n",
 91 |     "\n",
 92 |     "response = client.create_state_machine(\n",
 93 |     "    name='boto3StateMachine3', # Names need to be unique. \n",
 94 |     "    definition=definition,\n",
 95 |     "    roleArn='arn:aws:iam::565094796913:role/service-role/StepFunctions-firstStateMachine-role-0826984a',\n",
 96 |     "    type='STANDARD',\n",
 97 |     "    loggingConfiguration={\n",
 98 |     "        'level': 'OFF'\n",
 99 |     "    }\n",
100 |     ")\n",
101 |     "print(response)"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": null,
107 |    "id": "e9d45764",
108 |    "metadata": {},
109 |    "outputs": [],
110 |    "source": [
111 |     "response = client.start_execution(\n",
112 |     "    stateMachineArn='arn:aws:states:us-west-2:565094796913:stateMachine:boto3StateMachine3', # You can find this through the Console or through the 'response' object. \n",
113 |     "    name='example1', # Execution names need to be unique within state machines. \n",
114 |     "    input='{}' # Input needs to be at least empty brackets. \n",
115 |     ")"
116 |    ]
117 |   }
118 |  ],
119 |  "metadata": {
120 |   "kernelspec": {
121 |    "display_name": "Python 3 (ipykernel)",
122 |    "language": "python",
123 |    "name": "python3"
124 |   },
125 |   "language_info": {
126 |    "codemirror_mode": {
127 |     "name": "ipython",
128 |     "version": 3
129 |    },
130 |    "file_extension": ".py",
131 |    "mimetype": "text/x-python",
132 |    "name": "python",
133 |    "nbconvert_exporter": "python",
134 |    "pygments_lexer": "ipython3",
135 |    "version": "3.8.0"
136 |   }
137 |  },
138 |  "nbformat": 4,
139 |  "nbformat_minor": 5
140 | }
141 | 


--------------------------------------------------------------------------------
/lesson3/demo/readme.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/lesson3/reviews_Patio_Lawn_and_Garden_5.json.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/udacity-nd009t-C2-Developing-ML-Workflow/d1928db95b27ca4dec5b73f460a357d9cdcea9d7/lesson3/reviews_Patio_Lawn_and_Garden_5.json.zip


--------------------------------------------------------------------------------
/lesson4/exercises-starters.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Exercises\n",
  8 |     "\n",
  9 |     "This is the notebook containing the exercises for Feature Store, Model Monitor, and Clarify. Tested for these exercises was performed using __2 vCPU + 4 GiB notebook instance with Python 3 (TensorFlow 2.1 Python 3.6 CPU Optimized) kernel__.\n",
 10 |     "\n",
 11 |     "## Staging\n",
 12 |     "\n",
 13 |     "We'll begin by initializing some variables. These are often assumed to be present in code samples you'll find in the AWS documenation."
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": null,
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "import sagemaker\n",
 23 |     "from sagemaker.session import Session\n",
 24 |     "from sagemaker import get_execution_role\n",
 25 |     "\n",
 26 |     "role = get_execution_role()\n",
 27 |     "session = sagemaker.Session()\n",
 28 |     "region = session.boto_region_name\n",
 29 |     "bucket = session.default_bucket()"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "markdown",
 34 |    "metadata": {},
 35 |    "source": [
 36 |     "## Feature Store\n",
 37 |     "---"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "markdown",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "Feature store is a special database to give ML systems a consistent data flow across training and inference workloads. It can ingest data in batches (for training) as well as serve input features to models with very low latency for real-time prediction.\n",
 45 |     "\n",
 46 |     "For this exercise we'll work with a wine quality dataset: https://archive.ics.uci.edu/ml/datasets/wine+quality/\n",
 47 |     "\n",
 48 |     "```P. Cortez, A. Cerdeira, F. Almeida, T. Matos and J. Reis.\n",
 49 |     "Modeling wine preferences by data mining from physicochemical properties. In Decision Support Systems, Elsevier, 47(4):547-553, 2009.```"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": null,
 55 |    "metadata": {},
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "import pandas as pd\n",
 59 |     "from sklearn import datasets\n",
 60 |     "import time\n",
 61 |     "import uuid"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": null,
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "data = datasets.load_wine()\n",
 71 |     "df = pd.DataFrame(data['data'])\n",
 72 |     "df.columns = data['feature_names']"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "markdown",
 77 |    "metadata": {},
 78 |    "source": [
 79 |     "If we leave the column names as-is, Feature Store won't be able to handle the `/` in `od280/od315_of_diluted_wines` (`/` is a delimiter Feature Store uses to manage how features are organized.)"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": null,
 85 |    "metadata": {},
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "df.rename(columns = {'od280/od315_of_diluted_wines':'od280_od315_of_diluted_wines'}, inplace=True)"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "Once we have our data, we can create a feature group. Remember to attach event time and ID columns - Feature Store needs them."
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {},
102 |    "outputs": [],
103 |    "source": [
104 |     "# Add a feature group\n",
105 |     "df[\"EventTime\"] = time.time()\n",
106 |     "df[\"id\"] = range(len(df))\n",
107 |     "\n",
108 |     "# TODO: Create feature group\n",
109 |     "from sagemaker.feature_store.feature_group import FeatureGroup\n",
110 |     "\n",
111 |     "feature_group = \n",
112 |     "\n",
113 |     "# TODO: Load Feature definitions\n"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "markdown",
118 |    "metadata": {},
119 |    "source": [
120 |     "The feature group is not created until we call the `create` method, let's do that now:"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": null,
126 |    "metadata": {},
127 |    "outputs": [],
128 |    "source": [
129 |     "# Create the feature store:"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": null,
135 |    "metadata": {},
136 |    "outputs": [],
137 |    "source": [
138 |     "# TODO\n",
139 |     "feature_group.create(\n",
140 |     "# ...\n",
141 |     ")"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "markdown",
146 |    "metadata": {},
147 |    "source": [
148 |     "Lastly, ingest some data into your feature group:"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": null,
154 |    "metadata": {},
155 |    "outputs": [],
156 |    "source": [
157 |     "# TODO"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "markdown",
162 |    "metadata": {},
163 |    "source": [
164 |     "Great job! You've demonstrated your understanding of creating feature groups and ingesting data into them using Feature Store. Next up we'll cover Model Monitor!"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "markdown",
169 |    "metadata": {},
170 |    "source": [
171 |     "## Model Monitor"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "markdown",
176 |    "metadata": {},
177 |    "source": [
178 |     "In this exercise we'll create a monitoring schedule for a deployed model. We're going to provide code to help you deploy a model and get started, so that you can focus on Model Monitor for this exercise. __Remember to clean up your model before you end a work session__. We'll provide some code at the end to help you clean up your model. We'll begin by reloading our data from the previous exercise.\n",
179 |     "\n"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": null,
185 |    "metadata": {},
186 |    "outputs": [],
187 |    "source": [
188 |     "data = datasets.load_wine()\n",
189 |     "df = pd.DataFrame(data['data'])\n",
190 |     "df.columns = data['feature_names']\n",
191 |     "df.rename(columns = {'od280/od315_of_diluted_wines':'od280_od315_of_diluted_wines'}, inplace=True)"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "markdown",
196 |    "metadata": {},
197 |    "source": [
198 |     "We also need to put the target variable in the first column per the docs for our chosen algorithm: https://docs.aws.amazon.com/sagemaker/latest/dg/xgboost.html"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": null,
204 |    "metadata": {},
205 |    "outputs": [],
206 |    "source": [
207 |     "df[\"TARGET\"] = data['target']\n",
208 |     "df.set_index(df.pop('TARGET'), inplace=True)\n",
209 |     "df.reset_index(inplace=True)"
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "markdown",
214 |    "metadata": {},
215 |    "source": [
216 |     "Now we'll upload the data to S3 as train and validation data:"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "code",
221 |    "execution_count": null,
222 |    "metadata": {},
223 |    "outputs": [],
224 |    "source": [
225 |     "delimiter = int(len(df)/2)\n",
226 |     "train, test = df.iloc[delimiter:], df.iloc[:delimiter]\n",
227 |     "\n",
228 |     "train.to_csv(\"train.csv\", header=False, index=False)\n",
229 |     "test.to_csv(\"validation.csv\", header=False, index=False)\n",
230 |     "\n",
231 |     "val_location = session.upload_data('./validation.csv', key_prefix=\"data\")\n",
232 |     "train_location = session.upload_data('./train.csv', key_prefix=\"data\")\n",
233 |     "\n",
234 |     "s3_input_train = sagemaker.inputs.TrainingInput(s3_data=train_location, content_type='csv')\n",
235 |     "s3_input_validation = sagemaker.inputs.TrainingInput(s3_data=val_location, content_type='csv')"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": null,
241 |    "metadata": {},
242 |    "outputs": [],
243 |    "source": [
244 |     "algo_image = sagemaker.image_uris.retrieve(\"xgboost\", region, version='latest')\n",
245 |     "s3_output_location = f\"s3://{bucket}/models/wine_model\"\n",
246 |     "\n",
247 |     "model=sagemaker.estimator.Estimator(\n",
248 |     "    image_uri=algo_image,\n",
249 |     "    role=role,\n",
250 |     "    instance_count=1,\n",
251 |     "    instance_type='ml.m4.xlarge',\n",
252 |     "    volume_size=5,\n",
253 |     "    output_path=s3_output_location,\n",
254 |     "    sagemaker_session=sagemaker.Session()\n",
255 |     ")\n",
256 |     "\n",
257 |     "model.set_hyperparameters(max_depth=5,\n",
258 |     "                        eta=0.2,\n",
259 |     "                        gamma=4,\n",
260 |     "                        min_child_weight=6,\n",
261 |     "                        subsample=0.8,\n",
262 |     "                        objective='reg:linear',\n",
263 |     "                        early_stopping_rounds=10,\n",
264 |     "                        num_round=200)\n",
265 |     "\n",
266 |     "\n",
267 |     "model.fit({'train': s3_input_train, 'validation': s3_input_validation})"
268 |    ]
269 |   },
270 |   {
271 |    "cell_type": "markdown",
272 |    "metadata": {},
273 |    "source": [
274 |     "Now that your training job has finished, you can perform the first task in this exercise: creating a data capture config. Configure your model to sample `34%` of inferences:"
275 |    ]
276 |   },
277 |   {
278 |    "cell_type": "code",
279 |    "execution_count": null,
280 |    "metadata": {},
281 |    "outputs": [],
282 |    "source": [
283 |     "# TODO\n",
284 |     "from sagemaker.model_monitor import DataCaptureConfig\n",
285 |     "\n",
286 |     "capture_uri =\n",
287 |     "\n",
288 |     "data_capture_config = DataCaptureConfig(\n",
289 |     "# ...\n",
290 |     ")"
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "markdown",
295 |    "metadata": {},
296 |    "source": [
297 |     "Great! We'll use your config to deploy a model below:"
298 |    ]
299 |   },
300 |   {
301 |    "cell_type": "code",
302 |    "execution_count": null,
303 |    "metadata": {},
304 |    "outputs": [],
305 |    "source": [
306 |     "xgb_predictor = model.deploy(\n",
307 |     "    initial_instance_count=1, instance_type='ml.m4.xlarge',\n",
308 |     "    data_capture_config=data_capture_config\n",
309 |     ")"
310 |    ]
311 |   },
312 |   {
313 |    "cell_type": "markdown",
314 |    "metadata": {},
315 |    "source": [
316 |     "Great! You should see an indicator like this when the deployment finishes:\n",
317 |     "\n",
318 |     "```\n",
319 |     "-----------------!\n",
320 |     "```\n",
321 |     "We can test your deployment like so:"
322 |    ]
323 |   },
324 |   {
325 |    "cell_type": "code",
326 |    "execution_count": null,
327 |    "metadata": {},
328 |    "outputs": [],
329 |    "source": [
330 |     "xgb_predictor.serializer = sagemaker.serializers.CSVSerializer()\n",
331 |     "inputs = test.copy()\n",
332 |     "# Drop the target variable\n",
333 |     "inputs = inputs.drop(columns=inputs.columns[0])\n",
334 |     "x_pred = xgb_predictor.predict(inputs.sample(5).values).decode('utf-8')\n",
335 |     "x_pred"
336 |    ]
337 |   },
338 |   {
339 |    "cell_type": "markdown",
340 |    "metadata": {},
341 |    "source": [
342 |     "All systems go! To finish up the exercise, we're going to provide you with a DefaultModelMonitor and a suggested baseline. Combine the `xgb_predictor` and the provided `my_monitor` to configure the monitoring schedule for _hourly_ monitoring."
343 |    ]
344 |   },
345 |   {
346 |    "cell_type": "code",
347 |    "execution_count": null,
348 |    "metadata": {},
349 |    "outputs": [],
350 |    "source": [
351 |     "from sagemaker.model_monitor import DefaultModelMonitor\n",
352 |     "from sagemaker.model_monitor.dataset_format import DatasetFormat\n",
353 |     "\n",
354 |     "my_monitor = DefaultModelMonitor(\n",
355 |     "    role=role,\n",
356 |     "    instance_count=1,\n",
357 |     "    instance_type='ml.m5.xlarge',\n",
358 |     "    volume_size_in_gb=20,\n",
359 |     "    max_runtime_in_seconds=3600,\n",
360 |     ")"
361 |    ]
362 |   },
363 |   {
364 |    "cell_type": "code",
365 |    "execution_count": null,
366 |    "metadata": {},
367 |    "outputs": [],
368 |    "source": [
369 |     "my_monitor.suggest_baseline(\n",
370 |     "    baseline_dataset=f's3://{bucket}/data/train.csv',\n",
371 |     "    dataset_format=DatasetFormat.csv(header=False),\n",
372 |     ")"
373 |    ]
374 |   },
375 |   {
376 |    "cell_type": "markdown",
377 |    "metadata": {},
378 |    "source": [
379 |     "Below, provide the monitoring schedule:"
380 |    ]
381 |   },
382 |   {
383 |    "cell_type": "code",
384 |    "execution_count": null,
385 |    "metadata": {},
386 |    "outputs": [],
387 |    "source": [
388 |     "# TODO\n",
389 |     "from sagemaker.model_monitor import CronExpressionGenerator\n",
390 |     "\n",
391 |     "my_monitor.create_monitoring_schedule(\n",
392 |     "# ...\n",
393 |     ")"
394 |    ]
395 |   },
396 |   {
397 |    "cell_type": "markdown",
398 |    "metadata": {},
399 |    "source": [
400 |     "Great job! You can check that your schedule was created by selecting the `SageMaker components and registries` tab on the far left.\n",
401 |     "\n",
402 |     "In this exercise you configured Model Monitor to watch a simple model. Next, we'll monitor the same deployment for explainability."
403 |    ]
404 |   },
405 |   {
406 |    "cell_type": "markdown",
407 |    "metadata": {},
408 |    "source": [
409 |     "__REMINDER:__ Don't leave your model deployed overnight. If you aren't going to follow up with the Clarify exercise within a few hours, use the code below to remove your model:"
410 |    ]
411 |   },
412 |   {
413 |    "cell_type": "code",
414 |    "execution_count": null,
415 |    "metadata": {},
416 |    "outputs": [],
417 |    "source": [
418 |     "monitors = xgb_predictor.list_monitors()\n",
419 |     "for monitor in monitors:\n",
420 |     "    monitor.delete_monitoring_schedule()"
421 |    ]
422 |   },
423 |   {
424 |    "cell_type": "code",
425 |    "execution_count": null,
426 |    "metadata": {},
427 |    "outputs": [],
428 |    "source": [
429 |     "xgb_predictor.delete_endpoint()"
430 |    ]
431 |   },
432 |   {
433 |    "cell_type": "markdown",
434 |    "metadata": {},
435 |    "source": [
436 |     "## Clarify\n",
437 |     "\n",
438 |     "For the last exercise we'll deploy an explainability monitor using Clarify. We're going to use the model that you deployed in the last exercise, but if you cleaned up your deployments from the previous exercise, that's ok! You can rerun the deployment from the previous exercise up to the point where we deployed our model. It'll look like this:\n",
439 |     "\n",
440 |     "```python\n",
441 |     "xgb_predictor = model.deploy(\n",
442 |     "    initial_instance_count=1, instance_type='ml.m4.xlarge',\n",
443 |     "    data_capture_config=data_capture_config\n",
444 |     ")\n",
445 |     "```\n",
446 |     "\n",
447 |     "Once your model is deployed, you can come back here. _REMINDER_: you need to clean up your deployment, don't leave it running overnight. We'll provide some code at the end to delete your deployment.\n",
448 |     "\n",
449 |     "## Prep\n",
450 |     "\n",
451 |     "We'll begin by reloading our data from the previous exercise."
452 |    ]
453 |   },
454 |   {
455 |    "cell_type": "code",
456 |    "execution_count": 88,
457 |    "metadata": {},
458 |    "outputs": [],
459 |    "source": [
460 |     "data = datasets.load_wine()\n",
461 |     "df = pd.DataFrame(data['data'])\n",
462 |     "df.columns = data['feature_names']\n",
463 |     "df.rename(columns = {'od280/od315_of_diluted_wines':'od280_od315_of_diluted_wines'}, inplace=True)"
464 |    ]
465 |   },
466 |   {
467 |    "cell_type": "markdown",
468 |    "metadata": {},
469 |    "source": [
470 |     "We also need to put the target variable in the first column per the docs for our chosen algorithm: https://docs.aws.amazon.com/sagemaker/latest/dg/xgboost.html"
471 |    ]
472 |   },
473 |   {
474 |    "cell_type": "code",
475 |    "execution_count": 89,
476 |    "metadata": {},
477 |    "outputs": [],
478 |    "source": [
479 |     "df[\"TARGET\"] = data['target']\n",
480 |     "df.set_index(df.pop('TARGET'), inplace=True)\n",
481 |     "df.reset_index(inplace=True)"
482 |    ]
483 |   },
484 |   {
485 |    "cell_type": "markdown",
486 |    "metadata": {},
487 |    "source": [
488 |     "Now we'll upload the data to S3 as train and validation data:"
489 |    ]
490 |   },
491 |   {
492 |    "cell_type": "code",
493 |    "execution_count": 90,
494 |    "metadata": {},
495 |    "outputs": [],
496 |    "source": [
497 |     "delimiter = int(len(df)/2)\n",
498 |     "train, test = df.iloc[delimiter:], df.iloc[:delimiter]\n",
499 |     "\n",
500 |     "train.to_csv(\"train.csv\", header=False, index=False)\n",
501 |     "test.to_csv(\"validation.csv\", header=False, index=False)\n",
502 |     "\n",
503 |     "val_location = session.upload_data('./validation.csv', key_prefix=\"data\")\n",
504 |     "train_location = session.upload_data('./train.csv', key_prefix=\"data\")\n",
505 |     "\n",
506 |     "s3_input_train = sagemaker.inputs.TrainingInput(s3_data=train_location, content_type='csv')\n",
507 |     "s3_input_validation = sagemaker.inputs.TrainingInput(s3_data=val_location, content_type='csv')"
508 |    ]
509 |   },
510 |   {
511 |    "cell_type": "markdown",
512 |    "metadata": {},
513 |    "source": [
514 |     "Great! Our data is staged and our model is deployed - let's monitor it for explainability. We need to define three config objects, the `SHAPConfig`, the `ModelConfig`, and the `ExplainabilityAnalysisConfig`. Below, we provide the `SHAPConfig`."
515 |    ]
516 |   },
517 |   {
518 |    "cell_type": "code",
519 |    "execution_count": 121,
520 |    "metadata": {},
521 |    "outputs": [],
522 |    "source": [
523 |     "shap_config = sagemaker.clarify.SHAPConfig(\n",
524 |     "    baseline=[train.mean().astype(int).to_list()[1:]],\n",
525 |     "    num_samples=int(train.size),\n",
526 |     "    agg_method=\"mean_abs\",\n",
527 |     "    save_local_shap_values=False,\n",
528 |     ")"
529 |    ]
530 |   },
531 |   {
532 |    "cell_type": "markdown",
533 |    "metadata": {},
534 |    "source": [
535 |     "Next up, fill in the blanks to define the `ModelConfig` and `ExplainabilityAnalysisConfig`."
536 |    ]
537 |   },
538 |   {
539 |    "cell_type": "code",
540 |    "execution_count": 122,
541 |    "metadata": {},
542 |    "outputs": [],
543 |    "source": [
544 |     "# TODO\n",
545 |     "model_config = sagemaker.clarify.ModelConfig(\n",
546 |     "# ...\n",
547 |     ")\n",
548 |     "\n",
549 |     "analysis_config = sagemaker.model_monitor.ExplainabilityAnalysisConfig(\n",
550 |     "# ...\n",
551 |     "    )"
552 |    ]
553 |   },
554 |   {
555 |    "cell_type": "markdown",
556 |    "metadata": {},
557 |    "source": [
558 |     "Before we apply our config, we need to create the monitor object. This is what we'll apply all our config to."
559 |    ]
560 |   },
561 |   {
562 |    "cell_type": "code",
563 |    "execution_count": 123,
564 |    "metadata": {},
565 |    "outputs": [],
566 |    "source": [
567 |     "model_explainability_monitor = sagemaker.model_monitor.ModelExplainabilityMonitor(\n",
568 |     "    role=role,\n",
569 |     "    sagemaker_session=session,\n",
570 |     "    max_runtime_in_seconds=1800,\n",
571 |     ")"
572 |    ]
573 |   },
574 |   {
575 |    "cell_type": "markdown",
576 |    "metadata": {},
577 |    "source": [
578 |     "Everything's ready! Below, create a monitoring schedule using the configs we created. Set the schedule to run _daily_."
579 |    ]
580 |   },
581 |   {
582 |    "cell_type": "code",
583 |    "execution_count": 124,
584 |    "metadata": {},
585 |    "outputs": [],
586 |    "source": [
587 |     "# TODO \n",
588 |     "from sagemaker.model_monitor import CronExpressionGenerator\n",
589 |     "\n",
590 |     "\n",
591 |     "explainability_uri =\n",
592 |     "model_explainability_monitor.create_monitoring_schedule(\n",
593 |     "# ...\n",
594 |     ")"
595 |    ]
596 |   },
597 |   {
598 |    "cell_type": "markdown",
599 |    "metadata": {},
600 |    "source": [
601 |     "Way to go! You can check that your schedule was created by selecting the `SageMaker components and registries` tab on the far left.\n",
602 |     "\n",
603 |     "In this exercise you deployed a monitor for explainability to your SageMaker endpoint. This is the last exercise - you'll apply these learnings again in your Project at the end of the course.\n",
604 |     "\n"
605 |    ]
606 |   },
607 |   {
608 |    "cell_type": "markdown",
609 |    "metadata": {},
610 |    "source": [
611 |     "__REMINDER:__ Don't leave your model deployed overnight. Use the code below to remove your model:"
612 |    ]
613 |   },
614 |   {
615 |    "cell_type": "code",
616 |    "execution_count": 126,
617 |    "metadata": {},
618 |    "outputs": [
619 |     {
620 |      "name": "stdout",
621 |      "output_type": "stream",
622 |      "text": [
623 |       "\n",
624 |       "Deleting Monitoring Schedule with name: monitoring-schedule-2021-09-13-17-25-08-560\n",
625 |       "\n",
626 |       "Deleting Monitoring Schedule with name: wine-monitoring-schedule\n"
627 |      ]
628 |     }
629 |    ],
630 |    "source": [
631 |     "monitors = xgb_predictor.list_monitors()\n",
632 |     "for monitor in monitors:\n",
633 |     "    monitor.delete_monitoring_schedule()"
634 |    ]
635 |   },
636 |   {
637 |    "cell_type": "code",
638 |    "execution_count": 127,
639 |    "metadata": {},
640 |    "outputs": [],
641 |    "source": [
642 |     "xgb_predictor.delete_endpoint()"
643 |    ]
644 |   },
645 |   {
646 |    "cell_type": "code",
647 |    "execution_count": null,
648 |    "metadata": {},
649 |    "outputs": [],
650 |    "source": []
651 |   }
652 |  ],
653 |  "metadata": {
654 |   "instance_type": "ml.t3.medium",
655 |   "kernelspec": {
656 |    "display_name": "Python 3 (TensorFlow 2.1 Python 3.6 CPU Optimized)",
657 |    "language": "python",
658 |    "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/tensorflow-2.1-cpu-py36"
659 |   },
660 |   "language_info": {
661 |    "codemirror_mode": {
662 |     "name": "ipython",
663 |     "version": 3
664 |    },
665 |    "file_extension": ".py",
666 |    "mimetype": "text/x-python",
667 |    "name": "python",
668 |    "nbconvert_exporter": "python",
669 |    "pygments_lexer": "ipython3",
670 |    "version": "3.6.13"
671 |   }
672 |  },
673 |  "nbformat": 4,
674 |  "nbformat_minor": 4
675 | }
676 | 


--------------------------------------------------------------------------------