├── AWS_SageMaker.ipynb
└── README.md


/AWS_SageMaker.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |   "cells": [
   3 |     {
   4 |       "cell_type": "markdown",
   5 |       "metadata": {
   6 |         "id": "view-in-github",
   7 |         "colab_type": "text"
   8 |       },
   9 |       "source": [
  10 |         "<a href=\"https://colab.research.google.com/github/Prasang-Biyani/aws-sagemaker/blob/main/AWS_SageMaker.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
  11 |       ]
  12 |     },
  13 |     {
  14 |       "cell_type": "code",
  15 |       "execution_count": null,
  16 |       "id": "8c436963-afa1-454c-9275-a1ca109a4cbe",
  17 |       "metadata": {
  18 |         "tags": [],
  19 |         "id": "8c436963-afa1-454c-9275-a1ca109a4cbe"
  20 |       },
  21 |       "outputs": [],
  22 |       "source": [
  23 |         "import pandas as pd\n",
  24 |         "import numpy as np\n",
  25 |         "import matplotlib.pyplot as plt\n",
  26 |         "import sagemaker\n",
  27 |         "import boto3"
  28 |       ]
  29 |     },
  30 |     {
  31 |       "cell_type": "code",
  32 |       "execution_count": null,
  33 |       "id": "7721d237-76df-47c2-a947-175a55c5f88f",
  34 |       "metadata": {
  35 |         "tags": [],
  36 |         "id": "7721d237-76df-47c2-a947-175a55c5f88f",
  37 |         "outputId": "0776a493-d9c4-496b-b42b-eff885a8c41b"
  38 |       },
  39 |       "outputs": [
  40 |         {
  41 |           "name": "stdout",
  42 |           "output_type": "stream",
  43 |           "text": [
  44 |             "us-east-1\n"
  45 |           ]
  46 |         }
  47 |       ],
  48 |       "source": [
  49 |         "bucket_name = \"dsmlsagemaker-prasang\"\n",
  50 |         "# Check the region of the instance\n",
  51 |         "my_region = boto3.session.Session().region_name\n",
  52 |         "print(my_region)"
  53 |       ]
  54 |     },
  55 |     {
  56 |       "cell_type": "code",
  57 |       "execution_count": null,
  58 |       "id": "e30ee6c1-9f90-4357-91b4-80e2be7a464d",
  59 |       "metadata": {
  60 |         "tags": [],
  61 |         "id": "e30ee6c1-9f90-4357-91b4-80e2be7a464d",
  62 |         "outputId": "b3790a2b-89e9-483c-962a-a69000a20e3d"
  63 |       },
  64 |       "outputs": [
  65 |         {
  66 |           "name": "stdout",
  67 |           "output_type": "stream",
  68 |           "text": [
  69 |             "S3 Bucket Created Successfully!\n"
  70 |           ]
  71 |         }
  72 |       ],
  73 |       "source": [
  74 |         "# Create a bucket\n",
  75 |         "s3 = boto3.resource(\"s3\", \n",
  76 |         "                   region_name=my_region,\n",
  77 |         "                   aws_access_key_id=\"\",\n",
  78 |         "                   aws_secret_access_key=\"\")\n",
  79 |         "\n",
  80 |         "try:\n",
  81 |         "    if my_region == \"us-east-1\":\n",
  82 |         "        s3.create_bucket(Bucket=bucket_name)\n",
  83 |         "        print(\"S3 Bucket Created Successfully!\")\n",
  84 |         "except Exception as e:\n",
  85 |         "    print(str(e))\n",
  86 |         "    "
  87 |       ]
  88 |     },
  89 |     {
  90 |       "cell_type": "code",
  91 |       "execution_count": null,
  92 |       "id": "3eb7f357-4ae3-4d46-a13c-dad2d1ce5467",
  93 |       "metadata": {
  94 |         "tags": [],
  95 |         "id": "3eb7f357-4ae3-4d46-a13c-dad2d1ce5467",
  96 |         "outputId": "fdb144d3-89bb-48c8-a484-6abaf1fc9257"
  97 |       },
  98 |       "outputs": [
  99 |         {
 100 |           "name": "stdout",
 101 |           "output_type": "stream",
 102 |           "text": [
 103 |             "s3://dsmlsagemaker-prasang/xgboost-as-a-built-in-algo/output\n"
 104 |           ]
 105 |         }
 106 |       ],
 107 |       "source": [
 108 |         "## set an output path for model artifacts\n",
 109 |         "path = \"xgboost-as-a-built-in-algo\"\n",
 110 |         "output_path = f\"s3://{bucket_name}/{path}/output\"\n",
 111 |         "print(output_path)"
 112 |       ]
 113 |     },
 114 |     {
 115 |       "cell_type": "markdown",
 116 |       "id": "3771b3e3-d416-4555-b4e1-6c776b344508",
 117 |       "metadata": {
 118 |         "id": "3771b3e3-d416-4555-b4e1-6c776b344508"
 119 |       },
 120 |       "source": [
 121 |         "## Download the data and store in S3"
 122 |       ]
 123 |     },
 124 |     {
 125 |       "cell_type": "code",
 126 |       "execution_count": null,
 127 |       "id": "275ef361-0969-4898-b28f-6ff1d0266def",
 128 |       "metadata": {
 129 |         "tags": [],
 130 |         "id": "275ef361-0969-4898-b28f-6ff1d0266def",
 131 |         "outputId": "eef657c0-a005-4606-b4d9-85f83a9b7ef6"
 132 |       },
 133 |       "outputs": [
 134 |         {
 135 |           "name": "stdout",
 136 |           "output_type": "stream",
 137 |           "text": [
 138 |             "Success: downloaded bank_clean.csv.\n",
 139 |             "Success: Data loaded into dataframe.\n"
 140 |           ]
 141 |         }
 142 |       ],
 143 |       "source": [
 144 |         "import urllib\n",
 145 |         "try:\n",
 146 |         "    #the bank data is in one hot encoded format already\n",
 147 |         "        urllib.request.urlretrieve (\"https://d1.awsstatic.com/tmt/build-train-deploy-machine-learning-model-sagemaker/bank_clean.27f01fbbdf43271788427f3682996ae29ceca05d.csv\", \"bank_clean.csv\")\n",
 148 |         "        print('Success: downloaded bank_clean.csv.')\n",
 149 |         "except Exception as e:\n",
 150 |         "        print('Data load error: ',e) \n",
 151 |         "try:\n",
 152 |         "        model_data = pd.read_csv('./bank_clean.csv',index_col=0)\n",
 153 |         "        print('Success: Data loaded into dataframe.')\n",
 154 |         "except Exception as e:\n",
 155 |         "        print('Data load error: ',e)"
 156 |       ]
 157 |     },
 158 |     {
 159 |       "cell_type": "code",
 160 |       "execution_count": null,
 161 |       "id": "8039da29-803d-44b4-ad41-23e957b05fd3",
 162 |       "metadata": {
 163 |         "tags": [],
 164 |         "id": "8039da29-803d-44b4-ad41-23e957b05fd3",
 165 |         "outputId": "666db70c-d53b-4a32-d1f0-a6e685b8ff73"
 166 |       },
 167 |       "outputs": [
 168 |         {
 169 |           "data": {
 170 |             "text/html": [
 171 |               "<div>\n",
 172 |               "<style scoped>\n",
 173 |               "    .dataframe tbody tr th:only-of-type {\n",
 174 |               "        vertical-align: middle;\n",
 175 |               "    }\n",
 176 |               "\n",
 177 |               "    .dataframe tbody tr th {\n",
 178 |               "        vertical-align: top;\n",
 179 |               "    }\n",
 180 |               "\n",
 181 |               "    .dataframe thead th {\n",
 182 |               "        text-align: right;\n",
 183 |               "    }\n",
 184 |               "</style>\n",
 185 |               "<table border=\"1\" class=\"dataframe\">\n",
 186 |               "  <thead>\n",
 187 |               "    <tr style=\"text-align: right;\">\n",
 188 |               "      <th></th>\n",
 189 |               "      <th>age</th>\n",
 190 |               "      <th>campaign</th>\n",
 191 |               "      <th>pdays</th>\n",
 192 |               "      <th>previous</th>\n",
 193 |               "      <th>no_previous_contact</th>\n",
 194 |               "      <th>not_working</th>\n",
 195 |               "      <th>job_admin.</th>\n",
 196 |               "      <th>job_blue-collar</th>\n",
 197 |               "      <th>job_entrepreneur</th>\n",
 198 |               "      <th>job_housemaid</th>\n",
 199 |               "      <th>...</th>\n",
 200 |               "      <th>day_of_week_fri</th>\n",
 201 |               "      <th>day_of_week_mon</th>\n",
 202 |               "      <th>day_of_week_thu</th>\n",
 203 |               "      <th>day_of_week_tue</th>\n",
 204 |               "      <th>day_of_week_wed</th>\n",
 205 |               "      <th>poutcome_failure</th>\n",
 206 |               "      <th>poutcome_nonexistent</th>\n",
 207 |               "      <th>poutcome_success</th>\n",
 208 |               "      <th>y_no</th>\n",
 209 |               "      <th>y_yes</th>\n",
 210 |               "    </tr>\n",
 211 |               "  </thead>\n",
 212 |               "  <tbody>\n",
 213 |               "    <tr>\n",
 214 |               "      <th>0</th>\n",
 215 |               "      <td>56</td>\n",
 216 |               "      <td>1</td>\n",
 217 |               "      <td>999</td>\n",
 218 |               "      <td>0</td>\n",
 219 |               "      <td>1</td>\n",
 220 |               "      <td>0</td>\n",
 221 |               "      <td>0</td>\n",
 222 |               "      <td>0</td>\n",
 223 |               "      <td>0</td>\n",
 224 |               "      <td>1</td>\n",
 225 |               "      <td>...</td>\n",
 226 |               "      <td>0</td>\n",
 227 |               "      <td>1</td>\n",
 228 |               "      <td>0</td>\n",
 229 |               "      <td>0</td>\n",
 230 |               "      <td>0</td>\n",
 231 |               "      <td>0</td>\n",
 232 |               "      <td>1</td>\n",
 233 |               "      <td>0</td>\n",
 234 |               "      <td>1</td>\n",
 235 |               "      <td>0</td>\n",
 236 |               "    </tr>\n",
 237 |               "    <tr>\n",
 238 |               "      <th>1</th>\n",
 239 |               "      <td>57</td>\n",
 240 |               "      <td>1</td>\n",
 241 |               "      <td>999</td>\n",
 242 |               "      <td>0</td>\n",
 243 |               "      <td>1</td>\n",
 244 |               "      <td>0</td>\n",
 245 |               "      <td>0</td>\n",
 246 |               "      <td>0</td>\n",
 247 |               "      <td>0</td>\n",
 248 |               "      <td>0</td>\n",
 249 |               "      <td>...</td>\n",
 250 |               "      <td>0</td>\n",
 251 |               "      <td>1</td>\n",
 252 |               "      <td>0</td>\n",
 253 |               "      <td>0</td>\n",
 254 |               "      <td>0</td>\n",
 255 |               "      <td>0</td>\n",
 256 |               "      <td>1</td>\n",
 257 |               "      <td>0</td>\n",
 258 |               "      <td>1</td>\n",
 259 |               "      <td>0</td>\n",
 260 |               "    </tr>\n",
 261 |               "    <tr>\n",
 262 |               "      <th>2</th>\n",
 263 |               "      <td>37</td>\n",
 264 |               "      <td>1</td>\n",
 265 |               "      <td>999</td>\n",
 266 |               "      <td>0</td>\n",
 267 |               "      <td>1</td>\n",
 268 |               "      <td>0</td>\n",
 269 |               "      <td>0</td>\n",
 270 |               "      <td>0</td>\n",
 271 |               "      <td>0</td>\n",
 272 |               "      <td>0</td>\n",
 273 |               "      <td>...</td>\n",
 274 |               "      <td>0</td>\n",
 275 |               "      <td>1</td>\n",
 276 |               "      <td>0</td>\n",
 277 |               "      <td>0</td>\n",
 278 |               "      <td>0</td>\n",
 279 |               "      <td>0</td>\n",
 280 |               "      <td>1</td>\n",
 281 |               "      <td>0</td>\n",
 282 |               "      <td>1</td>\n",
 283 |               "      <td>0</td>\n",
 284 |               "    </tr>\n",
 285 |               "    <tr>\n",
 286 |               "      <th>3</th>\n",
 287 |               "      <td>40</td>\n",
 288 |               "      <td>1</td>\n",
 289 |               "      <td>999</td>\n",
 290 |               "      <td>0</td>\n",
 291 |               "      <td>1</td>\n",
 292 |               "      <td>0</td>\n",
 293 |               "      <td>1</td>\n",
 294 |               "      <td>0</td>\n",
 295 |               "      <td>0</td>\n",
 296 |               "      <td>0</td>\n",
 297 |               "      <td>...</td>\n",
 298 |               "      <td>0</td>\n",
 299 |               "      <td>1</td>\n",
 300 |               "      <td>0</td>\n",
 301 |               "      <td>0</td>\n",
 302 |               "      <td>0</td>\n",
 303 |               "      <td>0</td>\n",
 304 |               "      <td>1</td>\n",
 305 |               "      <td>0</td>\n",
 306 |               "      <td>1</td>\n",
 307 |               "      <td>0</td>\n",
 308 |               "    </tr>\n",
 309 |               "    <tr>\n",
 310 |               "      <th>4</th>\n",
 311 |               "      <td>56</td>\n",
 312 |               "      <td>1</td>\n",
 313 |               "      <td>999</td>\n",
 314 |               "      <td>0</td>\n",
 315 |               "      <td>1</td>\n",
 316 |               "      <td>0</td>\n",
 317 |               "      <td>0</td>\n",
 318 |               "      <td>0</td>\n",
 319 |               "      <td>0</td>\n",
 320 |               "      <td>0</td>\n",
 321 |               "      <td>...</td>\n",
 322 |               "      <td>0</td>\n",
 323 |               "      <td>1</td>\n",
 324 |               "      <td>0</td>\n",
 325 |               "      <td>0</td>\n",
 326 |               "      <td>0</td>\n",
 327 |               "      <td>0</td>\n",
 328 |               "      <td>1</td>\n",
 329 |               "      <td>0</td>\n",
 330 |               "      <td>1</td>\n",
 331 |               "      <td>0</td>\n",
 332 |               "    </tr>\n",
 333 |               "  </tbody>\n",
 334 |               "</table>\n",
 335 |               "<p>5 rows × 61 columns</p>\n",
 336 |               "</div>"
 337 |             ],
 338 |             "text/plain": [
 339 |               "   age  campaign  pdays  previous  no_previous_contact  not_working  \\\n",
 340 |               "0   56         1    999         0                    1            0   \n",
 341 |               "1   57         1    999         0                    1            0   \n",
 342 |               "2   37         1    999         0                    1            0   \n",
 343 |               "3   40         1    999         0                    1            0   \n",
 344 |               "4   56         1    999         0                    1            0   \n",
 345 |               "\n",
 346 |               "   job_admin.  job_blue-collar  job_entrepreneur  job_housemaid  ...  \\\n",
 347 |               "0           0                0                 0              1  ...   \n",
 348 |               "1           0                0                 0              0  ...   \n",
 349 |               "2           0                0                 0              0  ...   \n",
 350 |               "3           1                0                 0              0  ...   \n",
 351 |               "4           0                0                 0              0  ...   \n",
 352 |               "\n",
 353 |               "   day_of_week_fri  day_of_week_mon  day_of_week_thu  day_of_week_tue  \\\n",
 354 |               "0                0                1                0                0   \n",
 355 |               "1                0                1                0                0   \n",
 356 |               "2                0                1                0                0   \n",
 357 |               "3                0                1                0                0   \n",
 358 |               "4                0                1                0                0   \n",
 359 |               "\n",
 360 |               "   day_of_week_wed  poutcome_failure  poutcome_nonexistent  poutcome_success  \\\n",
 361 |               "0                0                 0                     1                 0   \n",
 362 |               "1                0                 0                     1                 0   \n",
 363 |               "2                0                 0                     1                 0   \n",
 364 |               "3                0                 0                     1                 0   \n",
 365 |               "4                0                 0                     1                 0   \n",
 366 |               "\n",
 367 |               "   y_no  y_yes  \n",
 368 |               "0     1      0  \n",
 369 |               "1     1      0  \n",
 370 |               "2     1      0  \n",
 371 |               "3     1      0  \n",
 372 |               "4     1      0  \n",
 373 |               "\n",
 374 |               "[5 rows x 61 columns]"
 375 |             ]
 376 |           },
 377 |           "execution_count": 41,
 378 |           "metadata": {},
 379 |           "output_type": "execute_result"
 380 |         }
 381 |       ],
 382 |       "source": [
 383 |         "model_data.head()"
 384 |       ]
 385 |     },
 386 |     {
 387 |       "cell_type": "code",
 388 |       "execution_count": null,
 389 |       "id": "332a9f7c-556d-4ced-ac81-5c1b51162417",
 390 |       "metadata": {
 391 |         "tags": [],
 392 |         "id": "332a9f7c-556d-4ced-ac81-5c1b51162417",
 393 |         "outputId": "a28f7c82-70e3-4525-b131-c5e8922f9cf6"
 394 |       },
 395 |       "outputs": [
 396 |         {
 397 |           "name": "stdout",
 398 |           "output_type": "stream",
 399 |           "text": [
 400 |             "<class 'pandas.core.frame.DataFrame'>\n",
 401 |             "Int64Index: 41188 entries, 0 to 41187\n",
 402 |             "Data columns (total 61 columns):\n",
 403 |             " #   Column                         Non-Null Count  Dtype\n",
 404 |             "---  ------                         --------------  -----\n",
 405 |             " 0   age                            41188 non-null  int64\n",
 406 |             " 1   campaign                       41188 non-null  int64\n",
 407 |             " 2   pdays                          41188 non-null  int64\n",
 408 |             " 3   previous                       41188 non-null  int64\n",
 409 |             " 4   no_previous_contact            41188 non-null  int64\n",
 410 |             " 5   not_working                    41188 non-null  int64\n",
 411 |             " 6   job_admin.                     41188 non-null  int64\n",
 412 |             " 7   job_blue-collar                41188 non-null  int64\n",
 413 |             " 8   job_entrepreneur               41188 non-null  int64\n",
 414 |             " 9   job_housemaid                  41188 non-null  int64\n",
 415 |             " 10  job_management                 41188 non-null  int64\n",
 416 |             " 11  job_retired                    41188 non-null  int64\n",
 417 |             " 12  job_self-employed              41188 non-null  int64\n",
 418 |             " 13  job_services                   41188 non-null  int64\n",
 419 |             " 14  job_student                    41188 non-null  int64\n",
 420 |             " 15  job_technician                 41188 non-null  int64\n",
 421 |             " 16  job_unemployed                 41188 non-null  int64\n",
 422 |             " 17  job_unknown                    41188 non-null  int64\n",
 423 |             " 18  marital_divorced               41188 non-null  int64\n",
 424 |             " 19  marital_married                41188 non-null  int64\n",
 425 |             " 20  marital_single                 41188 non-null  int64\n",
 426 |             " 21  marital_unknown                41188 non-null  int64\n",
 427 |             " 22  education_basic.4y             41188 non-null  int64\n",
 428 |             " 23  education_basic.6y             41188 non-null  int64\n",
 429 |             " 24  education_basic.9y             41188 non-null  int64\n",
 430 |             " 25  education_high.school          41188 non-null  int64\n",
 431 |             " 26  education_illiterate           41188 non-null  int64\n",
 432 |             " 27  education_professional.course  41188 non-null  int64\n",
 433 |             " 28  education_university.degree    41188 non-null  int64\n",
 434 |             " 29  education_unknown              41188 non-null  int64\n",
 435 |             " 30  default_no                     41188 non-null  int64\n",
 436 |             " 31  default_unknown                41188 non-null  int64\n",
 437 |             " 32  default_yes                    41188 non-null  int64\n",
 438 |             " 33  housing_no                     41188 non-null  int64\n",
 439 |             " 34  housing_unknown                41188 non-null  int64\n",
 440 |             " 35  housing_yes                    41188 non-null  int64\n",
 441 |             " 36  loan_no                        41188 non-null  int64\n",
 442 |             " 37  loan_unknown                   41188 non-null  int64\n",
 443 |             " 38  loan_yes                       41188 non-null  int64\n",
 444 |             " 39  contact_cellular               41188 non-null  int64\n",
 445 |             " 40  contact_telephone              41188 non-null  int64\n",
 446 |             " 41  month_apr                      41188 non-null  int64\n",
 447 |             " 42  month_aug                      41188 non-null  int64\n",
 448 |             " 43  month_dec                      41188 non-null  int64\n",
 449 |             " 44  month_jul                      41188 non-null  int64\n",
 450 |             " 45  month_jun                      41188 non-null  int64\n",
 451 |             " 46  month_mar                      41188 non-null  int64\n",
 452 |             " 47  month_may                      41188 non-null  int64\n",
 453 |             " 48  month_nov                      41188 non-null  int64\n",
 454 |             " 49  month_oct                      41188 non-null  int64\n",
 455 |             " 50  month_sep                      41188 non-null  int64\n",
 456 |             " 51  day_of_week_fri                41188 non-null  int64\n",
 457 |             " 52  day_of_week_mon                41188 non-null  int64\n",
 458 |             " 53  day_of_week_thu                41188 non-null  int64\n",
 459 |             " 54  day_of_week_tue                41188 non-null  int64\n",
 460 |             " 55  day_of_week_wed                41188 non-null  int64\n",
 461 |             " 56  poutcome_failure               41188 non-null  int64\n",
 462 |             " 57  poutcome_nonexistent           41188 non-null  int64\n",
 463 |             " 58  poutcome_success               41188 non-null  int64\n",
 464 |             " 59  y_no                           41188 non-null  int64\n",
 465 |             " 60  y_yes                          41188 non-null  int64\n",
 466 |             "dtypes: int64(61)\n",
 467 |             "memory usage: 19.5 MB\n"
 468 |           ]
 469 |         }
 470 |       ],
 471 |       "source": [
 472 |         "model_data.info()"
 473 |       ]
 474 |     },
 475 |     {
 476 |       "cell_type": "markdown",
 477 |       "id": "b1420e12-0ab4-4e7b-8b8f-ca10411f3514",
 478 |       "metadata": {
 479 |         "id": "b1420e12-0ab4-4e7b-8b8f-ca10411f3514"
 480 |       },
 481 |       "source": [
 482 |         "## Train and Test Split"
 483 |       ]
 484 |     },
 485 |     {
 486 |       "cell_type": "code",
 487 |       "execution_count": null,
 488 |       "id": "d1cc3aba-b34f-442b-8822-462b31002fac",
 489 |       "metadata": {
 490 |         "tags": [],
 491 |         "id": "d1cc3aba-b34f-442b-8822-462b31002fac"
 492 |       },
 493 |       "outputs": [],
 494 |       "source": [
 495 |         "from sklearn.model_selection import train_test_split\n",
 496 |         "\n",
 497 |         "train_data, test_data = train_test_split(model_data, test_size=0.3)"
 498 |       ]
 499 |     },
 500 |     {
 501 |       "cell_type": "code",
 502 |       "execution_count": null,
 503 |       "id": "b680644d-3697-4cee-9710-38397f5b89de",
 504 |       "metadata": {
 505 |         "tags": [],
 506 |         "id": "b680644d-3697-4cee-9710-38397f5b89de"
 507 |       },
 508 |       "outputs": [],
 509 |       "source": [
 510 |         "## saving training and testing data in s3 buckets\n",
 511 |         "import os\n",
 512 |         "pd.concat([train_data['y_yes'], train_data.drop(['y_no', 'y_yes'],\n",
 513 |         "axis = 1)],\n",
 514 |         "axis = 1).to_csv(\"train.csv\", index=False, header=False)"
 515 |       ]
 516 |     },
 517 |     {
 518 |       "cell_type": "code",
 519 |       "execution_count": null,
 520 |       "id": "509d33df-9a2c-4847-8a23-bb7889e72c33",
 521 |       "metadata": {
 522 |         "tags": [],
 523 |         "id": "509d33df-9a2c-4847-8a23-bb7889e72c33",
 524 |         "outputId": "3bdbd3a4-1d54-4965-e5bf-56f1a1060bcb"
 525 |       },
 526 |       "outputs": [
 527 |         {
 528 |           "name": "stderr",
 529 |           "output_type": "stream",
 530 |           "text": [
 531 |             "INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole\n"
 532 |           ]
 533 |         }
 534 |       ],
 535 |       "source": [
 536 |         "boto3.Session().resource(\"s3\").Bucket(bucket_name).Object(os.path.join(path, \"train/train.csv\")).upload_file(\"train.csv\")"
 537 |       ]
 538 |     },
 539 |     {
 540 |       "cell_type": "code",
 541 |       "execution_count": null,
 542 |       "id": "163bcf90-7aa4-41d7-b790-3f94a505465c",
 543 |       "metadata": {
 544 |         "tags": [],
 545 |         "id": "163bcf90-7aa4-41d7-b790-3f94a505465c"
 546 |       },
 547 |       "outputs": [],
 548 |       "source": [
 549 |         "## now storing the training csv into a variable\n",
 550 |         "s3_input_train = sagemaker.TrainingInput(s3_data=f\"s3://{bucket_name}/{path}/train/\", content_type=\"csv\")"
 551 |       ]
 552 |     },
 553 |     {
 554 |       "cell_type": "code",
 555 |       "execution_count": null,
 556 |       "id": "14b988c2-e807-4b82-ab86-064ac66d4b51",
 557 |       "metadata": {
 558 |         "tags": [],
 559 |         "id": "14b988c2-e807-4b82-ab86-064ac66d4b51"
 560 |       },
 561 |       "outputs": [],
 562 |       "source": [
 563 |         "pd.concat([test_data['y_yes'], test_data.drop(['y_no', 'y_yes'],\n",
 564 |         "axis = 1)],\n",
 565 |         "axis = 1).to_csv(\"test.csv\", index=False, header=False)"
 566 |       ]
 567 |     },
 568 |     {
 569 |       "cell_type": "code",
 570 |       "execution_count": null,
 571 |       "id": "5508f1fa-07c2-41db-8728-1964d3c1feee",
 572 |       "metadata": {
 573 |         "tags": [],
 574 |         "id": "5508f1fa-07c2-41db-8728-1964d3c1feee",
 575 |         "outputId": "13c99d77-63fa-44f1-d67c-581192240834"
 576 |       },
 577 |       "outputs": [
 578 |         {
 579 |           "name": "stderr",
 580 |           "output_type": "stream",
 581 |           "text": [
 582 |             "INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole\n"
 583 |           ]
 584 |         }
 585 |       ],
 586 |       "source": [
 587 |         "boto3.Session().resource(\"s3\").Bucket(bucket_name).Object(os.path.join(path, \"test/test.csv\")).upload_file(\"test.csv\")\n",
 588 |         "## now storing the training csv into a variable\n",
 589 |         "s3_input_test = sagemaker.TrainingInput(s3_data=f\"s3://{bucket_name}/{path}/test/\", content_type=\"csv\")"
 590 |       ]
 591 |     },
 592 |     {
 593 |       "cell_type": "markdown",
 594 |       "id": "37413d3a-eab0-43a4-a650-c1d6411cde6e",
 595 |       "metadata": {
 596 |         "id": "37413d3a-eab0-43a4-a650-c1d6411cde6e"
 597 |       },
 598 |       "source": [
 599 |         "## Building XGBoost-inbuild algo"
 600 |       ]
 601 |     },
 602 |     {
 603 |       "cell_type": "code",
 604 |       "execution_count": null,
 605 |       "id": "945764ca-2025-4d23-9e73-457b0290308c",
 606 |       "metadata": {
 607 |         "tags": [],
 608 |         "id": "945764ca-2025-4d23-9e73-457b0290308c"
 609 |       },
 610 |       "outputs": [],
 611 |       "source": [
 612 |         "from sagemaker.amazon.amazon_estimator import image_uris\n",
 613 |         "from sagemaker.session import s3_input, Session"
 614 |       ]
 615 |     },
 616 |     {
 617 |       "cell_type": "code",
 618 |       "execution_count": null,
 619 |       "id": "00ffeb83-0dd8-4cd0-9036-38ca23ea2d40",
 620 |       "metadata": {
 621 |         "tags": [],
 622 |         "id": "00ffeb83-0dd8-4cd0-9036-38ca23ea2d40",
 623 |         "outputId": "2fec7532-e1a6-4f97-d753-47d24ac94d95"
 624 |       },
 625 |       "outputs": [
 626 |         {
 627 |           "name": "stderr",
 628 |           "output_type": "stream",
 629 |           "text": [
 630 |             "INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.\n"
 631 |           ]
 632 |         }
 633 |       ],
 634 |       "source": [
 635 |         "container = image_uris.retrieve(\"xgboost\", boto3.Session().region_name, \"1.5-1\")"
 636 |       ]
 637 |     },
 638 |     {
 639 |       "cell_type": "code",
 640 |       "execution_count": null,
 641 |       "id": "8cc12158-aa48-4306-a600-abb7a540f3dd",
 642 |       "metadata": {
 643 |         "tags": [],
 644 |         "id": "8cc12158-aa48-4306-a600-abb7a540f3dd"
 645 |       },
 646 |       "outputs": [],
 647 |       "source": [
 648 |         "##initialise all the parameters \n",
 649 |         "hyperparameters = {\n",
 650 |         "\"max_depth\": \"5\",\n",
 651 |         "\"eta\": \"0.2\",\n",
 652 |         "\"gamma\":\"4\",\n",
 653 |         "\"min_child_weight\":\"6\",\n",
 654 |         "\"subsample\":\"0.7\",\n",
 655 |         "\"objective\":\"binary:logistic\",\n",
 656 |         "\"num_round\":50\n",
 657 |         "}"
 658 |       ]
 659 |     },
 660 |     {
 661 |       "cell_type": "code",
 662 |       "execution_count": null,
 663 |       "id": "05b18a5b-9148-46bb-a502-7b2455c2ee3d",
 664 |       "metadata": {
 665 |         "tags": [],
 666 |         "id": "05b18a5b-9148-46bb-a502-7b2455c2ee3d"
 667 |       },
 668 |       "outputs": [],
 669 |       "source": [
 670 |         "# create sagemaker estimator that calls the xgboost container\n",
 671 |         "estimator = sagemaker.estimator.Estimator(image_uri=container, \n",
 672 |         "                                         hyperparameters=hyperparameters,\n",
 673 |         "                                         role=sagemaker.get_execution_role(),\n",
 674 |         "                                         instance_count=1,\n",
 675 |         "                                         instance_type=\"ml.m5.2xlarge\",\n",
 676 |         "                                         volume_size=5,\n",
 677 |         "                                         output_path=output_path)"
 678 |       ]
 679 |     },
 680 |     {
 681 |       "cell_type": "code",
 682 |       "execution_count": null,
 683 |       "id": "6d3e01e8-1039-43e0-a26c-2a48e9e2b155",
 684 |       "metadata": {
 685 |         "tags": [],
 686 |         "id": "6d3e01e8-1039-43e0-a26c-2a48e9e2b155",
 687 |         "outputId": "66e9f423-4475-44ba-9bb3-dec828445bdd"
 688 |       },
 689 |       "outputs": [
 690 |         {
 691 |           "name": "stderr",
 692 |           "output_type": "stream",
 693 |           "text": [
 694 |             "INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2023-06-08-16-50-36-867\n"
 695 |           ]
 696 |         },
 697 |         {
 698 |           "name": "stdout",
 699 |           "output_type": "stream",
 700 |           "text": [
 701 |             "2023-06-08 16:50:40 Starting - Starting the training job...\n",
 702 |             "2023-06-08 16:50:55 Starting - Preparing the instances for training......\n",
 703 |             "2023-06-08 16:52:14 Downloading - Downloading input data\n",
 704 |             "2023-06-08 16:52:14 Training - Downloading the training image...\n",
 705 |             "2023-06-08 16:52:35 Training - Training image download completed. Training in progress...\u001b[34m[2023-06-08 16:52:54.214 ip-10-2-95-175.ec2.internal:7 INFO utils.py:28] RULE_JOB_STOP_SIGNAL_FILENAME: None\u001b[0m\n",
 706 |             "\u001b[34m[2023-06-08 16:52:54.283 ip-10-2-95-175.ec2.internal:7 INFO profiler_config_parser.py:111] User has disabled profiler.\u001b[0m\n",
 707 |             "\u001b[34m[2023-06-08:16:52:54:INFO] Imported framework sagemaker_xgboost_container.training\u001b[0m\n",
 708 |             "\u001b[34m[2023-06-08:16:52:54:INFO] Failed to parse hyperparameter objective value binary:logistic to Json.\u001b[0m\n",
 709 |             "\u001b[34mReturning the value itself\u001b[0m\n",
 710 |             "\u001b[34m[2023-06-08:16:52:54:INFO] No GPUs detected (normal if no gpus installed)\u001b[0m\n",
 711 |             "\u001b[34m[2023-06-08:16:52:54:INFO] Running XGBoost Sagemaker in algorithm mode\u001b[0m\n",
 712 |             "\u001b[34m[2023-06-08:16:52:54:INFO] Determined 0 GPU(s) available on the instance.\u001b[0m\n",
 713 |             "\u001b[34m[2023-06-08:16:52:54:INFO] Determined delimiter of CSV input is ','\u001b[0m\n",
 714 |             "\u001b[34m[2023-06-08:16:52:54:INFO] Determined delimiter of CSV input is ','\u001b[0m\n",
 715 |             "\u001b[34m[2023-06-08:16:52:54:INFO] files path: /opt/ml/input/data/train\u001b[0m\n",
 716 |             "\u001b[34m[2023-06-08:16:52:54:INFO] Determined delimiter of CSV input is ','\u001b[0m\n",
 717 |             "\u001b[34m[2023-06-08:16:52:54:INFO] files path: /opt/ml/input/data/validation\u001b[0m\n",
 718 |             "\u001b[34m[2023-06-08:16:52:54:INFO] Determined delimiter of CSV input is ','\u001b[0m\n",
 719 |             "\u001b[34m[2023-06-08:16:52:54:INFO] Single node training.\u001b[0m\n",
 720 |             "\u001b[34m[2023-06-08:16:52:54:INFO] Train matrix has 28831 rows and 59 columns\u001b[0m\n",
 721 |             "\u001b[34m[2023-06-08:16:52:54:INFO] Validation matrix has 12357 rows\u001b[0m\n",
 722 |             "\u001b[34m[2023-06-08 16:52:54.720 ip-10-2-95-175.ec2.internal:7 INFO json_config.py:92] Creating hook from json_config at /opt/ml/input/config/debughookconfig.json.\u001b[0m\n",
 723 |             "\u001b[34m[2023-06-08 16:52:54.721 ip-10-2-95-175.ec2.internal:7 INFO hook.py:206] tensorboard_dir has not been set for the hook. SMDebug will not be exporting tensorboard summaries.\u001b[0m\n",
 724 |             "\u001b[34m[2023-06-08 16:52:54.722 ip-10-2-95-175.ec2.internal:7 INFO hook.py:259] Saving to /opt/ml/output/tensors\u001b[0m\n",
 725 |             "\u001b[34m[2023-06-08 16:52:54.722 ip-10-2-95-175.ec2.internal:7 INFO state_store.py:77] The checkpoint config file /opt/ml/input/config/checkpointconfig.json does not exist.\u001b[0m\n",
 726 |             "\u001b[34m[2023-06-08:16:52:54:INFO] Debug hook created from config\u001b[0m\n",
 727 |             "\u001b[34m[16:52:54] WARNING: ../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\u001b[0m\n",
 728 |             "\u001b[34m[2023-06-08 16:52:54.799 ip-10-2-95-175.ec2.internal:7 INFO hook.py:427] Monitoring the collections: metrics\u001b[0m\n",
 729 |             "\u001b[34m[2023-06-08 16:52:54.802 ip-10-2-95-175.ec2.internal:7 INFO hook.py:491] Hook is writing from the hook with pid: 7\u001b[0m\n",
 730 |             "\u001b[34m[0]#011train-logloss:0.57428#011validation-logloss:0.57330\u001b[0m\n",
 731 |             "\u001b[34m[1]#011train-logloss:0.49542#011validation-logloss:0.49375\u001b[0m\n",
 732 |             "\u001b[34m[2]#011train-logloss:0.43939#011validation-logloss:0.43713\u001b[0m\n",
 733 |             "\u001b[34m[3]#011train-logloss:0.40015#011validation-logloss:0.39731\u001b[0m\n",
 734 |             "\u001b[34m[4]#011train-logloss:0.37156#011validation-logloss:0.36851\u001b[0m\n",
 735 |             "\u001b[34m[5]#011train-logloss:0.34992#011validation-logloss:0.34684\u001b[0m\n",
 736 |             "\u001b[34m[6]#011train-logloss:0.33411#011validation-logloss:0.33079\u001b[0m\n",
 737 |             "\u001b[34m[7]#011train-logloss:0.32230#011validation-logloss:0.31873\u001b[0m\n",
 738 |             "\u001b[34m[8]#011train-logloss:0.31328#011validation-logloss:0.30944\u001b[0m\n",
 739 |             "\u001b[34m[9]#011train-logloss:0.30687#011validation-logloss:0.30285\u001b[0m\n",
 740 |             "\u001b[34m[10]#011train-logloss:0.30208#011validation-logloss:0.29787\u001b[0m\n",
 741 |             "\u001b[34m[11]#011train-logloss:0.29824#011validation-logloss:0.29425\u001b[0m\n",
 742 |             "\u001b[34m[12]#011train-logloss:0.29549#011validation-logloss:0.29142\u001b[0m\n",
 743 |             "\u001b[34m[13]#011train-logloss:0.29334#011validation-logloss:0.28951\u001b[0m\n",
 744 |             "\u001b[34m[14]#011train-logloss:0.29156#011validation-logloss:0.28759\u001b[0m\n",
 745 |             "\u001b[34m[15]#011train-logloss:0.29002#011validation-logloss:0.28623\u001b[0m\n",
 746 |             "\u001b[34m[16]#011train-logloss:0.28896#011validation-logloss:0.28525\u001b[0m\n",
 747 |             "\u001b[34m[17]#011train-logloss:0.28785#011validation-logloss:0.28464\u001b[0m\n",
 748 |             "\u001b[34m[18]#011train-logloss:0.28711#011validation-logloss:0.28403\u001b[0m\n",
 749 |             "\u001b[34m[19]#011train-logloss:0.28606#011validation-logloss:0.28331\u001b[0m\n",
 750 |             "\u001b[34m[20]#011train-logloss:0.28546#011validation-logloss:0.28283\u001b[0m\n",
 751 |             "\u001b[34m[21]#011train-logloss:0.28482#011validation-logloss:0.28255\u001b[0m\n",
 752 |             "\u001b[34m[22]#011train-logloss:0.28445#011validation-logloss:0.28218\u001b[0m\n",
 753 |             "\u001b[34m[23]#011train-logloss:0.28360#011validation-logloss:0.28154\u001b[0m\n",
 754 |             "\u001b[34m[24]#011train-logloss:0.28319#011validation-logloss:0.28148\u001b[0m\n",
 755 |             "\u001b[34m[25]#011train-logloss:0.28266#011validation-logloss:0.28155\u001b[0m\n",
 756 |             "\u001b[34m[26]#011train-logloss:0.28223#011validation-logloss:0.28107\u001b[0m\n",
 757 |             "\u001b[34m[27]#011train-logloss:0.28208#011validation-logloss:0.28106\u001b[0m\n",
 758 |             "\u001b[34m[28]#011train-logloss:0.28179#011validation-logloss:0.28102\u001b[0m\n",
 759 |             "\u001b[34m[29]#011train-logloss:0.28158#011validation-logloss:0.28087\u001b[0m\n",
 760 |             "\u001b[34m[30]#011train-logloss:0.28133#011validation-logloss:0.28070\u001b[0m\n",
 761 |             "\u001b[34m[31]#011train-logloss:0.28119#011validation-logloss:0.28057\u001b[0m\n",
 762 |             "\u001b[34m[32]#011train-logloss:0.28093#011validation-logloss:0.28066\u001b[0m\n",
 763 |             "\u001b[34m[33]#011train-logloss:0.28046#011validation-logloss:0.28070\u001b[0m\n",
 764 |             "\u001b[34m[34]#011train-logloss:0.28036#011validation-logloss:0.28076\u001b[0m\n",
 765 |             "\u001b[34m[35]#011train-logloss:0.28011#011validation-logloss:0.28081\u001b[0m\n",
 766 |             "\u001b[34m[36]#011train-logloss:0.27984#011validation-logloss:0.28091\u001b[0m\n",
 767 |             "\u001b[34m[37]#011train-logloss:0.27935#011validation-logloss:0.28062\u001b[0m\n",
 768 |             "\u001b[34m[38]#011train-logloss:0.27918#011validation-logloss:0.28051\u001b[0m\n",
 769 |             "\u001b[34m[39]#011train-logloss:0.27897#011validation-logloss:0.28065\u001b[0m\n",
 770 |             "\u001b[34m[40]#011train-logloss:0.27889#011validation-logloss:0.28070\u001b[0m\n",
 771 |             "\u001b[34m[41]#011train-logloss:0.27871#011validation-logloss:0.28066\u001b[0m\n",
 772 |             "\u001b[34m[42]#011train-logloss:0.27860#011validation-logloss:0.28077\u001b[0m\n",
 773 |             "\u001b[34m[43]#011train-logloss:0.27860#011validation-logloss:0.28077\u001b[0m\n",
 774 |             "\u001b[34m[44]#011train-logloss:0.27829#011validation-logloss:0.28093\u001b[0m\n",
 775 |             "\u001b[34m[45]#011train-logloss:0.27808#011validation-logloss:0.28091\u001b[0m\n",
 776 |             "\u001b[34m[46]#011train-logloss:0.27801#011validation-logloss:0.28085\u001b[0m\n",
 777 |             "\u001b[34m[47]#011train-logloss:0.27786#011validation-logloss:0.28088\u001b[0m\n",
 778 |             "\u001b[34m[48]#011train-logloss:0.27779#011validation-logloss:0.28085\u001b[0m\n",
 779 |             "\u001b[34m[49]#011train-logloss:0.27756#011validation-logloss:0.28106\u001b[0m\n",
 780 |             "\n",
 781 |             "2023-06-08 16:53:16 Uploading - Uploading generated training model\n",
 782 |             "2023-06-08 16:53:16 Completed - Training job completed\n",
 783 |             "Training seconds: 82\n",
 784 |             "Billable seconds: 82\n"
 785 |           ]
 786 |         }
 787 |       ],
 788 |       "source": [
 789 |         "estimator.fit({\"train\":s3_input_train, \"validation\":s3_input_test})"
 790 |       ]
 791 |     },
 792 |     {
 793 |       "cell_type": "markdown",
 794 |       "id": "550b55e8-6f11-4643-b0e1-a0465e0f8d68",
 795 |       "metadata": {
 796 |         "id": "550b55e8-6f11-4643-b0e1-a0465e0f8d68"
 797 |       },
 798 |       "source": [
 799 |         "## Deploying the ML Model as an endpoint"
 800 |       ]
 801 |     },
 802 |     {
 803 |       "cell_type": "code",
 804 |       "execution_count": null,
 805 |       "id": "d5801e3e-a924-4422-84c9-de93827d25f8",
 806 |       "metadata": {
 807 |         "tags": [],
 808 |         "id": "d5801e3e-a924-4422-84c9-de93827d25f8",
 809 |         "outputId": "49613511-3aba-4db0-b92c-bbb010540176"
 810 |       },
 811 |       "outputs": [
 812 |         {
 813 |           "name": "stderr",
 814 |           "output_type": "stream",
 815 |           "text": [
 816 |             "INFO:sagemaker:Creating model with name: sagemaker-xgboost-2023-06-08-16-53-29-875\n",
 817 |             "INFO:sagemaker:Creating endpoint-config with name sagemaker-xgboost-2023-06-08-16-53-29-875\n",
 818 |             "INFO:sagemaker:Creating endpoint with name sagemaker-xgboost-2023-06-08-16-53-29-875\n"
 819 |           ]
 820 |         },
 821 |         {
 822 |           "name": "stdout",
 823 |           "output_type": "stream",
 824 |           "text": [
 825 |             "-------!"
 826 |           ]
 827 |         }
 828 |       ],
 829 |       "source": [
 830 |         "from sagemaker.serializers import CSVSerializer \n",
 831 |         "xgb_predictor = estimator.deploy(initial_instance_count = 1, instance_type=\"ml.m4.xlarge\", serializer=CSVSerializer())"
 832 |       ]
 833 |     },
 834 |     {
 835 |       "cell_type": "markdown",
 836 |       "id": "fdbfd854-3489-49fd-a0a7-5016ae0079ae",
 837 |       "metadata": {
 838 |         "id": "fdbfd854-3489-49fd-a0a7-5016ae0079ae"
 839 |       },
 840 |       "source": [
 841 |         "## Prediction of test data"
 842 |       ]
 843 |     },
 844 |     {
 845 |       "cell_type": "code",
 846 |       "execution_count": null,
 847 |       "id": "82d7c25f-d9c5-4355-80df-6b83c19bb2f1",
 848 |       "metadata": {
 849 |         "tags": [],
 850 |         "id": "82d7c25f-d9c5-4355-80df-6b83c19bb2f1",
 851 |         "outputId": "8345be11-9d16-4c21-efaf-7e9c52f8a824"
 852 |       },
 853 |       "outputs": [
 854 |         {
 855 |           "data": {
 856 |             "text/plain": [
 857 |               "array([[ 34,   4, 999, ...,   0,   1,   0],\n",
 858 |               "       [ 31,   6, 999, ...,   0,   1,   0],\n",
 859 |               "       [ 32,   1, 999, ...,   0,   1,   0],\n",
 860 |               "       ...,\n",
 861 |               "       [ 36,   3, 999, ...,   0,   1,   0],\n",
 862 |               "       [ 47,   3, 999, ...,   0,   1,   0],\n",
 863 |               "       [ 39,   8, 999, ...,   0,   1,   0]])"
 864 |             ]
 865 |           },
 866 |           "execution_count": 55,
 867 |           "metadata": {},
 868 |           "output_type": "execute_result"
 869 |         }
 870 |       ],
 871 |       "source": [
 872 |         "test_data_array = test_data.drop(['y_yes', 'y_no'], axis=1).values\n",
 873 |         "test_data_array"
 874 |       ]
 875 |     },
 876 |     {
 877 |       "cell_type": "code",
 878 |       "execution_count": null,
 879 |       "id": "31843c08-a7b8-438c-8370-46c48626b11f",
 880 |       "metadata": {
 881 |         "tags": [],
 882 |         "id": "31843c08-a7b8-438c-8370-46c48626b11f"
 883 |       },
 884 |       "outputs": [],
 885 |       "source": [
 886 |         "predictions = xgb_predictor.predict(test_data_array).decode(\"utf-8\")"
 887 |       ]
 888 |     },
 889 |     {
 890 |       "cell_type": "code",
 891 |       "execution_count": null,
 892 |       "id": "46bcd959-2951-4329-b211-279a84b174f1",
 893 |       "metadata": {
 894 |         "tags": [],
 895 |         "id": "46bcd959-2951-4329-b211-279a84b174f1",
 896 |         "outputId": "fdf0fc9c-48c2-4c8b-d02e-5c159c140643"
 897 |       },
 898 |       "outputs": [
 899 |         {
 900 |           "data": {
 901 |             "text/plain": [
 902 |               "array([0.08596137, 0.08066913, 0.2411833 , ..., 0.10747377, 0.07555814,\n",
 903 |               "       0.03684008])"
 904 |             ]
 905 |           },
 906 |           "execution_count": 57,
 907 |           "metadata": {},
 908 |           "output_type": "execute_result"
 909 |         }
 910 |       ],
 911 |       "source": [
 912 |         "predictions_array = np.fromstring(predictions[:], sep=\"\\n\")\n",
 913 |         "predictions_array"
 914 |       ]
 915 |     },
 916 |     {
 917 |       "cell_type": "code",
 918 |       "execution_count": null,
 919 |       "id": "8f049870-18b8-472b-9ecd-e65be3e08922",
 920 |       "metadata": {
 921 |         "id": "8f049870-18b8-472b-9ecd-e65be3e08922",
 922 |         "outputId": "d8709a5b-c1d6-4dd7-b297-0555f58be0be"
 923 |       },
 924 |       "outputs": [
 925 |         {
 926 |           "name": "stdout",
 927 |           "output_type": "stream",
 928 |           "text": [
 929 |             "[[10828   163]\n",
 930 |             " [ 1076   290]]\n",
 931 |             "              precision    recall  f1-score   support\n",
 932 |             "\n",
 933 |             "           0       0.91      0.99      0.95     10991\n",
 934 |             "           1       0.64      0.21      0.32      1366\n",
 935 |             "\n",
 936 |             "    accuracy                           0.90     12357\n",
 937 |             "   macro avg       0.77      0.60      0.63     12357\n",
 938 |             "weighted avg       0.88      0.90      0.88     12357\n",
 939 |             "\n"
 940 |           ]
 941 |         }
 942 |       ],
 943 |       "source": [
 944 |         "import sklearn.metrics\n",
 945 |         "cutoff=0.5\n",
 946 |         "print(sklearn.metrics.confusion_matrix(test_data['y_yes'],np.round(predictions_array)))\n",
 947 |         "print(sklearn.metrics.classification_report(test_data['y_yes'],np.round(predictions_array)))"
 948 |       ]
 949 |     },
 950 |     {
 951 |       "cell_type": "code",
 952 |       "execution_count": null,
 953 |       "id": "f2efc2f5-b86f-4ff2-a429-3524f045f79f",
 954 |       "metadata": {
 955 |         "id": "f2efc2f5-b86f-4ff2-a429-3524f045f79f",
 956 |         "outputId": "1221010c-6e00-485a-c0aa-5a7831cbd74d"
 957 |       },
 958 |       "outputs": [
 959 |         {
 960 |           "data": {
 961 |             "text/plain": [
 962 |               "'sagemaker-xgboost-2023-06-08-16-53-29-875'"
 963 |             ]
 964 |           },
 965 |           "execution_count": 59,
 966 |           "metadata": {},
 967 |           "output_type": "execute_result"
 968 |         }
 969 |       ],
 970 |       "source": [
 971 |         "xgb_predictor.endpoint_name"
 972 |       ]
 973 |     },
 974 |     {
 975 |       "cell_type": "code",
 976 |       "execution_count": null,
 977 |       "id": "5963d98c-3fbd-475d-99ee-17886bf2543a",
 978 |       "metadata": {
 979 |         "id": "5963d98c-3fbd-475d-99ee-17886bf2543a",
 980 |         "outputId": "c54c3685-8101-4905-a862-51362df24d23"
 981 |       },
 982 |       "outputs": [
 983 |         {
 984 |           "name": "stderr",
 985 |           "output_type": "stream",
 986 |           "text": [
 987 |             "WARNING:sagemaker.deprecations:The endpoint attribute has been renamed in sagemaker>=2.\n",
 988 |             "See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.\n",
 989 |             "INFO:sagemaker:Deleting endpoint with name: sagemaker-xgboost-2023-06-08-16-53-29-875\n"
 990 |           ]
 991 |         }
 992 |       ],
 993 |       "source": [
 994 |         "sagemaker.Session().delete_endpoint(xgb_predictor.endpoint)\n",
 995 |         "bucket_to_delete = boto3.resource('s3').Bucket(bucket_name)"
 996 |       ]
 997 |     },
 998 |     {
 999 |       "cell_type": "code",
1000 |       "execution_count": null,
1001 |       "id": "1cbb5a75-f0d7-4c6f-8a3e-e7737d3c0c4f",
1002 |       "metadata": {
1003 |         "tags": [],
1004 |         "id": "1cbb5a75-f0d7-4c6f-8a3e-e7737d3c0c4f",
1005 |         "outputId": "45df1f88-4d5f-4bbd-c9a6-1bb0daeff769"
1006 |       },
1007 |       "outputs": [
1008 |         {
1009 |           "data": {
1010 |             "text/plain": [
1011 |               "[{'ResponseMetadata': {'RequestId': '3ZFZK6BFTT1TP8MY',\n",
1012 |               "   'HostId': 'oGJmvH3ZJE/pzZ8b4ipnm20ms75AFSLBPaOYYV9qlTBwihvWdnULVLgtk+xQdk9hJrFeFaEBz5E=',\n",
1013 |               "   'HTTPStatusCode': 200,\n",
1014 |               "   'HTTPHeaders': {'x-amz-id-2': 'oGJmvH3ZJE/pzZ8b4ipnm20ms75AFSLBPaOYYV9qlTBwihvWdnULVLgtk+xQdk9hJrFeFaEBz5E=',\n",
1015 |               "    'x-amz-request-id': '3ZFZK6BFTT1TP8MY',\n",
1016 |               "    'date': 'Thu, 08 Jun 2023 16:57:35 GMT',\n",
1017 |               "    'content-type': 'application/xml',\n",
1018 |               "    'transfer-encoding': 'chunked',\n",
1019 |               "    'server': 'AmazonS3',\n",
1020 |               "    'connection': 'close'},\n",
1021 |               "   'RetryAttempts': 0},\n",
1022 |               "  'Deleted': [{'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/index/000000000/000000000030_worker_0.json'},\n",
1023 |               "   {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/claim.smd'},\n",
1024 |               "   {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/index/000000000/000000000010_worker_0.json'},\n",
1025 |               "   {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/events/000000000020/000000000020_worker_0.tfevents'},\n",
1026 |               "   {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/profiler-output/system/incremental/2023060816/1686243120.algo-1.json'},\n",
1027 |               "   {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/events/000000000000/000000000000_worker_0.tfevents'},\n",
1028 |               "   {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/profiler-output/system/training_job_end.ts'},\n",
1029 |               "   {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/training_job_end.ts'},\n",
1030 |               "   {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/index/000000000/000000000000_worker_0.json'},\n",
1031 |               "   {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/index/000000000/000000000020_worker_0.json'},\n",
1032 |               "   {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/index/000000000/000000000040_worker_0.json'},\n",
1033 |               "   {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/collections/000000000/worker_0_collections.json'},\n",
1034 |               "   {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/output/model.tar.gz'},\n",
1035 |               "   {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/events/000000000040/000000000040_worker_0.tfevents'},\n",
1036 |               "   {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/profiler-output/system/incremental/2023060816/1686243180.algo-1.json'},\n",
1037 |               "   {'Key': 'xgboost-as-a-built-in-algo/test/test.csv'},\n",
1038 |               "   {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/profiler-output/framework/training_job_end.ts'},\n",
1039 |               "   {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/events/000000000010/000000000010_worker_0.tfevents'},\n",
1040 |               "   {'Key': 'xgboost-as-a-built-in-algo/train/train.csv'},\n",
1041 |               "   {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/profiler-output/system/incremental/2023060816/1686243060.algo-1.json'},\n",
1042 |               "   {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/events/000000000030/000000000030_worker_0.tfevents'}]}]"
1043 |             ]
1044 |           },
1045 |           "execution_count": 61,
1046 |           "metadata": {},
1047 |           "output_type": "execute_result"
1048 |         }
1049 |       ],
1050 |       "source": [
1051 |         "bucket_to_delete.objects.all().delete()"
1052 |       ]
1053 |     },
1054 |     {
1055 |       "cell_type": "code",
1056 |       "execution_count": null,
1057 |       "id": "43110ec1-844b-413f-b52e-42668eb1f436",
1058 |       "metadata": {
1059 |         "id": "43110ec1-844b-413f-b52e-42668eb1f436"
1060 |       },
1061 |       "outputs": [],
1062 |       "source": []
1063 |     }
1064 |   ],
1065 |   "metadata": {
1066 |     "kernelspec": {
1067 |       "display_name": "conda_python3",
1068 |       "language": "python",
1069 |       "name": "conda_python3"
1070 |     },
1071 |     "language_info": {
1072 |       "codemirror_mode": {
1073 |         "name": "ipython",
1074 |         "version": 3
1075 |       },
1076 |       "file_extension": ".py",
1077 |       "mimetype": "text/x-python",
1078 |       "name": "python",
1079 |       "nbconvert_exporter": "python",
1080 |       "pygments_lexer": "ipython3",
1081 |       "version": "3.10.8"
1082 |     },
1083 |     "colab": {
1084 |       "provenance": [],
1085 |       "include_colab_link": true
1086 |     }
1087 |   },
1088 |   "nbformat": 4,
1089 |   "nbformat_minor": 5
1090 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # aws-sagemaker
2 | Demonstration of Amazon Web Services (AWS) SageMaker for the training of XGBoost models using the SageMaker APIs.
3 | 
4 | 1. [SageMaker Python API](https://sagemaker.readthedocs.io/en/stable/overview.html)
5 | 2. [Know more about AWS SageMaker](https://aws.amazon.com/sagemaker/)
6 | 3. [SageMaker Pricing](https://aws.amazon.com/sagemaker/pricing/?nc=sn&loc=3)
7 | 


--------------------------------------------------------------------------------
	age	campaign	pdays	no_previous_contact	job_admin.	job_housemaid	...	day_of_week_mon	poutcome_nonexistent	y_no
0	56	1	999	1	0	1	...	1	1	1
1	57	1	999	1	0	0	...	1	1	1
2	37	1	999	1	0	0	...	1	1	1
3	40	1	999	1	1	0	...	1	1	1
4	56	1	999	1	0	0	...	1	1	1