├── AWS_SageMaker.ipynb └── README.md /AWS_SageMaker.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "view-in-github", 7 | "colab_type": "text" 8 | }, 9 | "source": [ 10 | "\"Open" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "id": "8c436963-afa1-454c-9275-a1ca109a4cbe", 17 | "metadata": { 18 | "tags": [], 19 | "id": "8c436963-afa1-454c-9275-a1ca109a4cbe" 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "import pandas as pd\n", 24 | "import numpy as np\n", 25 | "import matplotlib.pyplot as plt\n", 26 | "import sagemaker\n", 27 | "import boto3" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "id": "7721d237-76df-47c2-a947-175a55c5f88f", 34 | "metadata": { 35 | "tags": [], 36 | "id": "7721d237-76df-47c2-a947-175a55c5f88f", 37 | "outputId": "0776a493-d9c4-496b-b42b-eff885a8c41b" 38 | }, 39 | "outputs": [ 40 | { 41 | "name": "stdout", 42 | "output_type": "stream", 43 | "text": [ 44 | "us-east-1\n" 45 | ] 46 | } 47 | ], 48 | "source": [ 49 | "bucket_name = \"dsmlsagemaker-prasang\"\n", 50 | "# Check the region of the instance\n", 51 | "my_region = boto3.session.Session().region_name\n", 52 | "print(my_region)" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "id": "e30ee6c1-9f90-4357-91b4-80e2be7a464d", 59 | "metadata": { 60 | "tags": [], 61 | "id": "e30ee6c1-9f90-4357-91b4-80e2be7a464d", 62 | "outputId": "b3790a2b-89e9-483c-962a-a69000a20e3d" 63 | }, 64 | "outputs": [ 65 | { 66 | "name": "stdout", 67 | "output_type": "stream", 68 | "text": [ 69 | "S3 Bucket Created Successfully!\n" 70 | ] 71 | } 72 | ], 73 | "source": [ 74 | "# Create a bucket\n", 75 | "s3 = boto3.resource(\"s3\", \n", 76 | " region_name=my_region,\n", 77 | " aws_access_key_id=\"\",\n", 78 | " aws_secret_access_key=\"\")\n", 79 | "\n", 80 | "try:\n", 81 | " if my_region == \"us-east-1\":\n", 82 | " s3.create_bucket(Bucket=bucket_name)\n", 83 | " print(\"S3 Bucket Created Successfully!\")\n", 84 | "except Exception as e:\n", 85 | " print(str(e))\n", 86 | " " 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "id": "3eb7f357-4ae3-4d46-a13c-dad2d1ce5467", 93 | "metadata": { 94 | "tags": [], 95 | "id": "3eb7f357-4ae3-4d46-a13c-dad2d1ce5467", 96 | "outputId": "fdb144d3-89bb-48c8-a484-6abaf1fc9257" 97 | }, 98 | "outputs": [ 99 | { 100 | "name": "stdout", 101 | "output_type": "stream", 102 | "text": [ 103 | "s3://dsmlsagemaker-prasang/xgboost-as-a-built-in-algo/output\n" 104 | ] 105 | } 106 | ], 107 | "source": [ 108 | "## set an output path for model artifacts\n", 109 | "path = \"xgboost-as-a-built-in-algo\"\n", 110 | "output_path = f\"s3://{bucket_name}/{path}/output\"\n", 111 | "print(output_path)" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "id": "3771b3e3-d416-4555-b4e1-6c776b344508", 117 | "metadata": { 118 | "id": "3771b3e3-d416-4555-b4e1-6c776b344508" 119 | }, 120 | "source": [ 121 | "## Download the data and store in S3" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "id": "275ef361-0969-4898-b28f-6ff1d0266def", 128 | "metadata": { 129 | "tags": [], 130 | "id": "275ef361-0969-4898-b28f-6ff1d0266def", 131 | "outputId": "eef657c0-a005-4606-b4d9-85f83a9b7ef6" 132 | }, 133 | "outputs": [ 134 | { 135 | "name": "stdout", 136 | "output_type": "stream", 137 | "text": [ 138 | "Success: downloaded bank_clean.csv.\n", 139 | "Success: Data loaded into dataframe.\n" 140 | ] 141 | } 142 | ], 143 | "source": [ 144 | "import urllib\n", 145 | "try:\n", 146 | " #the bank data is in one hot encoded format already\n", 147 | " urllib.request.urlretrieve (\"https://d1.awsstatic.com/tmt/build-train-deploy-machine-learning-model-sagemaker/bank_clean.27f01fbbdf43271788427f3682996ae29ceca05d.csv\", \"bank_clean.csv\")\n", 148 | " print('Success: downloaded bank_clean.csv.')\n", 149 | "except Exception as e:\n", 150 | " print('Data load error: ',e) \n", 151 | "try:\n", 152 | " model_data = pd.read_csv('./bank_clean.csv',index_col=0)\n", 153 | " print('Success: Data loaded into dataframe.')\n", 154 | "except Exception as e:\n", 155 | " print('Data load error: ',e)" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "id": "8039da29-803d-44b4-ad41-23e957b05fd3", 162 | "metadata": { 163 | "tags": [], 164 | "id": "8039da29-803d-44b4-ad41-23e957b05fd3", 165 | "outputId": "666db70c-d53b-4a32-d1f0-a6e685b8ff73" 166 | }, 167 | "outputs": [ 168 | { 169 | "data": { 170 | "text/html": [ 171 | "
\n", 172 | "\n", 185 | "\n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | "
agecampaignpdayspreviousno_previous_contactnot_workingjob_admin.job_blue-collarjob_entrepreneurjob_housemaid...day_of_week_friday_of_week_monday_of_week_thuday_of_week_tueday_of_week_wedpoutcome_failurepoutcome_nonexistentpoutcome_successy_noy_yes
05619990100001...0100001010
15719990100000...0100001010
23719990100000...0100001010
34019990101000...0100001010
45619990100000...0100001010
\n", 335 | "

5 rows × 61 columns

\n", 336 | "
" 337 | ], 338 | "text/plain": [ 339 | " age campaign pdays previous no_previous_contact not_working \\\n", 340 | "0 56 1 999 0 1 0 \n", 341 | "1 57 1 999 0 1 0 \n", 342 | "2 37 1 999 0 1 0 \n", 343 | "3 40 1 999 0 1 0 \n", 344 | "4 56 1 999 0 1 0 \n", 345 | "\n", 346 | " job_admin. job_blue-collar job_entrepreneur job_housemaid ... \\\n", 347 | "0 0 0 0 1 ... \n", 348 | "1 0 0 0 0 ... \n", 349 | "2 0 0 0 0 ... \n", 350 | "3 1 0 0 0 ... \n", 351 | "4 0 0 0 0 ... \n", 352 | "\n", 353 | " day_of_week_fri day_of_week_mon day_of_week_thu day_of_week_tue \\\n", 354 | "0 0 1 0 0 \n", 355 | "1 0 1 0 0 \n", 356 | "2 0 1 0 0 \n", 357 | "3 0 1 0 0 \n", 358 | "4 0 1 0 0 \n", 359 | "\n", 360 | " day_of_week_wed poutcome_failure poutcome_nonexistent poutcome_success \\\n", 361 | "0 0 0 1 0 \n", 362 | "1 0 0 1 0 \n", 363 | "2 0 0 1 0 \n", 364 | "3 0 0 1 0 \n", 365 | "4 0 0 1 0 \n", 366 | "\n", 367 | " y_no y_yes \n", 368 | "0 1 0 \n", 369 | "1 1 0 \n", 370 | "2 1 0 \n", 371 | "3 1 0 \n", 372 | "4 1 0 \n", 373 | "\n", 374 | "[5 rows x 61 columns]" 375 | ] 376 | }, 377 | "execution_count": 41, 378 | "metadata": {}, 379 | "output_type": "execute_result" 380 | } 381 | ], 382 | "source": [ 383 | "model_data.head()" 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "execution_count": null, 389 | "id": "332a9f7c-556d-4ced-ac81-5c1b51162417", 390 | "metadata": { 391 | "tags": [], 392 | "id": "332a9f7c-556d-4ced-ac81-5c1b51162417", 393 | "outputId": "a28f7c82-70e3-4525-b131-c5e8922f9cf6" 394 | }, 395 | "outputs": [ 396 | { 397 | "name": "stdout", 398 | "output_type": "stream", 399 | "text": [ 400 | "\n", 401 | "Int64Index: 41188 entries, 0 to 41187\n", 402 | "Data columns (total 61 columns):\n", 403 | " # Column Non-Null Count Dtype\n", 404 | "--- ------ -------------- -----\n", 405 | " 0 age 41188 non-null int64\n", 406 | " 1 campaign 41188 non-null int64\n", 407 | " 2 pdays 41188 non-null int64\n", 408 | " 3 previous 41188 non-null int64\n", 409 | " 4 no_previous_contact 41188 non-null int64\n", 410 | " 5 not_working 41188 non-null int64\n", 411 | " 6 job_admin. 41188 non-null int64\n", 412 | " 7 job_blue-collar 41188 non-null int64\n", 413 | " 8 job_entrepreneur 41188 non-null int64\n", 414 | " 9 job_housemaid 41188 non-null int64\n", 415 | " 10 job_management 41188 non-null int64\n", 416 | " 11 job_retired 41188 non-null int64\n", 417 | " 12 job_self-employed 41188 non-null int64\n", 418 | " 13 job_services 41188 non-null int64\n", 419 | " 14 job_student 41188 non-null int64\n", 420 | " 15 job_technician 41188 non-null int64\n", 421 | " 16 job_unemployed 41188 non-null int64\n", 422 | " 17 job_unknown 41188 non-null int64\n", 423 | " 18 marital_divorced 41188 non-null int64\n", 424 | " 19 marital_married 41188 non-null int64\n", 425 | " 20 marital_single 41188 non-null int64\n", 426 | " 21 marital_unknown 41188 non-null int64\n", 427 | " 22 education_basic.4y 41188 non-null int64\n", 428 | " 23 education_basic.6y 41188 non-null int64\n", 429 | " 24 education_basic.9y 41188 non-null int64\n", 430 | " 25 education_high.school 41188 non-null int64\n", 431 | " 26 education_illiterate 41188 non-null int64\n", 432 | " 27 education_professional.course 41188 non-null int64\n", 433 | " 28 education_university.degree 41188 non-null int64\n", 434 | " 29 education_unknown 41188 non-null int64\n", 435 | " 30 default_no 41188 non-null int64\n", 436 | " 31 default_unknown 41188 non-null int64\n", 437 | " 32 default_yes 41188 non-null int64\n", 438 | " 33 housing_no 41188 non-null int64\n", 439 | " 34 housing_unknown 41188 non-null int64\n", 440 | " 35 housing_yes 41188 non-null int64\n", 441 | " 36 loan_no 41188 non-null int64\n", 442 | " 37 loan_unknown 41188 non-null int64\n", 443 | " 38 loan_yes 41188 non-null int64\n", 444 | " 39 contact_cellular 41188 non-null int64\n", 445 | " 40 contact_telephone 41188 non-null int64\n", 446 | " 41 month_apr 41188 non-null int64\n", 447 | " 42 month_aug 41188 non-null int64\n", 448 | " 43 month_dec 41188 non-null int64\n", 449 | " 44 month_jul 41188 non-null int64\n", 450 | " 45 month_jun 41188 non-null int64\n", 451 | " 46 month_mar 41188 non-null int64\n", 452 | " 47 month_may 41188 non-null int64\n", 453 | " 48 month_nov 41188 non-null int64\n", 454 | " 49 month_oct 41188 non-null int64\n", 455 | " 50 month_sep 41188 non-null int64\n", 456 | " 51 day_of_week_fri 41188 non-null int64\n", 457 | " 52 day_of_week_mon 41188 non-null int64\n", 458 | " 53 day_of_week_thu 41188 non-null int64\n", 459 | " 54 day_of_week_tue 41188 non-null int64\n", 460 | " 55 day_of_week_wed 41188 non-null int64\n", 461 | " 56 poutcome_failure 41188 non-null int64\n", 462 | " 57 poutcome_nonexistent 41188 non-null int64\n", 463 | " 58 poutcome_success 41188 non-null int64\n", 464 | " 59 y_no 41188 non-null int64\n", 465 | " 60 y_yes 41188 non-null int64\n", 466 | "dtypes: int64(61)\n", 467 | "memory usage: 19.5 MB\n" 468 | ] 469 | } 470 | ], 471 | "source": [ 472 | "model_data.info()" 473 | ] 474 | }, 475 | { 476 | "cell_type": "markdown", 477 | "id": "b1420e12-0ab4-4e7b-8b8f-ca10411f3514", 478 | "metadata": { 479 | "id": "b1420e12-0ab4-4e7b-8b8f-ca10411f3514" 480 | }, 481 | "source": [ 482 | "## Train and Test Split" 483 | ] 484 | }, 485 | { 486 | "cell_type": "code", 487 | "execution_count": null, 488 | "id": "d1cc3aba-b34f-442b-8822-462b31002fac", 489 | "metadata": { 490 | "tags": [], 491 | "id": "d1cc3aba-b34f-442b-8822-462b31002fac" 492 | }, 493 | "outputs": [], 494 | "source": [ 495 | "from sklearn.model_selection import train_test_split\n", 496 | "\n", 497 | "train_data, test_data = train_test_split(model_data, test_size=0.3)" 498 | ] 499 | }, 500 | { 501 | "cell_type": "code", 502 | "execution_count": null, 503 | "id": "b680644d-3697-4cee-9710-38397f5b89de", 504 | "metadata": { 505 | "tags": [], 506 | "id": "b680644d-3697-4cee-9710-38397f5b89de" 507 | }, 508 | "outputs": [], 509 | "source": [ 510 | "## saving training and testing data in s3 buckets\n", 511 | "import os\n", 512 | "pd.concat([train_data['y_yes'], train_data.drop(['y_no', 'y_yes'],\n", 513 | "axis = 1)],\n", 514 | "axis = 1).to_csv(\"train.csv\", index=False, header=False)" 515 | ] 516 | }, 517 | { 518 | "cell_type": "code", 519 | "execution_count": null, 520 | "id": "509d33df-9a2c-4847-8a23-bb7889e72c33", 521 | "metadata": { 522 | "tags": [], 523 | "id": "509d33df-9a2c-4847-8a23-bb7889e72c33", 524 | "outputId": "3bdbd3a4-1d54-4965-e5bf-56f1a1060bcb" 525 | }, 526 | "outputs": [ 527 | { 528 | "name": "stderr", 529 | "output_type": "stream", 530 | "text": [ 531 | "INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole\n" 532 | ] 533 | } 534 | ], 535 | "source": [ 536 | "boto3.Session().resource(\"s3\").Bucket(bucket_name).Object(os.path.join(path, \"train/train.csv\")).upload_file(\"train.csv\")" 537 | ] 538 | }, 539 | { 540 | "cell_type": "code", 541 | "execution_count": null, 542 | "id": "163bcf90-7aa4-41d7-b790-3f94a505465c", 543 | "metadata": { 544 | "tags": [], 545 | "id": "163bcf90-7aa4-41d7-b790-3f94a505465c" 546 | }, 547 | "outputs": [], 548 | "source": [ 549 | "## now storing the training csv into a variable\n", 550 | "s3_input_train = sagemaker.TrainingInput(s3_data=f\"s3://{bucket_name}/{path}/train/\", content_type=\"csv\")" 551 | ] 552 | }, 553 | { 554 | "cell_type": "code", 555 | "execution_count": null, 556 | "id": "14b988c2-e807-4b82-ab86-064ac66d4b51", 557 | "metadata": { 558 | "tags": [], 559 | "id": "14b988c2-e807-4b82-ab86-064ac66d4b51" 560 | }, 561 | "outputs": [], 562 | "source": [ 563 | "pd.concat([test_data['y_yes'], test_data.drop(['y_no', 'y_yes'],\n", 564 | "axis = 1)],\n", 565 | "axis = 1).to_csv(\"test.csv\", index=False, header=False)" 566 | ] 567 | }, 568 | { 569 | "cell_type": "code", 570 | "execution_count": null, 571 | "id": "5508f1fa-07c2-41db-8728-1964d3c1feee", 572 | "metadata": { 573 | "tags": [], 574 | "id": "5508f1fa-07c2-41db-8728-1964d3c1feee", 575 | "outputId": "13c99d77-63fa-44f1-d67c-581192240834" 576 | }, 577 | "outputs": [ 578 | { 579 | "name": "stderr", 580 | "output_type": "stream", 581 | "text": [ 582 | "INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole\n" 583 | ] 584 | } 585 | ], 586 | "source": [ 587 | "boto3.Session().resource(\"s3\").Bucket(bucket_name).Object(os.path.join(path, \"test/test.csv\")).upload_file(\"test.csv\")\n", 588 | "## now storing the training csv into a variable\n", 589 | "s3_input_test = sagemaker.TrainingInput(s3_data=f\"s3://{bucket_name}/{path}/test/\", content_type=\"csv\")" 590 | ] 591 | }, 592 | { 593 | "cell_type": "markdown", 594 | "id": "37413d3a-eab0-43a4-a650-c1d6411cde6e", 595 | "metadata": { 596 | "id": "37413d3a-eab0-43a4-a650-c1d6411cde6e" 597 | }, 598 | "source": [ 599 | "## Building XGBoost-inbuild algo" 600 | ] 601 | }, 602 | { 603 | "cell_type": "code", 604 | "execution_count": null, 605 | "id": "945764ca-2025-4d23-9e73-457b0290308c", 606 | "metadata": { 607 | "tags": [], 608 | "id": "945764ca-2025-4d23-9e73-457b0290308c" 609 | }, 610 | "outputs": [], 611 | "source": [ 612 | "from sagemaker.amazon.amazon_estimator import image_uris\n", 613 | "from sagemaker.session import s3_input, Session" 614 | ] 615 | }, 616 | { 617 | "cell_type": "code", 618 | "execution_count": null, 619 | "id": "00ffeb83-0dd8-4cd0-9036-38ca23ea2d40", 620 | "metadata": { 621 | "tags": [], 622 | "id": "00ffeb83-0dd8-4cd0-9036-38ca23ea2d40", 623 | "outputId": "2fec7532-e1a6-4f97-d753-47d24ac94d95" 624 | }, 625 | "outputs": [ 626 | { 627 | "name": "stderr", 628 | "output_type": "stream", 629 | "text": [ 630 | "INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.\n" 631 | ] 632 | } 633 | ], 634 | "source": [ 635 | "container = image_uris.retrieve(\"xgboost\", boto3.Session().region_name, \"1.5-1\")" 636 | ] 637 | }, 638 | { 639 | "cell_type": "code", 640 | "execution_count": null, 641 | "id": "8cc12158-aa48-4306-a600-abb7a540f3dd", 642 | "metadata": { 643 | "tags": [], 644 | "id": "8cc12158-aa48-4306-a600-abb7a540f3dd" 645 | }, 646 | "outputs": [], 647 | "source": [ 648 | "##initialise all the parameters \n", 649 | "hyperparameters = {\n", 650 | "\"max_depth\": \"5\",\n", 651 | "\"eta\": \"0.2\",\n", 652 | "\"gamma\":\"4\",\n", 653 | "\"min_child_weight\":\"6\",\n", 654 | "\"subsample\":\"0.7\",\n", 655 | "\"objective\":\"binary:logistic\",\n", 656 | "\"num_round\":50\n", 657 | "}" 658 | ] 659 | }, 660 | { 661 | "cell_type": "code", 662 | "execution_count": null, 663 | "id": "05b18a5b-9148-46bb-a502-7b2455c2ee3d", 664 | "metadata": { 665 | "tags": [], 666 | "id": "05b18a5b-9148-46bb-a502-7b2455c2ee3d" 667 | }, 668 | "outputs": [], 669 | "source": [ 670 | "# create sagemaker estimator that calls the xgboost container\n", 671 | "estimator = sagemaker.estimator.Estimator(image_uri=container, \n", 672 | " hyperparameters=hyperparameters,\n", 673 | " role=sagemaker.get_execution_role(),\n", 674 | " instance_count=1,\n", 675 | " instance_type=\"ml.m5.2xlarge\",\n", 676 | " volume_size=5,\n", 677 | " output_path=output_path)" 678 | ] 679 | }, 680 | { 681 | "cell_type": "code", 682 | "execution_count": null, 683 | "id": "6d3e01e8-1039-43e0-a26c-2a48e9e2b155", 684 | "metadata": { 685 | "tags": [], 686 | "id": "6d3e01e8-1039-43e0-a26c-2a48e9e2b155", 687 | "outputId": "66e9f423-4475-44ba-9bb3-dec828445bdd" 688 | }, 689 | "outputs": [ 690 | { 691 | "name": "stderr", 692 | "output_type": "stream", 693 | "text": [ 694 | "INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2023-06-08-16-50-36-867\n" 695 | ] 696 | }, 697 | { 698 | "name": "stdout", 699 | "output_type": "stream", 700 | "text": [ 701 | "2023-06-08 16:50:40 Starting - Starting the training job...\n", 702 | "2023-06-08 16:50:55 Starting - Preparing the instances for training......\n", 703 | "2023-06-08 16:52:14 Downloading - Downloading input data\n", 704 | "2023-06-08 16:52:14 Training - Downloading the training image...\n", 705 | "2023-06-08 16:52:35 Training - Training image download completed. Training in progress...\u001b[34m[2023-06-08 16:52:54.214 ip-10-2-95-175.ec2.internal:7 INFO utils.py:28] RULE_JOB_STOP_SIGNAL_FILENAME: None\u001b[0m\n", 706 | "\u001b[34m[2023-06-08 16:52:54.283 ip-10-2-95-175.ec2.internal:7 INFO profiler_config_parser.py:111] User has disabled profiler.\u001b[0m\n", 707 | "\u001b[34m[2023-06-08:16:52:54:INFO] Imported framework sagemaker_xgboost_container.training\u001b[0m\n", 708 | "\u001b[34m[2023-06-08:16:52:54:INFO] Failed to parse hyperparameter objective value binary:logistic to Json.\u001b[0m\n", 709 | "\u001b[34mReturning the value itself\u001b[0m\n", 710 | "\u001b[34m[2023-06-08:16:52:54:INFO] No GPUs detected (normal if no gpus installed)\u001b[0m\n", 711 | "\u001b[34m[2023-06-08:16:52:54:INFO] Running XGBoost Sagemaker in algorithm mode\u001b[0m\n", 712 | "\u001b[34m[2023-06-08:16:52:54:INFO] Determined 0 GPU(s) available on the instance.\u001b[0m\n", 713 | "\u001b[34m[2023-06-08:16:52:54:INFO] Determined delimiter of CSV input is ','\u001b[0m\n", 714 | "\u001b[34m[2023-06-08:16:52:54:INFO] Determined delimiter of CSV input is ','\u001b[0m\n", 715 | "\u001b[34m[2023-06-08:16:52:54:INFO] files path: /opt/ml/input/data/train\u001b[0m\n", 716 | "\u001b[34m[2023-06-08:16:52:54:INFO] Determined delimiter of CSV input is ','\u001b[0m\n", 717 | "\u001b[34m[2023-06-08:16:52:54:INFO] files path: /opt/ml/input/data/validation\u001b[0m\n", 718 | "\u001b[34m[2023-06-08:16:52:54:INFO] Determined delimiter of CSV input is ','\u001b[0m\n", 719 | "\u001b[34m[2023-06-08:16:52:54:INFO] Single node training.\u001b[0m\n", 720 | "\u001b[34m[2023-06-08:16:52:54:INFO] Train matrix has 28831 rows and 59 columns\u001b[0m\n", 721 | "\u001b[34m[2023-06-08:16:52:54:INFO] Validation matrix has 12357 rows\u001b[0m\n", 722 | "\u001b[34m[2023-06-08 16:52:54.720 ip-10-2-95-175.ec2.internal:7 INFO json_config.py:92] Creating hook from json_config at /opt/ml/input/config/debughookconfig.json.\u001b[0m\n", 723 | "\u001b[34m[2023-06-08 16:52:54.721 ip-10-2-95-175.ec2.internal:7 INFO hook.py:206] tensorboard_dir has not been set for the hook. SMDebug will not be exporting tensorboard summaries.\u001b[0m\n", 724 | "\u001b[34m[2023-06-08 16:52:54.722 ip-10-2-95-175.ec2.internal:7 INFO hook.py:259] Saving to /opt/ml/output/tensors\u001b[0m\n", 725 | "\u001b[34m[2023-06-08 16:52:54.722 ip-10-2-95-175.ec2.internal:7 INFO state_store.py:77] The checkpoint config file /opt/ml/input/config/checkpointconfig.json does not exist.\u001b[0m\n", 726 | "\u001b[34m[2023-06-08:16:52:54:INFO] Debug hook created from config\u001b[0m\n", 727 | "\u001b[34m[16:52:54] WARNING: ../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\u001b[0m\n", 728 | "\u001b[34m[2023-06-08 16:52:54.799 ip-10-2-95-175.ec2.internal:7 INFO hook.py:427] Monitoring the collections: metrics\u001b[0m\n", 729 | "\u001b[34m[2023-06-08 16:52:54.802 ip-10-2-95-175.ec2.internal:7 INFO hook.py:491] Hook is writing from the hook with pid: 7\u001b[0m\n", 730 | "\u001b[34m[0]#011train-logloss:0.57428#011validation-logloss:0.57330\u001b[0m\n", 731 | "\u001b[34m[1]#011train-logloss:0.49542#011validation-logloss:0.49375\u001b[0m\n", 732 | "\u001b[34m[2]#011train-logloss:0.43939#011validation-logloss:0.43713\u001b[0m\n", 733 | "\u001b[34m[3]#011train-logloss:0.40015#011validation-logloss:0.39731\u001b[0m\n", 734 | "\u001b[34m[4]#011train-logloss:0.37156#011validation-logloss:0.36851\u001b[0m\n", 735 | "\u001b[34m[5]#011train-logloss:0.34992#011validation-logloss:0.34684\u001b[0m\n", 736 | "\u001b[34m[6]#011train-logloss:0.33411#011validation-logloss:0.33079\u001b[0m\n", 737 | "\u001b[34m[7]#011train-logloss:0.32230#011validation-logloss:0.31873\u001b[0m\n", 738 | "\u001b[34m[8]#011train-logloss:0.31328#011validation-logloss:0.30944\u001b[0m\n", 739 | "\u001b[34m[9]#011train-logloss:0.30687#011validation-logloss:0.30285\u001b[0m\n", 740 | "\u001b[34m[10]#011train-logloss:0.30208#011validation-logloss:0.29787\u001b[0m\n", 741 | "\u001b[34m[11]#011train-logloss:0.29824#011validation-logloss:0.29425\u001b[0m\n", 742 | "\u001b[34m[12]#011train-logloss:0.29549#011validation-logloss:0.29142\u001b[0m\n", 743 | "\u001b[34m[13]#011train-logloss:0.29334#011validation-logloss:0.28951\u001b[0m\n", 744 | "\u001b[34m[14]#011train-logloss:0.29156#011validation-logloss:0.28759\u001b[0m\n", 745 | "\u001b[34m[15]#011train-logloss:0.29002#011validation-logloss:0.28623\u001b[0m\n", 746 | "\u001b[34m[16]#011train-logloss:0.28896#011validation-logloss:0.28525\u001b[0m\n", 747 | "\u001b[34m[17]#011train-logloss:0.28785#011validation-logloss:0.28464\u001b[0m\n", 748 | "\u001b[34m[18]#011train-logloss:0.28711#011validation-logloss:0.28403\u001b[0m\n", 749 | "\u001b[34m[19]#011train-logloss:0.28606#011validation-logloss:0.28331\u001b[0m\n", 750 | "\u001b[34m[20]#011train-logloss:0.28546#011validation-logloss:0.28283\u001b[0m\n", 751 | "\u001b[34m[21]#011train-logloss:0.28482#011validation-logloss:0.28255\u001b[0m\n", 752 | "\u001b[34m[22]#011train-logloss:0.28445#011validation-logloss:0.28218\u001b[0m\n", 753 | "\u001b[34m[23]#011train-logloss:0.28360#011validation-logloss:0.28154\u001b[0m\n", 754 | "\u001b[34m[24]#011train-logloss:0.28319#011validation-logloss:0.28148\u001b[0m\n", 755 | "\u001b[34m[25]#011train-logloss:0.28266#011validation-logloss:0.28155\u001b[0m\n", 756 | "\u001b[34m[26]#011train-logloss:0.28223#011validation-logloss:0.28107\u001b[0m\n", 757 | "\u001b[34m[27]#011train-logloss:0.28208#011validation-logloss:0.28106\u001b[0m\n", 758 | "\u001b[34m[28]#011train-logloss:0.28179#011validation-logloss:0.28102\u001b[0m\n", 759 | "\u001b[34m[29]#011train-logloss:0.28158#011validation-logloss:0.28087\u001b[0m\n", 760 | "\u001b[34m[30]#011train-logloss:0.28133#011validation-logloss:0.28070\u001b[0m\n", 761 | "\u001b[34m[31]#011train-logloss:0.28119#011validation-logloss:0.28057\u001b[0m\n", 762 | "\u001b[34m[32]#011train-logloss:0.28093#011validation-logloss:0.28066\u001b[0m\n", 763 | "\u001b[34m[33]#011train-logloss:0.28046#011validation-logloss:0.28070\u001b[0m\n", 764 | "\u001b[34m[34]#011train-logloss:0.28036#011validation-logloss:0.28076\u001b[0m\n", 765 | "\u001b[34m[35]#011train-logloss:0.28011#011validation-logloss:0.28081\u001b[0m\n", 766 | "\u001b[34m[36]#011train-logloss:0.27984#011validation-logloss:0.28091\u001b[0m\n", 767 | "\u001b[34m[37]#011train-logloss:0.27935#011validation-logloss:0.28062\u001b[0m\n", 768 | "\u001b[34m[38]#011train-logloss:0.27918#011validation-logloss:0.28051\u001b[0m\n", 769 | "\u001b[34m[39]#011train-logloss:0.27897#011validation-logloss:0.28065\u001b[0m\n", 770 | "\u001b[34m[40]#011train-logloss:0.27889#011validation-logloss:0.28070\u001b[0m\n", 771 | "\u001b[34m[41]#011train-logloss:0.27871#011validation-logloss:0.28066\u001b[0m\n", 772 | "\u001b[34m[42]#011train-logloss:0.27860#011validation-logloss:0.28077\u001b[0m\n", 773 | "\u001b[34m[43]#011train-logloss:0.27860#011validation-logloss:0.28077\u001b[0m\n", 774 | "\u001b[34m[44]#011train-logloss:0.27829#011validation-logloss:0.28093\u001b[0m\n", 775 | "\u001b[34m[45]#011train-logloss:0.27808#011validation-logloss:0.28091\u001b[0m\n", 776 | "\u001b[34m[46]#011train-logloss:0.27801#011validation-logloss:0.28085\u001b[0m\n", 777 | "\u001b[34m[47]#011train-logloss:0.27786#011validation-logloss:0.28088\u001b[0m\n", 778 | "\u001b[34m[48]#011train-logloss:0.27779#011validation-logloss:0.28085\u001b[0m\n", 779 | "\u001b[34m[49]#011train-logloss:0.27756#011validation-logloss:0.28106\u001b[0m\n", 780 | "\n", 781 | "2023-06-08 16:53:16 Uploading - Uploading generated training model\n", 782 | "2023-06-08 16:53:16 Completed - Training job completed\n", 783 | "Training seconds: 82\n", 784 | "Billable seconds: 82\n" 785 | ] 786 | } 787 | ], 788 | "source": [ 789 | "estimator.fit({\"train\":s3_input_train, \"validation\":s3_input_test})" 790 | ] 791 | }, 792 | { 793 | "cell_type": "markdown", 794 | "id": "550b55e8-6f11-4643-b0e1-a0465e0f8d68", 795 | "metadata": { 796 | "id": "550b55e8-6f11-4643-b0e1-a0465e0f8d68" 797 | }, 798 | "source": [ 799 | "## Deploying the ML Model as an endpoint" 800 | ] 801 | }, 802 | { 803 | "cell_type": "code", 804 | "execution_count": null, 805 | "id": "d5801e3e-a924-4422-84c9-de93827d25f8", 806 | "metadata": { 807 | "tags": [], 808 | "id": "d5801e3e-a924-4422-84c9-de93827d25f8", 809 | "outputId": "49613511-3aba-4db0-b92c-bbb010540176" 810 | }, 811 | "outputs": [ 812 | { 813 | "name": "stderr", 814 | "output_type": "stream", 815 | "text": [ 816 | "INFO:sagemaker:Creating model with name: sagemaker-xgboost-2023-06-08-16-53-29-875\n", 817 | "INFO:sagemaker:Creating endpoint-config with name sagemaker-xgboost-2023-06-08-16-53-29-875\n", 818 | "INFO:sagemaker:Creating endpoint with name sagemaker-xgboost-2023-06-08-16-53-29-875\n" 819 | ] 820 | }, 821 | { 822 | "name": "stdout", 823 | "output_type": "stream", 824 | "text": [ 825 | "-------!" 826 | ] 827 | } 828 | ], 829 | "source": [ 830 | "from sagemaker.serializers import CSVSerializer \n", 831 | "xgb_predictor = estimator.deploy(initial_instance_count = 1, instance_type=\"ml.m4.xlarge\", serializer=CSVSerializer())" 832 | ] 833 | }, 834 | { 835 | "cell_type": "markdown", 836 | "id": "fdbfd854-3489-49fd-a0a7-5016ae0079ae", 837 | "metadata": { 838 | "id": "fdbfd854-3489-49fd-a0a7-5016ae0079ae" 839 | }, 840 | "source": [ 841 | "## Prediction of test data" 842 | ] 843 | }, 844 | { 845 | "cell_type": "code", 846 | "execution_count": null, 847 | "id": "82d7c25f-d9c5-4355-80df-6b83c19bb2f1", 848 | "metadata": { 849 | "tags": [], 850 | "id": "82d7c25f-d9c5-4355-80df-6b83c19bb2f1", 851 | "outputId": "8345be11-9d16-4c21-efaf-7e9c52f8a824" 852 | }, 853 | "outputs": [ 854 | { 855 | "data": { 856 | "text/plain": [ 857 | "array([[ 34, 4, 999, ..., 0, 1, 0],\n", 858 | " [ 31, 6, 999, ..., 0, 1, 0],\n", 859 | " [ 32, 1, 999, ..., 0, 1, 0],\n", 860 | " ...,\n", 861 | " [ 36, 3, 999, ..., 0, 1, 0],\n", 862 | " [ 47, 3, 999, ..., 0, 1, 0],\n", 863 | " [ 39, 8, 999, ..., 0, 1, 0]])" 864 | ] 865 | }, 866 | "execution_count": 55, 867 | "metadata": {}, 868 | "output_type": "execute_result" 869 | } 870 | ], 871 | "source": [ 872 | "test_data_array = test_data.drop(['y_yes', 'y_no'], axis=1).values\n", 873 | "test_data_array" 874 | ] 875 | }, 876 | { 877 | "cell_type": "code", 878 | "execution_count": null, 879 | "id": "31843c08-a7b8-438c-8370-46c48626b11f", 880 | "metadata": { 881 | "tags": [], 882 | "id": "31843c08-a7b8-438c-8370-46c48626b11f" 883 | }, 884 | "outputs": [], 885 | "source": [ 886 | "predictions = xgb_predictor.predict(test_data_array).decode(\"utf-8\")" 887 | ] 888 | }, 889 | { 890 | "cell_type": "code", 891 | "execution_count": null, 892 | "id": "46bcd959-2951-4329-b211-279a84b174f1", 893 | "metadata": { 894 | "tags": [], 895 | "id": "46bcd959-2951-4329-b211-279a84b174f1", 896 | "outputId": "fdf0fc9c-48c2-4c8b-d02e-5c159c140643" 897 | }, 898 | "outputs": [ 899 | { 900 | "data": { 901 | "text/plain": [ 902 | "array([0.08596137, 0.08066913, 0.2411833 , ..., 0.10747377, 0.07555814,\n", 903 | " 0.03684008])" 904 | ] 905 | }, 906 | "execution_count": 57, 907 | "metadata": {}, 908 | "output_type": "execute_result" 909 | } 910 | ], 911 | "source": [ 912 | "predictions_array = np.fromstring(predictions[:], sep=\"\\n\")\n", 913 | "predictions_array" 914 | ] 915 | }, 916 | { 917 | "cell_type": "code", 918 | "execution_count": null, 919 | "id": "8f049870-18b8-472b-9ecd-e65be3e08922", 920 | "metadata": { 921 | "id": "8f049870-18b8-472b-9ecd-e65be3e08922", 922 | "outputId": "d8709a5b-c1d6-4dd7-b297-0555f58be0be" 923 | }, 924 | "outputs": [ 925 | { 926 | "name": "stdout", 927 | "output_type": "stream", 928 | "text": [ 929 | "[[10828 163]\n", 930 | " [ 1076 290]]\n", 931 | " precision recall f1-score support\n", 932 | "\n", 933 | " 0 0.91 0.99 0.95 10991\n", 934 | " 1 0.64 0.21 0.32 1366\n", 935 | "\n", 936 | " accuracy 0.90 12357\n", 937 | " macro avg 0.77 0.60 0.63 12357\n", 938 | "weighted avg 0.88 0.90 0.88 12357\n", 939 | "\n" 940 | ] 941 | } 942 | ], 943 | "source": [ 944 | "import sklearn.metrics\n", 945 | "cutoff=0.5\n", 946 | "print(sklearn.metrics.confusion_matrix(test_data['y_yes'],np.round(predictions_array)))\n", 947 | "print(sklearn.metrics.classification_report(test_data['y_yes'],np.round(predictions_array)))" 948 | ] 949 | }, 950 | { 951 | "cell_type": "code", 952 | "execution_count": null, 953 | "id": "f2efc2f5-b86f-4ff2-a429-3524f045f79f", 954 | "metadata": { 955 | "id": "f2efc2f5-b86f-4ff2-a429-3524f045f79f", 956 | "outputId": "1221010c-6e00-485a-c0aa-5a7831cbd74d" 957 | }, 958 | "outputs": [ 959 | { 960 | "data": { 961 | "text/plain": [ 962 | "'sagemaker-xgboost-2023-06-08-16-53-29-875'" 963 | ] 964 | }, 965 | "execution_count": 59, 966 | "metadata": {}, 967 | "output_type": "execute_result" 968 | } 969 | ], 970 | "source": [ 971 | "xgb_predictor.endpoint_name" 972 | ] 973 | }, 974 | { 975 | "cell_type": "code", 976 | "execution_count": null, 977 | "id": "5963d98c-3fbd-475d-99ee-17886bf2543a", 978 | "metadata": { 979 | "id": "5963d98c-3fbd-475d-99ee-17886bf2543a", 980 | "outputId": "c54c3685-8101-4905-a862-51362df24d23" 981 | }, 982 | "outputs": [ 983 | { 984 | "name": "stderr", 985 | "output_type": "stream", 986 | "text": [ 987 | "WARNING:sagemaker.deprecations:The endpoint attribute has been renamed in sagemaker>=2.\n", 988 | "See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.\n", 989 | "INFO:sagemaker:Deleting endpoint with name: sagemaker-xgboost-2023-06-08-16-53-29-875\n" 990 | ] 991 | } 992 | ], 993 | "source": [ 994 | "sagemaker.Session().delete_endpoint(xgb_predictor.endpoint)\n", 995 | "bucket_to_delete = boto3.resource('s3').Bucket(bucket_name)" 996 | ] 997 | }, 998 | { 999 | "cell_type": "code", 1000 | "execution_count": null, 1001 | "id": "1cbb5a75-f0d7-4c6f-8a3e-e7737d3c0c4f", 1002 | "metadata": { 1003 | "tags": [], 1004 | "id": "1cbb5a75-f0d7-4c6f-8a3e-e7737d3c0c4f", 1005 | "outputId": "45df1f88-4d5f-4bbd-c9a6-1bb0daeff769" 1006 | }, 1007 | "outputs": [ 1008 | { 1009 | "data": { 1010 | "text/plain": [ 1011 | "[{'ResponseMetadata': {'RequestId': '3ZFZK6BFTT1TP8MY',\n", 1012 | " 'HostId': 'oGJmvH3ZJE/pzZ8b4ipnm20ms75AFSLBPaOYYV9qlTBwihvWdnULVLgtk+xQdk9hJrFeFaEBz5E=',\n", 1013 | " 'HTTPStatusCode': 200,\n", 1014 | " 'HTTPHeaders': {'x-amz-id-2': 'oGJmvH3ZJE/pzZ8b4ipnm20ms75AFSLBPaOYYV9qlTBwihvWdnULVLgtk+xQdk9hJrFeFaEBz5E=',\n", 1015 | " 'x-amz-request-id': '3ZFZK6BFTT1TP8MY',\n", 1016 | " 'date': 'Thu, 08 Jun 2023 16:57:35 GMT',\n", 1017 | " 'content-type': 'application/xml',\n", 1018 | " 'transfer-encoding': 'chunked',\n", 1019 | " 'server': 'AmazonS3',\n", 1020 | " 'connection': 'close'},\n", 1021 | " 'RetryAttempts': 0},\n", 1022 | " 'Deleted': [{'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/index/000000000/000000000030_worker_0.json'},\n", 1023 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/claim.smd'},\n", 1024 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/index/000000000/000000000010_worker_0.json'},\n", 1025 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/events/000000000020/000000000020_worker_0.tfevents'},\n", 1026 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/profiler-output/system/incremental/2023060816/1686243120.algo-1.json'},\n", 1027 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/events/000000000000/000000000000_worker_0.tfevents'},\n", 1028 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/profiler-output/system/training_job_end.ts'},\n", 1029 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/training_job_end.ts'},\n", 1030 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/index/000000000/000000000000_worker_0.json'},\n", 1031 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/index/000000000/000000000020_worker_0.json'},\n", 1032 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/index/000000000/000000000040_worker_0.json'},\n", 1033 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/collections/000000000/worker_0_collections.json'},\n", 1034 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/output/model.tar.gz'},\n", 1035 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/events/000000000040/000000000040_worker_0.tfevents'},\n", 1036 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/profiler-output/system/incremental/2023060816/1686243180.algo-1.json'},\n", 1037 | " {'Key': 'xgboost-as-a-built-in-algo/test/test.csv'},\n", 1038 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/profiler-output/framework/training_job_end.ts'},\n", 1039 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/events/000000000010/000000000010_worker_0.tfevents'},\n", 1040 | " {'Key': 'xgboost-as-a-built-in-algo/train/train.csv'},\n", 1041 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/profiler-output/system/incremental/2023060816/1686243060.algo-1.json'},\n", 1042 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/events/000000000030/000000000030_worker_0.tfevents'}]}]" 1043 | ] 1044 | }, 1045 | "execution_count": 61, 1046 | "metadata": {}, 1047 | "output_type": "execute_result" 1048 | } 1049 | ], 1050 | "source": [ 1051 | "bucket_to_delete.objects.all().delete()" 1052 | ] 1053 | }, 1054 | { 1055 | "cell_type": "code", 1056 | "execution_count": null, 1057 | "id": "43110ec1-844b-413f-b52e-42668eb1f436", 1058 | "metadata": { 1059 | "id": "43110ec1-844b-413f-b52e-42668eb1f436" 1060 | }, 1061 | "outputs": [], 1062 | "source": [] 1063 | } 1064 | ], 1065 | "metadata": { 1066 | "kernelspec": { 1067 | "display_name": "conda_python3", 1068 | "language": "python", 1069 | "name": "conda_python3" 1070 | }, 1071 | "language_info": { 1072 | "codemirror_mode": { 1073 | "name": "ipython", 1074 | "version": 3 1075 | }, 1076 | "file_extension": ".py", 1077 | "mimetype": "text/x-python", 1078 | "name": "python", 1079 | "nbconvert_exporter": "python", 1080 | "pygments_lexer": "ipython3", 1081 | "version": "3.10.8" 1082 | }, 1083 | "colab": { 1084 | "provenance": [], 1085 | "include_colab_link": true 1086 | } 1087 | }, 1088 | "nbformat": 4, 1089 | "nbformat_minor": 5 1090 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # aws-sagemaker 2 | Demonstration of Amazon Web Services (AWS) SageMaker for the training of XGBoost models using the SageMaker APIs. 3 | 4 | 1. [SageMaker Python API](https://sagemaker.readthedocs.io/en/stable/overview.html) 5 | 2. [Know more about AWS SageMaker](https://aws.amazon.com/sagemaker/) 6 | 3. [SageMaker Pricing](https://aws.amazon.com/sagemaker/pricing/?nc=sn&loc=3) 7 | --------------------------------------------------------------------------------