├── README.md ├── Tutorial - 1 Sagemaker SKLearn Custom Script Mode ├── Data │ └── mob_price_classification_train.csv ├── README.md ├── Tutorial - 1 Sagemaker SKlearn Custom Script.ipynb └── yt_thumb.jpg ├── Tutorial - 2 Create Rest API for Sagemaker Endpoint ├── README.md ├── Tutorial - 2 Create Rest API.ipynb └── yt_thumb_2.jpg └── Tutorial - 3 Sagemaker Build Custom Algorithm ├── Algo_Container ├── Decision_Tree │ ├── nginx.conf │ ├── predictor.py │ ├── serve │ ├── train │ └── wsgi.py └── Dockerfile ├── README.md ├── bring_own_algo.ipynb ├── iris.csv └── yt_thumb.jpg /README.md: -------------------------------------------------------------------------------- 1 | ## Sagemaker Tutorial Series 2 | ### [Playlist Link ►](https://www.youtube.com/playlist?list=PLsT53VV2LIIEw5q8UadePwjrNcVGHVf1W) 3 | 4 | In this repository, I will upload required notebooks, data and scripts which is important for the tutorial. 5 | 6 | - [Tutorial-1 ►](https://github.com/Spidy20/Sagemaker-Tutorials/tree/master/Tutorial%20-%201%20Sagemaker%20SKLearn%20Custom%20Script%20Mode) Build and Deploy Sklearn model with Custom Script in Sagemaker. 7 | - [Tutorial-2 ►](https://github.com/Spidy20/Sagemaker-Tutorials/tree/master/Tutorial%20-%202%20Create%20Rest%20API%20for%20Sagemaker%20Endpoint) In this tutorial, I have explained how we can create Rest API for Sagemaker Endpoint using AWS Lambda + API Gateway. 8 | - [Tutorial-3 ►](https://github.com/Spidy20/Sagemaker-Tutorials/tree/master/Tutorial%20-%203%20Sagemaker%20Build%20Custom%20Algorithm) Building a Custom Algorithm in Amazon SageMaker 9 | 10 | **More tutorials are coming......** 11 | 12 | ### Give Star⭐ to this repository, and fork it to support me. 13 | 14 | ### [Buy me a Coffee☕](https://www.buymeacoffee.com/spidy20) 15 | ### [Donate me on PayPal(It will inspire me to do more projects)](https://www.paypal.me/spidy1820) 16 | -------------------------------------------------------------------------------- /Tutorial - 1 Sagemaker SKLearn Custom Script Mode/README.md: -------------------------------------------------------------------------------- 1 | ## Tutorial-1 ► Build and Deploy Sklearn model with Custom Script in Sagemaker 2 | 3 | ### [Watch this tutorial►](https://youtu.be/YWmnD_QcZQU) 4 | 5 | 6 | 7 | - In this tutorial I have explained how we can build and deploy sklearn model with own custom script in Sagemaker environment. 8 | 9 | ### Steps that we followed! 10 | 11 | 1. Initialize Boto3 SDK and create S3 bucket. 12 | 2. Upload data in Sagemaker Local Storage. 13 | 3. Data Exploration and Understanding. 14 | 4. Split the data into Train/Test CSV File. 15 | 5. Upload data into the S3 Bucket. 16 | 6. Create a Training Script 17 | 7. Train script inside Sagemaker container. 18 | 8. Store Model Artifacts(model.tar.gz) into the S3 Bucket. 19 | 9. Deploy Sagemaker Endpoint(API) for the trained model, and test it. 20 | 21 | 22 | ### Give Star⭐ to this repository, and fork it to support me. 23 | 24 | ### [Buy me a Coffee☕](https://www.buymeacoffee.com/spidy20) 25 | ### [Donate me on PayPal(It will inspire me to do more projects)](https://www.paypal.me/spidy1820) 26 | 27 | -------------------------------------------------------------------------------- /Tutorial - 1 Sagemaker SKLearn Custom Script Mode/Tutorial - 1 Sagemaker SKlearn Custom Script.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "tags": [] 7 | }, 8 | "source": [ 9 | "## Sagemaker Tutorial Series" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": { 15 | "tags": [] 16 | }, 17 | "source": [ 18 | "### Tutorial - 1 Mobile Price Classification using SKLearn Custom Script in Sagemaker" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "Data Source - https://www.kaggle.com/datasets/iabhishekofficial/mobile-price-classification?resource=download" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "### Let's divide the workload\n", 33 | "1. Initialize Boto3 SDK and create S3 bucket. \n", 34 | "2. Upload data in Sagemaker Local Storage. \n", 35 | "3. Data Exploration and Understanding.\n", 36 | "4. Split the data into Train/Test CSV File. \n", 37 | "5. Upload data into the S3 Bucket.\n", 38 | "6. Create Training Script\n", 39 | "7. Train script in-side Sagemaker container. \n", 40 | "8. Store Model Artifacts(model.tar.gz) into the S3 Bucket. \n", 41 | "9. Deploy Sagemaker Endpoint(API) for trained model, and test it. " 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 2, 47 | "metadata": { 48 | "tags": [] 49 | }, 50 | "outputs": [ 51 | { 52 | "data": { 53 | "text/plain": [ 54 | "'0.22.1'" 55 | ] 56 | }, 57 | "execution_count": 2, 58 | "metadata": {}, 59 | "output_type": "execute_result" 60 | } 61 | ], 62 | "source": [ 63 | "import sklearn # Check Sklearn version\n", 64 | "sklearn.__version__" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": { 70 | "tags": [] 71 | }, 72 | "source": [ 73 | "## 1. Initialize Boto3 SDK and create S3 bucket. " 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 3, 79 | "metadata": { 80 | "tags": [] 81 | }, 82 | "outputs": [ 83 | { 84 | "name": "stdout", 85 | "output_type": "stream", 86 | "text": [ 87 | "Using bucket sagemaker-tutorials-mlhub\n" 88 | ] 89 | } 90 | ], 91 | "source": [ 92 | "import numpy as np\n", 93 | "from sagemaker import get_execution_role\n", 94 | "import sagemaker\n", 95 | "from sklearn.model_selection import train_test_split\n", 96 | "from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder\n", 97 | "import datetime\n", 98 | "import time\n", 99 | "import tarfile\n", 100 | "import boto3\n", 101 | "import pandas as pd\n", 102 | "\n", 103 | "sm_boto3 = boto3.client(\"sagemaker\")\n", 104 | "sess = sagemaker.Session()\n", 105 | "region = sess.boto_session.region_name\n", 106 | "bucket = 'sagemaker-tutorials-mlhub' # Mention the created S3 bucket name here\n", 107 | "print(\"Using bucket \" + bucket)" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": { 113 | "tags": [] 114 | }, 115 | "source": [ 116 | "## 3. Data Exploration and Understanding." 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 4, 122 | "metadata": { 123 | "tags": [] 124 | }, 125 | "outputs": [], 126 | "source": [ 127 | "df = pd.read_csv(\"mob_price_classification_train.csv\")" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 5, 133 | "metadata": { 134 | "tags": [] 135 | }, 136 | "outputs": [ 137 | { 138 | "data": { 139 | "text/html": [ 140 | "
\n", 141 | "\n", 154 | "\n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | "
battery_powerblueclock_speeddual_simfcfour_gint_memorym_depmobile_wtn_cores...px_heightpx_widthramsc_hsc_wtalk_timethree_gtouch_screenwifiprice_range
084202.201070.61882...20756254997190011
1102110.5101530.71363...9051988263117371102
256310.5121410.91455...12631716260311291102
361512.5000100.81316...121617862769168111002
4182111.20131440.61412...12081212141182151101
\n", 304 | "

5 rows × 21 columns

\n", 305 | "
" 306 | ], 307 | "text/plain": [ 308 | " battery_power blue clock_speed dual_sim fc four_g int_memory m_dep \\\n", 309 | "0 842 0 2.2 0 1 0 7 0.6 \n", 310 | "1 1021 1 0.5 1 0 1 53 0.7 \n", 311 | "2 563 1 0.5 1 2 1 41 0.9 \n", 312 | "3 615 1 2.5 0 0 0 10 0.8 \n", 313 | "4 1821 1 1.2 0 13 1 44 0.6 \n", 314 | "\n", 315 | " mobile_wt n_cores ... px_height px_width ram sc_h sc_w talk_time \\\n", 316 | "0 188 2 ... 20 756 2549 9 7 19 \n", 317 | "1 136 3 ... 905 1988 2631 17 3 7 \n", 318 | "2 145 5 ... 1263 1716 2603 11 2 9 \n", 319 | "3 131 6 ... 1216 1786 2769 16 8 11 \n", 320 | "4 141 2 ... 1208 1212 1411 8 2 15 \n", 321 | "\n", 322 | " three_g touch_screen wifi price_range \n", 323 | "0 0 0 1 1 \n", 324 | "1 1 1 0 2 \n", 325 | "2 1 1 0 2 \n", 326 | "3 1 0 0 2 \n", 327 | "4 1 1 0 1 \n", 328 | "\n", 329 | "[5 rows x 21 columns]" 330 | ] 331 | }, 332 | "execution_count": 5, 333 | "metadata": {}, 334 | "output_type": "execute_result" 335 | } 336 | ], 337 | "source": [ 338 | "df.head()" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": 6, 344 | "metadata": { 345 | "tags": [] 346 | }, 347 | "outputs": [ 348 | { 349 | "data": { 350 | "text/plain": [ 351 | "(2000, 21)" 352 | ] 353 | }, 354 | "execution_count": 6, 355 | "metadata": {}, 356 | "output_type": "execute_result" 357 | } 358 | ], 359 | "source": [ 360 | "df.shape" 361 | ] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "execution_count": 7, 366 | "metadata": { 367 | "tags": [] 368 | }, 369 | "outputs": [ 370 | { 371 | "data": { 372 | "text/plain": [ 373 | "1 0.25\n", 374 | "2 0.25\n", 375 | "3 0.25\n", 376 | "0 0.25\n", 377 | "Name: price_range, dtype: float64" 378 | ] 379 | }, 380 | "execution_count": 7, 381 | "metadata": {}, 382 | "output_type": "execute_result" 383 | } 384 | ], 385 | "source": [ 386 | "# ['Low_Risk','High_Risk'],[0,1]\n", 387 | "df['price_range'].value_counts(normalize=True)" 388 | ] 389 | }, 390 | { 391 | "cell_type": "code", 392 | "execution_count": 8, 393 | "metadata": { 394 | "tags": [] 395 | }, 396 | "outputs": [ 397 | { 398 | "data": { 399 | "text/plain": [ 400 | "Index(['battery_power', 'blue', 'clock_speed', 'dual_sim', 'fc', 'four_g',\n", 401 | " 'int_memory', 'm_dep', 'mobile_wt', 'n_cores', 'pc', 'px_height',\n", 402 | " 'px_width', 'ram', 'sc_h', 'sc_w', 'talk_time', 'three_g',\n", 403 | " 'touch_screen', 'wifi', 'price_range'],\n", 404 | " dtype='object')" 405 | ] 406 | }, 407 | "execution_count": 8, 408 | "metadata": {}, 409 | "output_type": "execute_result" 410 | } 411 | ], 412 | "source": [ 413 | "df.columns" 414 | ] 415 | }, 416 | { 417 | "cell_type": "code", 418 | "execution_count": 9, 419 | "metadata": { 420 | "tags": [] 421 | }, 422 | "outputs": [ 423 | { 424 | "data": { 425 | "text/plain": [ 426 | "(2000, 21)" 427 | ] 428 | }, 429 | "execution_count": 9, 430 | "metadata": {}, 431 | "output_type": "execute_result" 432 | } 433 | ], 434 | "source": [ 435 | "df.shape" 436 | ] 437 | }, 438 | { 439 | "cell_type": "code", 440 | "execution_count": 10, 441 | "metadata": { 442 | "tags": [] 443 | }, 444 | "outputs": [ 445 | { 446 | "data": { 447 | "text/plain": [ 448 | "battery_power 0.0\n", 449 | "blue 0.0\n", 450 | "clock_speed 0.0\n", 451 | "dual_sim 0.0\n", 452 | "fc 0.0\n", 453 | "four_g 0.0\n", 454 | "int_memory 0.0\n", 455 | "m_dep 0.0\n", 456 | "mobile_wt 0.0\n", 457 | "n_cores 0.0\n", 458 | "pc 0.0\n", 459 | "px_height 0.0\n", 460 | "px_width 0.0\n", 461 | "ram 0.0\n", 462 | "sc_h 0.0\n", 463 | "sc_w 0.0\n", 464 | "talk_time 0.0\n", 465 | "three_g 0.0\n", 466 | "touch_screen 0.0\n", 467 | "wifi 0.0\n", 468 | "price_range 0.0\n", 469 | "dtype: float64" 470 | ] 471 | }, 472 | "execution_count": 10, 473 | "metadata": {}, 474 | "output_type": "execute_result" 475 | } 476 | ], 477 | "source": [ 478 | "# Find the Percentage of Values are missing\n", 479 | "df.isnull().mean() * 100" 480 | ] 481 | }, 482 | { 483 | "cell_type": "code", 484 | "execution_count": 11, 485 | "metadata": { 486 | "tags": [] 487 | }, 488 | "outputs": [ 489 | { 490 | "data": { 491 | "text/plain": [ 492 | "['battery_power',\n", 493 | " 'blue',\n", 494 | " 'clock_speed',\n", 495 | " 'dual_sim',\n", 496 | " 'fc',\n", 497 | " 'four_g',\n", 498 | " 'int_memory',\n", 499 | " 'm_dep',\n", 500 | " 'mobile_wt',\n", 501 | " 'n_cores',\n", 502 | " 'pc',\n", 503 | " 'px_height',\n", 504 | " 'px_width',\n", 505 | " 'ram',\n", 506 | " 'sc_h',\n", 507 | " 'sc_w',\n", 508 | " 'talk_time',\n", 509 | " 'three_g',\n", 510 | " 'touch_screen',\n", 511 | " 'wifi',\n", 512 | " 'price_range']" 513 | ] 514 | }, 515 | "execution_count": 11, 516 | "metadata": {}, 517 | "output_type": "execute_result" 518 | } 519 | ], 520 | "source": [ 521 | "features = list(df.columns)\n", 522 | "features" 523 | ] 524 | }, 525 | { 526 | "cell_type": "code", 527 | "execution_count": 12, 528 | "metadata": { 529 | "tags": [] 530 | }, 531 | "outputs": [ 532 | { 533 | "data": { 534 | "text/plain": [ 535 | "'price_range'" 536 | ] 537 | }, 538 | "execution_count": 12, 539 | "metadata": {}, 540 | "output_type": "execute_result" 541 | } 542 | ], 543 | "source": [ 544 | "label = features.pop(-1)\n", 545 | "label" 546 | ] 547 | }, 548 | { 549 | "cell_type": "code", 550 | "execution_count": 13, 551 | "metadata": { 552 | "tags": [] 553 | }, 554 | "outputs": [], 555 | "source": [ 556 | "x = df[features]\n", 557 | "y = df[label]" 558 | ] 559 | }, 560 | { 561 | "cell_type": "code", 562 | "execution_count": 14, 563 | "metadata": { 564 | "tags": [] 565 | }, 566 | "outputs": [ 567 | { 568 | "data": { 569 | "text/html": [ 570 | "
\n", 571 | "\n", 584 | "\n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | "
battery_powerblueclock_speeddual_simfcfour_gint_memorym_depmobile_wtn_corespcpx_heightpx_widthramsc_hsc_wtalk_timethree_gtouch_screenwifi
084202.201070.6188222075625499719001
1102110.5101530.713636905198826311737110
256310.5121410.9145561263171626031129110
361512.5000100.81316912161786276916811100
4182111.20131440.61412141208121214118215110
\n", 728 | "
" 729 | ], 730 | "text/plain": [ 731 | " battery_power blue clock_speed dual_sim fc four_g int_memory m_dep \\\n", 732 | "0 842 0 2.2 0 1 0 7 0.6 \n", 733 | "1 1021 1 0.5 1 0 1 53 0.7 \n", 734 | "2 563 1 0.5 1 2 1 41 0.9 \n", 735 | "3 615 1 2.5 0 0 0 10 0.8 \n", 736 | "4 1821 1 1.2 0 13 1 44 0.6 \n", 737 | "\n", 738 | " mobile_wt n_cores pc px_height px_width ram sc_h sc_w talk_time \\\n", 739 | "0 188 2 2 20 756 2549 9 7 19 \n", 740 | "1 136 3 6 905 1988 2631 17 3 7 \n", 741 | "2 145 5 6 1263 1716 2603 11 2 9 \n", 742 | "3 131 6 9 1216 1786 2769 16 8 11 \n", 743 | "4 141 2 14 1208 1212 1411 8 2 15 \n", 744 | "\n", 745 | " three_g touch_screen wifi \n", 746 | "0 0 0 1 \n", 747 | "1 1 1 0 \n", 748 | "2 1 1 0 \n", 749 | "3 1 0 0 \n", 750 | "4 1 1 0 " 751 | ] 752 | }, 753 | "execution_count": 14, 754 | "metadata": {}, 755 | "output_type": "execute_result" 756 | } 757 | ], 758 | "source": [ 759 | "x.head()" 760 | ] 761 | }, 762 | { 763 | "cell_type": "code", 764 | "execution_count": 15, 765 | "metadata": { 766 | "tags": [] 767 | }, 768 | "outputs": [ 769 | { 770 | "data": { 771 | "text/plain": [ 772 | "0 1\n", 773 | "1 2\n", 774 | "2 2\n", 775 | "3 2\n", 776 | "4 1\n", 777 | "Name: price_range, dtype: int64" 778 | ] 779 | }, 780 | "execution_count": 15, 781 | "metadata": {}, 782 | "output_type": "execute_result" 783 | } 784 | ], 785 | "source": [ 786 | "# {0: 'Low_Risk',1: 'High_Risk'}\n", 787 | "y.head()" 788 | ] 789 | }, 790 | { 791 | "cell_type": "code", 792 | "execution_count": 16, 793 | "metadata": { 794 | "tags": [] 795 | }, 796 | "outputs": [ 797 | { 798 | "data": { 799 | "text/plain": [ 800 | "(2000, 20)" 801 | ] 802 | }, 803 | "execution_count": 16, 804 | "metadata": {}, 805 | "output_type": "execute_result" 806 | } 807 | ], 808 | "source": [ 809 | "x.shape" 810 | ] 811 | }, 812 | { 813 | "cell_type": "code", 814 | "execution_count": 17, 815 | "metadata": { 816 | "tags": [] 817 | }, 818 | "outputs": [ 819 | { 820 | "data": { 821 | "text/plain": [ 822 | "1 500\n", 823 | "2 500\n", 824 | "3 500\n", 825 | "0 500\n", 826 | "Name: price_range, dtype: int64" 827 | ] 828 | }, 829 | "execution_count": 17, 830 | "metadata": {}, 831 | "output_type": "execute_result" 832 | } 833 | ], 834 | "source": [ 835 | "y.value_counts()" 836 | ] 837 | }, 838 | { 839 | "cell_type": "code", 840 | "execution_count": 18, 841 | "metadata": { 842 | "tags": [] 843 | }, 844 | "outputs": [], 845 | "source": [ 846 | "X_train, X_test, y_train, y_test = train_test_split(x,y, test_size=0.15, random_state=0)" 847 | ] 848 | }, 849 | { 850 | "cell_type": "code", 851 | "execution_count": 19, 852 | "metadata": { 853 | "tags": [] 854 | }, 855 | "outputs": [ 856 | { 857 | "name": "stdout", 858 | "output_type": "stream", 859 | "text": [ 860 | "(1700, 20)\n", 861 | "(300, 20)\n", 862 | "(1700,)\n", 863 | "(300,)\n" 864 | ] 865 | } 866 | ], 867 | "source": [ 868 | "print(X_train.shape)\n", 869 | "print(X_test.shape)\n", 870 | "print(y_train.shape)\n", 871 | "print(y_test.shape)" 872 | ] 873 | }, 874 | { 875 | "cell_type": "markdown", 876 | "metadata": { 877 | "tags": [] 878 | }, 879 | "source": [ 880 | "## 4. Split the data into Train/Test CSV File. " 881 | ] 882 | }, 883 | { 884 | "cell_type": "code", 885 | "execution_count": 20, 886 | "metadata": { 887 | "tags": [] 888 | }, 889 | "outputs": [], 890 | "source": [ 891 | "trainX = pd.DataFrame(X_train)\n", 892 | "trainX[label] = y_train\n", 893 | "\n", 894 | "testX = pd.DataFrame(X_test)\n", 895 | "testX[label] = y_test" 896 | ] 897 | }, 898 | { 899 | "cell_type": "code", 900 | "execution_count": 21, 901 | "metadata": { 902 | "tags": [] 903 | }, 904 | "outputs": [ 905 | { 906 | "name": "stdout", 907 | "output_type": "stream", 908 | "text": [ 909 | "(1700, 21)\n", 910 | "(300, 21)\n" 911 | ] 912 | } 913 | ], 914 | "source": [ 915 | "print(trainX.shape)\n", 916 | "print(testX.shape)" 917 | ] 918 | }, 919 | { 920 | "cell_type": "code", 921 | "execution_count": 22, 922 | "metadata": { 923 | "tags": [] 924 | }, 925 | "outputs": [ 926 | { 927 | "data": { 928 | "text/html": [ 929 | "
\n", 930 | "\n", 943 | "\n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | "
battery_powerblueclock_speeddual_simfcfour_gint_memorym_depmobile_wtn_cores...px_heightpx_widthramsc_hsc_wtalk_timethree_gtouch_screenwifiprice_range
1452145002.1010310.61145...1573163979411590111
1044121812.8130390.81507...112217461667100120001
1279160200.60120580.41701...125917463622172170113
674103402.6121450.31903...182129396915171000
120053002.4010320.3886...48101295917760100
\n", 1093 | "

5 rows × 21 columns

\n", 1094 | "
" 1095 | ], 1096 | "text/plain": [ 1097 | " battery_power blue clock_speed dual_sim fc four_g int_memory \\\n", 1098 | "1452 1450 0 2.1 0 1 0 31 \n", 1099 | "1044 1218 1 2.8 1 3 0 39 \n", 1100 | "1279 1602 0 0.6 0 12 0 58 \n", 1101 | "674 1034 0 2.6 1 2 1 45 \n", 1102 | "1200 530 0 2.4 0 1 0 32 \n", 1103 | "\n", 1104 | " m_dep mobile_wt n_cores ... px_height px_width ram sc_h sc_w \\\n", 1105 | "1452 0.6 114 5 ... 1573 1639 794 11 5 \n", 1106 | "1044 0.8 150 7 ... 1122 1746 1667 10 0 \n", 1107 | "1279 0.4 170 1 ... 1259 1746 3622 17 2 \n", 1108 | "674 0.3 190 3 ... 182 1293 969 15 1 \n", 1109 | "1200 0.3 88 6 ... 48 1012 959 17 7 \n", 1110 | "\n", 1111 | " talk_time three_g touch_screen wifi price_range \n", 1112 | "1452 9 0 1 1 1 \n", 1113 | "1044 12 0 0 0 1 \n", 1114 | "1279 17 0 1 1 3 \n", 1115 | "674 7 1 0 0 0 \n", 1116 | "1200 6 0 1 0 0 \n", 1117 | "\n", 1118 | "[5 rows x 21 columns]" 1119 | ] 1120 | }, 1121 | "execution_count": 22, 1122 | "metadata": {}, 1123 | "output_type": "execute_result" 1124 | } 1125 | ], 1126 | "source": [ 1127 | "trainX.head()" 1128 | ] 1129 | }, 1130 | { 1131 | "cell_type": "code", 1132 | "execution_count": 23, 1133 | "metadata": { 1134 | "tags": [] 1135 | }, 1136 | "outputs": [ 1137 | { 1138 | "data": { 1139 | "text/plain": [ 1140 | "battery_power 0\n", 1141 | "blue 0\n", 1142 | "clock_speed 0\n", 1143 | "dual_sim 0\n", 1144 | "fc 0\n", 1145 | "four_g 0\n", 1146 | "int_memory 0\n", 1147 | "m_dep 0\n", 1148 | "mobile_wt 0\n", 1149 | "n_cores 0\n", 1150 | "pc 0\n", 1151 | "px_height 0\n", 1152 | "px_width 0\n", 1153 | "ram 0\n", 1154 | "sc_h 0\n", 1155 | "sc_w 0\n", 1156 | "talk_time 0\n", 1157 | "three_g 0\n", 1158 | "touch_screen 0\n", 1159 | "wifi 0\n", 1160 | "price_range 0\n", 1161 | "dtype: int64" 1162 | ] 1163 | }, 1164 | "execution_count": 23, 1165 | "metadata": {}, 1166 | "output_type": "execute_result" 1167 | } 1168 | ], 1169 | "source": [ 1170 | "trainX.isnull().sum()" 1171 | ] 1172 | }, 1173 | { 1174 | "cell_type": "code", 1175 | "execution_count": 24, 1176 | "metadata": { 1177 | "tags": [] 1178 | }, 1179 | "outputs": [ 1180 | { 1181 | "data": { 1182 | "text/plain": [ 1183 | "battery_power 0\n", 1184 | "blue 0\n", 1185 | "clock_speed 0\n", 1186 | "dual_sim 0\n", 1187 | "fc 0\n", 1188 | "four_g 0\n", 1189 | "int_memory 0\n", 1190 | "m_dep 0\n", 1191 | "mobile_wt 0\n", 1192 | "n_cores 0\n", 1193 | "pc 0\n", 1194 | "px_height 0\n", 1195 | "px_width 0\n", 1196 | "ram 0\n", 1197 | "sc_h 0\n", 1198 | "sc_w 0\n", 1199 | "talk_time 0\n", 1200 | "three_g 0\n", 1201 | "touch_screen 0\n", 1202 | "wifi 0\n", 1203 | "price_range 0\n", 1204 | "dtype: int64" 1205 | ] 1206 | }, 1207 | "execution_count": 24, 1208 | "metadata": {}, 1209 | "output_type": "execute_result" 1210 | } 1211 | ], 1212 | "source": [ 1213 | "testX.isnull().sum()" 1214 | ] 1215 | }, 1216 | { 1217 | "cell_type": "markdown", 1218 | "metadata": { 1219 | "tags": [] 1220 | }, 1221 | "source": [ 1222 | "## 5. Upload data into the S3 Bucket." 1223 | ] 1224 | }, 1225 | { 1226 | "cell_type": "code", 1227 | "execution_count": 25, 1228 | "metadata": { 1229 | "tags": [] 1230 | }, 1231 | "outputs": [], 1232 | "source": [ 1233 | "trainX.to_csv(\"train-V-1.csv\",index = False)\n", 1234 | "testX.to_csv(\"test-V-1.csv\", index = False)" 1235 | ] 1236 | }, 1237 | { 1238 | "cell_type": "code", 1239 | "execution_count": 26, 1240 | "metadata": { 1241 | "tags": [] 1242 | }, 1243 | "outputs": [], 1244 | "source": [ 1245 | "# send data to S3. SageMaker will take training data from s3\n", 1246 | "sk_prefix = \"sagemaker/mobile_price_classification/sklearncontainer\"\n", 1247 | "trainpath = sess.upload_data(\n", 1248 | " path=\"train-V-1.csv\", bucket=bucket, key_prefix=sk_prefix\n", 1249 | ")\n", 1250 | "\n", 1251 | "testpath = sess.upload_data(\n", 1252 | " path=\"test-V-1.csv\", bucket=bucket, key_prefix=sk_prefix\n", 1253 | ")" 1254 | ] 1255 | }, 1256 | { 1257 | "cell_type": "code", 1258 | "execution_count": 27, 1259 | "metadata": { 1260 | "tags": [] 1261 | }, 1262 | "outputs": [ 1263 | { 1264 | "data": { 1265 | "text/plain": [ 1266 | "'s3://sagemaker-tutorials-mlhub/sagemaker/mobile_price_classification/sklearncontainer/test-V-1.csv'" 1267 | ] 1268 | }, 1269 | "execution_count": 27, 1270 | "metadata": {}, 1271 | "output_type": "execute_result" 1272 | } 1273 | ], 1274 | "source": [ 1275 | "testpath" 1276 | ] 1277 | }, 1278 | { 1279 | "cell_type": "code", 1280 | "execution_count": 28, 1281 | "metadata": { 1282 | "tags": [] 1283 | }, 1284 | "outputs": [ 1285 | { 1286 | "data": { 1287 | "text/plain": [ 1288 | "'s3://sagemaker-tutorials-mlhub/sagemaker/mobile_price_classification/sklearncontainer/train-V-1.csv'" 1289 | ] 1290 | }, 1291 | "execution_count": 28, 1292 | "metadata": {}, 1293 | "output_type": "execute_result" 1294 | } 1295 | ], 1296 | "source": [ 1297 | "trainpath" 1298 | ] 1299 | }, 1300 | { 1301 | "cell_type": "markdown", 1302 | "metadata": { 1303 | "tags": [] 1304 | }, 1305 | "source": [ 1306 | "## 6. Create Training Script" 1307 | ] 1308 | }, 1309 | { 1310 | "cell_type": "code", 1311 | "execution_count": 29, 1312 | "metadata": { 1313 | "tags": [] 1314 | }, 1315 | "outputs": [ 1316 | { 1317 | "name": "stdout", 1318 | "output_type": "stream", 1319 | "text": [ 1320 | "Writing script.py\n" 1321 | ] 1322 | } 1323 | ], 1324 | "source": [ 1325 | "%%writefile script.py\n", 1326 | "\n", 1327 | "\n", 1328 | "from sklearn.ensemble import RandomForestClassifier\n", 1329 | "from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score, recall_score, f1_score, roc_curve, auc\n", 1330 | "import sklearn\n", 1331 | "import joblib\n", 1332 | "import boto3\n", 1333 | "import pathlib\n", 1334 | "from io import StringIO \n", 1335 | "import argparse\n", 1336 | "import joblib\n", 1337 | "import os\n", 1338 | "import numpy as np\n", 1339 | "import pandas as pd\n", 1340 | "\n", 1341 | "# inference functions ---------------\n", 1342 | "\n", 1343 | "# def input_fn(request_body, request_content_type):\n", 1344 | "# print(request_body)\n", 1345 | "# print(request_content_type)\n", 1346 | "# if request_content_type == \"text/csv\":\n", 1347 | "# request_body = request_body.strip()\n", 1348 | "# try:\n", 1349 | "# df = pd.read_csv(StringIO(request_body), header=None)\n", 1350 | "# return df\n", 1351 | " \n", 1352 | "# except Exception as e:\n", 1353 | "# print(e)\n", 1354 | "# else:\n", 1355 | "# return \"\"\"Please use Content-Type = 'text/csv' and, send the request!!\"\"\" \n", 1356 | " \n", 1357 | " \n", 1358 | "def model_fn(model_dir):\n", 1359 | " clf = joblib.load(os.path.join(model_dir, \"model.joblib\"))\n", 1360 | " return clf\n", 1361 | "\n", 1362 | "# def predict_fn(input_data, model):\n", 1363 | "# if type(input_data) != str:\n", 1364 | "# prediction = model.predict(input_data)\n", 1365 | "# print(prediction)\n", 1366 | "# return prediction\n", 1367 | "# else:\n", 1368 | "# return input_data\n", 1369 | " \n", 1370 | " \n", 1371 | "if __name__ == \"__main__\":\n", 1372 | "\n", 1373 | " print(\"[INFO] Extracting arguments\")\n", 1374 | " parser = argparse.ArgumentParser()\n", 1375 | "\n", 1376 | " # hyperparameters sent by the client are passed as command-line arguments to the script.\n", 1377 | " parser.add_argument(\"--n_estimators\", type=int, default=100)\n", 1378 | " parser.add_argument(\"--random_state\", type=int, default=0)\n", 1379 | "\n", 1380 | " # Data, model, and output directories\n", 1381 | " parser.add_argument(\"--model-dir\", type=str, default=os.environ.get(\"SM_MODEL_DIR\"))\n", 1382 | " parser.add_argument(\"--train\", type=str, default=os.environ.get(\"SM_CHANNEL_TRAIN\"))\n", 1383 | " parser.add_argument(\"--test\", type=str, default=os.environ.get(\"SM_CHANNEL_TEST\"))\n", 1384 | " parser.add_argument(\"--train-file\", type=str, default=\"train-V-1.csv\")\n", 1385 | " parser.add_argument(\"--test-file\", type=str, default=\"test-V-1.csv\")\n", 1386 | "\n", 1387 | " args, _ = parser.parse_known_args()\n", 1388 | " \n", 1389 | " print(\"SKLearn Version: \", sklearn.__version__)\n", 1390 | " print(\"Joblib Version: \", joblib.__version__)\n", 1391 | "\n", 1392 | " print(\"[INFO] Reading data\")\n", 1393 | " print()\n", 1394 | " train_df = pd.read_csv(os.path.join(args.train, args.train_file))\n", 1395 | " test_df = pd.read_csv(os.path.join(args.test, args.test_file))\n", 1396 | " \n", 1397 | " features = list(train_df.columns)\n", 1398 | " label = features.pop(-1)\n", 1399 | " \n", 1400 | " print(\"Building training and testing datasets\")\n", 1401 | " print()\n", 1402 | " X_train = train_df[features]\n", 1403 | " X_test = test_df[features]\n", 1404 | " y_train = train_df[label]\n", 1405 | " y_test = test_df[label]\n", 1406 | "\n", 1407 | " print('Column order: ')\n", 1408 | " print(features)\n", 1409 | " print()\n", 1410 | " \n", 1411 | " print(\"Label column is: \",label)\n", 1412 | " print()\n", 1413 | " \n", 1414 | " print(\"Data Shape: \")\n", 1415 | " print()\n", 1416 | " print(\"---- SHAPE OF TRAINING DATA (85%) ----\")\n", 1417 | " print(X_train.shape)\n", 1418 | " print(y_train.shape)\n", 1419 | " print()\n", 1420 | " print(\"---- SHAPE OF TESTING DATA (15%) ----\")\n", 1421 | " print(X_test.shape)\n", 1422 | " print(y_test.shape)\n", 1423 | " print()\n", 1424 | " \n", 1425 | " \n", 1426 | " print(\"Training RandomForest Model.....\")\n", 1427 | " print()\n", 1428 | " model = RandomForestClassifier(n_estimators=args.n_estimators, random_state=args.random_state, verbose = 3,n_jobs=-1)\n", 1429 | " model.fit(X_train, y_train)\n", 1430 | " print()\n", 1431 | " \n", 1432 | "\n", 1433 | " model_path = os.path.join(args.model_dir, \"model.joblib\")\n", 1434 | " joblib.dump(model,model_path)\n", 1435 | " print(\"Model persisted at \" + model_path)\n", 1436 | " print()\n", 1437 | "\n", 1438 | " \n", 1439 | " y_pred_test = model.predict(X_test)\n", 1440 | " test_acc = accuracy_score(y_test,y_pred_test)\n", 1441 | " test_rep = classification_report(y_test,y_pred_test)\n", 1442 | "\n", 1443 | " print()\n", 1444 | " print(\"---- METRICS RESULTS FOR TESTING DATA ----\")\n", 1445 | " print()\n", 1446 | " print(\"Total Rows are: \", X_test.shape[0])\n", 1447 | " print('[TESTING] Model Accuracy is: ', test_acc)\n", 1448 | " print('[TESTING] Testing Report: ')\n", 1449 | " print(test_rep)" 1450 | ] 1451 | }, 1452 | { 1453 | "cell_type": "code", 1454 | "execution_count": 30, 1455 | "metadata": { 1456 | "scrolled": true, 1457 | "tags": [] 1458 | }, 1459 | "outputs": [ 1460 | { 1461 | "name": "stdout", 1462 | "output_type": "stream", 1463 | "text": [ 1464 | "[INFO] Extracting arguments\n", 1465 | "SKLearn Version: 0.22.1\n", 1466 | "Joblib Version: 0.14.1\n", 1467 | "[INFO] Reading data\n", 1468 | "\n", 1469 | "Building training and testing datasets\n", 1470 | "\n", 1471 | "Column order: \n", 1472 | "['battery_power', 'blue', 'clock_speed', 'dual_sim', 'fc', 'four_g', 'int_memory', 'm_dep', 'mobile_wt', 'n_cores', 'pc', 'px_height', 'px_width', 'ram', 'sc_h', 'sc_w', 'talk_time', 'three_g', 'touch_screen', 'wifi']\n", 1473 | "\n", 1474 | "Label column is: price_range\n", 1475 | "\n", 1476 | "Data Shape: \n", 1477 | "\n", 1478 | "---- SHAPE OF TRAINING DATA (85%) ----\n", 1479 | "(1700, 20)\n", 1480 | "(1700,)\n", 1481 | "\n", 1482 | "---- SHAPE OF TESTING DATA (15%) ----\n", 1483 | "(300, 20)\n", 1484 | "(300,)\n", 1485 | "\n", 1486 | "Training RandomForest Model.....\n", 1487 | "\n", 1488 | "[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 2 concurrent workers.\n", 1489 | "building tree 1 of 100\n", 1490 | "building tree 2 of 100\n", 1491 | "building tree 3 of 100\n", 1492 | "building tree 4 of 100\n", 1493 | "building tree 5 of 100\n", 1494 | "building tree 6 of 100\n", 1495 | "building tree 7 of 100\n", 1496 | "building tree 8 of 100\n", 1497 | "building tree 9 of 100\n", 1498 | "building tree 10 of 100\n", 1499 | "building tree 11 of 100\n", 1500 | "building tree 12 of 100\n", 1501 | "building tree 13 of 100\n", 1502 | "building tree 14 of 100\n", 1503 | "building tree 15 of 100\n", 1504 | "building tree 16 of 100\n", 1505 | "building tree 17 of 100\n", 1506 | "building tree 18 of 100\n", 1507 | "building tree 19 of 100\n", 1508 | "building tree 20 of 100\n", 1509 | "building tree 21 of 100\n", 1510 | "building tree 22 of 100\n", 1511 | "building tree 23 of 100\n", 1512 | "building tree 24 of 100\n", 1513 | "building tree 25 of 100\n", 1514 | "building tree 26 of 100\n", 1515 | "building tree 27 of 100\n", 1516 | "building tree 28 of 100\n", 1517 | "building tree 29 of 100\n", 1518 | "building tree 30 of 100\n", 1519 | "[Parallel(n_jobs=-1)]: Done 28 tasks | elapsed: 0.1s\n", 1520 | "building tree 31 of 100\n", 1521 | "building tree 32 of 100\n", 1522 | "building tree 33 of 100\n", 1523 | "building tree 34 of 100\n", 1524 | "building tree 35 of 100\n", 1525 | "building tree 36 of 100\n", 1526 | "building tree 37 of 100\n", 1527 | "building tree 38 of 100\n", 1528 | "building tree 39 of 100\n", 1529 | "building tree 40 of 100\n", 1530 | "building tree 41 of 100\n", 1531 | "building tree 42 of 100\n", 1532 | "building tree 43 of 100\n", 1533 | "building tree 44 of 100\n", 1534 | "building tree 45 of 100\n", 1535 | "building tree 46 of 100\n", 1536 | "building tree 47 of 100\n", 1537 | "building tree 48 of 100\n", 1538 | "building tree 49 of 100\n", 1539 | "building tree 50 of 100\n", 1540 | "building tree 51 of 100\n", 1541 | "building tree 52 of 100\n", 1542 | "building tree 53 of 100\n", 1543 | "building tree 54 of 100\n", 1544 | "building tree 55 of 100\n", 1545 | "building tree 56 of 100\n", 1546 | "building tree 57 of 100\n", 1547 | "building tree 58 of 100\n", 1548 | "building tree 59 of 100\n", 1549 | "building tree 60 of 100\n", 1550 | "building tree 61 of 100\n", 1551 | "building tree 62 of 100\n", 1552 | "building tree 63 of 100\n", 1553 | "building tree 64 of 100\n", 1554 | "building tree 65 of 100\n", 1555 | "building tree 66 of 100\n", 1556 | "building tree 67 of 100\n", 1557 | "building tree 68 of 100\n", 1558 | "building tree 69 of 100\n", 1559 | "building tree 70 of 100\n", 1560 | "building tree 71 of 100\n", 1561 | "building tree 72 of 100\n", 1562 | "building tree 73 of 100\n", 1563 | "building tree 74 of 100\n", 1564 | "building tree 75 of 100\n", 1565 | "building tree 76 of 100\n", 1566 | "building tree 77 of 100\n", 1567 | "building tree 78 of 100\n", 1568 | "building tree 79 of 100\n", 1569 | "building tree 80 of 100\n", 1570 | "building tree 81 of 100\n", 1571 | "building tree 82 of 100\n", 1572 | "building tree 83 of 100\n", 1573 | "building tree 84 of 100\n", 1574 | "building tree 85 of 100\n", 1575 | "building tree 86 of 100\n", 1576 | "building tree 87 of 100\n", 1577 | "building tree 88 of 100\n", 1578 | "building tree 89 of 100\n", 1579 | "building tree 90 of 100\n", 1580 | "building tree 91 of 100\n", 1581 | "building tree 92 of 100\n", 1582 | "building tree 93 of 100\n", 1583 | "building tree 94 of 100\n", 1584 | "building tree 95 of 100\n", 1585 | "building tree 96 of 100\n", 1586 | "building tree 97 of 100\n", 1587 | "building tree 98 of 100\n", 1588 | "building tree 99 of 100\n", 1589 | "building tree 100 of 100\n", 1590 | "[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed: 0.3s finished\n", 1591 | "\n", 1592 | "Model persisted at ./model.joblib\n", 1593 | "\n", 1594 | "[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.\n", 1595 | "[Parallel(n_jobs=2)]: Done 28 tasks | elapsed: 0.0s\n", 1596 | "[Parallel(n_jobs=2)]: Done 100 out of 100 | elapsed: 0.0s finished\n", 1597 | "\n", 1598 | "---- METRICS RESULTS FOR TESTING DATA ----\n", 1599 | "\n", 1600 | "Total Rows are: 300\n", 1601 | "[TESTING] Model Accuracy is: 0.8833333333333333\n", 1602 | "[TESTING] Testing Report: \n", 1603 | " precision recall f1-score support\n", 1604 | "\n", 1605 | " 0 0.95 1.00 0.97 69\n", 1606 | " 1 0.85 0.80 0.83 66\n", 1607 | " 2 0.80 0.77 0.79 74\n", 1608 | " 3 0.91 0.95 0.93 91\n", 1609 | "\n", 1610 | " accuracy 0.88 300\n", 1611 | " macro avg 0.88 0.88 0.88 300\n", 1612 | "weighted avg 0.88 0.88 0.88 300\n", 1613 | "\n" 1614 | ] 1615 | } 1616 | ], 1617 | "source": [ 1618 | "! python script.py --n_estimators 100 \\\n", 1619 | " --random_state 0 \\\n", 1620 | " --model-dir ./ \\\n", 1621 | " --train ./ \\\n", 1622 | " --test ./ \\" 1623 | ] 1624 | }, 1625 | { 1626 | "cell_type": "markdown", 1627 | "metadata": { 1628 | "tags": [] 1629 | }, 1630 | "source": [ 1631 | "## 7. Train script in-side Sagemaker container." 1632 | ] 1633 | }, 1634 | { 1635 | "cell_type": "code", 1636 | "execution_count": 37, 1637 | "metadata": { 1638 | "tags": [] 1639 | }, 1640 | "outputs": [], 1641 | "source": [ 1642 | "from sagemaker.sklearn.estimator import SKLearn\n", 1643 | "\n", 1644 | "FRAMEWORK_VERSION = \"0.23-1\"\n", 1645 | "\n", 1646 | "sklearn_estimator = SKLearn(\n", 1647 | " entry_point=\"script.py\",\n", 1648 | " role=get_execution_role(),\n", 1649 | " instance_count=1,\n", 1650 | " instance_type=\"ml.m5.large\",\n", 1651 | " framework_version=FRAMEWORK_VERSION,\n", 1652 | " base_job_name=\"RF-custom-sklearn\",\n", 1653 | " hyperparameters={\n", 1654 | " \"n_estimators\": 100,\n", 1655 | " \"random_state\": 0,\n", 1656 | " },\n", 1657 | " use_spot_instances = True,\n", 1658 | " max_wait = 7200,\n", 1659 | " max_run = 3600\n", 1660 | ")" 1661 | ] 1662 | }, 1663 | { 1664 | "cell_type": "code", 1665 | "execution_count": 38, 1666 | "metadata": { 1667 | "scrolled": true, 1668 | "tags": [] 1669 | }, 1670 | "outputs": [ 1671 | { 1672 | "name": "stdout", 1673 | "output_type": "stream", 1674 | "text": [ 1675 | "2023-02-11 06:10:30 Starting - Starting the training job...\n", 1676 | "2023-02-11 06:10:53 Starting - Preparing the instances for trainingProfilerReport-1676095830: InProgress\n", 1677 | "......\n", 1678 | "2023-02-11 06:11:59 Downloading - Downloading input data...\n", 1679 | "2023-02-11 06:12:24 Training - Downloading the training image...\n", 1680 | "2023-02-11 06:13:00 Training - Training image download completed. Training in progress...\u001b[34m2023-02-11 06:13:05,662 sagemaker-containers INFO Imported framework sagemaker_sklearn_container.training\u001b[0m\n", 1681 | "\u001b[34m2023-02-11 06:13:05,665 sagemaker-training-toolkit INFO No GPUs detected (normal if no gpus installed)\u001b[0m\n", 1682 | "\u001b[34m2023-02-11 06:13:05,705 sagemaker_sklearn_container.training INFO Invoking user training script.\u001b[0m\n", 1683 | "\u001b[34m2023-02-11 06:13:05,885 sagemaker-training-toolkit INFO No GPUs detected (normal if no gpus installed)\u001b[0m\n", 1684 | "\u001b[34m2023-02-11 06:13:05,897 sagemaker-training-toolkit INFO No GPUs detected (normal if no gpus installed)\u001b[0m\n", 1685 | "\u001b[34m2023-02-11 06:13:05,908 sagemaker-training-toolkit INFO No GPUs detected (normal if no gpus installed)\u001b[0m\n", 1686 | "\u001b[34m2023-02-11 06:13:05,919 sagemaker-training-toolkit INFO Invoking user script\u001b[0m\n", 1687 | "\u001b[34mTraining Env:\u001b[0m\n", 1688 | "\u001b[34m{\n", 1689 | " \"additional_framework_parameters\": {},\n", 1690 | " \"channel_input_dirs\": {\n", 1691 | " \"test\": \"/opt/ml/input/data/test\",\n", 1692 | " \"train\": \"/opt/ml/input/data/train\"\n", 1693 | " },\n", 1694 | " \"current_host\": \"algo-1\",\n", 1695 | " \"framework_module\": \"sagemaker_sklearn_container.training:main\",\n", 1696 | " \"hosts\": [\n", 1697 | " \"algo-1\"\n", 1698 | " ],\n", 1699 | " \"hyperparameters\": {\n", 1700 | " \"n_estimators\": 100,\n", 1701 | " \"random_state\": 0\n", 1702 | " },\n", 1703 | " \"input_config_dir\": \"/opt/ml/input/config\",\n", 1704 | " \"input_data_config\": {\n", 1705 | " \"test\": {\n", 1706 | " \"TrainingInputMode\": \"File\",\n", 1707 | " \"S3DistributionType\": \"FullyReplicated\",\n", 1708 | " \"RecordWrapperType\": \"None\"\n", 1709 | " },\n", 1710 | " \"train\": {\n", 1711 | " \"TrainingInputMode\": \"File\",\n", 1712 | " \"S3DistributionType\": \"FullyReplicated\",\n", 1713 | " \"RecordWrapperType\": \"None\"\n", 1714 | " }\n", 1715 | " },\n", 1716 | " \"input_dir\": \"/opt/ml/input\",\n", 1717 | " \"is_master\": true,\n", 1718 | " \"job_name\": \"RF-custom-sklearn-2023-02-11-06-10-29-900\",\n", 1719 | " \"log_level\": 20,\n", 1720 | " \"master_hostname\": \"algo-1\",\n", 1721 | " \"model_dir\": \"/opt/ml/model\",\n", 1722 | " \"module_dir\": \"s3://sagemaker-ap-south-1-179822996285/RF-custom-sklearn-2023-02-11-06-10-29-900/source/sourcedir.tar.gz\",\n", 1723 | " \"module_name\": \"script\",\n", 1724 | " \"network_interface_name\": \"eth0\",\n", 1725 | " \"num_cpus\": 2,\n", 1726 | " \"num_gpus\": 0,\n", 1727 | " \"output_data_dir\": \"/opt/ml/output/data\",\n", 1728 | " \"output_dir\": \"/opt/ml/output\",\n", 1729 | " \"output_intermediate_dir\": \"/opt/ml/output/intermediate\",\n", 1730 | " \"resource_config\": {\n", 1731 | " \"current_host\": \"algo-1\",\n", 1732 | " \"current_instance_type\": \"ml.m5.large\",\n", 1733 | " \"current_group_name\": \"homogeneousCluster\",\n", 1734 | " \"hosts\": [\n", 1735 | " \"algo-1\"\n", 1736 | " ],\n", 1737 | " \"instance_groups\": [\n", 1738 | " {\n", 1739 | " \"instance_group_name\": \"homogeneousCluster\",\n", 1740 | " \"instance_type\": \"ml.m5.large\",\n", 1741 | " \"hosts\": [\n", 1742 | " \"algo-1\"\n", 1743 | " ]\n", 1744 | " }\n", 1745 | " ],\n", 1746 | " \"network_interface_name\": \"eth0\"\n", 1747 | " },\n", 1748 | " \"user_entry_point\": \"script.py\"\u001b[0m\n", 1749 | "\u001b[34m}\u001b[0m\n", 1750 | "\u001b[34mEnvironment variables:\u001b[0m\n", 1751 | "\u001b[34mSM_HOSTS=[\"algo-1\"]\u001b[0m\n", 1752 | "\u001b[34mSM_NETWORK_INTERFACE_NAME=eth0\u001b[0m\n", 1753 | "\u001b[34mSM_HPS={\"n_estimators\":100,\"random_state\":0}\u001b[0m\n", 1754 | "\u001b[34mSM_USER_ENTRY_POINT=script.py\u001b[0m\n", 1755 | "\u001b[34mSM_FRAMEWORK_PARAMS={}\u001b[0m\n", 1756 | "\u001b[34mSM_RESOURCE_CONFIG={\"current_group_name\":\"homogeneousCluster\",\"current_host\":\"algo-1\",\"current_instance_type\":\"ml.m5.large\",\"hosts\":[\"algo-1\"],\"instance_groups\":[{\"hosts\":[\"algo-1\"],\"instance_group_name\":\"homogeneousCluster\",\"instance_type\":\"ml.m5.large\"}],\"network_interface_name\":\"eth0\"}\u001b[0m\n", 1757 | "\u001b[34mSM_INPUT_DATA_CONFIG={\"test\":{\"RecordWrapperType\":\"None\",\"S3DistributionType\":\"FullyReplicated\",\"TrainingInputMode\":\"File\"},\"train\":{\"RecordWrapperType\":\"None\",\"S3DistributionType\":\"FullyReplicated\",\"TrainingInputMode\":\"File\"}}\u001b[0m\n", 1758 | "\u001b[34mSM_OUTPUT_DATA_DIR=/opt/ml/output/data\u001b[0m\n", 1759 | "\u001b[34mSM_CHANNELS=[\"test\",\"train\"]\u001b[0m\n", 1760 | "\u001b[34mSM_CURRENT_HOST=algo-1\u001b[0m\n", 1761 | "\u001b[34mSM_MODULE_NAME=script\u001b[0m\n", 1762 | "\u001b[34mSM_LOG_LEVEL=20\u001b[0m\n", 1763 | "\u001b[34mSM_FRAMEWORK_MODULE=sagemaker_sklearn_container.training:main\u001b[0m\n", 1764 | "\u001b[34mSM_INPUT_DIR=/opt/ml/input\u001b[0m\n", 1765 | "\u001b[34mSM_INPUT_CONFIG_DIR=/opt/ml/input/config\u001b[0m\n", 1766 | "\u001b[34mSM_OUTPUT_DIR=/opt/ml/output\u001b[0m\n", 1767 | "\u001b[34mSM_NUM_CPUS=2\u001b[0m\n", 1768 | "\u001b[34mSM_NUM_GPUS=0\u001b[0m\n", 1769 | "\u001b[34mSM_MODEL_DIR=/opt/ml/model\u001b[0m\n", 1770 | "\u001b[34mSM_MODULE_DIR=s3://sagemaker-ap-south-1-179822996285/RF-custom-sklearn-2023-02-11-06-10-29-900/source/sourcedir.tar.gz\u001b[0m\n", 1771 | "\u001b[34mSM_TRAINING_ENV={\"additional_framework_parameters\":{},\"channel_input_dirs\":{\"test\":\"/opt/ml/input/data/test\",\"train\":\"/opt/ml/input/data/train\"},\"current_host\":\"algo-1\",\"framework_module\":\"sagemaker_sklearn_container.training:main\",\"hosts\":[\"algo-1\"],\"hyperparameters\":{\"n_estimators\":100,\"random_state\":0},\"input_config_dir\":\"/opt/ml/input/config\",\"input_data_config\":{\"test\":{\"RecordWrapperType\":\"None\",\"S3DistributionType\":\"FullyReplicated\",\"TrainingInputMode\":\"File\"},\"train\":{\"RecordWrapperType\":\"None\",\"S3DistributionType\":\"FullyReplicated\",\"TrainingInputMode\":\"File\"}},\"input_dir\":\"/opt/ml/input\",\"is_master\":true,\"job_name\":\"RF-custom-sklearn-2023-02-11-06-10-29-900\",\"log_level\":20,\"master_hostname\":\"algo-1\",\"model_dir\":\"/opt/ml/model\",\"module_dir\":\"s3://sagemaker-ap-south-1-179822996285/RF-custom-sklearn-2023-02-11-06-10-29-900/source/sourcedir.tar.gz\",\"module_name\":\"script\",\"network_interface_name\":\"eth0\",\"num_cpus\":2,\"num_gpus\":0,\"output_data_dir\":\"/opt/ml/output/data\",\"output_dir\":\"/opt/ml/output\",\"output_intermediate_dir\":\"/opt/ml/output/intermediate\",\"resource_config\":{\"current_group_name\":\"homogeneousCluster\",\"current_host\":\"algo-1\",\"current_instance_type\":\"ml.m5.large\",\"hosts\":[\"algo-1\"],\"instance_groups\":[{\"hosts\":[\"algo-1\"],\"instance_group_name\":\"homogeneousCluster\",\"instance_type\":\"ml.m5.large\"}],\"network_interface_name\":\"eth0\"},\"user_entry_point\":\"script.py\"}\u001b[0m\n", 1772 | "\u001b[34mSM_USER_ARGS=[\"--n_estimators\",\"100\",\"--random_state\",\"0\"]\u001b[0m\n", 1773 | "\u001b[34mSM_OUTPUT_INTERMEDIATE_DIR=/opt/ml/output/intermediate\u001b[0m\n", 1774 | "\u001b[34mSM_CHANNEL_TEST=/opt/ml/input/data/test\u001b[0m\n", 1775 | "\u001b[34mSM_CHANNEL_TRAIN=/opt/ml/input/data/train\u001b[0m\n", 1776 | "\u001b[34mSM_HP_N_ESTIMATORS=100\u001b[0m\n", 1777 | "\u001b[34mSM_HP_RANDOM_STATE=0\u001b[0m\n", 1778 | "\u001b[34mPYTHONPATH=/opt/ml/code:/miniconda3/bin:/miniconda3/lib/python37.zip:/miniconda3/lib/python3.7:/miniconda3/lib/python3.7/lib-dynload:/miniconda3/lib/python3.7/site-packages\u001b[0m\n", 1779 | "\u001b[34mInvoking script with the following command:\u001b[0m\n", 1780 | "\u001b[34m/miniconda3/bin/python script.py --n_estimators 100 --random_state 0\u001b[0m\n", 1781 | "\u001b[34m[INFO] Extracting arguments\u001b[0m\n", 1782 | "\u001b[34mSKLearn Version: 0.23.2\u001b[0m\n", 1783 | "\u001b[34mJoblib Version: 1.2.0\u001b[0m\n", 1784 | "\u001b[34m[INFO] Reading data\u001b[0m\n", 1785 | "\u001b[34mBuilding training and testing datasets\u001b[0m\n", 1786 | "\u001b[34mColumn order: \u001b[0m\n", 1787 | "\u001b[34m['battery_power', 'blue', 'clock_speed', 'dual_sim', 'fc', 'four_g', 'int_memory', 'm_dep', 'mobile_wt', 'n_cores', 'pc', 'px_height', 'px_width', 'ram', 'sc_h', 'sc_w', 'talk_time', 'three_g', 'touch_screen', 'wifi']\u001b[0m\n", 1788 | "\u001b[34mLabel column is: price_range\u001b[0m\n", 1789 | "\u001b[34mData Shape: \u001b[0m\n", 1790 | "\u001b[34m---- SHAPE OF TRAINING DATA (85%) ----\u001b[0m\n", 1791 | "\u001b[34m(1700, 20)\u001b[0m\n", 1792 | "\u001b[34m(1700,)\u001b[0m\n", 1793 | "\u001b[34m---- SHAPE OF TESTING DATA (15%) ----\u001b[0m\n", 1794 | "\u001b[34m(300, 20)\u001b[0m\n", 1795 | "\u001b[34m(300,)\u001b[0m\n", 1796 | "\u001b[34mTraining RandomForest Model.....\u001b[0m\n", 1797 | "\u001b[34m[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 2 concurrent workers.\u001b[0m\n", 1798 | "\u001b[34mbuilding tree 1 of 100building tree 2 of 100\u001b[0m\n", 1799 | "\u001b[34mbuilding tree 3 of 100\u001b[0m\n", 1800 | "\u001b[34mbuilding tree 4 of 100\u001b[0m\n", 1801 | "\u001b[34mbuilding tree 5 of 100building tree 6 of 100\u001b[0m\n", 1802 | "\u001b[34mbuilding tree 7 of 100building tree 8 of 100\u001b[0m\n", 1803 | "\u001b[34mbuilding tree 9 of 100building tree 10 of 100\u001b[0m\n", 1804 | "\u001b[34mbuilding tree 11 of 100\u001b[0m\n", 1805 | "\u001b[34mbuilding tree 12 of 100\u001b[0m\n", 1806 | "\u001b[34mbuilding tree 13 of 100\u001b[0m\n", 1807 | "\u001b[34mbuilding tree 14 of 100\u001b[0m\n", 1808 | "\u001b[34mbuilding tree 15 of 100\u001b[0m\n", 1809 | "\u001b[34mbuilding tree 16 of 100\u001b[0m\n", 1810 | "\u001b[34mbuilding tree 17 of 100\u001b[0m\n", 1811 | "\u001b[34mbuilding tree 18 of 100\u001b[0m\n", 1812 | "\u001b[34mbuilding tree 19 of 100\u001b[0m\n", 1813 | "\u001b[34mbuilding tree 20 of 100\u001b[0m\n", 1814 | "\u001b[34mbuilding tree 21 of 100\u001b[0m\n", 1815 | "\u001b[34mbuilding tree 22 of 100\u001b[0m\n", 1816 | "\u001b[34mbuilding tree 23 of 100\u001b[0m\n", 1817 | "\u001b[34mbuilding tree 24 of 100\u001b[0m\n", 1818 | "\u001b[34mbuilding tree 25 of 100\u001b[0m\n", 1819 | "\u001b[34mbuilding tree 26 of 100\u001b[0m\n", 1820 | "\u001b[34mbuilding tree 27 of 100\u001b[0m\n", 1821 | "\u001b[34mbuilding tree 28 of 100\u001b[0m\n", 1822 | "\u001b[34mbuilding tree 29 of 100\u001b[0m\n", 1823 | "\u001b[34mbuilding tree 30 of 100\u001b[0m\n", 1824 | "\u001b[34m[Parallel(n_jobs=-1)]: Done 28 tasks | elapsed: 0.1s\u001b[0m\n", 1825 | "\u001b[34mbuilding tree 31 of 100\u001b[0m\n", 1826 | "\u001b[34mbuilding tree 32 of 100\u001b[0m\n", 1827 | "\u001b[34mbuilding tree 33 of 100\u001b[0m\n", 1828 | "\u001b[34mbuilding tree 34 of 100\u001b[0m\n", 1829 | "\u001b[34mbuilding tree 35 of 100\u001b[0m\n", 1830 | "\u001b[34mbuilding tree 36 of 100\u001b[0m\n", 1831 | "\u001b[34mbuilding tree 37 of 100\u001b[0m\n", 1832 | "\u001b[34mbuilding tree 38 of 100\u001b[0m\n", 1833 | "\u001b[34mbuilding tree 39 of 100\u001b[0m\n", 1834 | "\u001b[34mbuilding tree 40 of 100\u001b[0m\n", 1835 | "\u001b[34mbuilding tree 41 of 100\u001b[0m\n", 1836 | "\u001b[34mbuilding tree 42 of 100\u001b[0m\n", 1837 | "\u001b[34mbuilding tree 43 of 100\u001b[0m\n", 1838 | "\u001b[34mbuilding tree 44 of 100\u001b[0m\n", 1839 | "\u001b[34mbuilding tree 45 of 100\u001b[0m\n", 1840 | "\u001b[34mbuilding tree 46 of 100\u001b[0m\n", 1841 | "\u001b[34mbuilding tree 47 of 100\u001b[0m\n", 1842 | "\u001b[34mbuilding tree 48 of 100\u001b[0m\n", 1843 | "\u001b[34mbuilding tree 49 of 100\u001b[0m\n", 1844 | "\u001b[34mbuilding tree 50 of 100\u001b[0m\n", 1845 | "\u001b[34mbuilding tree 51 of 100\u001b[0m\n", 1846 | "\u001b[34mbuilding tree 52 of 100\u001b[0m\n", 1847 | "\u001b[34mbuilding tree 53 of 100\u001b[0m\n", 1848 | "\u001b[34mbuilding tree 54 of 100\u001b[0m\n", 1849 | "\u001b[34mbuilding tree 55 of 100\u001b[0m\n", 1850 | "\u001b[34mbuilding tree 56 of 100\u001b[0m\n", 1851 | "\u001b[34mbuilding tree 57 of 100\u001b[0m\n", 1852 | "\u001b[34mbuilding tree 58 of 100\u001b[0m\n", 1853 | "\u001b[34mbuilding tree 59 of 100building tree 60 of 100\u001b[0m\n", 1854 | "\u001b[34mbuilding tree 61 of 100\u001b[0m\n", 1855 | "\u001b[34mbuilding tree 62 of 100\u001b[0m\n", 1856 | "\u001b[34mbuilding tree 63 of 100\u001b[0m\n", 1857 | "\u001b[34mbuilding tree 64 of 100\u001b[0m\n", 1858 | "\u001b[34mbuilding tree 65 of 100\u001b[0m\n", 1859 | "\u001b[34mbuilding tree 66 of 100\u001b[0m\n", 1860 | "\u001b[34mbuilding tree 67 of 100\u001b[0m\n", 1861 | "\u001b[34mbuilding tree 68 of 100\u001b[0m\n", 1862 | "\u001b[34mbuilding tree 69 of 100\u001b[0m\n", 1863 | "\u001b[34mbuilding tree 70 of 100\u001b[0m\n", 1864 | "\u001b[34mbuilding tree 71 of 100building tree 72 of 100\u001b[0m\n", 1865 | "\u001b[34mbuilding tree 73 of 100\u001b[0m\n", 1866 | "\u001b[34mbuilding tree 74 of 100\u001b[0m\n", 1867 | "\u001b[34mbuilding tree 75 of 100\u001b[0m\n", 1868 | "\u001b[34mbuilding tree 76 of 100\u001b[0m\n", 1869 | "\u001b[34mbuilding tree 77 of 100building tree 78 of 100\u001b[0m\n", 1870 | "\u001b[34mbuilding tree 79 of 100\u001b[0m\n", 1871 | "\u001b[34mbuilding tree 80 of 100\u001b[0m\n", 1872 | "\u001b[34mbuilding tree 81 of 100\u001b[0m\n", 1873 | "\u001b[34mbuilding tree 82 of 100\u001b[0m\n", 1874 | "\u001b[34mbuilding tree 83 of 100\u001b[0m\n", 1875 | "\u001b[34mbuilding tree 84 of 100\u001b[0m\n", 1876 | "\u001b[34mbuilding tree 85 of 100building tree 86 of 100\u001b[0m\n", 1877 | "\u001b[34mbuilding tree 87 of 100\u001b[0m\n", 1878 | "\u001b[34mbuilding tree 88 of 100\u001b[0m\n", 1879 | "\u001b[34mbuilding tree 89 of 100\u001b[0m\n", 1880 | "\u001b[34mbuilding tree 90 of 100\u001b[0m\n", 1881 | "\u001b[34mbuilding tree 91 of 100\u001b[0m\n", 1882 | "\u001b[34mbuilding tree 92 of 100\u001b[0m\n", 1883 | "\u001b[34mbuilding tree 93 of 100building tree 94 of 100\u001b[0m\n", 1884 | "\u001b[34mbuilding tree 95 of 100building tree 96 of 100\u001b[0m\n", 1885 | "\u001b[34mbuilding tree 97 of 100\u001b[0m\n", 1886 | "\u001b[34mbuilding tree 98 of 100\u001b[0m\n", 1887 | "\u001b[34mbuilding tree 99 of 100\u001b[0m\n", 1888 | "\u001b[34mbuilding tree 100 of 100\u001b[0m\n", 1889 | "\u001b[34m[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed: 0.3s finished\u001b[0m\n", 1890 | "\u001b[34mModel persisted at /opt/ml/model/model.joblib\u001b[0m\n", 1891 | "\u001b[34m[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.\u001b[0m\n", 1892 | "\u001b[34m[Parallel(n_jobs=2)]: Done 28 tasks | elapsed: 0.0s\u001b[0m\n", 1893 | "\u001b[34m[Parallel(n_jobs=2)]: Done 100 out of 100 | elapsed: 0.0s finished\u001b[0m\n", 1894 | "\u001b[34m---- METRICS RESULTS FOR TESTING DATA ----\u001b[0m\n", 1895 | "\u001b[34mTotal Rows are: 300\u001b[0m\n", 1896 | "\u001b[34m[TESTING] Model Accuracy is: 0.8833333333333333\u001b[0m\n", 1897 | "\u001b[34m[TESTING] Testing Report: \n", 1898 | " precision recall f1-score support\n", 1899 | " 0 0.95 1.00 0.97 69\n", 1900 | " 1 0.85 0.80 0.83 66\n", 1901 | " 2 0.80 0.77 0.79 74\n", 1902 | " 3 0.91 0.95 0.93 91\n", 1903 | " accuracy 0.88 300\n", 1904 | " macro avg 0.88 0.88 0.88 300\u001b[0m\n", 1905 | "\u001b[34mweighted avg 0.88 0.88 0.88 300\u001b[0m\n", 1906 | "\u001b[34m2023-02-11 06:13:07,454 sagemaker-containers INFO Reporting training SUCCESS\u001b[0m\n", 1907 | "\n", 1908 | "2023-02-11 06:13:26 Uploading - Uploading generated training model\n", 1909 | "2023-02-11 06:13:26 Completed - Training job completed\n", 1910 | "Training seconds: 87\n", 1911 | "Billable seconds: 30\n", 1912 | "Managed Spot Training savings: 65.5%\n" 1913 | ] 1914 | } 1915 | ], 1916 | "source": [ 1917 | "# launch training job, with asynchronous call\n", 1918 | "sklearn_estimator.fit({\"train\": trainpath, \"test\": testpath}, wait=True)\n", 1919 | "# sklearn_estimator.fit({\"train\": datapath}, wait=True)" 1920 | ] 1921 | }, 1922 | { 1923 | "cell_type": "markdown", 1924 | "metadata": { 1925 | "tags": [] 1926 | }, 1927 | "source": [ 1928 | "## 8. Store Model Artifacts(model.tar.gz) into the S3 Bucket. " 1929 | ] 1930 | }, 1931 | { 1932 | "cell_type": "code", 1933 | "execution_count": 39, 1934 | "metadata": { 1935 | "tags": [] 1936 | }, 1937 | "outputs": [ 1938 | { 1939 | "name": "stdout", 1940 | "output_type": "stream", 1941 | "text": [ 1942 | "\n", 1943 | "2023-02-11 06:14:34 Starting - Preparing the instances for training\n", 1944 | "2023-02-11 06:14:34 Downloading - Downloading input data\n", 1945 | "2023-02-11 06:14:34 Training - Training image download completed. Training in progress.\n", 1946 | "2023-02-11 06:14:34 Uploading - Uploading generated training model\n", 1947 | "2023-02-11 06:14:34 Completed - Training job completed\n", 1948 | "Model artifact persisted at s3://sagemaker-ap-south-1-179822996285/RF-custom-sklearn-2023-02-11-06-10-29-900/output/model.tar.gz\n" 1949 | ] 1950 | } 1951 | ], 1952 | "source": [ 1953 | "sklearn_estimator.latest_training_job.wait(logs=\"None\")\n", 1954 | "artifact = sm_boto3.describe_training_job(\n", 1955 | " TrainingJobName=sklearn_estimator.latest_training_job.name\n", 1956 | ")[\"ModelArtifacts\"][\"S3ModelArtifacts\"]\n", 1957 | "\n", 1958 | "print(\"Model artifact persisted at \" + artifact)" 1959 | ] 1960 | }, 1961 | { 1962 | "cell_type": "markdown", 1963 | "metadata": { 1964 | "tags": [] 1965 | }, 1966 | "source": [ 1967 | "## 9. Deploy Sagemaker Endpoint(API) for trained model, and test it. " 1968 | ] 1969 | }, 1970 | { 1971 | "cell_type": "code", 1972 | "execution_count": 40, 1973 | "metadata": { 1974 | "tags": [] 1975 | }, 1976 | "outputs": [], 1977 | "source": [ 1978 | "from sagemaker.sklearn.model import SKLearnModel\n", 1979 | "from time import gmtime, strftime\n", 1980 | "\n", 1981 | "model_name = \"Custom-sklearn-model-\" + strftime(\"%Y-%m-%d-%H-%M-%S\", gmtime())\n", 1982 | "model = SKLearnModel(\n", 1983 | " name = model_name,\n", 1984 | " model_data=artifact,\n", 1985 | " role=get_execution_role(),\n", 1986 | " entry_point=\"script.py\",\n", 1987 | " framework_version=FRAMEWORK_VERSION,\n", 1988 | ")" 1989 | ] 1990 | }, 1991 | { 1992 | "cell_type": "code", 1993 | "execution_count": 41, 1994 | "metadata": { 1995 | "tags": [] 1996 | }, 1997 | "outputs": [ 1998 | { 1999 | "name": "stdout", 2000 | "output_type": "stream", 2001 | "text": [ 2002 | "EndpointName=RF-custom-sklearn-model-2023-02-11-06-14-52\n", 2003 | "------!" 2004 | ] 2005 | } 2006 | ], 2007 | "source": [ 2008 | "endpoint_name = \"Custom-sklearn-model-\" + strftime(\"%Y-%m-%d-%H-%M-%S\", gmtime())\n", 2009 | "print(\"EndpointName={}\".format(endpoint_name))\n", 2010 | "\n", 2011 | "predictor = model.deploy(\n", 2012 | " initial_instance_count=1,\n", 2013 | " instance_type=\"ml.m4.xlarge\",\n", 2014 | " endpoint_name=endpoint_name,\n", 2015 | ")" 2016 | ] 2017 | }, 2018 | { 2019 | "cell_type": "code", 2020 | "execution_count": 42, 2021 | "metadata": { 2022 | "tags": [] 2023 | }, 2024 | "outputs": [ 2025 | { 2026 | "data": { 2027 | "text/plain": [ 2028 | "[[1454.0,\n", 2029 | " 1.0,\n", 2030 | " 0.5,\n", 2031 | " 1.0,\n", 2032 | " 1.0,\n", 2033 | " 0.0,\n", 2034 | " 34.0,\n", 2035 | " 0.7,\n", 2036 | " 83.0,\n", 2037 | " 4.0,\n", 2038 | " 3.0,\n", 2039 | " 250.0,\n", 2040 | " 1033.0,\n", 2041 | " 3419.0,\n", 2042 | " 7.0,\n", 2043 | " 5.0,\n", 2044 | " 5.0,\n", 2045 | " 1.0,\n", 2046 | " 1.0,\n", 2047 | " 0.0],\n", 2048 | " [1092.0,\n", 2049 | " 1.0,\n", 2050 | " 0.5,\n", 2051 | " 1.0,\n", 2052 | " 10.0,\n", 2053 | " 0.0,\n", 2054 | " 11.0,\n", 2055 | " 0.5,\n", 2056 | " 167.0,\n", 2057 | " 3.0,\n", 2058 | " 14.0,\n", 2059 | " 468.0,\n", 2060 | " 571.0,\n", 2061 | " 737.0,\n", 2062 | " 14.0,\n", 2063 | " 4.0,\n", 2064 | " 11.0,\n", 2065 | " 0.0,\n", 2066 | " 1.0,\n", 2067 | " 0.0]]" 2068 | ] 2069 | }, 2070 | "execution_count": 42, 2071 | "metadata": {}, 2072 | "output_type": "execute_result" 2073 | } 2074 | ], 2075 | "source": [ 2076 | "testX[features][0:2].values.tolist()" 2077 | ] 2078 | }, 2079 | { 2080 | "cell_type": "code", 2081 | "execution_count": 43, 2082 | "metadata": { 2083 | "tags": [] 2084 | }, 2085 | "outputs": [ 2086 | { 2087 | "name": "stdout", 2088 | "output_type": "stream", 2089 | "text": [ 2090 | "[3 0]\n" 2091 | ] 2092 | } 2093 | ], 2094 | "source": [ 2095 | "print(predictor.predict(testX[features][0:2].values.tolist()))" 2096 | ] 2097 | }, 2098 | { 2099 | "cell_type": "markdown", 2100 | "metadata": { 2101 | "tags": [] 2102 | }, 2103 | "source": [ 2104 | "## Don't forget to delete the endpoint !" 2105 | ] 2106 | }, 2107 | { 2108 | "cell_type": "code", 2109 | "execution_count": 44, 2110 | "metadata": { 2111 | "tags": [] 2112 | }, 2113 | "outputs": [ 2114 | { 2115 | "data": { 2116 | "text/plain": [ 2117 | "{'ResponseMetadata': {'RequestId': '30c986e4-c6da-4f1f-90f2-66a41149fcad',\n", 2118 | " 'HTTPStatusCode': 200,\n", 2119 | " 'HTTPHeaders': {'x-amzn-requestid': '30c986e4-c6da-4f1f-90f2-66a41149fcad',\n", 2120 | " 'content-type': 'application/x-amz-json-1.1',\n", 2121 | " 'content-length': '0',\n", 2122 | " 'date': 'Sat, 11 Feb 2023 06:27:32 GMT'},\n", 2123 | " 'RetryAttempts': 0}}" 2124 | ] 2125 | }, 2126 | "execution_count": 44, 2127 | "metadata": {}, 2128 | "output_type": "execute_result" 2129 | } 2130 | ], 2131 | "source": [ 2132 | "sm_boto3.delete_endpoint(EndpointName=endpoint_name)" 2133 | ] 2134 | }, 2135 | { 2136 | "cell_type": "markdown", 2137 | "metadata": { 2138 | "tags": [] 2139 | }, 2140 | "source": [ 2141 | "### Don't forget to Subscribe Machine Learning Hub YouTube Channel. " 2142 | ] 2143 | }, 2144 | { 2145 | "cell_type": "code", 2146 | "execution_count": null, 2147 | "metadata": {}, 2148 | "outputs": [], 2149 | "source": [] 2150 | } 2151 | ], 2152 | "metadata": { 2153 | "availableInstances": [ 2154 | { 2155 | "_defaultOrder": 0, 2156 | "_isFastLaunch": true, 2157 | "category": "General purpose", 2158 | "gpuNum": 0, 2159 | "memoryGiB": 4, 2160 | "name": "ml.t3.medium", 2161 | "vcpuNum": 2 2162 | }, 2163 | { 2164 | "_defaultOrder": 1, 2165 | "_isFastLaunch": false, 2166 | "category": "General purpose", 2167 | "gpuNum": 0, 2168 | "memoryGiB": 8, 2169 | "name": "ml.t3.large", 2170 | "vcpuNum": 2 2171 | }, 2172 | { 2173 | "_defaultOrder": 2, 2174 | "_isFastLaunch": false, 2175 | "category": "General purpose", 2176 | "gpuNum": 0, 2177 | "memoryGiB": 16, 2178 | "name": "ml.t3.xlarge", 2179 | "vcpuNum": 4 2180 | }, 2181 | { 2182 | "_defaultOrder": 3, 2183 | "_isFastLaunch": false, 2184 | "category": "General purpose", 2185 | "gpuNum": 0, 2186 | "memoryGiB": 32, 2187 | "name": "ml.t3.2xlarge", 2188 | "vcpuNum": 8 2189 | }, 2190 | { 2191 | "_defaultOrder": 4, 2192 | "_isFastLaunch": true, 2193 | "category": "General purpose", 2194 | "gpuNum": 0, 2195 | "memoryGiB": 8, 2196 | "name": "ml.m5.large", 2197 | "vcpuNum": 2 2198 | }, 2199 | { 2200 | "_defaultOrder": 5, 2201 | "_isFastLaunch": false, 2202 | "category": "General purpose", 2203 | "gpuNum": 0, 2204 | "memoryGiB": 16, 2205 | "name": "ml.m5.xlarge", 2206 | "vcpuNum": 4 2207 | }, 2208 | { 2209 | "_defaultOrder": 6, 2210 | "_isFastLaunch": false, 2211 | "category": "General purpose", 2212 | "gpuNum": 0, 2213 | "memoryGiB": 32, 2214 | "name": "ml.m5.2xlarge", 2215 | "vcpuNum": 8 2216 | }, 2217 | { 2218 | "_defaultOrder": 7, 2219 | "_isFastLaunch": false, 2220 | "category": "General purpose", 2221 | "gpuNum": 0, 2222 | "memoryGiB": 64, 2223 | "name": "ml.m5.4xlarge", 2224 | "vcpuNum": 16 2225 | }, 2226 | { 2227 | "_defaultOrder": 8, 2228 | "_isFastLaunch": false, 2229 | "category": "General purpose", 2230 | "gpuNum": 0, 2231 | "memoryGiB": 128, 2232 | "name": "ml.m5.8xlarge", 2233 | "vcpuNum": 32 2234 | }, 2235 | { 2236 | "_defaultOrder": 9, 2237 | "_isFastLaunch": false, 2238 | "category": "General purpose", 2239 | "gpuNum": 0, 2240 | "memoryGiB": 192, 2241 | "name": "ml.m5.12xlarge", 2242 | "vcpuNum": 48 2243 | }, 2244 | { 2245 | "_defaultOrder": 10, 2246 | "_isFastLaunch": false, 2247 | "category": "General purpose", 2248 | "gpuNum": 0, 2249 | "memoryGiB": 256, 2250 | "name": "ml.m5.16xlarge", 2251 | "vcpuNum": 64 2252 | }, 2253 | { 2254 | "_defaultOrder": 11, 2255 | "_isFastLaunch": false, 2256 | "category": "General purpose", 2257 | "gpuNum": 0, 2258 | "memoryGiB": 384, 2259 | "name": "ml.m5.24xlarge", 2260 | "vcpuNum": 96 2261 | }, 2262 | { 2263 | "_defaultOrder": 12, 2264 | "_isFastLaunch": false, 2265 | "category": "General purpose", 2266 | "gpuNum": 0, 2267 | "memoryGiB": 8, 2268 | "name": "ml.m5d.large", 2269 | "vcpuNum": 2 2270 | }, 2271 | { 2272 | "_defaultOrder": 13, 2273 | "_isFastLaunch": false, 2274 | "category": "General purpose", 2275 | "gpuNum": 0, 2276 | "memoryGiB": 16, 2277 | "name": "ml.m5d.xlarge", 2278 | "vcpuNum": 4 2279 | }, 2280 | { 2281 | "_defaultOrder": 14, 2282 | "_isFastLaunch": false, 2283 | "category": "General purpose", 2284 | "gpuNum": 0, 2285 | "memoryGiB": 32, 2286 | "name": "ml.m5d.2xlarge", 2287 | "vcpuNum": 8 2288 | }, 2289 | { 2290 | "_defaultOrder": 15, 2291 | "_isFastLaunch": false, 2292 | "category": "General purpose", 2293 | "gpuNum": 0, 2294 | "memoryGiB": 64, 2295 | "name": "ml.m5d.4xlarge", 2296 | "vcpuNum": 16 2297 | }, 2298 | { 2299 | "_defaultOrder": 16, 2300 | "_isFastLaunch": false, 2301 | "category": "General purpose", 2302 | "gpuNum": 0, 2303 | "memoryGiB": 128, 2304 | "name": "ml.m5d.8xlarge", 2305 | "vcpuNum": 32 2306 | }, 2307 | { 2308 | "_defaultOrder": 17, 2309 | "_isFastLaunch": false, 2310 | "category": "General purpose", 2311 | "gpuNum": 0, 2312 | "memoryGiB": 192, 2313 | "name": "ml.m5d.12xlarge", 2314 | "vcpuNum": 48 2315 | }, 2316 | { 2317 | "_defaultOrder": 18, 2318 | "_isFastLaunch": false, 2319 | "category": "General purpose", 2320 | "gpuNum": 0, 2321 | "memoryGiB": 256, 2322 | "name": "ml.m5d.16xlarge", 2323 | "vcpuNum": 64 2324 | }, 2325 | { 2326 | "_defaultOrder": 19, 2327 | "_isFastLaunch": false, 2328 | "category": "General purpose", 2329 | "gpuNum": 0, 2330 | "memoryGiB": 384, 2331 | "name": "ml.m5d.24xlarge", 2332 | "vcpuNum": 96 2333 | }, 2334 | { 2335 | "_defaultOrder": 20, 2336 | "_isFastLaunch": true, 2337 | "category": "Compute optimized", 2338 | "gpuNum": 0, 2339 | "memoryGiB": 4, 2340 | "name": "ml.c5.large", 2341 | "vcpuNum": 2 2342 | }, 2343 | { 2344 | "_defaultOrder": 21, 2345 | "_isFastLaunch": false, 2346 | "category": "Compute optimized", 2347 | "gpuNum": 0, 2348 | "memoryGiB": 8, 2349 | "name": "ml.c5.xlarge", 2350 | "vcpuNum": 4 2351 | }, 2352 | { 2353 | "_defaultOrder": 22, 2354 | "_isFastLaunch": false, 2355 | "category": "Compute optimized", 2356 | "gpuNum": 0, 2357 | "memoryGiB": 16, 2358 | "name": "ml.c5.2xlarge", 2359 | "vcpuNum": 8 2360 | }, 2361 | { 2362 | "_defaultOrder": 23, 2363 | "_isFastLaunch": false, 2364 | "category": "Compute optimized", 2365 | "gpuNum": 0, 2366 | "memoryGiB": 32, 2367 | "name": "ml.c5.4xlarge", 2368 | "vcpuNum": 16 2369 | }, 2370 | { 2371 | "_defaultOrder": 24, 2372 | "_isFastLaunch": false, 2373 | "category": "Compute optimized", 2374 | "gpuNum": 0, 2375 | "memoryGiB": 72, 2376 | "name": "ml.c5.9xlarge", 2377 | "vcpuNum": 36 2378 | }, 2379 | { 2380 | "_defaultOrder": 25, 2381 | "_isFastLaunch": false, 2382 | "category": "Compute optimized", 2383 | "gpuNum": 0, 2384 | "memoryGiB": 96, 2385 | "name": "ml.c5.12xlarge", 2386 | "vcpuNum": 48 2387 | }, 2388 | { 2389 | "_defaultOrder": 26, 2390 | "_isFastLaunch": false, 2391 | "category": "Compute optimized", 2392 | "gpuNum": 0, 2393 | "memoryGiB": 144, 2394 | "name": "ml.c5.18xlarge", 2395 | "vcpuNum": 72 2396 | }, 2397 | { 2398 | "_defaultOrder": 27, 2399 | "_isFastLaunch": false, 2400 | "category": "Compute optimized", 2401 | "gpuNum": 0, 2402 | "memoryGiB": 192, 2403 | "name": "ml.c5.24xlarge", 2404 | "vcpuNum": 96 2405 | }, 2406 | { 2407 | "_defaultOrder": 28, 2408 | "_isFastLaunch": true, 2409 | "category": "Accelerated computing", 2410 | "gpuNum": 1, 2411 | "memoryGiB": 16, 2412 | "name": "ml.g4dn.xlarge", 2413 | "vcpuNum": 4 2414 | }, 2415 | { 2416 | "_defaultOrder": 29, 2417 | "_isFastLaunch": false, 2418 | "category": "Accelerated computing", 2419 | "gpuNum": 1, 2420 | "memoryGiB": 32, 2421 | "name": "ml.g4dn.2xlarge", 2422 | "vcpuNum": 8 2423 | }, 2424 | { 2425 | "_defaultOrder": 30, 2426 | "_isFastLaunch": false, 2427 | "category": "Accelerated computing", 2428 | "gpuNum": 1, 2429 | "memoryGiB": 64, 2430 | "name": "ml.g4dn.4xlarge", 2431 | "vcpuNum": 16 2432 | }, 2433 | { 2434 | "_defaultOrder": 31, 2435 | "_isFastLaunch": false, 2436 | "category": "Accelerated computing", 2437 | "gpuNum": 1, 2438 | "memoryGiB": 128, 2439 | "name": "ml.g4dn.8xlarge", 2440 | "vcpuNum": 32 2441 | }, 2442 | { 2443 | "_defaultOrder": 32, 2444 | "_isFastLaunch": false, 2445 | "category": "Accelerated computing", 2446 | "gpuNum": 4, 2447 | "memoryGiB": 192, 2448 | "name": "ml.g4dn.12xlarge", 2449 | "vcpuNum": 48 2450 | }, 2451 | { 2452 | "_defaultOrder": 33, 2453 | "_isFastLaunch": false, 2454 | "category": "Accelerated computing", 2455 | "gpuNum": 1, 2456 | "memoryGiB": 256, 2457 | "name": "ml.g4dn.16xlarge", 2458 | "vcpuNum": 64 2459 | }, 2460 | { 2461 | "_defaultOrder": 34, 2462 | "_isFastLaunch": false, 2463 | "category": "Accelerated computing", 2464 | "gpuNum": 1, 2465 | "memoryGiB": 61, 2466 | "name": "ml.p3.2xlarge", 2467 | "vcpuNum": 8 2468 | }, 2469 | { 2470 | "_defaultOrder": 35, 2471 | "_isFastLaunch": false, 2472 | "category": "Accelerated computing", 2473 | "gpuNum": 4, 2474 | "memoryGiB": 244, 2475 | "name": "ml.p3.8xlarge", 2476 | "vcpuNum": 32 2477 | }, 2478 | { 2479 | "_defaultOrder": 36, 2480 | "_isFastLaunch": false, 2481 | "category": "Accelerated computing", 2482 | "gpuNum": 8, 2483 | "memoryGiB": 488, 2484 | "name": "ml.p3.16xlarge", 2485 | "vcpuNum": 64 2486 | }, 2487 | { 2488 | "_defaultOrder": 37, 2489 | "_isFastLaunch": false, 2490 | "category": "Accelerated computing", 2491 | "gpuNum": 8, 2492 | "memoryGiB": 768, 2493 | "name": "ml.p3dn.24xlarge", 2494 | "vcpuNum": 96 2495 | }, 2496 | { 2497 | "_defaultOrder": 38, 2498 | "_isFastLaunch": false, 2499 | "category": "Memory Optimized", 2500 | "gpuNum": 0, 2501 | "memoryGiB": 16, 2502 | "name": "ml.r5.large", 2503 | "vcpuNum": 2 2504 | }, 2505 | { 2506 | "_defaultOrder": 39, 2507 | "_isFastLaunch": false, 2508 | "category": "Memory Optimized", 2509 | "gpuNum": 0, 2510 | "memoryGiB": 32, 2511 | "name": "ml.r5.xlarge", 2512 | "vcpuNum": 4 2513 | }, 2514 | { 2515 | "_defaultOrder": 40, 2516 | "_isFastLaunch": false, 2517 | "category": "Memory Optimized", 2518 | "gpuNum": 0, 2519 | "memoryGiB": 64, 2520 | "name": "ml.r5.2xlarge", 2521 | "vcpuNum": 8 2522 | }, 2523 | { 2524 | "_defaultOrder": 41, 2525 | "_isFastLaunch": false, 2526 | "category": "Memory Optimized", 2527 | "gpuNum": 0, 2528 | "memoryGiB": 128, 2529 | "name": "ml.r5.4xlarge", 2530 | "vcpuNum": 16 2531 | }, 2532 | { 2533 | "_defaultOrder": 42, 2534 | "_isFastLaunch": false, 2535 | "category": "Memory Optimized", 2536 | "gpuNum": 0, 2537 | "memoryGiB": 256, 2538 | "name": "ml.r5.8xlarge", 2539 | "vcpuNum": 32 2540 | }, 2541 | { 2542 | "_defaultOrder": 43, 2543 | "_isFastLaunch": false, 2544 | "category": "Memory Optimized", 2545 | "gpuNum": 0, 2546 | "memoryGiB": 384, 2547 | "name": "ml.r5.12xlarge", 2548 | "vcpuNum": 48 2549 | }, 2550 | { 2551 | "_defaultOrder": 44, 2552 | "_isFastLaunch": false, 2553 | "category": "Memory Optimized", 2554 | "gpuNum": 0, 2555 | "memoryGiB": 512, 2556 | "name": "ml.r5.16xlarge", 2557 | "vcpuNum": 64 2558 | }, 2559 | { 2560 | "_defaultOrder": 45, 2561 | "_isFastLaunch": false, 2562 | "category": "Memory Optimized", 2563 | "gpuNum": 0, 2564 | "memoryGiB": 768, 2565 | "name": "ml.r5.24xlarge", 2566 | "vcpuNum": 96 2567 | }, 2568 | { 2569 | "_defaultOrder": 46, 2570 | "_isFastLaunch": false, 2571 | "category": "Accelerated computing", 2572 | "gpuNum": 1, 2573 | "memoryGiB": 16, 2574 | "name": "ml.g5.xlarge", 2575 | "vcpuNum": 4 2576 | }, 2577 | { 2578 | "_defaultOrder": 47, 2579 | "_isFastLaunch": false, 2580 | "category": "Accelerated computing", 2581 | "gpuNum": 1, 2582 | "memoryGiB": 32, 2583 | "name": "ml.g5.2xlarge", 2584 | "vcpuNum": 8 2585 | }, 2586 | { 2587 | "_defaultOrder": 48, 2588 | "_isFastLaunch": false, 2589 | "category": "Accelerated computing", 2590 | "gpuNum": 1, 2591 | "memoryGiB": 64, 2592 | "name": "ml.g5.4xlarge", 2593 | "vcpuNum": 16 2594 | }, 2595 | { 2596 | "_defaultOrder": 49, 2597 | "_isFastLaunch": false, 2598 | "category": "Accelerated computing", 2599 | "gpuNum": 1, 2600 | "memoryGiB": 128, 2601 | "name": "ml.g5.8xlarge", 2602 | "vcpuNum": 32 2603 | }, 2604 | { 2605 | "_defaultOrder": 50, 2606 | "_isFastLaunch": false, 2607 | "category": "Accelerated computing", 2608 | "gpuNum": 1, 2609 | "memoryGiB": 256, 2610 | "name": "ml.g5.16xlarge", 2611 | "vcpuNum": 64 2612 | }, 2613 | { 2614 | "_defaultOrder": 51, 2615 | "_isFastLaunch": false, 2616 | "category": "Accelerated computing", 2617 | "gpuNum": 4, 2618 | "memoryGiB": 192, 2619 | "name": "ml.g5.12xlarge", 2620 | "vcpuNum": 48 2621 | }, 2622 | { 2623 | "_defaultOrder": 52, 2624 | "_isFastLaunch": false, 2625 | "category": "Accelerated computing", 2626 | "gpuNum": 4, 2627 | "memoryGiB": 384, 2628 | "name": "ml.g5.24xlarge", 2629 | "vcpuNum": 96 2630 | }, 2631 | { 2632 | "_defaultOrder": 53, 2633 | "_isFastLaunch": false, 2634 | "category": "Accelerated computing", 2635 | "gpuNum": 8, 2636 | "memoryGiB": 768, 2637 | "name": "ml.g5.48xlarge", 2638 | "vcpuNum": 192 2639 | } 2640 | ], 2641 | "instance_type": "ml.t3.medium", 2642 | "kernelspec": { 2643 | "display_name": "Python 3 (Data Science)", 2644 | "language": "python", 2645 | "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:ap-south-1:394103062818:image/datascience-1.0" 2646 | }, 2647 | "language_info": { 2648 | "codemirror_mode": { 2649 | "name": "ipython", 2650 | "version": 3 2651 | }, 2652 | "file_extension": ".py", 2653 | "mimetype": "text/x-python", 2654 | "name": "python", 2655 | "nbconvert_exporter": "python", 2656 | "pygments_lexer": "ipython3", 2657 | "version": "3.7.10" 2658 | } 2659 | }, 2660 | "nbformat": 4, 2661 | "nbformat_minor": 4 2662 | } 2663 | -------------------------------------------------------------------------------- /Tutorial - 1 Sagemaker SKLearn Custom Script Mode/yt_thumb.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spidy20/Sagemaker-Tutorials/0c838658262679f81fce8d2ea95b4b5df20b1a18/Tutorial - 1 Sagemaker SKLearn Custom Script Mode/yt_thumb.jpg -------------------------------------------------------------------------------- /Tutorial - 2 Create Rest API for Sagemaker Endpoint/README.md: -------------------------------------------------------------------------------- 1 | ## Tutorial - 2 Create Rest API for Sagemaker Endpoint 2 | 3 | ### [Watch this tutorial►](https://youtu.be/hLzEHsUSHq4) 4 | 5 | 6 | 7 | - In this tutorial, I have explained how we can create Rest API for Sagemaker Endpoint using AWS Lambda + API Gateway. 8 | 9 | ### Steps that we followed! 10 | 11 | 1. Deploy the Sagemaker Endpoint. 12 | 2. Create Lambda Function 13 | 3. Create Rest API in API Gateway 14 | 4. Deploy API 15 | 5. Test API using Postman and Python Script 16 | 17 | ### AWS Services that we used! 18 | - Sagemaker Endpoint(For Machine Learning Model). 19 | - AWS Lambda(For processing request from the API, and get the prediction from Sagemaker Endpoint). 20 | - API Gateway(Rest API created!) 21 | 22 | ### Give Star⭐ to this repository, and fork it to support me. 23 | 24 | ### [Buy me a Coffee☕](https://www.buymeacoffee.com/spidy20) 25 | ### [Donate me on PayPal(It will inspire me to do more projects)](https://www.paypal.me/spidy1820) 26 | 27 | -------------------------------------------------------------------------------- /Tutorial - 2 Create Rest API for Sagemaker Endpoint/Tutorial - 2 Create Rest API.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "a11909f4-105c-446d-9c6d-424b9469c324", 6 | "metadata": {}, 7 | "source": [ 8 | "# Sagemaker Tutorial Series" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "ccecae11-a29c-41ed-9560-181e75cff408", 14 | "metadata": {}, 15 | "source": [ 16 | "## Tutorial - 2 Create Rest API for Sagemaker Endpoint using Lambda & API Gateway" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "id": "92bbb33f-36d5-4b18-b7cd-aa954d6a146c", 22 | "metadata": {}, 23 | "source": [ 24 | "### Let's divide the workload:\n", 25 | "\n", 26 | "1. Deploy the Sagemaker Endpoint. \n", 27 | "2. Create Lambda Function\n", 28 | "3. Create Rest API in API Gateway\n", 29 | "4. Deploy API\n", 30 | "5. Test API using Postman and Python Script" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "id": "70b8d54e-49f1-459f-aff5-d228f2d7cd27", 36 | "metadata": {}, 37 | "source": [ 38 | "### 1. Deploy the Sagemaker Endpoint. \n", 39 | "- Refer Tutorial-1 Notebook, and deploy endpoint from there. Please use below training script as reference. This contains custom created inference function. In last tutorial series we have used default inference function. " 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "id": "d242fbec-e8ce-4458-b09d-bf3adadeda6b", 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "%%writefile script.py\n", 50 | "\n", 51 | "\n", 52 | "from sklearn.ensemble import RandomForestClassifier\n", 53 | "from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score, recall_score, f1_score, roc_curve, auc\n", 54 | "import sklearn\n", 55 | "import joblib\n", 56 | "import boto3\n", 57 | "import pathlib\n", 58 | "from io import StringIO \n", 59 | "import argparse\n", 60 | "import joblib\n", 61 | "import os\n", 62 | "import numpy as np\n", 63 | "import pandas as pd\n", 64 | "\n", 65 | "# inference functions ---------------\n", 66 | "\n", 67 | "def input_fn(request_body, request_content_type):\n", 68 | " print(request_body)\n", 69 | " print(request_content_type)\n", 70 | " if request_content_type == \"text/csv\":\n", 71 | " request_body = request_body.strip()\n", 72 | " try:\n", 73 | " df = pd.read_csv(StringIO(request_body), header=None)\n", 74 | " return df\n", 75 | " \n", 76 | " except Exception as e:\n", 77 | " print(e)\n", 78 | " else:\n", 79 | " return \"\"\"Please use Content-Type = 'text/csv' and, send the request!!\"\"\" \n", 80 | " \n", 81 | " \n", 82 | "def model_fn(model_dir):\n", 83 | " clf = joblib.load(os.path.join(model_dir, \"model.joblib\"))\n", 84 | " return clf\n", 85 | "\n", 86 | "def predict_fn(input_data, model):\n", 87 | " if type(input_data) != str:\n", 88 | " prediction = model.predict(input_data)\n", 89 | " print(prediction)\n", 90 | " return prediction\n", 91 | " else:\n", 92 | " return input_data\n", 93 | " \n", 94 | " \n", 95 | "if __name__ == \"__main__\":\n", 96 | "\n", 97 | " print(\"[INFO] Extracting arguments\")\n", 98 | " parser = argparse.ArgumentParser()\n", 99 | "\n", 100 | " # hyperparameters sent by the client are passed as command-line arguments to the script.\n", 101 | " parser.add_argument(\"--n_estimators\", type=int, default=100)\n", 102 | " parser.add_argument(\"--random_state\", type=int, default=0)\n", 103 | "\n", 104 | " # Data, model, and output directories\n", 105 | " parser.add_argument(\"--model-dir\", type=str, default=os.environ.get(\"SM_MODEL_DIR\"))\n", 106 | " parser.add_argument(\"--train\", type=str, default=os.environ.get(\"SM_CHANNEL_TRAIN\"))\n", 107 | " parser.add_argument(\"--test\", type=str, default=os.environ.get(\"SM_CHANNEL_TEST\"))\n", 108 | " parser.add_argument(\"--train-file\", type=str, default=\"train-V-1.csv\")\n", 109 | " parser.add_argument(\"--test-file\", type=str, default=\"test-V-1.csv\")\n", 110 | "\n", 111 | " args, _ = parser.parse_known_args()\n", 112 | " \n", 113 | " print(\"SKLearn Version: \", sklearn.__version__)\n", 114 | " print(\"Joblib Version: \", joblib.__version__)\n", 115 | "\n", 116 | " print(\"[INFO] Reading data\")\n", 117 | " print()\n", 118 | " train_df = pd.read_csv(os.path.join(args.train, args.train_file))\n", 119 | " test_df = pd.read_csv(os.path.join(args.test, args.test_file))\n", 120 | " \n", 121 | " features = list(train_df.columns)\n", 122 | " label = features.pop(-1)\n", 123 | " \n", 124 | " print(\"Building training and testing datasets\")\n", 125 | " print()\n", 126 | " X_train = train_df[features]\n", 127 | " X_test = test_df[features]\n", 128 | " y_train = train_df[label]\n", 129 | " y_test = test_df[label]\n", 130 | "\n", 131 | " print('Column order: ')\n", 132 | " print(features)\n", 133 | " print()\n", 134 | " \n", 135 | " print(\"Label column is: \",label)\n", 136 | " print()\n", 137 | " \n", 138 | " print(\"Data Shape: \")\n", 139 | " print()\n", 140 | " print(\"---- SHAPE OF TRAINING DATA (85%) ----\")\n", 141 | " print(X_train.shape)\n", 142 | " print(y_train.shape)\n", 143 | " print()\n", 144 | " print(\"---- SHAPE OF TESTING DATA (15%) ----\")\n", 145 | " print(X_test.shape)\n", 146 | " print(y_test.shape)\n", 147 | " print()\n", 148 | " \n", 149 | " \n", 150 | " print(\"Training RandomForest Model.....\")\n", 151 | " print()\n", 152 | " model = RandomForestClassifier(n_estimators=args.n_estimators, random_state=args.random_state, verbose = 3,n_jobs=-1)\n", 153 | " model.fit(X_train, y_train)\n", 154 | " print()\n", 155 | " \n", 156 | "\n", 157 | " model_path = os.path.join(args.model_dir, \"model.joblib\")\n", 158 | " joblib.dump(model,model_path)\n", 159 | " print(\"Model persisted at \" + model_path)\n", 160 | " print()\n", 161 | "\n", 162 | " \n", 163 | " y_pred_test = model.predict(X_test)\n", 164 | " test_acc = accuracy_score(y_test,y_pred_test)\n", 165 | " test_rep = classification_report(y_test,y_pred_test)\n", 166 | "\n", 167 | " print()\n", 168 | " print(\"---- METRICS RESULTS FOR TESTING DATA ----\")\n", 169 | " print()\n", 170 | " print(\"Total Rows are: \", X_test.shape[0])\n", 171 | " print('[TESTING] Model Accuracy is: ', test_acc)\n", 172 | " print('[TESTING] Testing Report: ')\n", 173 | " print(test_rep)" 174 | ] 175 | }, 176 | { 177 | "cell_type": "markdown", 178 | "id": "d36bb40a-38d7-46b2-84af-ef9892904c7e", 179 | "metadata": { 180 | "tags": [] 181 | }, 182 | "source": [ 183 | "### 2. Create Lambda Function (Example Code)\n", 184 | "- Follow Tutorial video for more clarity." 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "id": "26126b33-f53f-4bc7-8505-8e403a686b37", 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "import os\n", 195 | "import io\n", 196 | "import boto3\n", 197 | "import json\n", 198 | "import csv\n", 199 | "\n", 200 | "# grab environment variables\n", 201 | "ENDPOINT_NAME = os.environ['ENDPOINT_NAME']\n", 202 | "runtime= boto3.client('runtime.sagemaker')\n", 203 | "\n", 204 | "\n", 205 | "def lambda_handler(event, context):\n", 206 | " # TODO implement\n", 207 | " payload = json.loads(json.dumps(event))\n", 208 | " payload_data = str(payload['body'])\n", 209 | " print(payload_data)\n", 210 | " response = runtime.invoke_endpoint(EndpointName=ENDPOINT_NAME,\n", 211 | " ContentType='text/csv',\n", 212 | " Body=payload_data)\n", 213 | " result = json.loads(response['Body'].read().decode())\n", 214 | " preds = {\"Prediction\": result}\n", 215 | " response_dict = {\n", 216 | " \"statusCode\": 200,\n", 217 | " \"body\": json.dumps(preds)\n", 218 | " }\n", 219 | " return response_dict" 220 | ] 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "id": "e8fe7eef-af66-446c-bf76-ece04883496e", 225 | "metadata": {}, 226 | "source": [ 227 | "- Attach this policy to Lambda Execution Role" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": null, 233 | "id": "9a79657f-233a-4875-a2a3-35e41f302d06", 234 | "metadata": {}, 235 | "outputs": [], 236 | "source": [ 237 | "{\n", 238 | " \"Version\": \"2012-10-17\",\n", 239 | " \"Statement\": [\n", 240 | " {\n", 241 | " \"Sid\": \"VisualEditor0\",\n", 242 | " \"Effect\": \"Allow\",\n", 243 | " \"Action\": \"sagemaker:InvokeEndpoint\",\n", 244 | " \"Resource\": \"*\"\n", 245 | " }\n", 246 | " ]\n", 247 | "}" 248 | ] 249 | }, 250 | { 251 | "cell_type": "markdown", 252 | "id": "e427ae4f-1f14-4bfe-8cf8-536fbbbd984a", 253 | "metadata": {}, 254 | "source": [ 255 | "### 3. Create Rest API in API Gateway\n", 256 | "- Follow Tutorial video for more clarity." 257 | ] 258 | }, 259 | { 260 | "cell_type": "markdown", 261 | "id": "571fc630-45d8-4952-898b-35218ec4aff9", 262 | "metadata": {}, 263 | "source": [ 264 | "### 4. Deploy API\n", 265 | "- In this step we are deploying our Rest API into the Stage, we will get one API URL. " 266 | ] 267 | }, 268 | { 269 | "cell_type": "markdown", 270 | "id": "3daa7a84-fafa-4934-927c-6169d812c51d", 271 | "metadata": {}, 272 | "source": [ 273 | "### 5. Test API using Postman and Python Script\n", 274 | "- Our API is running in stage, now we need to check API.\n", 275 | "- We can use POSTMAN and Python Script both for checking the API results. " 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": 5, 281 | "id": "bfb14b39-cac6-4942-8ead-52b6589c0daf", 282 | "metadata": {}, 283 | "outputs": [ 284 | { 285 | "name": "stdout", 286 | "output_type": "stream", 287 | "text": [ 288 | "{\"Prediction\": [3]}\n" 289 | ] 290 | } 291 | ], 292 | "source": [ 293 | "import requests\n", 294 | "\n", 295 | "url = \"------YOUR API URL--------\"\n", 296 | "\n", 297 | "payload = \"1454.0,1.0,0.5,1.0,1.0,0.0,34.0,0.7,83.0,4.0,3.0,250.0,1033.0,3419.0,7.0,5.0,5.0,1.0,1.0,0.0\"\n", 298 | "\n", 299 | "headers = {\n", 300 | " 'Content-Type': 'text/csv'\n", 301 | "}\n", 302 | "\n", 303 | "response = requests.request(\"POST\", url, headers=headers, data=payload)\n", 304 | "\n", 305 | "print(response.text)" 306 | ] 307 | }, 308 | { 309 | "cell_type": "markdown", 310 | "id": "f3260657-1333-48ca-a4bb-9a06845b0d0a", 311 | "metadata": {}, 312 | "source": [ 313 | "### Don't forget to Subscribe Machine Learning Hub YouTube Channel. " 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": null, 319 | "id": "d226eca4-68f0-47e4-a356-564d0e94f4a0", 320 | "metadata": {}, 321 | "outputs": [], 322 | "source": [] 323 | } 324 | ], 325 | "metadata": { 326 | "availableInstances": [ 327 | { 328 | "_defaultOrder": 0, 329 | "_isFastLaunch": true, 330 | "category": "General purpose", 331 | "gpuNum": 0, 332 | "memoryGiB": 4, 333 | "name": "ml.t3.medium", 334 | "vcpuNum": 2 335 | }, 336 | { 337 | "_defaultOrder": 1, 338 | "_isFastLaunch": false, 339 | "category": "General purpose", 340 | "gpuNum": 0, 341 | "memoryGiB": 8, 342 | "name": "ml.t3.large", 343 | "vcpuNum": 2 344 | }, 345 | { 346 | "_defaultOrder": 2, 347 | "_isFastLaunch": false, 348 | "category": "General purpose", 349 | "gpuNum": 0, 350 | "memoryGiB": 16, 351 | "name": "ml.t3.xlarge", 352 | "vcpuNum": 4 353 | }, 354 | { 355 | "_defaultOrder": 3, 356 | "_isFastLaunch": false, 357 | "category": "General purpose", 358 | "gpuNum": 0, 359 | "memoryGiB": 32, 360 | "name": "ml.t3.2xlarge", 361 | "vcpuNum": 8 362 | }, 363 | { 364 | "_defaultOrder": 4, 365 | "_isFastLaunch": true, 366 | "category": "General purpose", 367 | "gpuNum": 0, 368 | "memoryGiB": 8, 369 | "name": "ml.m5.large", 370 | "vcpuNum": 2 371 | }, 372 | { 373 | "_defaultOrder": 5, 374 | "_isFastLaunch": false, 375 | "category": "General purpose", 376 | "gpuNum": 0, 377 | "memoryGiB": 16, 378 | "name": "ml.m5.xlarge", 379 | "vcpuNum": 4 380 | }, 381 | { 382 | "_defaultOrder": 6, 383 | "_isFastLaunch": false, 384 | "category": "General purpose", 385 | "gpuNum": 0, 386 | "memoryGiB": 32, 387 | "name": "ml.m5.2xlarge", 388 | "vcpuNum": 8 389 | }, 390 | { 391 | "_defaultOrder": 7, 392 | "_isFastLaunch": false, 393 | "category": "General purpose", 394 | "gpuNum": 0, 395 | "memoryGiB": 64, 396 | "name": "ml.m5.4xlarge", 397 | "vcpuNum": 16 398 | }, 399 | { 400 | "_defaultOrder": 8, 401 | "_isFastLaunch": false, 402 | "category": "General purpose", 403 | "gpuNum": 0, 404 | "memoryGiB": 128, 405 | "name": "ml.m5.8xlarge", 406 | "vcpuNum": 32 407 | }, 408 | { 409 | "_defaultOrder": 9, 410 | "_isFastLaunch": false, 411 | "category": "General purpose", 412 | "gpuNum": 0, 413 | "memoryGiB": 192, 414 | "name": "ml.m5.12xlarge", 415 | "vcpuNum": 48 416 | }, 417 | { 418 | "_defaultOrder": 10, 419 | "_isFastLaunch": false, 420 | "category": "General purpose", 421 | "gpuNum": 0, 422 | "memoryGiB": 256, 423 | "name": "ml.m5.16xlarge", 424 | "vcpuNum": 64 425 | }, 426 | { 427 | "_defaultOrder": 11, 428 | "_isFastLaunch": false, 429 | "category": "General purpose", 430 | "gpuNum": 0, 431 | "memoryGiB": 384, 432 | "name": "ml.m5.24xlarge", 433 | "vcpuNum": 96 434 | }, 435 | { 436 | "_defaultOrder": 12, 437 | "_isFastLaunch": false, 438 | "category": "General purpose", 439 | "gpuNum": 0, 440 | "memoryGiB": 8, 441 | "name": "ml.m5d.large", 442 | "vcpuNum": 2 443 | }, 444 | { 445 | "_defaultOrder": 13, 446 | "_isFastLaunch": false, 447 | "category": "General purpose", 448 | "gpuNum": 0, 449 | "memoryGiB": 16, 450 | "name": "ml.m5d.xlarge", 451 | "vcpuNum": 4 452 | }, 453 | { 454 | "_defaultOrder": 14, 455 | "_isFastLaunch": false, 456 | "category": "General purpose", 457 | "gpuNum": 0, 458 | "memoryGiB": 32, 459 | "name": "ml.m5d.2xlarge", 460 | "vcpuNum": 8 461 | }, 462 | { 463 | "_defaultOrder": 15, 464 | "_isFastLaunch": false, 465 | "category": "General purpose", 466 | "gpuNum": 0, 467 | "memoryGiB": 64, 468 | "name": "ml.m5d.4xlarge", 469 | "vcpuNum": 16 470 | }, 471 | { 472 | "_defaultOrder": 16, 473 | "_isFastLaunch": false, 474 | "category": "General purpose", 475 | "gpuNum": 0, 476 | "memoryGiB": 128, 477 | "name": "ml.m5d.8xlarge", 478 | "vcpuNum": 32 479 | }, 480 | { 481 | "_defaultOrder": 17, 482 | "_isFastLaunch": false, 483 | "category": "General purpose", 484 | "gpuNum": 0, 485 | "memoryGiB": 192, 486 | "name": "ml.m5d.12xlarge", 487 | "vcpuNum": 48 488 | }, 489 | { 490 | "_defaultOrder": 18, 491 | "_isFastLaunch": false, 492 | "category": "General purpose", 493 | "gpuNum": 0, 494 | "memoryGiB": 256, 495 | "name": "ml.m5d.16xlarge", 496 | "vcpuNum": 64 497 | }, 498 | { 499 | "_defaultOrder": 19, 500 | "_isFastLaunch": false, 501 | "category": "General purpose", 502 | "gpuNum": 0, 503 | "memoryGiB": 384, 504 | "name": "ml.m5d.24xlarge", 505 | "vcpuNum": 96 506 | }, 507 | { 508 | "_defaultOrder": 20, 509 | "_isFastLaunch": true, 510 | "category": "Compute optimized", 511 | "gpuNum": 0, 512 | "memoryGiB": 4, 513 | "name": "ml.c5.large", 514 | "vcpuNum": 2 515 | }, 516 | { 517 | "_defaultOrder": 21, 518 | "_isFastLaunch": false, 519 | "category": "Compute optimized", 520 | "gpuNum": 0, 521 | "memoryGiB": 8, 522 | "name": "ml.c5.xlarge", 523 | "vcpuNum": 4 524 | }, 525 | { 526 | "_defaultOrder": 22, 527 | "_isFastLaunch": false, 528 | "category": "Compute optimized", 529 | "gpuNum": 0, 530 | "memoryGiB": 16, 531 | "name": "ml.c5.2xlarge", 532 | "vcpuNum": 8 533 | }, 534 | { 535 | "_defaultOrder": 23, 536 | "_isFastLaunch": false, 537 | "category": "Compute optimized", 538 | "gpuNum": 0, 539 | "memoryGiB": 32, 540 | "name": "ml.c5.4xlarge", 541 | "vcpuNum": 16 542 | }, 543 | { 544 | "_defaultOrder": 24, 545 | "_isFastLaunch": false, 546 | "category": "Compute optimized", 547 | "gpuNum": 0, 548 | "memoryGiB": 72, 549 | "name": "ml.c5.9xlarge", 550 | "vcpuNum": 36 551 | }, 552 | { 553 | "_defaultOrder": 25, 554 | "_isFastLaunch": false, 555 | "category": "Compute optimized", 556 | "gpuNum": 0, 557 | "memoryGiB": 96, 558 | "name": "ml.c5.12xlarge", 559 | "vcpuNum": 48 560 | }, 561 | { 562 | "_defaultOrder": 26, 563 | "_isFastLaunch": false, 564 | "category": "Compute optimized", 565 | "gpuNum": 0, 566 | "memoryGiB": 144, 567 | "name": "ml.c5.18xlarge", 568 | "vcpuNum": 72 569 | }, 570 | { 571 | "_defaultOrder": 27, 572 | "_isFastLaunch": false, 573 | "category": "Compute optimized", 574 | "gpuNum": 0, 575 | "memoryGiB": 192, 576 | "name": "ml.c5.24xlarge", 577 | "vcpuNum": 96 578 | }, 579 | { 580 | "_defaultOrder": 28, 581 | "_isFastLaunch": true, 582 | "category": "Accelerated computing", 583 | "gpuNum": 1, 584 | "memoryGiB": 16, 585 | "name": "ml.g4dn.xlarge", 586 | "vcpuNum": 4 587 | }, 588 | { 589 | "_defaultOrder": 29, 590 | "_isFastLaunch": false, 591 | "category": "Accelerated computing", 592 | "gpuNum": 1, 593 | "memoryGiB": 32, 594 | "name": "ml.g4dn.2xlarge", 595 | "vcpuNum": 8 596 | }, 597 | { 598 | "_defaultOrder": 30, 599 | "_isFastLaunch": false, 600 | "category": "Accelerated computing", 601 | "gpuNum": 1, 602 | "memoryGiB": 64, 603 | "name": "ml.g4dn.4xlarge", 604 | "vcpuNum": 16 605 | }, 606 | { 607 | "_defaultOrder": 31, 608 | "_isFastLaunch": false, 609 | "category": "Accelerated computing", 610 | "gpuNum": 1, 611 | "memoryGiB": 128, 612 | "name": "ml.g4dn.8xlarge", 613 | "vcpuNum": 32 614 | }, 615 | { 616 | "_defaultOrder": 32, 617 | "_isFastLaunch": false, 618 | "category": "Accelerated computing", 619 | "gpuNum": 4, 620 | "memoryGiB": 192, 621 | "name": "ml.g4dn.12xlarge", 622 | "vcpuNum": 48 623 | }, 624 | { 625 | "_defaultOrder": 33, 626 | "_isFastLaunch": false, 627 | "category": "Accelerated computing", 628 | "gpuNum": 1, 629 | "memoryGiB": 256, 630 | "name": "ml.g4dn.16xlarge", 631 | "vcpuNum": 64 632 | }, 633 | { 634 | "_defaultOrder": 34, 635 | "_isFastLaunch": false, 636 | "category": "Accelerated computing", 637 | "gpuNum": 1, 638 | "memoryGiB": 61, 639 | "name": "ml.p3.2xlarge", 640 | "vcpuNum": 8 641 | }, 642 | { 643 | "_defaultOrder": 35, 644 | "_isFastLaunch": false, 645 | "category": "Accelerated computing", 646 | "gpuNum": 4, 647 | "memoryGiB": 244, 648 | "name": "ml.p3.8xlarge", 649 | "vcpuNum": 32 650 | }, 651 | { 652 | "_defaultOrder": 36, 653 | "_isFastLaunch": false, 654 | "category": "Accelerated computing", 655 | "gpuNum": 8, 656 | "memoryGiB": 488, 657 | "name": "ml.p3.16xlarge", 658 | "vcpuNum": 64 659 | }, 660 | { 661 | "_defaultOrder": 37, 662 | "_isFastLaunch": false, 663 | "category": "Accelerated computing", 664 | "gpuNum": 8, 665 | "memoryGiB": 768, 666 | "name": "ml.p3dn.24xlarge", 667 | "vcpuNum": 96 668 | }, 669 | { 670 | "_defaultOrder": 38, 671 | "_isFastLaunch": false, 672 | "category": "Memory Optimized", 673 | "gpuNum": 0, 674 | "memoryGiB": 16, 675 | "name": "ml.r5.large", 676 | "vcpuNum": 2 677 | }, 678 | { 679 | "_defaultOrder": 39, 680 | "_isFastLaunch": false, 681 | "category": "Memory Optimized", 682 | "gpuNum": 0, 683 | "memoryGiB": 32, 684 | "name": "ml.r5.xlarge", 685 | "vcpuNum": 4 686 | }, 687 | { 688 | "_defaultOrder": 40, 689 | "_isFastLaunch": false, 690 | "category": "Memory Optimized", 691 | "gpuNum": 0, 692 | "memoryGiB": 64, 693 | "name": "ml.r5.2xlarge", 694 | "vcpuNum": 8 695 | }, 696 | { 697 | "_defaultOrder": 41, 698 | "_isFastLaunch": false, 699 | "category": "Memory Optimized", 700 | "gpuNum": 0, 701 | "memoryGiB": 128, 702 | "name": "ml.r5.4xlarge", 703 | "vcpuNum": 16 704 | }, 705 | { 706 | "_defaultOrder": 42, 707 | "_isFastLaunch": false, 708 | "category": "Memory Optimized", 709 | "gpuNum": 0, 710 | "memoryGiB": 256, 711 | "name": "ml.r5.8xlarge", 712 | "vcpuNum": 32 713 | }, 714 | { 715 | "_defaultOrder": 43, 716 | "_isFastLaunch": false, 717 | "category": "Memory Optimized", 718 | "gpuNum": 0, 719 | "memoryGiB": 384, 720 | "name": "ml.r5.12xlarge", 721 | "vcpuNum": 48 722 | }, 723 | { 724 | "_defaultOrder": 44, 725 | "_isFastLaunch": false, 726 | "category": "Memory Optimized", 727 | "gpuNum": 0, 728 | "memoryGiB": 512, 729 | "name": "ml.r5.16xlarge", 730 | "vcpuNum": 64 731 | }, 732 | { 733 | "_defaultOrder": 45, 734 | "_isFastLaunch": false, 735 | "category": "Memory Optimized", 736 | "gpuNum": 0, 737 | "memoryGiB": 768, 738 | "name": "ml.r5.24xlarge", 739 | "vcpuNum": 96 740 | }, 741 | { 742 | "_defaultOrder": 46, 743 | "_isFastLaunch": false, 744 | "category": "Accelerated computing", 745 | "gpuNum": 1, 746 | "memoryGiB": 16, 747 | "name": "ml.g5.xlarge", 748 | "vcpuNum": 4 749 | }, 750 | { 751 | "_defaultOrder": 47, 752 | "_isFastLaunch": false, 753 | "category": "Accelerated computing", 754 | "gpuNum": 1, 755 | "memoryGiB": 32, 756 | "name": "ml.g5.2xlarge", 757 | "vcpuNum": 8 758 | }, 759 | { 760 | "_defaultOrder": 48, 761 | "_isFastLaunch": false, 762 | "category": "Accelerated computing", 763 | "gpuNum": 1, 764 | "memoryGiB": 64, 765 | "name": "ml.g5.4xlarge", 766 | "vcpuNum": 16 767 | }, 768 | { 769 | "_defaultOrder": 49, 770 | "_isFastLaunch": false, 771 | "category": "Accelerated computing", 772 | "gpuNum": 1, 773 | "memoryGiB": 128, 774 | "name": "ml.g5.8xlarge", 775 | "vcpuNum": 32 776 | }, 777 | { 778 | "_defaultOrder": 50, 779 | "_isFastLaunch": false, 780 | "category": "Accelerated computing", 781 | "gpuNum": 1, 782 | "memoryGiB": 256, 783 | "name": "ml.g5.16xlarge", 784 | "vcpuNum": 64 785 | }, 786 | { 787 | "_defaultOrder": 51, 788 | "_isFastLaunch": false, 789 | "category": "Accelerated computing", 790 | "gpuNum": 4, 791 | "memoryGiB": 192, 792 | "name": "ml.g5.12xlarge", 793 | "vcpuNum": 48 794 | }, 795 | { 796 | "_defaultOrder": 52, 797 | "_isFastLaunch": false, 798 | "category": "Accelerated computing", 799 | "gpuNum": 4, 800 | "memoryGiB": 384, 801 | "name": "ml.g5.24xlarge", 802 | "vcpuNum": 96 803 | }, 804 | { 805 | "_defaultOrder": 53, 806 | "_isFastLaunch": false, 807 | "category": "Accelerated computing", 808 | "gpuNum": 8, 809 | "memoryGiB": 768, 810 | "name": "ml.g5.48xlarge", 811 | "vcpuNum": 192 812 | } 813 | ], 814 | "instance_type": "ml.t3.medium", 815 | "kernelspec": { 816 | "display_name": "Python 3 (Data Science)", 817 | "language": "python", 818 | "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:ap-south-1:394103062818:image/datascience-1.0" 819 | }, 820 | "language_info": { 821 | "codemirror_mode": { 822 | "name": "ipython", 823 | "version": 3 824 | }, 825 | "file_extension": ".py", 826 | "mimetype": "text/x-python", 827 | "name": "python", 828 | "nbconvert_exporter": "python", 829 | "pygments_lexer": "ipython3", 830 | "version": "3.7.10" 831 | } 832 | }, 833 | "nbformat": 4, 834 | "nbformat_minor": 5 835 | } 836 | -------------------------------------------------------------------------------- /Tutorial - 2 Create Rest API for Sagemaker Endpoint/yt_thumb_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spidy20/Sagemaker-Tutorials/0c838658262679f81fce8d2ea95b4b5df20b1a18/Tutorial - 2 Create Rest API for Sagemaker Endpoint/yt_thumb_2.jpg -------------------------------------------------------------------------------- /Tutorial - 3 Sagemaker Build Custom Algorithm/Algo_Container/Decision_Tree/nginx.conf: -------------------------------------------------------------------------------- 1 | worker_processes 1; 2 | daemon off; # Prevent forking 3 | 4 | 5 | pid /tmp/nginx.pid; 6 | error_log /var/log/nginx/error.log; 7 | 8 | events { 9 | # defaults 10 | } 11 | 12 | http { 13 | include /etc/nginx/mime.types; 14 | default_type application/octet-stream; 15 | access_log /var/log/nginx/access.log combined; 16 | 17 | upstream gunicorn { 18 | server unix:/tmp/gunicorn.sock; 19 | } 20 | 21 | server { 22 | listen 8080 deferred; 23 | client_max_body_size 5m; 24 | 25 | keepalive_timeout 5; 26 | proxy_read_timeout 1200s; 27 | 28 | location ~ ^/(ping|invocations) { 29 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 30 | proxy_set_header Host $http_host; 31 | proxy_redirect off; 32 | proxy_pass http://gunicorn; 33 | } 34 | 35 | location / { 36 | return 404 "{}"; 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /Tutorial - 3 Sagemaker Build Custom Algorithm/Algo_Container/Decision_Tree/predictor.py: -------------------------------------------------------------------------------- 1 | # This is the file that implements a flask server to do inferences. It's the file that you will modify to 2 | # implement the scoring for your own algorithm. 3 | 4 | from __future__ import print_function 5 | 6 | import io 7 | import json 8 | import os 9 | import pickle 10 | import signal 11 | import sys 12 | import traceback 13 | 14 | import flask 15 | import pandas as pd 16 | 17 | prefix = "/opt/ml/" 18 | model_path = os.path.join(prefix, "model") 19 | 20 | # A singleton for holding the model. This simply loads the model and holds it. 21 | # It has a predict function that does a prediction based on the model and the input data. 22 | 23 | 24 | class ScoringService(object): 25 | model = None # Where we keep the model when it's loaded 26 | 27 | @classmethod 28 | def get_model(cls): 29 | """Get the model object for this instance, loading it if it's not already loaded.""" 30 | if cls.model == None: 31 | with open(os.path.join(model_path, "decision-tree-model.pkl"), "rb") as inp: 32 | cls.model = pickle.load(inp) 33 | return cls.model 34 | 35 | @classmethod 36 | def predict(cls, input): 37 | """For the input, do the predictions and return them. 38 | 39 | Args: 40 | input (a pandas dataframe): The data on which to do the predictions. There will be 41 | one prediction per row in the dataframe""" 42 | clf = cls.get_model() 43 | return clf.predict(input) 44 | 45 | 46 | # The flask app for serving predictions 47 | app = flask.Flask(__name__) 48 | 49 | 50 | @app.route("/ping", methods=["GET"]) 51 | def ping(): 52 | """Determine if the container is working and healthy. In this sample container, we declare 53 | it healthy if we can load the model successfully.""" 54 | health = ScoringService.get_model() is not None # You can insert a health check here 55 | 56 | status = 200 if health else 404 57 | return flask.Response(response="\n", status=status, mimetype="application/json") 58 | 59 | 60 | @app.route("/invocations", methods=["POST"]) 61 | def transformation(): 62 | """Do an inference on a single batch of data. In this sample server, we take data as CSV, convert 63 | it to a pandas data frame for internal use and then convert the predictions back to CSV (which really 64 | just means one prediction per line, since there's a single column. 65 | """ 66 | data = None 67 | 68 | # Convert from CSV to pandas 69 | if flask.request.content_type == "text/csv": 70 | data = flask.request.data.decode("utf-8") 71 | s = io.StringIO(data) 72 | data = pd.read_csv(s, header=None) 73 | else: 74 | return flask.Response( 75 | response="This predictor only supports CSV data", status=415, mimetype="text/plain" 76 | ) 77 | 78 | print("Invoked with {} records".format(data.shape[0])) 79 | 80 | # Do the prediction 81 | predictions = ScoringService.predict(data) 82 | 83 | # Convert from numpy back to CSV 84 | out = io.StringIO() 85 | pd.DataFrame({"results": predictions}).to_csv(out, header=False, index=False) 86 | result = out.getvalue() 87 | 88 | return flask.Response(response=result, status=200, mimetype="text/csv") 89 | -------------------------------------------------------------------------------- /Tutorial - 3 Sagemaker Build Custom Algorithm/Algo_Container/Decision_Tree/serve: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # This file implements the scoring service shell. You don't necessarily need to modify it for various 4 | # algorithms. It starts nginx and gunicorn with the correct configurations and then simply waits until 5 | # gunicorn exits. 6 | # 7 | # The flask server is specified to be the app object in wsgi.py 8 | # 9 | # We set the following parameters: 10 | # 11 | # Parameter Environment Variable Default Value 12 | # --------- -------------------- ------------- 13 | # number of workers MODEL_SERVER_WORKERS the number of CPU cores 14 | # timeout MODEL_SERVER_TIMEOUT 60 seconds 15 | 16 | import multiprocessing 17 | import os 18 | import signal 19 | import subprocess 20 | import sys 21 | 22 | cpu_count = multiprocessing.cpu_count() 23 | 24 | model_server_timeout = os.environ.get('MODEL_SERVER_TIMEOUT', 60) 25 | model_server_workers = int(os.environ.get('MODEL_SERVER_WORKERS', cpu_count)) 26 | 27 | def sigterm_handler(nginx_pid, gunicorn_pid): 28 | try: 29 | os.kill(nginx_pid, signal.SIGQUIT) 30 | except OSError: 31 | pass 32 | try: 33 | os.kill(gunicorn_pid, signal.SIGTERM) 34 | except OSError: 35 | pass 36 | 37 | sys.exit(0) 38 | 39 | def start_server(): 40 | print('Starting the inference server with {} workers.'.format(model_server_workers)) 41 | 42 | 43 | # link the log streams to stdout/err so they will be logged to the container logs 44 | subprocess.check_call(['ln', '-sf', '/dev/stdout', '/var/log/nginx/access.log']) 45 | subprocess.check_call(['ln', '-sf', '/dev/stderr', '/var/log/nginx/error.log']) 46 | 47 | nginx = subprocess.Popen(['nginx', '-c', '/opt/program/nginx.conf']) 48 | gunicorn = subprocess.Popen(['gunicorn', 49 | '--timeout', str(model_server_timeout), 50 | '-k', 'sync', 51 | '-b', 'unix:/tmp/gunicorn.sock', 52 | '-w', str(model_server_workers), 53 | 'wsgi:app']) 54 | 55 | signal.signal(signal.SIGTERM, lambda a, b: sigterm_handler(nginx.pid, gunicorn.pid)) 56 | 57 | # If either subprocess exits, so do we. 58 | pids = set([nginx.pid, gunicorn.pid]) 59 | while True: 60 | pid, _ = os.wait() 61 | if pid in pids: 62 | break 63 | 64 | sigterm_handler(nginx.pid, gunicorn.pid) 65 | print('Inference server exiting') 66 | 67 | # The main routine just invokes the start function. 68 | 69 | if __name__ == '__main__': 70 | start_server() 71 | -------------------------------------------------------------------------------- /Tutorial - 3 Sagemaker Build Custom Algorithm/Algo_Container/Decision_Tree/train: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import print_function 4 | 5 | import json 6 | import os 7 | import pickle 8 | import sys 9 | import traceback 10 | 11 | import pandas as pd 12 | from sklearn import tree 13 | 14 | # Important path for sagemaker 15 | prefix = '/opt/ml/' 16 | 17 | input_path = prefix + 'input/data' 18 | output_path = os.path.join(prefix, 'output') 19 | model_path = os.path.join(prefix, 'model') 20 | param_path = os.path.join(prefix, 'input/config/hyperparameters.json') 21 | 22 | # Defining training channel 23 | channel_name = 'training' 24 | training_path = os.path.join(input_path, channel_name) 25 | 26 | 27 | # The function to execute the training. 28 | def train(): 29 | print('[INFO] Starting the training.....') 30 | try: 31 | # Read in any hyperparameters that the user passed with the training job 32 | with open(param_path, 'r') as tc: 33 | trainingParams = json.load(tc) 34 | print("[INFO] Hyperparameters: ", trainingParams) 35 | # Take the set of files and read them all into a single pandas dataframe 36 | input_files = [os.path.join(training_path, file) for file in os.listdir(training_path)] 37 | if len(input_files) == 0: 38 | raise ValueError(('There are no files in {}.\n' + 39 | 'This usually indicates that the channel ({}) was incorrectly specified,\n' + 40 | 'the data specification in S3 was incorrectly specified or the role specified\n' + 41 | 'does not have permission to access the data.').format(training_path, channel_name)) 42 | print("[INFO] Found following training files: ", input_files) 43 | raw_data = [pd.read_csv(file, header=None) for file in input_files if file.endswith(".csv")] 44 | train_data = pd.concat(raw_data) 45 | 46 | # labels are in the first column 47 | train_y = train_data.iloc[:, 0] 48 | train_X = train_data.iloc[:, 1:] 49 | 50 | max_leaf_nodes = int(trainingParams.get('max_leaf_nodes',2)) 51 | random_state = int(trainingParams.get('random_state', 42)) 52 | criterion = trainingParams.get('criterion', "gini") 53 | 54 | # Now use scikit-learn's decision tree classifier to train the model. 55 | clf = tree.DecisionTreeClassifier(criterion=criterion, max_leaf_nodes=max_leaf_nodes, random_state=random_state) 56 | clf = clf.fit(train_X, train_y) 57 | 58 | # save the model 59 | with open(os.path.join(model_path, 'decision-tree-model.pkl'), 'wb') as out: 60 | pickle.dump(clf, out) 61 | print('[INFO] Training complete.') 62 | print(f'[INFO] Model saved at: {model_path}.') 63 | except Exception as e: 64 | 65 | trc = traceback.format_exc() 66 | with open(os.path.join(output_path, 'failure'), 'w') as s: 67 | s.write('[INFO] Exception during training: ' + str(e) + '\n' + trc) 68 | # Printing this causes the exception to be in the training job logs, as well. 69 | print('[INFO] Exception during training: ' + str(e) + '\n' + trc, file=sys.stderr) 70 | # A non-zero exit code causes the training job to be marked as Failed. 71 | sys.exit(255) 72 | 73 | 74 | if __name__ == '__main__': 75 | train() 76 | 77 | # A zero exit code causes the job to be marked a Succeeded. 78 | sys.exit(0) 79 | -------------------------------------------------------------------------------- /Tutorial - 3 Sagemaker Build Custom Algorithm/Algo_Container/Decision_Tree/wsgi.py: -------------------------------------------------------------------------------- 1 | import predictor as myapp 2 | 3 | # This is just a simple wrapper for gunicorn to find your app. 4 | # If you want to change the algorithm file, simply change "predictor" above to the 5 | # new file. 6 | 7 | app = myapp.app 8 | -------------------------------------------------------------------------------- /Tutorial - 3 Sagemaker Build Custom Algorithm/Algo_Container/Dockerfile: -------------------------------------------------------------------------------- 1 | # Build an image that can do training and inference in SageMaker 2 | # This is a Python 3 image that uses the nginx, gunicorn, flask stack 3 | # for serving inferences in a stable way. 4 | 5 | FROM ubuntu:18.04 6 | 7 | MAINTAINER Amazon AI 8 | 9 | 10 | RUN apt-get -y update && apt-get install -y --no-install-recommends \ 11 | wget \ 12 | python3-pip \ 13 | python3-setuptools \ 14 | nginx \ 15 | ca-certificates \ 16 | && rm -rf /var/lib/apt/lists/* 17 | 18 | RUN ln -s /usr/bin/python3 /usr/bin/python 19 | RUN ln -s /usr/bin/pip3 /usr/bin/pip 20 | 21 | # Here we get all python packages. 22 | # There's substantial overlap between scipy and numpy that we eliminate by 23 | # linking them together. Likewise, pip leaves the install caches populated which uses 24 | # a significant amount of space. These optimizations save a fair amount of space in the 25 | # image, which reduces start up time. 26 | RUN pip --no-cache-dir install numpy==1.16.2 scipy==1.2.1 scikit-learn==0.20.2 pandas flask gunicorn 27 | 28 | # Set some environment variables. PYTHONUNBUFFERED keeps Python from buffering our standard 29 | # output stream, which means that logs can be delivered to the user quickly. PYTHONDONTWRITEBYTECODE 30 | # keeps Python from writing the .pyc files which are unnecessary in this case. We also update 31 | # PATH so that the train and serve programs are found when the container is invoked. 32 | 33 | ENV PYTHONUNBUFFERED=TRUE 34 | ENV PYTHONDONTWRITEBYTECODE=TRUE 35 | ENV PATH="/opt/program:${PATH}" 36 | 37 | # Set up the program in the image 38 | COPY Decision_Tree /opt/program 39 | WORKDIR /opt/program 40 | -------------------------------------------------------------------------------- /Tutorial - 3 Sagemaker Build Custom Algorithm/README.md: -------------------------------------------------------------------------------- 1 | ## Tutorial - 3 Building a Custom Algorithm in Amazon SageMaker 2 | 3 | ![Tutorial Thumbnail](https://github.com/Spidy20/Sagemaker-Tutorials/blob/master/Tutorial%20-%203%20Sagemaker%20Build%20Custom%20Algorithm/yt_thumb.jpg) 4 | [Watch the tutorial here ►](https://youtu.be/_OjFubgXcWQ) 5 | 6 | ### Overview 7 | 8 | This tutorial guides you through the process of building a custom algorithm in Amazon SageMaker using the SM-Docker template. You will learn how to create and deploy your own machine learning algorithm. 9 | 10 | ### Tutorial Content 11 | 12 | In this tutorial, we cover the following steps: 13 | 14 | 1. **Download Official SageMaker Structure**: Start by obtaining the official SageMaker project structure. 15 | 16 | 2. **Edit `train` & `predictor.py`**: Customize the `train` and `predictor.py` files to match your specific algorithm requirements. 17 | 18 | 3. **Add Required Package Dependencies in Dockerfile**: Modify the Dockerfile to include any necessary package dependencies for your custom algorithm. 19 | 20 | 4. **Organize Files in SageMaker Studio File-System (EFS)**: Arrange all files and directories with the proper structure within the SageMaker Studio file-system, specifically in Amazon Elastic File System (EFS). 21 | 22 | 5. **Execute Notebook and Create Algorithm Container with `sm-docker`**: Use a Jupyter notebook and the `sm-docker` command to create an algorithm container. 23 | 24 | 6. **Train Model with Custom Algorithm**: Utilize the custom algorithm container(we have created in step-5) to train a machine learning model. 25 | 26 | 7. **Create SageMaker Endpoint and Test the Model**: Set up a SageMaker Endpoint and evaluate the model's performance using test data. 27 | 28 | ### AWS Services Used 29 | 30 | The tutorial utilizes the following AWS services: 31 | 32 | - **SageMaker Studio (Jupyter Notebook)**: We use SageMaker Studio for notebook execution and development. 33 | 34 | - **ECR (Elastic Container Registry)**: To store the custom algorithm image. 35 | 36 | - **CodeBuild**: Used for SM-Docker building activities and execution. 37 | 38 | ### Support 39 | 40 | If you find this tutorial helpful, please consider giving it a star⭐ or forking the repository to support our work. 41 | 42 | - [Buy me a Coffee☕](https://www.buymeacoffee.com/spidy20) 43 | - [Donate via PayPal (Your support inspires us to create more projects)](https://www.paypal.me/spidy1820) 44 | 45 | -------------------------------------------------------------------------------- /Tutorial - 3 Sagemaker Build Custom Algorithm/iris.csv: -------------------------------------------------------------------------------- 1 | setosa,5.1,3.5,1.4,0.2 2 | setosa,4.9,3,1.4,0.2 3 | setosa,4.7,3.2,1.3,0.2 4 | setosa,4.6,3.1,1.5,0.2 5 | setosa,5,3.6,1.4,0.2 6 | setosa,5.4,3.9,1.7,0.4 7 | setosa,4.6,3.4,1.4,0.3 8 | setosa,5,3.4,1.5,0.2 9 | setosa,4.4,2.9,1.4,0.2 10 | setosa,4.9,3.1,1.5,0.1 11 | setosa,5.4,3.7,1.5,0.2 12 | setosa,4.8,3.4,1.6,0.2 13 | setosa,4.8,3,1.4,0.1 14 | setosa,4.3,3,1.1,0.1 15 | setosa,5.8,4,1.2,0.2 16 | setosa,5.7,4.4,1.5,0.4 17 | setosa,5.4,3.9,1.3,0.4 18 | setosa,5.1,3.5,1.4,0.3 19 | setosa,5.7,3.8,1.7,0.3 20 | setosa,5.1,3.8,1.5,0.3 21 | setosa,5.4,3.4,1.7,0.2 22 | setosa,5.1,3.7,1.5,0.4 23 | setosa,4.6,3.6,1,0.2 24 | setosa,5.1,3.3,1.7,0.5 25 | setosa,4.8,3.4,1.9,0.2 26 | setosa,5,3,1.6,0.2 27 | setosa,5,3.4,1.6,0.4 28 | setosa,5.2,3.5,1.5,0.2 29 | setosa,5.2,3.4,1.4,0.2 30 | setosa,4.7,3.2,1.6,0.2 31 | setosa,4.8,3.1,1.6,0.2 32 | setosa,5.4,3.4,1.5,0.4 33 | setosa,5.2,4.1,1.5,0.1 34 | setosa,5.5,4.2,1.4,0.2 35 | setosa,4.9,3.1,1.5,0.2 36 | setosa,5,3.2,1.2,0.2 37 | setosa,5.5,3.5,1.3,0.2 38 | setosa,4.9,3.6,1.4,0.1 39 | setosa,4.4,3,1.3,0.2 40 | setosa,5.1,3.4,1.5,0.2 41 | setosa,5,3.5,1.3,0.3 42 | setosa,4.5,2.3,1.3,0.3 43 | setosa,4.4,3.2,1.3,0.2 44 | setosa,5,3.5,1.6,0.6 45 | setosa,5.1,3.8,1.9,0.4 46 | setosa,4.8,3,1.4,0.3 47 | setosa,5.1,3.8,1.6,0.2 48 | setosa,4.6,3.2,1.4,0.2 49 | setosa,5.3,3.7,1.5,0.2 50 | setosa,5,3.3,1.4,0.2 51 | versicolor,7,3.2,4.7,1.4 52 | versicolor,6.4,3.2,4.5,1.5 53 | versicolor,6.9,3.1,4.9,1.5 54 | versicolor,5.5,2.3,4,1.3 55 | versicolor,6.5,2.8,4.6,1.5 56 | versicolor,5.7,2.8,4.5,1.3 57 | versicolor,6.3,3.3,4.7,1.6 58 | versicolor,4.9,2.4,3.3,1 59 | versicolor,6.6,2.9,4.6,1.3 60 | versicolor,5.2,2.7,3.9,1.4 61 | versicolor,5,2,3.5,1 62 | versicolor,5.9,3,4.2,1.5 63 | versicolor,6,2.2,4,1 64 | versicolor,6.1,2.9,4.7,1.4 65 | versicolor,5.6,2.9,3.6,1.3 66 | versicolor,6.7,3.1,4.4,1.4 67 | versicolor,5.6,3,4.5,1.5 68 | versicolor,5.8,2.7,4.1,1 69 | versicolor,6.2,2.2,4.5,1.5 70 | versicolor,5.6,2.5,3.9,1.1 71 | versicolor,5.9,3.2,4.8,1.8 72 | versicolor,6.1,2.8,4,1.3 73 | versicolor,6.3,2.5,4.9,1.5 74 | versicolor,6.1,2.8,4.7,1.2 75 | versicolor,6.4,2.9,4.3,1.3 76 | versicolor,6.6,3,4.4,1.4 77 | versicolor,6.8,2.8,4.8,1.4 78 | versicolor,6.7,3,5,1.7 79 | versicolor,6,2.9,4.5,1.5 80 | versicolor,5.7,2.6,3.5,1 81 | versicolor,5.5,2.4,3.8,1.1 82 | versicolor,5.5,2.4,3.7,1 83 | versicolor,5.8,2.7,3.9,1.2 84 | versicolor,6,2.7,5.1,1.6 85 | versicolor,5.4,3,4.5,1.5 86 | versicolor,6,3.4,4.5,1.6 87 | versicolor,6.7,3.1,4.7,1.5 88 | versicolor,6.3,2.3,4.4,1.3 89 | versicolor,5.6,3,4.1,1.3 90 | versicolor,5.5,2.5,4,1.3 91 | versicolor,5.5,2.6,4.4,1.2 92 | versicolor,6.1,3,4.6,1.4 93 | versicolor,5.8,2.6,4,1.2 94 | versicolor,5,2.3,3.3,1 95 | versicolor,5.6,2.7,4.2,1.3 96 | versicolor,5.7,3,4.2,1.2 97 | versicolor,5.7,2.9,4.2,1.3 98 | versicolor,6.2,2.9,4.3,1.3 99 | versicolor,5.1,2.5,3,1.1 100 | versicolor,5.7,2.8,4.1,1.3 101 | virginica,6.3,3.3,6,2.5 102 | virginica,5.8,2.7,5.1,1.9 103 | virginica,7.1,3,5.9,2.1 104 | virginica,6.3,2.9,5.6,1.8 105 | virginica,6.5,3,5.8,2.2 106 | virginica,7.6,3,6.6,2.1 107 | virginica,4.9,2.5,4.5,1.7 108 | virginica,7.3,2.9,6.3,1.8 109 | virginica,6.7,2.5,5.8,1.8 110 | virginica,7.2,3.6,6.1,2.5 111 | virginica,6.5,3.2,5.1,2 112 | virginica,6.4,2.7,5.3,1.9 113 | virginica,6.8,3,5.5,2.1 114 | virginica,5.7,2.5,5,2 115 | virginica,5.8,2.8,5.1,2.4 116 | virginica,6.4,3.2,5.3,2.3 117 | virginica,6.5,3,5.5,1.8 118 | virginica,7.7,3.8,6.7,2.2 119 | virginica,7.7,2.6,6.9,2.3 120 | virginica,6,2.2,5,1.5 121 | virginica,6.9,3.2,5.7,2.3 122 | virginica,5.6,2.8,4.9,2 123 | virginica,7.7,2.8,6.7,2 124 | virginica,6.3,2.7,4.9,1.8 125 | virginica,6.7,3.3,5.7,2.1 126 | virginica,7.2,3.2,6,1.8 127 | virginica,6.2,2.8,4.8,1.8 128 | virginica,6.1,3,4.9,1.8 129 | virginica,6.4,2.8,5.6,2.1 130 | virginica,7.2,3,5.8,1.6 131 | virginica,7.4,2.8,6.1,1.9 132 | virginica,7.9,3.8,6.4,2 133 | virginica,6.4,2.8,5.6,2.2 134 | virginica,6.3,2.8,5.1,1.5 135 | virginica,6.1,2.6,5.6,1.4 136 | virginica,7.7,3,6.1,2.3 137 | virginica,6.3,3.4,5.6,2.4 138 | virginica,6.4,3.1,5.5,1.8 139 | virginica,6,3,4.8,1.8 140 | virginica,6.9,3.1,5.4,2.1 141 | virginica,6.7,3.1,5.6,2.4 142 | virginica,6.9,3.1,5.1,2.3 143 | virginica,5.8,2.7,5.1,1.9 144 | virginica,6.8,3.2,5.9,2.3 145 | virginica,6.7,3.3,5.7,2.5 146 | virginica,6.7,3,5.2,2.3 147 | virginica,6.3,2.5,5,1.9 148 | virginica,6.5,3,5.2,2 149 | virginica,6.2,3.4,5.4,2.3 150 | virginica,5.9,3,5.1,1.8 151 | -------------------------------------------------------------------------------- /Tutorial - 3 Sagemaker Build Custom Algorithm/yt_thumb.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spidy20/Sagemaker-Tutorials/0c838658262679f81fce8d2ea95b4b5df20b1a18/Tutorial - 3 Sagemaker Build Custom Algorithm/yt_thumb.jpg --------------------------------------------------------------------------------