├── README.md ├── TASK 1.ipynb └── TASK-2.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # Spark-Foundation 2 | The spark Foundation intership 3 | Hello Everyone i am doing an internship at the sparks foundation and this repository has task given by them i hope you guys like it and if you like do follow me on linkdin. 4 | ## Mandatory Task 5 | Improve your linkdin account: www.linkedin.com/in/shruthi-jain-81b4571ab 6 | ## PROJECTS(TASKS) 7 | There were many task given in which few are begginer level and some intermediate or advanced level , I have not completed all of them but i want to mention 8 | all the task according to which you can open my file. 9 | * Task-1 : Prediction Using Supervised Learning , predicting the percentage of student based on the number of hours studied. 10 | * Task-2: Prediction Using Unsupervised Learning. (iris dataset) 11 | * Task-3: Exploratory Data Analysis - Retail (As a manager figure out which area in business needs more attention) 12 | * Task-4 : Perform ‘Exploratory Data Analysis’ on dataset ‘Global Terrorism’ 13 | * Task-5:Perform ‘Exploratory Data Analysis’ on dataset ‘Indian Premier League’ 14 | * Task-6:Prediction using Decision Tree Algorithm 15 | * Task-7: Stock Market Prediction using Numerical and Textual Analysis 16 | * Task-8: Timeline Analysis : Covid-19 17 | # THANK YOU!!!! 18 | -------------------------------------------------------------------------------- /TASK 1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# ***The Spark Foundation - Task-1*** " 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## ***Prediction using supervised learning ,Predict the percentage of an student based on the no. of study hours.***" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "### ***Import The Required Libraries***" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 67, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "import numpy as np\n", 31 | "import pandas as pd\n", 32 | "import matplotlib.pyplot as plt\n", 33 | "%matplotlib inline\n", 34 | "from sklearn.model_selection import train_test_split\n", 35 | "from sklearn.linear_model import LinearRegression\n", 36 | "from sklearn.metrics import mean_absolute_error" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "### ***Reading The CSV file***" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 2, 49 | "metadata": {}, 50 | "outputs": [ 51 | { 52 | "data": { 53 | "text/html": [ 54 | "
\n", 55 | "\n", 68 | "\n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | "
HoursScores
02.521
15.147
23.227
38.575
43.530
51.520
69.288
75.560
88.381
92.725
107.785
115.962
124.541
133.342
141.117
158.995
162.530
171.924
186.167
197.469
202.730
214.854
223.835
236.976
247.886
\n", 204 | "
" 205 | ], 206 | "text/plain": [ 207 | " Hours Scores\n", 208 | "0 2.5 21\n", 209 | "1 5.1 47\n", 210 | "2 3.2 27\n", 211 | "3 8.5 75\n", 212 | "4 3.5 30\n", 213 | "5 1.5 20\n", 214 | "6 9.2 88\n", 215 | "7 5.5 60\n", 216 | "8 8.3 81\n", 217 | "9 2.7 25\n", 218 | "10 7.7 85\n", 219 | "11 5.9 62\n", 220 | "12 4.5 41\n", 221 | "13 3.3 42\n", 222 | "14 1.1 17\n", 223 | "15 8.9 95\n", 224 | "16 2.5 30\n", 225 | "17 1.9 24\n", 226 | "18 6.1 67\n", 227 | "19 7.4 69\n", 228 | "20 2.7 30\n", 229 | "21 4.8 54\n", 230 | "22 3.8 35\n", 231 | "23 6.9 76\n", 232 | "24 7.8 86" 233 | ] 234 | }, 235 | "execution_count": 2, 236 | "metadata": {}, 237 | "output_type": "execute_result" 238 | } 239 | ], 240 | "source": [ 241 | "url=\"https://raw.githubusercontent.com/AdiPersonalWorks/Random/master/student_scores%20-%20student_scores.csv\"\n", 242 | "df=pd.read_csv(url)\n", 243 | "df" 244 | ] 245 | }, 246 | { 247 | "cell_type": "markdown", 248 | "metadata": {}, 249 | "source": [ 250 | "### ***Checking how many null values are there in dataset***" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": 3, 256 | "metadata": {}, 257 | "outputs": [ 258 | { 259 | "data": { 260 | "text/plain": [ 261 | "Hours 0\n", 262 | "Scores 0\n", 263 | "dtype: int64" 264 | ] 265 | }, 266 | "execution_count": 3, 267 | "metadata": {}, 268 | "output_type": "execute_result" 269 | } 270 | ], 271 | "source": [ 272 | "df.isna().sum()" 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": {}, 278 | "source": [ 279 | "### ***Descriptive analysis of the dataset using describe function***" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": 4, 285 | "metadata": {}, 286 | "outputs": [ 287 | { 288 | "data": { 289 | "text/html": [ 290 | "
\n", 291 | "\n", 304 | "\n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | "
HoursScores
count25.00000025.000000
mean5.01200051.480000
std2.52509425.286887
min1.10000017.000000
25%2.70000030.000000
50%4.80000047.000000
75%7.40000075.000000
max9.20000095.000000
\n", 355 | "
" 356 | ], 357 | "text/plain": [ 358 | " Hours Scores\n", 359 | "count 25.000000 25.000000\n", 360 | "mean 5.012000 51.480000\n", 361 | "std 2.525094 25.286887\n", 362 | "min 1.100000 17.000000\n", 363 | "25% 2.700000 30.000000\n", 364 | "50% 4.800000 47.000000\n", 365 | "75% 7.400000 75.000000\n", 366 | "max 9.200000 95.000000" 367 | ] 368 | }, 369 | "execution_count": 4, 370 | "metadata": {}, 371 | "output_type": "execute_result" 372 | } 373 | ], 374 | "source": [ 375 | "df.describe()" 376 | ] 377 | }, 378 | { 379 | "cell_type": "markdown", 380 | "metadata": {}, 381 | "source": [ 382 | "### ***information about the dataset***" 383 | ] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "execution_count": 5, 388 | "metadata": {}, 389 | "outputs": [ 390 | { 391 | "name": "stdout", 392 | "output_type": "stream", 393 | "text": [ 394 | "\n", 395 | "RangeIndex: 25 entries, 0 to 24\n", 396 | "Data columns (total 2 columns):\n", 397 | " # Column Non-Null Count Dtype \n", 398 | "--- ------ -------------- ----- \n", 399 | " 0 Hours 25 non-null float64\n", 400 | " 1 Scores 25 non-null int64 \n", 401 | "dtypes: float64(1), int64(1)\n", 402 | "memory usage: 528.0 bytes\n" 403 | ] 404 | } 405 | ], 406 | "source": [ 407 | "df.info()" 408 | ] 409 | }, 410 | { 411 | "cell_type": "markdown", 412 | "metadata": {}, 413 | "source": [ 414 | "### ***Plotting a scatter plot showing relationship between No of Hours vs scores*** " 415 | ] 416 | }, 417 | { 418 | "cell_type": "code", 419 | "execution_count": 6, 420 | "metadata": {}, 421 | "outputs": [ 422 | { 423 | "data": { 424 | "image/png": "\n", 425 | "text/plain": [ 426 | "
" 427 | ] 428 | }, 429 | "metadata": { 430 | "needs_background": "light" 431 | }, 432 | "output_type": "display_data" 433 | } 434 | ], 435 | "source": [ 436 | "plt.xlabel('Hours')\n", 437 | "plt.ylabel('Scores')\n", 438 | "plt.title('Hours VS Scores')\n", 439 | "plt.scatter(df.Hours,df.Scores,color='green')\n", 440 | "plt.show()" 441 | ] 442 | }, 443 | { 444 | "cell_type": "markdown", 445 | "metadata": {}, 446 | "source": [ 447 | "### ***Two variables for the regression***" 448 | ] 449 | }, 450 | { 451 | "cell_type": "code", 452 | "execution_count": 10, 453 | "metadata": {}, 454 | "outputs": [], 455 | "source": [ 456 | "x=np.array(df.Hours)\n", 457 | "y=np.array(df.Scores)" 458 | ] 459 | }, 460 | { 461 | "cell_type": "markdown", 462 | "metadata": {}, 463 | "source": [ 464 | "### ***Reshaping the numpy array for vertical output***" 465 | ] 466 | }, 467 | { 468 | "cell_type": "code", 469 | "execution_count": 14, 470 | "metadata": {}, 471 | "outputs": [], 472 | "source": [ 473 | "x=x.reshape(-1,1)\n", 474 | "y=y.reshape(-1,1)" 475 | ] 476 | }, 477 | { 478 | "cell_type": "markdown", 479 | "metadata": {}, 480 | "source": [ 481 | "### ***Splitting the data into test data and train data***" 482 | ] 483 | }, 484 | { 485 | "cell_type": "code", 486 | "execution_count": 16, 487 | "metadata": {}, 488 | "outputs": [], 489 | "source": [ 490 | "X_train,X_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=0)" 491 | ] 492 | }, 493 | { 494 | "cell_type": "markdown", 495 | "metadata": {}, 496 | "source": [ 497 | "### ***Calling the linear function and reshaping all the data and fitting it to the model***" 498 | ] 499 | }, 500 | { 501 | "cell_type": "code", 502 | "execution_count": 39, 503 | "metadata": {}, 504 | "outputs": [ 505 | { 506 | "data": { 507 | "text/plain": [ 508 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)" 509 | ] 510 | }, 511 | "execution_count": 39, 512 | "metadata": {}, 513 | "output_type": "execute_result" 514 | } 515 | ], 516 | "source": [ 517 | "reg=LinearRegression()\n", 518 | "X_test=X_test.reshape(-1,1)\n", 519 | "y_test=y_test.reshape(-1,1)\n", 520 | "X_train=X_train.reshape(-1,1)\n", 521 | "y_train=y_train.reshape(-1,1)\n", 522 | "reg.fit(X_train,y_train)" 523 | ] 524 | }, 525 | { 526 | "cell_type": "markdown", 527 | "metadata": {}, 528 | "source": [ 529 | "### ***Formula of simple linear regression***" 530 | ] 531 | }, 532 | { 533 | "cell_type": "code", 534 | "execution_count": 40, 535 | "metadata": {}, 536 | "outputs": [], 537 | "source": [ 538 | "line1=reg.coef_*X_train+reg.intercept_" 539 | ] 540 | }, 541 | { 542 | "cell_type": "markdown", 543 | "metadata": {}, 544 | "source": [ 545 | "### ***Plotting a scatter plot for training dataset***" 546 | ] 547 | }, 548 | { 549 | "cell_type": "code", 550 | "execution_count": 41, 551 | "metadata": {}, 552 | "outputs": [ 553 | { 554 | "data": { 555 | "text/plain": [ 556 | "Text(0.5, 1.0, 'TRAINING DATA')" 557 | ] 558 | }, 559 | "execution_count": 41, 560 | "metadata": {}, 561 | "output_type": "execute_result" 562 | }, 563 | { 564 | "data": { 565 | "image/png": "\n", 566 | "text/plain": [ 567 | "
" 568 | ] 569 | }, 570 | "metadata": { 571 | "needs_background": "light" 572 | }, 573 | "output_type": "display_data" 574 | } 575 | ], 576 | "source": [ 577 | "plt.scatter(X_train,y_train,color='green')\n", 578 | "plt.plot(X_train,line1,color='red',linewidth=2)\n", 579 | "plt.title('TRAINING DATA')" 580 | ] 581 | }, 582 | { 583 | "cell_type": "code", 584 | "execution_count": 44, 585 | "metadata": {}, 586 | "outputs": [], 587 | "source": [ 588 | "line2=reg.coef_*X_test+reg.intercept_" 589 | ] 590 | }, 591 | { 592 | "cell_type": "markdown", 593 | "metadata": {}, 594 | "source": [ 595 | "### ***Plotting a scatter plot for testing dataset***" 596 | ] 597 | }, 598 | { 599 | "cell_type": "code", 600 | "execution_count": 45, 601 | "metadata": {}, 602 | "outputs": [ 603 | { 604 | "data": { 605 | "text/plain": [ 606 | "Text(0.5, 1.0, 'TESTING DATA')" 607 | ] 608 | }, 609 | "execution_count": 45, 610 | "metadata": {}, 611 | "output_type": "execute_result" 612 | }, 613 | { 614 | "data": { 615 | "image/png": "\n", 616 | "text/plain": [ 617 | "
" 618 | ] 619 | }, 620 | "metadata": { 621 | "needs_background": "light" 622 | }, 623 | "output_type": "display_data" 624 | } 625 | ], 626 | "source": [ 627 | "plt.scatter(X_test,y_test,color='red')\n", 628 | "plt.plot(X_test,line2,color='green',linewidth=2)\n", 629 | "plt.title('TESTING DATA')" 630 | ] 631 | }, 632 | { 633 | "cell_type": "markdown", 634 | "metadata": {}, 635 | "source": [ 636 | "### ***predicting the test value***" 637 | ] 638 | }, 639 | { 640 | "cell_type": "code", 641 | "execution_count": 49, 642 | "metadata": {}, 643 | "outputs": [ 644 | { 645 | "data": { 646 | "text/plain": [ 647 | "array([[16.88414476],\n", 648 | " [33.73226078],\n", 649 | " [75.357018 ],\n", 650 | " [26.79480124],\n", 651 | " [60.49103328]])" 652 | ] 653 | }, 654 | "execution_count": 49, 655 | "metadata": {}, 656 | "output_type": "execute_result" 657 | } 658 | ], 659 | "source": [ 660 | "y_predict=reg.predict(X_test)\n", 661 | "y_predict" 662 | ] 663 | }, 664 | { 665 | "cell_type": "markdown", 666 | "metadata": {}, 667 | "source": [ 668 | "### ***Comparing the actuals with the prediction to see the difference***" 669 | ] 670 | }, 671 | { 672 | "cell_type": "code", 673 | "execution_count": 56, 674 | "metadata": {}, 675 | "outputs": [ 676 | { 677 | "name": "stdout", 678 | "output_type": "stream", 679 | "text": [ 680 | "Comparing the actuals with prediction: \n" 681 | ] 682 | }, 683 | { 684 | "data": { 685 | "text/html": [ 686 | "
\n", 687 | "\n", 700 | "\n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | "
ACTUALprediction
02016.884145
12733.732261
26975.357018
33026.794801
46260.491033
\n", 736 | "
" 737 | ], 738 | "text/plain": [ 739 | " ACTUAL prediction\n", 740 | "0 20 16.884145\n", 741 | "1 27 33.732261\n", 742 | "2 69 75.357018\n", 743 | "3 30 26.794801\n", 744 | "4 62 60.491033" 745 | ] 746 | }, 747 | "execution_count": 56, 748 | "metadata": {}, 749 | "output_type": "execute_result" 750 | } 751 | ], 752 | "source": [ 753 | "a=y_test.flatten()\n", 754 | "b=y_predict.flatten()\n", 755 | "compare={\"ACTUAL\":a,\"prediction\":b}\n", 756 | "label={0,1,2,3,4}\n", 757 | "print(\"Comparing the actuals with prediction: \")\n", 758 | "df=pd.DataFrame(compare,index=label)\n", 759 | "df\n" 760 | ] 761 | }, 762 | { 763 | "cell_type": "markdown", 764 | "metadata": {}, 765 | "source": [ 766 | "### ***Evaluating the model***\n", 767 | "#### ***Evaluation is a very impotant step for knowing the accuracy of the model by using MAE***" 768 | ] 769 | }, 770 | { 771 | "cell_type": "code", 772 | "execution_count": 69, 773 | "metadata": {}, 774 | "outputs": [ 775 | { 776 | "data": { 777 | "text/plain": [ 778 | "4.183859899002975" 779 | ] 780 | }, 781 | "execution_count": 69, 782 | "metadata": {}, 783 | "output_type": "execute_result" 784 | } 785 | ], 786 | "source": [ 787 | "mae=mean_absolute_error(y_test,y_predict)\n", 788 | "mae" 789 | ] 790 | }, 791 | { 792 | "cell_type": "markdown", 793 | "metadata": {}, 794 | "source": [ 795 | "## ***prediction***\n", 796 | "### ***What will be predicted score if a student studies for 9.25 hrs/ day?***" 797 | ] 798 | }, 799 | { 800 | "cell_type": "code", 801 | "execution_count": 71, 802 | "metadata": {}, 803 | "outputs": [ 804 | { 805 | "data": { 806 | "text/plain": [ 807 | "array([[93.69173249]])" 808 | ] 809 | }, 810 | "execution_count": 71, 811 | "metadata": {}, 812 | "output_type": "execute_result" 813 | } 814 | ], 815 | "source": [ 816 | "data_predict=reg.predict([[9.25]])\n", 817 | "data_predict" 818 | ] 819 | } 820 | ], 821 | "metadata": { 822 | "kernelspec": { 823 | "display_name": "Python 3", 824 | "language": "python", 825 | "name": "python3" 826 | }, 827 | "language_info": { 828 | "codemirror_mode": { 829 | "name": "ipython", 830 | "version": 3 831 | }, 832 | "file_extension": ".py", 833 | "mimetype": "text/x-python", 834 | "name": "python", 835 | "nbconvert_exporter": "python", 836 | "pygments_lexer": "ipython3", 837 | "version": "3.7.6" 838 | } 839 | }, 840 | "nbformat": 4, 841 | "nbformat_minor": 4 842 | } 843 | -------------------------------------------------------------------------------- /TASK-2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# ***The Sparks Foundation Task-2***" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## ***From the given ‘Iris’ dataset, predict the optimum number of clusters and represent it visually.***\n" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "### ***Import all the required libraries***" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 14, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "import pandas as pd\n", 31 | "import numpy as np\n", 32 | "from sklearn.cluster import KMeans\n", 33 | "from sklearn.preprocessing import MinMaxScaler\n", 34 | "import matplotlib.pyplot as plt\n", 35 | "%matplotlib inline" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "### ***Read the CSV file***" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 9, 48 | "metadata": {}, 49 | "outputs": [ 50 | { 51 | "data": { 52 | "text/html": [ 53 | "
\n", 54 | "\n", 67 | "\n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | "
IdSepalLengthCmSepalWidthCmPetalLengthCmPetalWidthCmSpecies
015.13.51.40.2Iris-setosa
124.93.01.40.2Iris-setosa
234.73.21.30.2Iris-setosa
344.63.11.50.2Iris-setosa
455.03.61.40.2Iris-setosa
.....................
1451466.73.05.22.3Iris-virginica
1461476.32.55.01.9Iris-virginica
1471486.53.05.22.0Iris-virginica
1481496.23.45.42.3Iris-virginica
1491505.93.05.11.8Iris-virginica
\n", 181 | "

150 rows × 6 columns

\n", 182 | "
" 183 | ], 184 | "text/plain": [ 185 | " Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm \\\n", 186 | "0 1 5.1 3.5 1.4 0.2 \n", 187 | "1 2 4.9 3.0 1.4 0.2 \n", 188 | "2 3 4.7 3.2 1.3 0.2 \n", 189 | "3 4 4.6 3.1 1.5 0.2 \n", 190 | "4 5 5.0 3.6 1.4 0.2 \n", 191 | ".. ... ... ... ... ... \n", 192 | "145 146 6.7 3.0 5.2 2.3 \n", 193 | "146 147 6.3 2.5 5.0 1.9 \n", 194 | "147 148 6.5 3.0 5.2 2.0 \n", 195 | "148 149 6.2 3.4 5.4 2.3 \n", 196 | "149 150 5.9 3.0 5.1 1.8 \n", 197 | "\n", 198 | " Species \n", 199 | "0 Iris-setosa \n", 200 | "1 Iris-setosa \n", 201 | "2 Iris-setosa \n", 202 | "3 Iris-setosa \n", 203 | "4 Iris-setosa \n", 204 | ".. ... \n", 205 | "145 Iris-virginica \n", 206 | "146 Iris-virginica \n", 207 | "147 Iris-virginica \n", 208 | "148 Iris-virginica \n", 209 | "149 Iris-virginica \n", 210 | "\n", 211 | "[150 rows x 6 columns]" 212 | ] 213 | }, 214 | "execution_count": 9, 215 | "metadata": {}, 216 | "output_type": "execute_result" 217 | } 218 | ], 219 | "source": [ 220 | "df=pd.read_csv(\"iris.csv\")\n", 221 | "df" 222 | ] 223 | }, 224 | { 225 | "cell_type": "markdown", 226 | "metadata": {}, 227 | "source": [ 228 | "### ***Dropping the unnecessary columns***" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": 16, 234 | "metadata": {}, 235 | "outputs": [], 236 | "source": [ 237 | "df.drop(['Id', 'Species'],axis='columns',inplace=True) " 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": 44, 243 | "metadata": {}, 244 | "outputs": [ 245 | { 246 | "data": { 247 | "text/html": [ 248 | "
\n", 249 | "\n", 262 | "\n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | "
SepalLengthCmSepalWidthCmPetalLengthCmPetalWidthCmcluster
05.13.51.40.21
14.93.01.40.21
24.73.21.30.21
34.63.11.50.21
45.03.61.40.21
..................
1456.73.05.22.30
1466.32.55.01.92
1476.53.05.22.00
1486.23.45.42.30
1495.93.05.11.82
\n", 364 | "

150 rows × 5 columns

\n", 365 | "
" 366 | ], 367 | "text/plain": [ 368 | " SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm cluster\n", 369 | "0 5.1 3.5 1.4 0.2 1\n", 370 | "1 4.9 3.0 1.4 0.2 1\n", 371 | "2 4.7 3.2 1.3 0.2 1\n", 372 | "3 4.6 3.1 1.5 0.2 1\n", 373 | "4 5.0 3.6 1.4 0.2 1\n", 374 | ".. ... ... ... ... ...\n", 375 | "145 6.7 3.0 5.2 2.3 0\n", 376 | "146 6.3 2.5 5.0 1.9 2\n", 377 | "147 6.5 3.0 5.2 2.0 0\n", 378 | "148 6.2 3.4 5.4 2.3 0\n", 379 | "149 5.9 3.0 5.1 1.8 2\n", 380 | "\n", 381 | "[150 rows x 5 columns]" 382 | ] 383 | }, 384 | "execution_count": 44, 385 | "metadata": {}, 386 | "output_type": "execute_result" 387 | } 388 | ], 389 | "source": [ 390 | "df" 391 | ] 392 | }, 393 | { 394 | "cell_type": "markdown", 395 | "metadata": {}, 396 | "source": [ 397 | "### ***K-Means***" 398 | ] 399 | }, 400 | { 401 | "cell_type": "code", 402 | "execution_count": 37, 403 | "metadata": {}, 404 | "outputs": [], 405 | "source": [ 406 | "x=df.iloc[:,[0,1,2,3]].values\n", 407 | "sse = []\n", 408 | "k_rng = range(1,10)\n", 409 | "for k in k_rng:\n", 410 | " km = KMeans(n_clusters=k)\n", 411 | " km.fit(x)\n", 412 | " sse.append(km.inertia_)" 413 | ] 414 | }, 415 | { 416 | "cell_type": "markdown", 417 | "metadata": {}, 418 | "source": [ 419 | "### ***Plotting an ELBOW GRAPH to find the correct number of cluster***" 420 | ] 421 | }, 422 | { 423 | "cell_type": "code", 424 | "execution_count": 31, 425 | "metadata": {}, 426 | "outputs": [ 427 | { 428 | "data": { 429 | "text/plain": [ 430 | "[]" 431 | ] 432 | }, 433 | "execution_count": 31, 434 | "metadata": {}, 435 | "output_type": "execute_result" 436 | }, 437 | { 438 | "data": { 439 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEWCAYAAACJ0YulAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nO3deZQddZ338fenO3tI0p2kE0ISSG6HRQRkaRBF2WIiiAgoyDYSEGUeHx9HYdwfnZE5yuB5Rh0cZ5hhUUAJiywTUHTCviiLCSCLAclGVkgn6SxkT/r7/FHVzaW5fbs7ye263ffzOqdOV/3qV3W/Nwf62/WrX31LEYGZmRlAVdYBmJlZ+XBSMDOzVk4KZmbWyknBzMxaOSmYmVkrJwUzM2vlpGBmZq2cFGy3kvRW3tIsaVPe9vmSvifpVwWOC0mT0vXvSdrW5lxr2vm8Cemxb7VZzk733yDp++0cG5I2pP1XSrpFUk2bPh+X9Ezab5WkmyWNS/f1SY89Kq//+el527a90sG/2/faHpe2X5i2f61N+xJJx7fT99Nt2o+XtKTAZ35B0kuS+uW11UlqlPQRJb4jaWH6PZdIujmv7xOSLkzXi/a1nsNJwXariNijZQEWAafmtXXll8Rt+eeKiJoO+te06X9bJz/nfWmsOaAW+F7LDklnAtOBq4CRwHuBLcATkmojYjvwJHBc3vmOBV4p0PZYewFIEvAZYDUwrUCX1cA3JA3t4LtMK3KOQv4TaAT+b17bT4EZEfEA8FngHODE9N/oSOCRds7Vlb5WxpwUzICIWAfcAxwIrb+ofwR8PyJujohNEfEG8DngLeDS9NDHSH7pt/gw8MMCbe0mhXT/XsCXgXPy/3JPzSFJPpe2PbCFpH1IEtElwEcljS7yeQBEUs7gc8DfSTpY0sfSWP4+7XIk8PuImJ/2Xx4R17Zzuq70tTLmpGAGSKoFTgeeSpv2B/YGfp3fLyKagTuBKWnTY8AxkqokjQQGA7cDR+W1HUDxpDANuBdoubr5eIE+3wUulTS8nXNcAMyKiDtJksj5RT4v//vMA/4J+AVwNfC/ImJtuvsp4CJJX5V0hKTqIqfqSl8rY04KloVPS1qTv3Siz8MdnHNlm/7v6WQsz6afv5IkCfxX2j4y/bm8wDHL8/Y/DQwCDib5K/uJiNgILMhrez0iFhX6cEmDgLOA6RGxDbiDAsM/EfE8MBP4Rjvf4wKSoS7Sn50dQoJkeAzgmYj4Td5n3gB8BTiZJKmtkPTVQifoSl8rb04KloXbI6Imf+lEnxM6OOfINv3ndDKWw9PPH0Dyl/LjkgaQJAmAMQWOGdOyPyI2A8+QDBcdCzye9nkir63YVcIZwHbgvnT7ZuBkSXUF+v4D8AVJe+Y3SjoGmAjcmjZNBw6WdGiRz22VXv28ArxcYN8vI2IyUAN8EfhnSZPbOU+n+1r5clIwA9K/0q8j+eV6EPAqsITkr/hWkqqATwEP5jW33Ff4MG8nhcfz2joaOtoDWCTpDZLhqr7AuQVifAW4C/h2gXMIeD49x9Np+wVFPrdLImJbRNxKkjgO2l19rfz0yToAs25Qnf7136I5Irbmd0jHwC8CNgHzIyLS4Y9r0+mcdwPDgCuAocBP8g5/DPhfwA7gL2nbEyRJppZ2koKkscBkkiGXF/J2fYXkF/1PCxx2edpX6TkGAJ8mucH827x+nwL+QdLX8z4v/98AYEsUqZ0v6bPAGyQJbkMa5/4kV0Y73dfKm68UrFydXeDZg1FF+q9p0/eyvH3fJPll37I8lLfvz5LeAppIfhGfERGrAdJprZ8hmfWzkuQX/kDgmIhYlXeOP5IkjKdbfsmm+xuBFRHxWjsxfwZ4PiJmRsQbLQtJMjhE0rv+yo6IBcAvSW5oQ3JzfBNwU5tzXA9UAyel/ca2+TfYBNS3E1eLdcB3gMXpv88VwCUR8eQu9rUyJr9kx8zMWvhKwczMWjkpmJlZKycFMzNr5aRgZmatevSU1JEjR8aECROyDsPMrEeZPXv2yogo9IBkaZOCpEtJCm4F8CLJPPAxJE9eDgeeBT4TEVsl9QduAo4AVgFnR8TCYuefMGECs2bNKt0XMDPrhSS93t6+kg0fpQ/m/B3QEBEHkcyZPoekguRPImJfkvnMF6eHXAw0RcQkkgeDfliq2MzMrLBS31PoAwyU1IekaNhy4ESSol8AN5I8fANwWrpNun9yWr7YzMy6ScmSQkQsBf6F5EUry4G1wGxgTfpyEkhqy4xN18eSPA1Jun8tMKLteSVdImmWpFmNjY2lCt/MrCKVcvioluSv/4kkLxAZTFIPpa2WR6oLXRW863HriLgmIhoioqGuruB9EjMz20mlHD76CLAgIhrTCpR3AR8EatLhJIBxwLJ0fQkwHpJ335LUklldwvjMzKyNUiaFRcDRkgal9wYmkxQUexg4M+0zDZiRrt/D2y8GORN4qFgFRzMz2/1KeU/haZIbxs+STEetAq4heXPUZZLmktwzuD495HpgRNp+GUllSzMz60Y9ukpqQ0ND7MxzCov/uJhX732VyVdMxhOczKzSSJodEQ2F9lVkmYvlzy3nD1f+gaZ5TVmHYmZWVioyKdRPTd4tMu/+eRlHYmZWXioyKQyfNJxh+wxj/sz5WYdiZlZWKjIpSKJ+aj0LHlpA8/bmrMMxMysbFZkUIBlC2rJuC0ufWZp1KGZmZaNik8LEEyeCYN5M31cwM2tRsUlh4PCBjD1yLPPv930FM7MWFZsUAHJTcix5egmb12zOOhQzs7JQ0Umhfmo9sSNY8PCCrEMxMysLFZ0Uxh09jn579PMQkplZqqKTQnW/aiYcP8E3m83MUhWdFAByU3M0zWuiab5LXpiZVXxScMkLM7O3VXxSGLHfCIaOH+qSF2ZmOCm45IWZWZ6KTwqQPK+wec1mls1a1nFnM7NezEkByE3OueSFmRklTAqS9pf0fN6yTtJXJA2XdL+k19KftWl/SfqppLmSXpB0eKlia2vQyEHsdcRefl7BzCpeKd/R/GpEHBoRhwJHABuBu0nevfxgROwLPMjb72I+Gdg3XS4Bri5VbIXkpuRY/ORitqzb0p0fa2ZWVrpr+GgyMC8iXgdOA25M228ETk/XTwNuisRTQI2kMd0Un0temJnRfUnhHOCWdH10RCwHSH+OStvHAovzjlmStnWLcR8YR9/BfT2EZGYVreRJQVI/4BPArzvqWqAtCpzvEkmzJM1qbGzcHSEC0Kd/HyYc55IXZlbZuuNK4WTg2Yh4M91+s2VYKP25Im1fAozPO24c8K45ohFxTUQ0RERDXV3dbg00NzXH6tdWs2bhmt16XjOznqI7ksK5vD10BHAPMC1dnwbMyGu/IJ2FdDSwtmWYqbu45IWZVbqSJgVJg4ApwF15zVcCUyS9lu67Mm2/D5gPzAWuBf53KWMrZOQBIxkydohLXphZxepTypNHxEZgRJu2VSSzkdr2DeCLpYynIy0lL17571do3tFMVbWf7TOzyuLfem3kpuTY3LSZ5bO7deTKzKwsOCm0kftIDnDJCzOrTE4KbQyuG8yYw8f4eQUzq0hOCgXkpuRY/MfFbFnvkhdmVlmcFAqon1pP8/ZmFj6yMOtQzMy6lZNCAeOPGU+fgX08hGRmFcdJoQCXvDCzSuWk0I7c1ByrXl3F2kVrsw7FzKzbOCm0wyUvzKwSOSm0o+7AOobs5ZIXZlZZupwUJFVLOr8UwZQTSeSm5Jj/wHyadzRnHY6ZWbdoNylIGirpW5J+JmlqWr30SyRF6z7dfSFmJzclx6bVm3jjuTeyDsXMrFsUu1L4JbA/8CLwOWAmcCZwWkSc1g2xZc4lL8ys0hSrkpqLiIMBJF0HrAT2joj13RJZGdhj9B7seeiezL9/Ph/+9oezDsfMrOSKXSlsa1mJiB3AgkpKCC1yU3Is+sMitr61NetQzMxKrlhSeJ+kdZLWS1oPHJK3va67Asxa/dR6mrc1s/DRhVmHYmZWcu0mhYiojoihETEkXfrkbQ/tziCztPeH9qbPAJe8MLPK0OGb1ySdALwXCODliHik1EGVkz4D+rDPsfv4ZrOZVYRiU1LHSnoa+B6QAyYBl0t6RtLYzpxcUo2kOyS9ImmOpA9IGi7pfkmvpT9r076S9FNJcyW9IOnw3fD9dovc1Bwr56xk3ZKKGTUzswpV7J7Cz4CrI+K4iLgsIi6NiOPS9v/o5PmvAn4fEQcA7wPmAN8EHoyIfYEH022Ak4F90+US4Oouf5sSqZ/ikhdmVhmKJYUDI+KGto0RcRNwQEcnljQUOBa4Pj1ua0SsAU4Dbky73Qicnq6fBtwUiaeAGkljOvtFSmnUwaMYPHqwS16YWa9XLClUF2qUVNXevjZyQCPwC0nPSbpO0mBgdEQsB0h/jkr7jwUW5x2/JG1r+/mXSJolaVZjY2Mnwth1kqifWs/8B+YTzdEtn2lmloViSeFeSdemv8gBSNf/E7ivE+fuAxxOMgR1GLCBt4eKClGBtnf9Bo6IayKiISIa6urqOhHG7pGbkmPjyo288bxLXphZ71UsKXwdWAu8Lmm2pFnAQmAd8NVOnHsJsCQink637yBJEm+2DAulP1fk9R+fd/w4YFknv0fJueSFmVWCYs8pbIuIr5L8or4Q+CywT0R8NSI6fLw3It4AFkvaP22aDPwFuAeYlrZNA2ak6/cAF6SzkI4G1rYMM5WDIWOGMPqQ0X5ewcx6tQ6fU4iITSRF8QCQNAX4ekRM6cT5vwTcLKkfSXXVi0gS0e2SLgYWAWelfe8DPgbMBTamfctKbkqOZ/7tGbZu2Eq/wf2yDsfMbLdrNylIOpHk/sFewH8DVwA3kYz9/6AzJ4+I54GGArsmF+gbwBc7c96s1E+t58kfPcnrj73Ovifvm3U4Zma7XbF7Cj8ieV5gBMn9gKeAX0bEERFxV3cEV272/vDeVPev9hCSmfVaxYaPIq+kxX9LaoyIq7ohprLVd2Bf9vmwS16YWe9VLCnUSPpk3rbytyv1aiE3NccDX3+AdUvXMXRsxdQFNLMKUWz46FHg1Lwlf/vjpQ+tPLWUvJj/gIeQzKz3afdKISLKbvZPORh9yGgGj0pKXhw67dCswzEz262KXSkgqVrSyLztfmmZiTmlD608qUrkpuSYd/88l7wws16nWOnsc4DVwAuSHk3fqzCfpJrp+d0UX1nKTcmxsXEjb77wZtahmJntVsVuNH8HOCIi5qbvNngSOCci7u6e0MpXayntmfPY89A9M47GzGz3KTZ8tDUi5gJExLPAAieExJC9hjDqoFF+XsHMep1iVwqjJF2Wt71H/nZE/Lh0YZW/3JQcf/qPP7Ft4zb6DuqbdThmZrtFsSuFa4EheUvb7YpWP7WeHVt28Prjr2cdipnZblNsSurl3RlIT7PPsftQ3S8peTHpo5OyDsfMbLcoNvvo9rz1H7bZN7OUQfUEfQf1Ze8P7e2SF2bWqxQbPsovA9q2THb3vfKsjOWm5ljx4grWL1+fdShmZrtFsaRQ7MksP7WFS16YWe9TLCkMknSYpCOAgen64S3b3RRfWdvz0D0ZNHIQ82c6KZhZ71BsSupyoGXa6Rt56y3bFe8dJS8ikJR1SGZmu6TY7KMTdvXkkhYC64EdwPaIaJA0HLgNmAAsBD4dEU1KfqNeRfJKzo3AhelDc2UtNyXHS7e8xIoXVzD6kNFZh2NmtkuKFsTbTU6IiEMjouW1nN8EHoyIfYEH021Iairtmy6XAFd3Q2y7LL/khZlZT9cdSaGt04Ab0/UbgdPz2m+KxFMkL/kZk0F8XTJ03FDqDqxzUjCzXqHUSSGAmZJmS7okbRsdEcsB0p+j0vaxwOK8Y5ekbe+Qlu6eJWlWY2NjCUPvvNyUHIseX8S2TduyDsXMbJd09D6FPpJOlfS1dPm4pGI3p9s6JiIOJxka+qKkY4t9XIG2d019jYhrIqIhIhrq6srjcYn6qfVs37ydRU8syjoUM7NdUuyJ5r2Al4G/B/Yi+av9a8DL6b4ORcSy9OcK4G7gKODNlmGh9OeKtPsSYHze4eOAZV35MlnZ57h9qOpb5aqpZtbjFbtSuAK4OiKOj4hLI+IrEXEc8O/AP3d0YkmDJQ1pWQemAi8B9wDT0m7TgBnp+j3ABUocDaxtGWYqd/0G92PvY1zywsx6vmJDQUdHxIVtGyPip5Je7cS5RwN3p3P3+wDTI+L3kv4E3C7pYmARcFba/z6S6ahzSaak9qh3ROem5njo2w/x1ptvscfoPbIOx8xspxRLCpuK7NvY0YkjYj7wvgLtq4DJBdoD+GJH5y1X9VPqeejbDzH/gfkccv4hWYdjZrZTiiWFYZI+WaBdwNASxdNj7XnYngwcMZD5M50UzKznKpYUHgNOLbLP8lRVV5H7iEtemFnPVqzMxYXdGEevkJuS4+XbXqbx5UZGHTSq4wPMzMpMsSmp/5q3/uU2+24oYUw9lktemFlPV2xKav6DZtPa7POgeQHD9h7GyANGOimYWY9VLCmonXUrIjclx+uPvc72zduzDsXMrMuKJYUqSbWSRuStD09LX1d3U3w9Tv3UerZv2s6iP7jkhZn1PEWnpAKzefsqoezfbVAO9jluH6r6JCUvcpNzWYdjZtYl7V4pRMSEiMhFxMRCS3cG2ZP0H9Kf8R8c7/sKZtYjdbl0tqT9JV1bimB6i9zUHG889wYbVmzIOhQzsy4pNiX1EEkzJb0k6fuSRku6k+RtaX/pvhB7npapqfMfdNVUM+tZil0pXAtMBz4FNJLcU5gPTIqIn3RDbD3WmCPGMKB2APNnOimYWc9S7EZz/4i4IV1/VdJXgW9GxI7Sh9WztZa8mOmSF2bWsxS7Uhgg6TBJh0s6HHgLOCRv24rITcmxftl6Vs5ZmXUoZmadVuxKYTnw47ztN/K2AzixVEH1BvklL+oOLI/XhpqZdaRYQbwT2tsnqW9pwuk9aibUMGK/EcybOY+jv3J01uGYmXVKp6ekpq/JPFHSdSTvU7YO5KbkeP3R19m+xSUvzKxn6DApSHq/pKuA10neo/w4cECpA+sN6qfWs23jNhb/cXHWoZiZdUqx5xR+IOk14ArgReAwoDEiboyIps5+gKRqSc9J+k26PVHS05Jek3SbpH5pe/90e266f8KufLFyMOH4CahazL/fU1PNrGcodqVwCfAmcDXwq/TdyrETn/FlYE7e9g+Bn0TEvkATcHHafjHQFBGTgJ+k/Xq0/kP7M/4DLnlhZj1HsaSwJ/AD4BPAXEm/BAZKKjZj6R0kjQNOAa5Lt0Uya+mOtMuNwOnp+mnpNun+yeoFE/xzU3Msf3Y5G1duzDoUM7MOFSuItyMifhcRFwCTgBnAH4GlkqZ38vz/CnwdaE63RwBrIqLlzusSYGy6PhZYnH72dmBt2v8dJF0iaZakWY2NjZ0MIzv1U+ohXPLCzHqGTs0+iojNEXFHRHwK2Bf4n46OkfRxYEVEzM5vLnT6TuzLj+WaiGiIiIa6uvKf/79Xw14MqBngISQz6xHaHQqSdFmbpgBWAk9ExI0FDmnrGOATkj4GDACGklw51Ejqk14NjAOWpf2XAOOBJekQ1TBgdVe+TDmq6lPFxMkTmT9zvktemFnZK3alMKTNMhRoAH4n6ZyOThwR34qIcRExATgHeCgizgceBs5Mu00jGZaCZLpry7ugz0z778yN7bKTm5Jj3ZJ1rHp1VdahmJkVVeyJ5ssLtaev43wAuHUnP/MbwK2Svg88B1yftl8P/FLSXJIrhA4TT09RP/XtkhcjDxiZcTRmZu3r9EyiFhGxuquzgiLiEeCRdH0+cFSBPpuBs7oaT09QO7GW2vpa5s2cx/v/7v1Zh2Nm1q6defPaiSTPF1gX1E+tZ+EjC9mx1ZXHzax8FXui+UVJL7RZlgBXAv+7+0LsHeqn1rNtwzYWP+mSF2ZWvooNH328zXYAqyLCLx7eCRNOeLvkxYTjJmQdjplZQcUeXns9Il4neS7hNJInj9/XXYH1NgOGDWDc+8f5eQUzK2vFho/GS3oW+C4wAZgI/LOk36fF6z7XTTH2GrmpOZbNWsbGVS55YWblqdiN5n8HfhoRx0XEZRFxaUQcB0wHnsT3FbqspeTFgocWZB2KmVlBxZLCARFxQ9vGiLgJGAWcVKqgequxR42l/9D+HkIys7JVLCkU3CepCtgUEStKE1Lv1bbkhZlZuSmWFO6VdK2kwS0N6fp/AveVPLJeKjclx9pFa1n9Wo8v62RmvVCxpPB1kvLVr0uaLWk2sBBYB3ytG2LrlfJLXpiZlZtiU1K3RcRXSSqXXghcBOwTEV+NiK3dFF+vM7x+ODUTa5wUzKwsFZuSeqSkPSNiU0S8CBwK3CLpp2lRPNtJ9VPrWfjwQnZsc8kLMysvxYaP/gvYCiDpWJLyFjeRDCldU/rQeq/6qfVsfWsrS55aknUoZmbvUCwpVEdEy93Qs4FrIuLOiPguyes5bSdNPHEiqpKHkMys7BRNCukb0AAmAw/l7etyyW1724CaAYw9aizz7/d7m82svBRLCrcAj0qaAWwCHgeQNIlkCMl2QW5qjmV/Wsampk1Zh2Jm1qrY7KMfAH8P3AB8KO/VmFXAl0ofWu9WP6WeaA6XvDCzslL0JTsR8VRE3J1fLjsi/hoRz3Z0YkkDJD0j6c+SXpZ0edo+UdLTkl6TdJukfml7/3R7brp/wq59tfI29v1j6Tekn+8rmFlZ6fKb17pgC3BiRLyPZDrrSZKOBn4I/CQi9iV5g9vFaf+LgaaImAT8JO3Xa1X3rWbiiS55YWblpWRJIRJvpZt90yWAE4E70vYbSd7TAMk7G25M1+8AJnf1XdA9TW5KjjUL19A0z283NbPyUOzhtaN39eSSqiU9D6wA7gfmAWsiYnvaZQkwNl0fCywGSPevBUYUOOclkmZJmtXY2LirIWbKJS/MrNwUu1L4j5YVSU/uzMkjYkdEHAqMA44C3lOoW8vHFNmXf85rIqIhIhrq6up2JqyyMXzScIbtM8xJwczKRrGkkP9LesCufEhErAEeAY4GavKefxgHLEvXl5DUWSLdPwzo1aVEJbnkhZmVlaLvU5BUK2lE3vrwlqWjE0uqk1STrg8EPgLMAR4Gzky7TQNmpOv3pNuk+x+KCrgDWz+1ni3rtrD0maVZh2JmVvTJ5GHAbN6+YsifhhpAroNzjwFulFRNknxuj4jfSPoLcKuk7wPPAden/a8HfilpLskVwjld+iY91MQTJ4KS+wp7H7N31uGYWYVrNylExIRdOXFEvAAcVqB9Psn9hbbtm4GzduUze6KBwwcy9sik5MUJl5+QdThmVuGK1jBKHyw7H3gvydXBX4DpEbGlG2KrGLmpOZ644gk2r9nMgJpdun1jZrZLik1JPZAkCRwPLCK5EXw88LKk93ZHcJWiteTFwy55YWbZKnal8G/AFyLi/vxGSR8BfgZ4rGM3GXf0OPrtkZS8eM8ZhWbtmpl1j2Kzj8a2TQgAEfEAsGfpQqo81f2qmXD8BObPdCltM8tWR1NS+7dtlDQAv09ht8tNzdE0v4nV83r1oxlmVuaKJYWbgDvzq5Wm67cDvyxlUJWopeSFX7xjZlkq9j6F7wO/Bx6TtFLSSuBR4P6I+KfuCrBSjNhvBEPHD3XJCzPLVNFhoIj4GfAzSUPS7fXdElUFail58Zc7/sK2TdvoO7Bv1iGZWQXqVOnsiFjvhFB6B59/MFvWbmHGRTOI5l5f4cPMylApX7JjXTTxhIlMvnIyL9/2Mg//48NZh2NmFciziMrMMV8/htWvrebx7z/O8EnDOXTaoVmHZGYVpN2kIOmTxQ6MiLt2fzgmiVOuPoU1C9Zw7+fvpWZCDROOm5B1WGZWIdRedWpJzcDz6QLvfL9CRMRnSxxbhxoaGmLWrFlZh1ESm5o28fMP/py33nyLzz31OUbs966X0JmZ7RRJsyOiodC+YvcUPgX8FTgEWAD8ICIuSpfME0JvN7B2IOf99jyqqquYfsp0Nq7amHVIZlYBij2ncHdEnAMcR/Ju5R9JekLScd0WXYWrzdVyzoxzWLt4LbedcRvbt2zv+CAzs13QmdlHm4G1wDpgMLv4ak7rmvEfHM9pvziNRY8v4t7P30sFvIzOzDJU7EbzCcC5JC/EeQC4KiJ65wB+mTv43INZPXc1j/zDI4zYbwTHfufYrEMys16q2JTUB4EXgCeA/sAFki5o2RkRf1fi2CzPsd85ltWvrebh7z5MbX0tB597cNYhmVkvVCwpfJbkbWs7RdJ4kqJ6ewLNwDURcZWk4cBtwARgIfDpiGiSJOAq4GPARuDCiHi20LkrkSROvfZU1ixcw4yLZlCzTw3jPzg+67DMrJdpd0rqLp9YGgOMiYhn09pJs4HTgQuB1RFxpaRvArUR8Q1JHwO+RJIU3k8yXPX+Yp/Rm6ektmfjqo1cf/T1bF6zmc89/Tlqc7VZh2RmPcxOTUmVdK+ke/KWGZKul/Q3nfnQiFje8pd+WjdpDjAWOA24Me12I0miIG2/KRJPATVpYrE8g0YM4rzfnkfzjmamnzKdzWs2Zx2SmfUixWYf/Qvwo7zlx8BvgDMkXdmVD0nfw3AY8DQwOiKWQ5I4gFFpt7HA4rzDlqRtbc91iaRZkmY1NjZ2JYxeY8R+Izj77rNZPW81t595Ozu27cg6JDPrJYo9p/BogeVu4NPASZ39AEl7AHcCX4mIdcW6FgqjQFzXRERDRDTU1dV1NoxeZ8JxEzj12lNZ8OACfvuF33qqqpntFl0uiBcRO5J7wh2T1JckIdycVyvpTUljImJ5Ojy0Im1fAuTfOR0HLOtqfJXk0GmHJsXzfvA4I/YbwTFfPybrkMyshyt2T2F4gaVe0uXAyx2dOJ1NdD0wJyJ+nLfrHmBauj4NmJHXfoESRwNrW4aZrH0n/NMJvPfs9/LANx5gzl1zsg7HzHq4YlcKs0mGb1ouCwJYBTwCfKET5z4G+AzwoqSWonrfBq4Ebpd0MbAIOCvddx/JzKO5JFNSL+r0t6hgqhKn/eI01r6+lrv+5i4ufPRCxh75rlsxZmadUrIpqd2hEqektmfDig1c9/7r2LZpG59/5vMM23tY1iGZWZna2SmpR0raM2/7gnRa6k/TB9CsjNtwAhAAAA8QSURBVAweNZjzfnse2zdtZ/op09mybkvWIZlZD1RsSup/AVsBJB1LMuxzE0lxvGtKH5p1Vd2BdZx1x1k0zmnkjrPvoHl7c9YhmVkPUywpVEfE6nT9bJIyFXdGxHeBSaUPzXZG/ZR6Trn6FOb+fi6/+/LvPFXVzLqk2I3makl9ImI7MBm4pJPHWcaO+PwRrPrrKp78lycZsd8Ijv7y0VmHZGY9RLFf7rcAj0paCWwCHgeQNIlkCMnK2JQfTqFpbhP/c+n/UJurZf9T9886JDPrAYo90fwD4O+BG4APxdvjEFUkheusjKlKnPGrMxhz+BjuPPdOlj/nRz7MrGNF37wWEU+lr+XckNf2V5e07hn6De7Hufeey8Dagdxy6i2sW1qsyoiZWedex2k92JAxQzjvt+exZe0Wbjn1Fra+tTXrkMysjDkpVIDRh4zmzNvO5M0/v8ld599F8w5PVTWzwpwUKsS+H9uXk646iVfveZX7v3Z/1uGYWZny1NIKctT/OYpVr63iqZ88xfB9h3PkF47MOiQzKzNOChXmoz/+KE3zmvjdl35Hba6WSR/1c4hm9jYPH1WYquoqPnXLpxh10Ch+fdavWfHSio4PMrOK4aRQgfoP6c+5955Lvz36Mf2U6bz1xltZh2RmZcJJoUINGz+Mc+89l40rN3LrabeybeO2rEMyszLgpFDB9jpiLz558ydZ+qel3H3B3USzi+eZVTonhQp3wOkHMOX/TWHOnXN48P8+mHU4ZpaxkiUFST+XtELSS3ltwyXdL+m19Gdt2q705T1zJb0g6fBSxWXv9oHLPsARf3sEf7jyDzz38+eyDsfMMlTKK4UbgJPatH0TeDAi9gUeTLcBTgb2TZdLgKtLGJe1IYmT/+1kclNy/OZvf8OChxZkHZKZZaRkSSEiHgNWt2k+DbgxXb8ROD2v/aZIPAXUSBpTqtjs3ar7VnPWr89ixH4juP1Tt7PylZVZh2RmGejuewqjI2I5QPpzVNo+Flic129J2vYuki6RNEvSrMbGxpIGW2kGDBvAeb89j+p+1Uw/ZTobGjd0fJCZ9SrlcqNZBdoKToWJiGsioiEiGurq6kocVuWpmVDDOTPOYf2y9dx2xm1s37w965DMrBt1d1J4s2VYKP3Z8jjtEmB8Xr9xwLJujs1S444ex+k3nc7iPyzmnovv8XuezSpIdyeFe4Bp6fo0YEZe+wXpLKSjgbUtw0yWjfee9V5OvOJEXpz+Io9e/mjW4ZhZNylZQTxJtwDHAyMlLQH+EbgSuF3SxcAi4Ky0+33Ax4C5wEbgolLFZZ33oW9+iNV/Xc2jlz/K8EnDOeRvDsk6JDMrMfXkoYGGhoaYNWtW1mH0aju27uBXH/0VCx9ZyKCRg6jN1VJbny65WobXD6e2vpYhY4agqkK3hsys3EiaHRENhfa5dLYVVd2vmrP/+2yeu/45Vr66kqZ5TSx5agkv3/byO8pi9BnQh5qJNQyvH05NrqY1WdTmaqmdWEufAf5Pzawn8P+p1qEBwwbwgcs+8I62Hdt2sPb1tTTNb2L1vNU0zWuiaX4TTfOaWPDwArZtyCuwJxg6dujbSaI+vcJI1wcOH4jkqwyzcuCkYDulum81wycNZ/ik4dRT/459EcHGxo2tyWL1vNWsmb+G1fNWM/d3c99Vqrv/sP7vGIrKTxxDxw2lqk+5zJw26/2cFGy3k8TgUYMZPGow4z8w/l37t27YypoFSZJoubpomtfEmy+8ySszXqF5W3Nr36o+VdRMqCl8lZGrpd8e/brzq5n1ek4K1u36De7HqINGMeqgUe/a17yjmXVL1rUmi/yrjKVPL2Xzms3v6D+obhB77LlHaxIqtvQd3NfDVGYdcFKwslJVXUXNPjXU7FPDxBMmvmv/pqZNrcmiaX4TaxasYcOKDWxYsYGlzyxlw4oNbF2/teC5+wzs06nkMahuEIPrBlPdr7rUX9es7DgpWI8ysHYgAxsGslfDXu322bZpGxsbN7Ymi0LL+mXreeP5N9iwYsM7hqvyDagZ8M5kMWpQu4lkYO1AT8m1XsFJwXqdvgP7MmzvYQzbe1iHfSOCLWu3tJs8WpLLyldWsuGxDWxctbFgVS5Vi8F177za6D+0P/2H9qffkH7J+pAi20P6+4a6lQUnBatokhhQM4ABNQMYsd+IDvs3b29m46r2r0I2rkj2NS1oYsu6LWxZt4UdW3Z0KpY+A/u0mzz6De1XPKm02a6qdoKxneOkYNYFVX2q2GP0Huwxeo9OH7Nj6w62rN/C1vVbk0SxPkkW7W1vXb+1tW3d4nXv6NPZBNN3UN92r0iqB1TTZ0CfZOmf/KzuX110u6M+VX2rfBO/l3BSMCux6n7VDBoxiEEjBu3yuXYqwaTtLQlm+5btbN+8nR1bdrB983aatxe+p9IlotMJ5B3bbfYV7FfsHG32VferdnLaRU4KZj3I7kwwLZp3NLcmiLYJo73tjvrs2LzjXX23rt/KxpUb2z1f7Ng9ddiq+1V3Ltm02dde/6q+VVT1abNUF2grsqhaHferriqLyQpOCmYVrqq6iqpBVfQd1DfTONpNToWSTmf7pcmpbf9CV0z5/bKiKnU6kRz3j8dx0NkH7fYYnBTMrCyUS3KKCJq3NbcmiubtzUWX2BEd9nnHsqMLfYucf+DwgSX5/k4KZmZ5JFHdr5rqftX0p3/W4XQ7z1szM7NWTgpmZtaqrJKCpJMkvSpprqRvZh2PmVmlKZukIKka+HfgZOBA4FxJB2YblZlZZSmbpAAcBcyNiPkRsRW4FTgt45jMzCpKOSWFscDivO0laZuZmXWTckoKhR7le9cjjpIukTRL0qzGxsZuCMvMrHKUU1JYAuS/u3EcsKxtp4i4JiIaIqKhrq6u24IzM6sEitg99UZ2laQ+wF+BycBS4E/AeRHxcpFjGoHXd/IjRwIrd/LYUnJcXeO4uq5cY3NcXbMrce0TEQX/qi6bJ5ojYruk/wP8D1AN/LxYQkiP2elLBUmzIqJhZ48vFcfVNY6r68o1NsfVNaWKq2ySAkBE3Afcl3UcZmaVqpzuKZiZWcYqOSlck3UA7XBcXeO4uq5cY3NcXVOSuMrmRrOZmWWvkq8UzMysDScFMzNrVXFJQdLPJa2Q9FLWseSTNF7Sw5LmSHpZ0pezjglA0gBJz0j6cxrX5VnHlE9StaTnJP0m61haSFoo6UVJz0ualXU8LSTVSLpD0ivpf2cfKIOY9k//nVqWdZK+knVcAJIuTf+bf0nSLZIGZB0TgKQvpzG9XIp/q4q7pyDpWOAt4KaI2P0vON1JksYAYyLiWUlDgNnA6RHxl4zjEjA4It6S1Bd4AvhyRDyVZVwtJF0GNABDI+LjWccDSVIAGiKirB54knQj8HhEXCepHzAoItZkHVeLtFLyUuD9EbGzD6XurljGkvy3fmBEbJJ0O3BfRNyQcVwHkRQLPQrYCvwe+EJEvLa7PqPirhQi4jFgddZxtBURyyPi2XR9PTCHMigIGIm30s2+6VIWf0lIGgecAlyXdSzlTtJQ4FjgeoCI2FpOCSE1GZiXdULI0wcYmFZbGESBsjsZeA/wVERsjIjtwKPAGbvzAyouKfQEkiYAhwFPZxtJIh2ieR5YAdwfEWURF/CvwNeB5qwDaSOAmZJmS7ok62BSOaAR+EU63HadpMFZB9XGOcAtWQcBEBFLgX8BFgHLgbURMTPbqAB4CThW0ghJg4CP8c6acbvMSaHMSNoDuBP4SkSsyzoegIjYERGHkhQpPCq9hM2UpI8DKyJidtaxFHBMRBxO8sKoL6ZDllnrAxwOXB0RhwEbgLJ5u2E6nPUJ4NdZxwIgqZbkfS4Tgb2AwZL+JtuoICLmAD8E7icZOvozsH13foaTQhlJx+zvBG6OiLuyjqetdLjhEeCkjEMBOAb4RDp+fytwoqRfZRtSIiKWpT9XAHeTjP9mbQmwJO8q7w6SJFEuTgaejYg3sw4k9RFgQUQ0RsQ24C7ggxnHBEBEXB8Rh0fEsSRD4bvtfgI4KZSN9Ibu9cCciPhx1vG0kFQnqSZdH0jyP8sr2UYFEfGtiBgXERNIhh0eiojM/5KTNDidKEA6PDOV5JI/UxHxBrBY0v5p02Qg00kMbZxLmQwdpRYBR0salP6/OZnkPl/mJI1Kf+4NfJLd/O9WVgXxuoOkW4DjgZGSlgD/GBHXZxsVkPzl+xngxXT8HuDbaZHALI0BbkxnhlQBt0dE2Uz/LEOjgbuT3yP0AaZHxO+zDanVl4Cb06Ga+cBFGccDQDo2PgX426xjaRERT0u6A3iWZHjmOcqn3MWdkkYA24AvRkTT7jx5xU1JNTOz9nn4yMzMWjkpmJlZKycFMzNr5aRgZmatnBTMzKyVk4JVPElv5a1/TNJr6Rzw/D4XSgpJk/Pazkjbzky3H5H0al7FzzvanOPP6ZTo/LYbJC2V1D/dHpk+kIekKkk/TStivijpT5Im7vZ/ALM8Ffecgll70l/4/wZMjYhFBbq8SPKQ1YPp9jkkZQbynR8R7yqXLek9JH+EHStpcERsyNu9A/gscHWbw84mKbFwSEQ0pwUAN2BWQr5SMAMkfRi4FjglIua10+1xktpPfdMaVZOA59vp29Z5wC+BmSQ1fvL9K3BpWo0z3xhgeUQ0A0TEkt39oJJZW04KZtAfmEHy/opiJTwCeAD4KEmxtHsK9Lk5b/jo/+W1nw3cRlKS4Nw2xywiqd3/mTbttwOnpuf6kaTDOv2NzHaSk4JZUi7gj8DFneh7K8mwUXtlns+PiEPT5WsAko4EGtP3BDwIHJ5W4cx3BfA18v6fjIglwP7At0jKgz+Yf0/DrBScFMySX7ifBo6U9O1iHSPiGeAgYGRE/LWT5z8XOCC9gTwPGAp8qs1555IMRX26TfuWiPhdmmCuAE7v5Gea7RTfaDYDImJj+o6GxyW92UGRxG8BmztzXklVwFkkN4uXpm0nAN/h3W+M+wHw27xjDwfeiIhl6XkOAV7o7Hcy2xlOCmapiFgt6STgMUkrI2JGO/1+V+Q0N0valK6vBL4PLG1JCKnHgAPT93Lnn/dlSc/y9nsORgHXtkxXBZ4Bfta1b2XWNa6SamZmrXxPwczMWjkpmJlZKycFMzNr5aRgZmatnBTMzKyVk4KZmbVyUjAzs1b/H8GsBl3Dx8XJAAAAAElFTkSuQmCC\n", 440 | "text/plain": [ 441 | "
" 442 | ] 443 | }, 444 | "metadata": { 445 | "needs_background": "light" 446 | }, 447 | "output_type": "display_data" 448 | } 449 | ], 450 | "source": [ 451 | "plt.xlabel('K MEANS')\n", 452 | "plt.ylabel('SUM OF SQUARED ERROR')\n", 453 | "plt.title('THE ELBOW ANALYSIS')\n", 454 | "plt.plot(k_rng,sse,color='purple')" 455 | ] 456 | }, 457 | { 458 | "cell_type": "markdown", 459 | "metadata": {}, 460 | "source": [ 461 | "#### ***This show that 3 is optimum number of cluster to form in iris dataset***" 462 | ] 463 | }, 464 | { 465 | "cell_type": "code", 466 | "execution_count": 38, 467 | "metadata": {}, 468 | "outputs": [ 469 | { 470 | "data": { 471 | "text/plain": [ 472 | "array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", 473 | " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", 474 | " 1, 1, 1, 1, 1, 1, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", 475 | " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", 476 | " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 0, 0, 0, 2, 0, 0, 0,\n", 477 | " 0, 0, 0, 2, 2, 0, 0, 0, 0, 2, 0, 2, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0,\n", 478 | " 0, 2, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 2])" 479 | ] 480 | }, 481 | "execution_count": 38, 482 | "metadata": {}, 483 | "output_type": "execute_result" 484 | } 485 | ], 486 | "source": [ 487 | "km = KMeans(n_clusters=3)\n", 488 | "y_predict = km.fit_predict(x)\n", 489 | "y_predict" 490 | ] 491 | }, 492 | { 493 | "cell_type": "markdown", 494 | "metadata": {}, 495 | "source": [ 496 | "## ***Adding a cluster column to show which cluster does the particular feature belong to***" 497 | ] 498 | }, 499 | { 500 | "cell_type": "code", 501 | "execution_count": 43, 502 | "metadata": {}, 503 | "outputs": [ 504 | { 505 | "data": { 506 | "text/html": [ 507 | "
\n", 508 | "\n", 521 | "\n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | "
SepalLengthCmSepalWidthCmPetalLengthCmPetalWidthCmcluster
05.13.51.40.21
14.93.01.40.21
24.73.21.30.21
34.63.11.50.21
45.03.61.40.21
..................
1456.73.05.22.30
1466.32.55.01.92
1476.53.05.22.00
1486.23.45.42.30
1495.93.05.11.82
\n", 623 | "

150 rows × 5 columns

\n", 624 | "
" 625 | ], 626 | "text/plain": [ 627 | " SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm cluster\n", 628 | "0 5.1 3.5 1.4 0.2 1\n", 629 | "1 4.9 3.0 1.4 0.2 1\n", 630 | "2 4.7 3.2 1.3 0.2 1\n", 631 | "3 4.6 3.1 1.5 0.2 1\n", 632 | "4 5.0 3.6 1.4 0.2 1\n", 633 | ".. ... ... ... ... ...\n", 634 | "145 6.7 3.0 5.2 2.3 0\n", 635 | "146 6.3 2.5 5.0 1.9 2\n", 636 | "147 6.5 3.0 5.2 2.0 0\n", 637 | "148 6.2 3.4 5.4 2.3 0\n", 638 | "149 5.9 3.0 5.1 1.8 2\n", 639 | "\n", 640 | "[150 rows x 5 columns]" 641 | ] 642 | }, 643 | "execution_count": 43, 644 | "metadata": {}, 645 | "output_type": "execute_result" 646 | } 647 | ], 648 | "source": [ 649 | "df['cluster']=y_predict\n", 650 | "df" 651 | ] 652 | }, 653 | { 654 | "cell_type": "code", 655 | "execution_count": 23, 656 | "metadata": {}, 657 | "outputs": [ 658 | { 659 | "data": { 660 | "text/plain": [ 661 | "array([1, 0, 2])" 662 | ] 663 | }, 664 | "execution_count": 23, 665 | "metadata": {}, 666 | "output_type": "execute_result" 667 | } 668 | ], 669 | "source": [ 670 | "df.cluster.unique()" 671 | ] 672 | }, 673 | { 674 | "cell_type": "code", 675 | "execution_count": 32, 676 | "metadata": {}, 677 | "outputs": [], 678 | "source": [ 679 | "df1 = df[df.cluster==0]\n", 680 | "df2 = df[df.cluster==1]\n", 681 | "df3 = df[df.cluster==2]" 682 | ] 683 | }, 684 | { 685 | "cell_type": "markdown", 686 | "metadata": {}, 687 | "source": [ 688 | "## ***Plotting a scatter plot showing the cluster***" 689 | ] 690 | }, 691 | { 692 | "cell_type": "code", 693 | "execution_count": 55, 694 | "metadata": {}, 695 | "outputs": [ 696 | { 697 | "data": { 698 | "text/plain": [ 699 | "" 700 | ] 701 | }, 702 | "execution_count": 55, 703 | "metadata": {}, 704 | "output_type": "execute_result" 705 | }, 706 | { 707 | "data": { 708 | "image/png": "\n", 709 | "text/plain": [ 710 | "
" 711 | ] 712 | }, 713 | "metadata": { 714 | "needs_background": "light" 715 | }, 716 | "output_type": "display_data" 717 | } 718 | ], 719 | "source": [ 720 | "plt.title('K Means Clustering')\n", 721 | "plt.scatter(x[y_predict==0,0],x[y_predict==0,1],c='red',label='Iris-setosa')\n", 722 | "plt.scatter(x[y_predict==1,0],x[y_predict==1,1],c='green',label='Iris-virginica')\n", 723 | "plt.scatter(x[y_predict==2,0],x[y_predict==2,1],c='yellow',label='Iris-versicolor')\n", 724 | "plt.legend(loc='best')" 725 | ] 726 | }, 727 | { 728 | "cell_type": "markdown", 729 | "metadata": {}, 730 | "source": [ 731 | "# ***THANK YOU FOR WATCHING***" 732 | ] 733 | } 734 | ], 735 | "metadata": { 736 | "kernelspec": { 737 | "display_name": "Python 3", 738 | "language": "python", 739 | "name": "python3" 740 | }, 741 | "language_info": { 742 | "codemirror_mode": { 743 | "name": "ipython", 744 | "version": 3 745 | }, 746 | "file_extension": ".py", 747 | "mimetype": "text/x-python", 748 | "name": "python", 749 | "nbconvert_exporter": "python", 750 | "pygments_lexer": "ipython3", 751 | "version": "3.7.6" 752 | } 753 | }, 754 | "nbformat": 4, 755 | "nbformat_minor": 4 756 | } 757 | --------------------------------------------------------------------------------