├── Data Preprocessing ├── Untitled.ipynb └── sf.pdf ├── Decision Tree ├── Untitled.ipynb └── utf-8''iris(1).csv ├── K-NN ├── Untitled.ipynb └── utf-8''iris(1).csv ├── K-means ├── CC GENERAL.csv ├── K0means.ipynb ├── creditcard.ipynb └── utf-8''iris.csv ├── LICENSE ├── Linear Regression ├── LinearRegressionMultipleVariables.ipynb ├── LinearRegressionSingle Variables.ipynb ├── ex1data1.txt └── ex1data2.txt ├── Logistic Regression └── Logistic │ ├── Untitled.ipynb │ ├── ex2data1.txt │ └── ex2data2.txt ├── README.md ├── RandomForest ├── RandomForest.ipynb ├── Social_Network_Ads.csv └── Untitled.ipynb ├── SVM ├── Social_Network_Ads.csv └── Untitled.ipynb ├── Sentiment Analysis ├── Restaurant_Reviews.tsv ├── moviereview.ipynb └── restaurentreview.ipynb ├── TextAnalytics └── textAnalytics.ipynb ├── TextClassification ├── 20news-bydate_py3.pkz └── Textclassification.ipynb └── _config.yml /Data Preprocessing/Untitled.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "DATA PREPROCESSING" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import pandas as pd\n", 17 | "import numpy as np\n", 18 | "import matplotlib.pyplot as plt\n", 19 | "import seaborn as sb" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 6, 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "data": { 29 | "text/html": [ 30 | "
\n", 31 | "\n", 44 | "\n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | "
NameGenderSizeColor
0AlexFSmallBlue
1BenMLargeYellow
2CamMMediumRed
3DaveMSmallRed
4EliFMediumYellow
5FrankMLargeRed
6GraceFLargeBlue
7HenryMLargeYellow
8IrisFSmallYellow
9JackMSmallBlue
\n", 127 | "
" 128 | ], 129 | "text/plain": [ 130 | " Name Gender Size Color\n", 131 | "0 Alex F Small Blue\n", 132 | "1 Ben M Large Yellow\n", 133 | "2 Cam M Medium Red\n", 134 | "3 Dave M Small Red\n", 135 | "4 Eli F Medium Yellow\n", 136 | "5 Frank M Large Red\n", 137 | "6 Grace F Large Blue\n", 138 | "7 Henry M Large Yellow\n", 139 | "8 Iris F Small Yellow\n", 140 | "9 Jack M Small Blue" 141 | ] 142 | }, 143 | "execution_count": 6, 144 | "metadata": {}, 145 | "output_type": "execute_result" 146 | } 147 | ], 148 | "source": [ 149 | "#Our dataset\n", 150 | "shirt_order = pd.DataFrame({'Name':['Alex', 'Ben', 'Cam', 'Dave', 'Eli', 'Frank', 'Grace', 'Henry', 'Iris', 'Jack'],\n", 151 | " 'Gender':['F', 'M', 'M', 'M', 'F', 'M', 'F', 'M', 'F', 'M'],\n", 152 | " 'Size':['Small', 'Large', 'Medium', 'Small', 'Medium', 'Large', 'Large', 'Large', 'Small', 'Small'],\n", 153 | " 'Color':['Blue', 'Yellow', 'Red', 'Red', 'Yellow', 'Red', 'Blue', 'Yellow', 'Yellow', 'Blue']\n", 154 | " })\n", 155 | "#Visualize dataset\n", 156 | "shirt_order" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": 8, 162 | "metadata": {}, 163 | "outputs": [ 164 | { 165 | "data": { 166 | "text/html": [ 167 | "
\n", 168 | "\n", 181 | "\n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | "
NameGenderSizeColorGender Category
0AlexFSmallBlue0
1BenMLargeYellow1
2CamMMediumRed1
3DaveMSmallRed1
4EliFMediumYellow0
5FrankMLargeRed1
6GraceFLargeBlue0
7HenryMLargeYellow1
8IrisFSmallYellow0
9JackMSmallBlue1
\n", 275 | "
" 276 | ], 277 | "text/plain": [ 278 | " Name Gender Size Color Gender Category\n", 279 | "0 Alex F Small Blue 0\n", 280 | "1 Ben M Large Yellow 1\n", 281 | "2 Cam M Medium Red 1\n", 282 | "3 Dave M Small Red 1\n", 283 | "4 Eli F Medium Yellow 0\n", 284 | "5 Frank M Large Red 1\n", 285 | "6 Grace F Large Blue 0\n", 286 | "7 Henry M Large Yellow 1\n", 287 | "8 Iris F Small Yellow 0\n", 288 | "9 Jack M Small Blue 1" 289 | ] 290 | }, 291 | "execution_count": 8, 292 | "metadata": {}, 293 | "output_type": "execute_result" 294 | } 295 | ], 296 | "source": [ 297 | "#LABEL ENCODING\n", 298 | "from sklearn.preprocessing import LabelEncoder\n", 299 | "#Create an object for Label Encoder\n", 300 | "LE=LabelEncoder()\n", 301 | "#Create a new column that will show you the encoded require column\n", 302 | "shirt_order['Gender Category']=LE.fit_transform(shirt_order.Gender)\n", 303 | "#Check\n", 304 | "shirt_order" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": 10, 310 | "metadata": {}, 311 | "outputs": [ 312 | { 313 | "data": { 314 | "text/plain": [ 315 | "array(['Small', 'Large', 'Medium'], dtype=object)" 316 | ] 317 | }, 318 | "execution_count": 10, 319 | "metadata": {}, 320 | "output_type": "execute_result" 321 | } 322 | ], 323 | "source": [ 324 | "shirt_order.Size.unique()\n" 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": 13, 330 | "metadata": {}, 331 | "outputs": [ 332 | { 333 | "data": { 334 | "text/html": [ 335 | "
\n", 336 | "\n", 349 | "\n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | "
NameGenderSizeColorGender CategorySize Category
0AlexFSmallBlue00
1BenMLargeYellow12
2CamMMediumRed11
3DaveMSmallRed10
4EliFMediumYellow01
5FrankMLargeRed12
6GraceFLargeBlue02
7HenryMLargeYellow12
8IrisFSmallYellow00
9JackMSmallBlue10
\n", 454 | "
" 455 | ], 456 | "text/plain": [ 457 | " Name Gender Size Color Gender Category Size Category\n", 458 | "0 Alex F Small Blue 0 0\n", 459 | "1 Ben M Large Yellow 1 2\n", 460 | "2 Cam M Medium Red 1 1\n", 461 | "3 Dave M Small Red 1 0\n", 462 | "4 Eli F Medium Yellow 0 1\n", 463 | "5 Frank M Large Red 1 2\n", 464 | "6 Grace F Large Blue 0 2\n", 465 | "7 Henry M Large Yellow 1 2\n", 466 | "8 Iris F Small Yellow 0 0\n", 467 | "9 Jack M Small Blue 1 0" 468 | ] 469 | }, 470 | "execution_count": 13, 471 | "metadata": {}, 472 | "output_type": "execute_result" 473 | } 474 | ], 475 | "source": [ 476 | "#ORDINAL ENCODING\n", 477 | "mapping_dict={'Small':0,'Medium':1,'Large':2}\n", 478 | "shirt_order['Size Category']=shirt_order.Size.map(mapping_dict)\n", 479 | "#check\n", 480 | "shirt_order\n" 481 | ] 482 | }, 483 | { 484 | "cell_type": "code", 485 | "execution_count": 18, 486 | "metadata": {}, 487 | "outputs": [ 488 | { 489 | "data": { 490 | "text/html": [ 491 | "
\n", 492 | "\n", 505 | "\n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | "
NameGenderSizeColorGender CategorySize CategoryColor_BlueColor_RedColor_Yellow
0AlexFSmallBlue00100
1BenMLargeYellow12001
2CamMMediumRed11010
3DaveMSmallRed10010
4EliFMediumYellow01001
5FrankMLargeRed12010
6GraceFLargeBlue02100
7HenryMLargeYellow12001
8IrisFSmallYellow00001
9JackMSmallBlue10100
\n", 643 | "
" 644 | ], 645 | "text/plain": [ 646 | " Name Gender Size Color Gender Category Size Category Color_Blue \\\n", 647 | "0 Alex F Small Blue 0 0 1 \n", 648 | "1 Ben M Large Yellow 1 2 0 \n", 649 | "2 Cam M Medium Red 1 1 0 \n", 650 | "3 Dave M Small Red 1 0 0 \n", 651 | "4 Eli F Medium Yellow 0 1 0 \n", 652 | "5 Frank M Large Red 1 2 0 \n", 653 | "6 Grace F Large Blue 0 2 1 \n", 654 | "7 Henry M Large Yellow 1 2 0 \n", 655 | "8 Iris F Small Yellow 0 0 0 \n", 656 | "9 Jack M Small Blue 1 0 1 \n", 657 | "\n", 658 | " Color_Red Color_Yellow \n", 659 | "0 0 0 \n", 660 | "1 0 1 \n", 661 | "2 1 0 \n", 662 | "3 1 0 \n", 663 | "4 0 1 \n", 664 | "5 1 0 \n", 665 | "6 0 0 \n", 666 | "7 0 1 \n", 667 | "8 0 1 \n", 668 | "9 0 0 " 669 | ] 670 | }, 671 | "execution_count": 18, 672 | "metadata": {}, 673 | "output_type": "execute_result" 674 | } 675 | ], 676 | "source": [ 677 | "#ONE HOT ENCODING\n", 678 | "#duplicate color column for keeping the orginal values\n", 679 | "shirt_order['color_category']=shirt_order.Color\n", 680 | "shirt_order_onehotencoding=pd.get_dummies(shirt_order,columns=[\"color_category\"],prefix=[\"Color\"])\n", 681 | "#check\n", 682 | "shirt_order_onehotencoding\n" 683 | ] 684 | }, 685 | { 686 | "cell_type": "markdown", 687 | "metadata": {}, 688 | "source": [] 689 | } 690 | ], 691 | "metadata": { 692 | "kernelspec": { 693 | "display_name": "Python 3", 694 | "language": "python", 695 | "name": "python3" 696 | }, 697 | "language_info": { 698 | "codemirror_mode": { 699 | "name": "ipython", 700 | "version": 3 701 | }, 702 | "file_extension": ".py", 703 | "mimetype": "text/x-python", 704 | "name": "python", 705 | "nbconvert_exporter": "python", 706 | "pygments_lexer": "ipython3", 707 | "version": "3.7.1" 708 | } 709 | }, 710 | "nbformat": 4, 711 | "nbformat_minor": 2 712 | } 713 | -------------------------------------------------------------------------------- /Data Preprocessing/sf.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suubh/Machine-Learning-in-Python/154596a9509dc7a066ae3caf5526b6f663a359cc/Data Preprocessing/sf.pdf -------------------------------------------------------------------------------- /Decision Tree/Untitled.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 38, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd\n", 11 | "import matplotlib.pyplot as plt\n", 12 | "import seaborn as sb" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 3, 18 | "metadata": {}, 19 | "outputs": [ 20 | { 21 | "data": { 22 | "text/html": [ 23 | "
\n", 24 | "\n", 37 | "\n", 38 | " \n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
05.13.51.40.2setosa
14.93.01.40.2setosa
24.73.21.30.2setosa
34.63.11.50.2setosa
45.03.61.40.2setosa
\n", 91 | "
" 92 | ], 93 | "text/plain": [ 94 | " sepal_length sepal_width petal_length petal_width species\n", 95 | "0 5.1 3.5 1.4 0.2 setosa\n", 96 | "1 4.9 3.0 1.4 0.2 setosa\n", 97 | "2 4.7 3.2 1.3 0.2 setosa\n", 98 | "3 4.6 3.1 1.5 0.2 setosa\n", 99 | "4 5.0 3.6 1.4 0.2 setosa" 100 | ] 101 | }, 102 | "execution_count": 3, 103 | "metadata": {}, 104 | "output_type": "execute_result" 105 | } 106 | ], 107 | "source": [ 108 | "df=pd.read_csv(\"utf-8''iris(1).csv\")\n", 109 | "df.head()" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 4, 115 | "metadata": {}, 116 | "outputs": [ 117 | { 118 | "data": { 119 | "text/plain": [ 120 | "array(['setosa', 'versicolor', 'virginica'], dtype=object)" 121 | ] 122 | }, 123 | "execution_count": 4, 124 | "metadata": {}, 125 | "output_type": "execute_result" 126 | } 127 | ], 128 | "source": [ 129 | "df.species.unique()" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 6, 135 | "metadata": {}, 136 | "outputs": [ 137 | { 138 | "data": { 139 | "text/html": [ 140 | "
\n", 141 | "\n", 154 | "\n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | "
sepal_lengthsepal_widthpetal_lengthpetal_widthspeciesspecies_cat
145.84.01.20.2setosa0
985.12.53.01.1versicolor1
756.63.04.41.4versicolor1
165.43.91.30.4setosa0
1317.93.86.42.0virginica2
\n", 214 | "
" 215 | ], 216 | "text/plain": [ 217 | " sepal_length sepal_width petal_length petal_width species \\\n", 218 | "14 5.8 4.0 1.2 0.2 setosa \n", 219 | "98 5.1 2.5 3.0 1.1 versicolor \n", 220 | "75 6.6 3.0 4.4 1.4 versicolor \n", 221 | "16 5.4 3.9 1.3 0.4 setosa \n", 222 | "131 7.9 3.8 6.4 2.0 virginica \n", 223 | "\n", 224 | " species_cat \n", 225 | "14 0 \n", 226 | "98 1 \n", 227 | "75 1 \n", 228 | "16 0 \n", 229 | "131 2 " 230 | ] 231 | }, 232 | "execution_count": 6, 233 | "metadata": {}, 234 | "output_type": "execute_result" 235 | } 236 | ], 237 | "source": [ 238 | "from sklearn.preprocessing import LabelEncoder\n", 239 | "lr=LabelEncoder()\n", 240 | "df['species_cat']=lr.fit_transform(df.species)\n", 241 | "df.sample(5,random_state=1)" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 16, 247 | "metadata": {}, 248 | "outputs": [ 249 | { 250 | "data": { 251 | "text/plain": [ 252 | "14 0\n", 253 | "98 1\n", 254 | "75 1\n", 255 | "16 0\n", 256 | "131 2\n", 257 | "Name: species_cat, dtype: int32" 258 | ] 259 | }, 260 | "execution_count": 16, 261 | "metadata": {}, 262 | "output_type": "execute_result" 263 | } 264 | ], 265 | "source": [ 266 | "X=df[['sepal_length','sepal_width','petal_length','petal_width']]\n", 267 | "y=df['species_cat']\n", 268 | "#X.sample(5,random_state=1)\n", 269 | "y.sample(5,random_state=1)\n" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 14, 275 | "metadata": {}, 276 | "outputs": [ 277 | { 278 | "name": "stdout", 279 | "output_type": "stream", 280 | "text": [ 281 | "(150, 4)\n", 282 | "(150,)\n" 283 | ] 284 | } 285 | ], 286 | "source": [ 287 | "print(X.shape)\n", 288 | "print(y.shape)" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": 23, 294 | "metadata": {}, 295 | "outputs": [ 296 | { 297 | "data": { 298 | "text/plain": [ 299 | "2 50\n", 300 | "1 50\n", 301 | "0 50\n", 302 | "Name: species_cat, dtype: int64" 303 | ] 304 | }, 305 | "execution_count": 23, 306 | "metadata": {}, 307 | "output_type": "execute_result" 308 | } 309 | ], 310 | "source": [ 311 | "df['species_cat'].value_counts()" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": 27, 317 | "metadata": {}, 318 | "outputs": [ 319 | { 320 | "data": { 321 | "text/plain": [ 322 | "( sepal_length sepal_width petal_length petal_width\n", 323 | " 77 6.7 3.0 5.0 1.7\n", 324 | " 29 4.7 3.2 1.6 0.2\n", 325 | " 92 5.8 2.6 4.0 1.2\n", 326 | " 23 5.1 3.3 1.7 0.5\n", 327 | " 128 6.4 2.8 5.6 2.1, 77 1\n", 328 | " 29 0\n", 329 | " 92 1\n", 330 | " 23 0\n", 331 | " 128 2\n", 332 | " Name: species_cat, dtype: int32)" 333 | ] 334 | }, 335 | "execution_count": 27, 336 | "metadata": {}, 337 | "output_type": "execute_result" 338 | } 339 | ], 340 | "source": [ 341 | "from sklearn.model_selection import train_test_split\n", 342 | "X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.4,random_state=23)\n", 343 | "X_train.shape,X_test.shape\n", 344 | "X_train.head(),y_train.head()" 345 | ] 346 | }, 347 | { 348 | "cell_type": "code", 349 | "execution_count": 28, 350 | "metadata": {}, 351 | "outputs": [ 352 | { 353 | "data": { 354 | "text/plain": [ 355 | "1 32\n", 356 | "2 29\n", 357 | "0 29\n", 358 | "Name: species_cat, dtype: int64" 359 | ] 360 | }, 361 | "execution_count": 28, 362 | "metadata": {}, 363 | "output_type": "execute_result" 364 | } 365 | ], 366 | "source": [ 367 | "y_train.value_counts()" 368 | ] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": 22, 373 | "metadata": {}, 374 | "outputs": [ 375 | { 376 | "name": "stdout", 377 | "output_type": "stream", 378 | "text": [ 379 | "Decision Tree prediction accuracy = 96.7%\n" 380 | ] 381 | } 382 | ], 383 | "source": [ 384 | "from sklearn.tree import DecisionTreeClassifier\n", 385 | "model = DecisionTreeClassifier(random_state=23)\n", 386 | "model = model.fit(X_train, y_train)\n", 387 | "score = 100.0 * model.score(X_test, y_test)\n", 388 | "print(f\"Decision Tree prediction accuracy = {score:4.1f}%\")" 389 | ] 390 | }, 391 | { 392 | "cell_type": "code", 393 | "execution_count": 30, 394 | "metadata": {}, 395 | "outputs": [ 396 | { 397 | "data": { 398 | "text/plain": [ 399 | "array([2, 2, 1, 0, 2, 1, 0, 2, 0, 1, 1, 0, 2, 0, 0, 1, 1, 1, 2, 0, 2, 0,\n", 400 | " 0, 0, 2, 0, 0, 2, 1, 1, 0, 1, 0, 2, 0, 0, 1, 1, 1, 2, 2, 0, 1, 0,\n", 401 | " 1, 2, 1, 2, 0, 1, 2, 2, 0, 2, 1, 1, 2, 1, 0, 2])" 402 | ] 403 | }, 404 | "execution_count": 30, 405 | "metadata": {}, 406 | "output_type": "execute_result" 407 | } 408 | ], 409 | "source": [ 410 | "result=model.predict(X_test)\n", 411 | "result" 412 | ] 413 | }, 414 | { 415 | "cell_type": "markdown", 416 | "metadata": {}, 417 | "source": [ 418 | "CLASSIFICATION REPORT AND CONFUSION MATRIX" 419 | ] 420 | }, 421 | { 422 | "cell_type": "code", 423 | "execution_count": 34, 424 | "metadata": {}, 425 | "outputs": [ 426 | { 427 | "name": "stdout", 428 | "output_type": "stream", 429 | "text": [ 430 | " precision recall f1-score support\n", 431 | "\n", 432 | " Setosa 1.00 1.00 1.00 21\n", 433 | " Versicolor 0.90 1.00 0.95 18\n", 434 | " Virginica 1.00 0.90 0.95 21\n", 435 | "\n", 436 | " micro avg 0.97 0.97 0.97 60\n", 437 | " macro avg 0.97 0.97 0.97 60\n", 438 | "weighted avg 0.97 0.97 0.97 60\n", 439 | "\n", 440 | "[[21 0 0]\n", 441 | " [ 0 18 0]\n", 442 | " [ 0 2 19]]\n" 443 | ] 444 | } 445 | ], 446 | "source": [ 447 | "from sklearn.metrics import classification_report,confusion_matrix\n", 448 | "result=model.predict(X_test)\n", 449 | "Labels=['Setosa','Versicolor','Virginica']\n", 450 | "print(classification_report(y_test,result,target_names=Labels))\n", 451 | "print(confusion_matrix(y_test,result))" 452 | ] 453 | }, 454 | { 455 | "cell_type": "markdown", 456 | "metadata": {}, 457 | "source": [ 458 | "MAKING PLOT" 459 | ] 460 | }, 461 | { 462 | "cell_type": "code", 463 | "execution_count": 39, 464 | "metadata": {}, 465 | "outputs": [ 466 | { 467 | "ename": "ModuleNotFoundError", 468 | "evalue": "No module named 'helper_code'", 469 | "output_type": "error", 470 | "traceback": [ 471 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 472 | "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", 473 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0mhelper_code\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mmlplots\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mml\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mml\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mconfusion\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my_test\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mresult\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mLabels\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'Decsion Tree Classifiaction'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 474 | "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'helper_code'" 475 | ] 476 | } 477 | ], 478 | "source": [ 479 | "from helper_code import mlplots as ml\n", 480 | "ml.confusion(y_test,result,Labels,'Decsion Tree Classifiaction')" 481 | ] 482 | }, 483 | { 484 | "cell_type": "code", 485 | "execution_count": null, 486 | "metadata": {}, 487 | "outputs": [], 488 | "source": [] 489 | } 490 | ], 491 | "metadata": { 492 | "kernelspec": { 493 | "display_name": "Python 3", 494 | "language": "python", 495 | "name": "python3" 496 | }, 497 | "language_info": { 498 | "codemirror_mode": { 499 | "name": "ipython", 500 | "version": 3 501 | }, 502 | "file_extension": ".py", 503 | "mimetype": "text/x-python", 504 | "name": "python", 505 | "nbconvert_exporter": "python", 506 | "pygments_lexer": "ipython3", 507 | "version": "3.7.1" 508 | } 509 | }, 510 | "nbformat": 4, 511 | "nbformat_minor": 2 512 | } 513 | -------------------------------------------------------------------------------- /Decision Tree/utf-8''iris(1).csv: -------------------------------------------------------------------------------- 1 | sepal_length,sepal_width,petal_length,petal_width,species 2 | 5.1,3.5,1.4,0.2,setosa 3 | 4.9,3.0,1.4,0.2,setosa 4 | 4.7,3.2,1.3,0.2,setosa 5 | 4.6,3.1,1.5,0.2,setosa 6 | 5.0,3.6,1.4,0.2,setosa 7 | 5.4,3.9,1.7,0.4,setosa 8 | 4.6,3.4,1.4,0.3,setosa 9 | 5.0,3.4,1.5,0.2,setosa 10 | 4.4,2.9,1.4,0.2,setosa 11 | 4.9,3.1,1.5,0.1,setosa 12 | 5.4,3.7,1.5,0.2,setosa 13 | 4.8,3.4,1.6,0.2,setosa 14 | 4.8,3.0,1.4,0.1,setosa 15 | 4.3,3.0,1.1,0.1,setosa 16 | 5.8,4.0,1.2,0.2,setosa 17 | 5.7,4.4,1.5,0.4,setosa 18 | 5.4,3.9,1.3,0.4,setosa 19 | 5.1,3.5,1.4,0.3,setosa 20 | 5.7,3.8,1.7,0.3,setosa 21 | 5.1,3.8,1.5,0.3,setosa 22 | 5.4,3.4,1.7,0.2,setosa 23 | 5.1,3.7,1.5,0.4,setosa 24 | 4.6,3.6,1.0,0.2,setosa 25 | 5.1,3.3,1.7,0.5,setosa 26 | 4.8,3.4,1.9,0.2,setosa 27 | 5.0,3.0,1.6,0.2,setosa 28 | 5.0,3.4,1.6,0.4,setosa 29 | 5.2,3.5,1.5,0.2,setosa 30 | 5.2,3.4,1.4,0.2,setosa 31 | 4.7,3.2,1.6,0.2,setosa 32 | 4.8,3.1,1.6,0.2,setosa 33 | 5.4,3.4,1.5,0.4,setosa 34 | 5.2,4.1,1.5,0.1,setosa 35 | 5.5,4.2,1.4,0.2,setosa 36 | 4.9,3.1,1.5,0.2,setosa 37 | 5.0,3.2,1.2,0.2,setosa 38 | 5.5,3.5,1.3,0.2,setosa 39 | 4.9,3.6,1.4,0.1,setosa 40 | 4.4,3.0,1.3,0.2,setosa 41 | 5.1,3.4,1.5,0.2,setosa 42 | 5.0,3.5,1.3,0.3,setosa 43 | 4.5,2.3,1.3,0.3,setosa 44 | 4.4,3.2,1.3,0.2,setosa 45 | 5.0,3.5,1.6,0.6,setosa 46 | 5.1,3.8,1.9,0.4,setosa 47 | 4.8,3.0,1.4,0.3,setosa 48 | 5.1,3.8,1.6,0.2,setosa 49 | 4.6,3.2,1.4,0.2,setosa 50 | 5.3,3.7,1.5,0.2,setosa 51 | 5.0,3.3,1.4,0.2,setosa 52 | 7.0,3.2,4.7,1.4,versicolor 53 | 6.4,3.2,4.5,1.5,versicolor 54 | 6.9,3.1,4.9,1.5,versicolor 55 | 5.5,2.3,4.0,1.3,versicolor 56 | 6.5,2.8,4.6,1.5,versicolor 57 | 5.7,2.8,4.5,1.3,versicolor 58 | 6.3,3.3,4.7,1.6,versicolor 59 | 4.9,2.4,3.3,1.0,versicolor 60 | 6.6,2.9,4.6,1.3,versicolor 61 | 5.2,2.7,3.9,1.4,versicolor 62 | 5.0,2.0,3.5,1.0,versicolor 63 | 5.9,3.0,4.2,1.5,versicolor 64 | 6.0,2.2,4.0,1.0,versicolor 65 | 6.1,2.9,4.7,1.4,versicolor 66 | 5.6,2.9,3.6,1.3,versicolor 67 | 6.7,3.1,4.4,1.4,versicolor 68 | 5.6,3.0,4.5,1.5,versicolor 69 | 5.8,2.7,4.1,1.0,versicolor 70 | 6.2,2.2,4.5,1.5,versicolor 71 | 5.6,2.5,3.9,1.1,versicolor 72 | 5.9,3.2,4.8,1.8,versicolor 73 | 6.1,2.8,4.0,1.3,versicolor 74 | 6.3,2.5,4.9,1.5,versicolor 75 | 6.1,2.8,4.7,1.2,versicolor 76 | 6.4,2.9,4.3,1.3,versicolor 77 | 6.6,3.0,4.4,1.4,versicolor 78 | 6.8,2.8,4.8,1.4,versicolor 79 | 6.7,3.0,5.0,1.7,versicolor 80 | 6.0,2.9,4.5,1.5,versicolor 81 | 5.7,2.6,3.5,1.0,versicolor 82 | 5.5,2.4,3.8,1.1,versicolor 83 | 5.5,2.4,3.7,1.0,versicolor 84 | 5.8,2.7,3.9,1.2,versicolor 85 | 6.0,2.7,5.1,1.6,versicolor 86 | 5.4,3.0,4.5,1.5,versicolor 87 | 6.0,3.4,4.5,1.6,versicolor 88 | 6.7,3.1,4.7,1.5,versicolor 89 | 6.3,2.3,4.4,1.3,versicolor 90 | 5.6,3.0,4.1,1.3,versicolor 91 | 5.5,2.5,4.0,1.3,versicolor 92 | 5.5,2.6,4.4,1.2,versicolor 93 | 6.1,3.0,4.6,1.4,versicolor 94 | 5.8,2.6,4.0,1.2,versicolor 95 | 5.0,2.3,3.3,1.0,versicolor 96 | 5.6,2.7,4.2,1.3,versicolor 97 | 5.7,3.0,4.2,1.2,versicolor 98 | 5.7,2.9,4.2,1.3,versicolor 99 | 6.2,2.9,4.3,1.3,versicolor 100 | 5.1,2.5,3.0,1.1,versicolor 101 | 5.7,2.8,4.1,1.3,versicolor 102 | 6.3,3.3,6.0,2.5,virginica 103 | 5.8,2.7,5.1,1.9,virginica 104 | 7.1,3.0,5.9,2.1,virginica 105 | 6.3,2.9,5.6,1.8,virginica 106 | 6.5,3.0,5.8,2.2,virginica 107 | 7.6,3.0,6.6,2.1,virginica 108 | 4.9,2.5,4.5,1.7,virginica 109 | 7.3,2.9,6.3,1.8,virginica 110 | 6.7,2.5,5.8,1.8,virginica 111 | 7.2,3.6,6.1,2.5,virginica 112 | 6.5,3.2,5.1,2.0,virginica 113 | 6.4,2.7,5.3,1.9,virginica 114 | 6.8,3.0,5.5,2.1,virginica 115 | 5.7,2.5,5.0,2.0,virginica 116 | 5.8,2.8,5.1,2.4,virginica 117 | 6.4,3.2,5.3,2.3,virginica 118 | 6.5,3.0,5.5,1.8,virginica 119 | 7.7,3.8,6.7,2.2,virginica 120 | 7.7,2.6,6.9,2.3,virginica 121 | 6.0,2.2,5.0,1.5,virginica 122 | 6.9,3.2,5.7,2.3,virginica 123 | 5.6,2.8,4.9,2.0,virginica 124 | 7.7,2.8,6.7,2.0,virginica 125 | 6.3,2.7,4.9,1.8,virginica 126 | 6.7,3.3,5.7,2.1,virginica 127 | 7.2,3.2,6.0,1.8,virginica 128 | 6.2,2.8,4.8,1.8,virginica 129 | 6.1,3.0,4.9,1.8,virginica 130 | 6.4,2.8,5.6,2.1,virginica 131 | 7.2,3.0,5.8,1.6,virginica 132 | 7.4,2.8,6.1,1.9,virginica 133 | 7.9,3.8,6.4,2.0,virginica 134 | 6.4,2.8,5.6,2.2,virginica 135 | 6.3,2.8,5.1,1.5,virginica 136 | 6.1,2.6,5.6,1.4,virginica 137 | 7.7,3.0,6.1,2.3,virginica 138 | 6.3,3.4,5.6,2.4,virginica 139 | 6.4,3.1,5.5,1.8,virginica 140 | 6.0,3.0,4.8,1.8,virginica 141 | 6.9,3.1,5.4,2.1,virginica 142 | 6.7,3.1,5.6,2.4,virginica 143 | 6.9,3.1,5.1,2.3,virginica 144 | 5.8,2.7,5.1,1.9,virginica 145 | 6.8,3.2,5.9,2.3,virginica 146 | 6.7,3.3,5.7,2.5,virginica 147 | 6.7,3.0,5.2,2.3,virginica 148 | 6.3,2.5,5.0,1.9,virginica 149 | 6.5,3.0,5.2,2.0,virginica 150 | 6.2,3.4,5.4,2.3,virginica 151 | 5.9,3.0,5.1,1.8,virginica 152 | -------------------------------------------------------------------------------- /K-NN/Untitled.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "KNN ALGORITHM ON IRIS DATASET" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import pandas as pd\n", 17 | "import numpy as np\n", 18 | "import matplotlib.pyplot as plt\n", 19 | "import seaborn as sb" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "data": { 29 | "text/html": [ 30 | "
\n", 31 | "\n", 44 | "\n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
05.13.51.40.2setosa
14.93.01.40.2setosa
24.73.21.30.2setosa
34.63.11.50.2setosa
45.03.61.40.2setosa
\n", 98 | "
" 99 | ], 100 | "text/plain": [ 101 | " sepal_length sepal_width petal_length petal_width species\n", 102 | "0 5.1 3.5 1.4 0.2 setosa\n", 103 | "1 4.9 3.0 1.4 0.2 setosa\n", 104 | "2 4.7 3.2 1.3 0.2 setosa\n", 105 | "3 4.6 3.1 1.5 0.2 setosa\n", 106 | "4 5.0 3.6 1.4 0.2 setosa" 107 | ] 108 | }, 109 | "execution_count": 2, 110 | "metadata": {}, 111 | "output_type": "execute_result" 112 | } 113 | ], 114 | "source": [ 115 | "df=pd.read_csv(\"utf-8''iris(1).csv\")\n", 116 | "df.head()" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 3, 122 | "metadata": {}, 123 | "outputs": [ 124 | { 125 | "data": { 126 | "text/plain": [ 127 | "array(['setosa', 'versicolor', 'virginica'], dtype=object)" 128 | ] 129 | }, 130 | "execution_count": 3, 131 | "metadata": {}, 132 | "output_type": "execute_result" 133 | } 134 | ], 135 | "source": [ 136 | "df.species.unique()" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 4, 142 | "metadata": {}, 143 | "outputs": [ 144 | { 145 | "data": { 146 | "text/html": [ 147 | "
\n", 148 | "\n", 161 | "\n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | "
sepal_lengthsepal_widthpetal_lengthpetal_widthspeciesspecies_cat
05.13.51.40.2setosa0
14.93.01.40.2setosa0
24.73.21.30.2setosa0
34.63.11.50.2setosa0
45.03.61.40.2setosa0
\n", 221 | "
" 222 | ], 223 | "text/plain": [ 224 | " sepal_length sepal_width petal_length petal_width species species_cat\n", 225 | "0 5.1 3.5 1.4 0.2 setosa 0\n", 226 | "1 4.9 3.0 1.4 0.2 setosa 0\n", 227 | "2 4.7 3.2 1.3 0.2 setosa 0\n", 228 | "3 4.6 3.1 1.5 0.2 setosa 0\n", 229 | "4 5.0 3.6 1.4 0.2 setosa 0" 230 | ] 231 | }, 232 | "execution_count": 4, 233 | "metadata": {}, 234 | "output_type": "execute_result" 235 | } 236 | ], 237 | "source": [ 238 | "from sklearn.preprocessing import LabelEncoder\n", 239 | "df['species_cat']=LabelEncoder().fit_transform(df.species)\n", 240 | "df.head()" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 5, 246 | "metadata": {}, 247 | "outputs": [ 248 | { 249 | "data": { 250 | "text/plain": [ 251 | "versicolor 50\n", 252 | "setosa 50\n", 253 | "virginica 50\n", 254 | "Name: species, dtype: int64" 255 | ] 256 | }, 257 | "execution_count": 5, 258 | "metadata": {}, 259 | "output_type": "execute_result" 260 | } 261 | ], 262 | "source": [ 263 | "df.species.value_counts()" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": 7, 269 | "metadata": {}, 270 | "outputs": [ 271 | { 272 | "name": "stdout", 273 | "output_type": "stream", 274 | "text": [ 275 | " sepal_length sepal_width petal_length petal_width\n", 276 | "0 5.1 3.5 1.4 0.2\n", 277 | "1 4.9 3.0 1.4 0.2\n", 278 | "2 4.7 3.2 1.3 0.2\n", 279 | "3 4.6 3.1 1.5 0.2\n", 280 | "4 5.0 3.6 1.4 0.2\n", 281 | "0 0\n", 282 | "1 0\n", 283 | "2 0\n", 284 | "3 0\n", 285 | "4 0\n", 286 | "Name: species_cat, dtype: int32\n" 287 | ] 288 | } 289 | ], 290 | "source": [ 291 | "X=df[['sepal_length','sepal_width','petal_length','petal_width']]\n", 292 | "y=df['species_cat']\n", 293 | "print(X.head())\n", 294 | "print(y.head())" 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": 8, 300 | "metadata": {}, 301 | "outputs": [], 302 | "source": [ 303 | "from sklearn.model_selection import train_test_split\n", 304 | "X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.4,random_state=23)" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": 12, 310 | "metadata": {}, 311 | "outputs": [ 312 | { 313 | "name": "stdout", 314 | "output_type": "stream", 315 | "text": [ 316 | "The acurrracy of the algorithm = 98.33333333333333\n" 317 | ] 318 | } 319 | ], 320 | "source": [ 321 | "from sklearn.neighbors import KNeighborsClassifier\n", 322 | "model=KNeighborsClassifier()\n", 323 | "model.fit(X_train,y_train)\n", 324 | "score=100*model.score(X_test,y_test)\n", 325 | "print(\"The acurrracy of the algorithm = {}\".format(score))" 326 | ] 327 | }, 328 | { 329 | "cell_type": "markdown", 330 | "metadata": {}, 331 | "source": [ 332 | "CLASSIFICATION REPORT " 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": 14, 338 | "metadata": {}, 339 | "outputs": [ 340 | { 341 | "name": "stdout", 342 | "output_type": "stream", 343 | "text": [ 344 | " precision recall f1-score support\n", 345 | "\n", 346 | " Setosa 1.00 1.00 1.00 21\n", 347 | " Versicolor 0.95 1.00 0.97 18\n", 348 | " Virginica 1.00 0.95 0.98 21\n", 349 | "\n", 350 | " micro avg 0.98 0.98 0.98 60\n", 351 | " macro avg 0.98 0.98 0.98 60\n", 352 | "weighted avg 0.98 0.98 0.98 60\n", 353 | "\n" 354 | ] 355 | }, 356 | { 357 | "data": { 358 | "text/plain": [ 359 | "array([[21, 0, 0],\n", 360 | " [ 0, 18, 0],\n", 361 | " [ 0, 1, 20]], dtype=int64)" 362 | ] 363 | }, 364 | "execution_count": 14, 365 | "metadata": {}, 366 | "output_type": "execute_result" 367 | } 368 | ], 369 | "source": [ 370 | "from sklearn.metrics import classification_report,confusion_matrix\n", 371 | "labels=['Setosa','Versicolor','Virginica']\n", 372 | "result=model.predict(X_test)\n", 373 | "print(classification_report(y_test,result,target_names=labels))\n", 374 | "confusion_matrix(y_test,result)" 375 | ] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": 15, 380 | "metadata": {}, 381 | "outputs": [ 382 | { 383 | "ename": "ModuleNotFoundError", 384 | "evalue": "No module named 'helper_code'", 385 | "output_type": "error", 386 | "traceback": [ 387 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 388 | "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", 389 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0mhelper_code\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mmlplots\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mml\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;31m# Call confusion matrix plotting routine\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[0mml\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mconfusion\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ml_test\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpredicted\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlabels\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'KNN Classification'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 390 | "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'helper_code'" 391 | ] 392 | } 393 | ], 394 | "source": [ 395 | "from helper_code import mlplots as ml\n", 396 | "\n", 397 | "# Call confusion matrix plotting routine\n", 398 | "ml.confusion(l_test, predicted, labels, 'KNN Classification')" 399 | ] 400 | }, 401 | { 402 | "cell_type": "code", 403 | "execution_count": null, 404 | "metadata": {}, 405 | "outputs": [], 406 | "source": [] 407 | } 408 | ], 409 | "metadata": { 410 | "kernelspec": { 411 | "display_name": "Python 3", 412 | "language": "python", 413 | "name": "python3" 414 | }, 415 | "language_info": { 416 | "codemirror_mode": { 417 | "name": "ipython", 418 | "version": 3 419 | }, 420 | "file_extension": ".py", 421 | "mimetype": "text/x-python", 422 | "name": "python", 423 | "nbconvert_exporter": "python", 424 | "pygments_lexer": "ipython3", 425 | "version": "3.7.1" 426 | } 427 | }, 428 | "nbformat": 4, 429 | "nbformat_minor": 2 430 | } 431 | -------------------------------------------------------------------------------- /K-NN/utf-8''iris(1).csv: -------------------------------------------------------------------------------- 1 | sepal_length,sepal_width,petal_length,petal_width,species 2 | 5.1,3.5,1.4,0.2,setosa 3 | 4.9,3.0,1.4,0.2,setosa 4 | 4.7,3.2,1.3,0.2,setosa 5 | 4.6,3.1,1.5,0.2,setosa 6 | 5.0,3.6,1.4,0.2,setosa 7 | 5.4,3.9,1.7,0.4,setosa 8 | 4.6,3.4,1.4,0.3,setosa 9 | 5.0,3.4,1.5,0.2,setosa 10 | 4.4,2.9,1.4,0.2,setosa 11 | 4.9,3.1,1.5,0.1,setosa 12 | 5.4,3.7,1.5,0.2,setosa 13 | 4.8,3.4,1.6,0.2,setosa 14 | 4.8,3.0,1.4,0.1,setosa 15 | 4.3,3.0,1.1,0.1,setosa 16 | 5.8,4.0,1.2,0.2,setosa 17 | 5.7,4.4,1.5,0.4,setosa 18 | 5.4,3.9,1.3,0.4,setosa 19 | 5.1,3.5,1.4,0.3,setosa 20 | 5.7,3.8,1.7,0.3,setosa 21 | 5.1,3.8,1.5,0.3,setosa 22 | 5.4,3.4,1.7,0.2,setosa 23 | 5.1,3.7,1.5,0.4,setosa 24 | 4.6,3.6,1.0,0.2,setosa 25 | 5.1,3.3,1.7,0.5,setosa 26 | 4.8,3.4,1.9,0.2,setosa 27 | 5.0,3.0,1.6,0.2,setosa 28 | 5.0,3.4,1.6,0.4,setosa 29 | 5.2,3.5,1.5,0.2,setosa 30 | 5.2,3.4,1.4,0.2,setosa 31 | 4.7,3.2,1.6,0.2,setosa 32 | 4.8,3.1,1.6,0.2,setosa 33 | 5.4,3.4,1.5,0.4,setosa 34 | 5.2,4.1,1.5,0.1,setosa 35 | 5.5,4.2,1.4,0.2,setosa 36 | 4.9,3.1,1.5,0.2,setosa 37 | 5.0,3.2,1.2,0.2,setosa 38 | 5.5,3.5,1.3,0.2,setosa 39 | 4.9,3.6,1.4,0.1,setosa 40 | 4.4,3.0,1.3,0.2,setosa 41 | 5.1,3.4,1.5,0.2,setosa 42 | 5.0,3.5,1.3,0.3,setosa 43 | 4.5,2.3,1.3,0.3,setosa 44 | 4.4,3.2,1.3,0.2,setosa 45 | 5.0,3.5,1.6,0.6,setosa 46 | 5.1,3.8,1.9,0.4,setosa 47 | 4.8,3.0,1.4,0.3,setosa 48 | 5.1,3.8,1.6,0.2,setosa 49 | 4.6,3.2,1.4,0.2,setosa 50 | 5.3,3.7,1.5,0.2,setosa 51 | 5.0,3.3,1.4,0.2,setosa 52 | 7.0,3.2,4.7,1.4,versicolor 53 | 6.4,3.2,4.5,1.5,versicolor 54 | 6.9,3.1,4.9,1.5,versicolor 55 | 5.5,2.3,4.0,1.3,versicolor 56 | 6.5,2.8,4.6,1.5,versicolor 57 | 5.7,2.8,4.5,1.3,versicolor 58 | 6.3,3.3,4.7,1.6,versicolor 59 | 4.9,2.4,3.3,1.0,versicolor 60 | 6.6,2.9,4.6,1.3,versicolor 61 | 5.2,2.7,3.9,1.4,versicolor 62 | 5.0,2.0,3.5,1.0,versicolor 63 | 5.9,3.0,4.2,1.5,versicolor 64 | 6.0,2.2,4.0,1.0,versicolor 65 | 6.1,2.9,4.7,1.4,versicolor 66 | 5.6,2.9,3.6,1.3,versicolor 67 | 6.7,3.1,4.4,1.4,versicolor 68 | 5.6,3.0,4.5,1.5,versicolor 69 | 5.8,2.7,4.1,1.0,versicolor 70 | 6.2,2.2,4.5,1.5,versicolor 71 | 5.6,2.5,3.9,1.1,versicolor 72 | 5.9,3.2,4.8,1.8,versicolor 73 | 6.1,2.8,4.0,1.3,versicolor 74 | 6.3,2.5,4.9,1.5,versicolor 75 | 6.1,2.8,4.7,1.2,versicolor 76 | 6.4,2.9,4.3,1.3,versicolor 77 | 6.6,3.0,4.4,1.4,versicolor 78 | 6.8,2.8,4.8,1.4,versicolor 79 | 6.7,3.0,5.0,1.7,versicolor 80 | 6.0,2.9,4.5,1.5,versicolor 81 | 5.7,2.6,3.5,1.0,versicolor 82 | 5.5,2.4,3.8,1.1,versicolor 83 | 5.5,2.4,3.7,1.0,versicolor 84 | 5.8,2.7,3.9,1.2,versicolor 85 | 6.0,2.7,5.1,1.6,versicolor 86 | 5.4,3.0,4.5,1.5,versicolor 87 | 6.0,3.4,4.5,1.6,versicolor 88 | 6.7,3.1,4.7,1.5,versicolor 89 | 6.3,2.3,4.4,1.3,versicolor 90 | 5.6,3.0,4.1,1.3,versicolor 91 | 5.5,2.5,4.0,1.3,versicolor 92 | 5.5,2.6,4.4,1.2,versicolor 93 | 6.1,3.0,4.6,1.4,versicolor 94 | 5.8,2.6,4.0,1.2,versicolor 95 | 5.0,2.3,3.3,1.0,versicolor 96 | 5.6,2.7,4.2,1.3,versicolor 97 | 5.7,3.0,4.2,1.2,versicolor 98 | 5.7,2.9,4.2,1.3,versicolor 99 | 6.2,2.9,4.3,1.3,versicolor 100 | 5.1,2.5,3.0,1.1,versicolor 101 | 5.7,2.8,4.1,1.3,versicolor 102 | 6.3,3.3,6.0,2.5,virginica 103 | 5.8,2.7,5.1,1.9,virginica 104 | 7.1,3.0,5.9,2.1,virginica 105 | 6.3,2.9,5.6,1.8,virginica 106 | 6.5,3.0,5.8,2.2,virginica 107 | 7.6,3.0,6.6,2.1,virginica 108 | 4.9,2.5,4.5,1.7,virginica 109 | 7.3,2.9,6.3,1.8,virginica 110 | 6.7,2.5,5.8,1.8,virginica 111 | 7.2,3.6,6.1,2.5,virginica 112 | 6.5,3.2,5.1,2.0,virginica 113 | 6.4,2.7,5.3,1.9,virginica 114 | 6.8,3.0,5.5,2.1,virginica 115 | 5.7,2.5,5.0,2.0,virginica 116 | 5.8,2.8,5.1,2.4,virginica 117 | 6.4,3.2,5.3,2.3,virginica 118 | 6.5,3.0,5.5,1.8,virginica 119 | 7.7,3.8,6.7,2.2,virginica 120 | 7.7,2.6,6.9,2.3,virginica 121 | 6.0,2.2,5.0,1.5,virginica 122 | 6.9,3.2,5.7,2.3,virginica 123 | 5.6,2.8,4.9,2.0,virginica 124 | 7.7,2.8,6.7,2.0,virginica 125 | 6.3,2.7,4.9,1.8,virginica 126 | 6.7,3.3,5.7,2.1,virginica 127 | 7.2,3.2,6.0,1.8,virginica 128 | 6.2,2.8,4.8,1.8,virginica 129 | 6.1,3.0,4.9,1.8,virginica 130 | 6.4,2.8,5.6,2.1,virginica 131 | 7.2,3.0,5.8,1.6,virginica 132 | 7.4,2.8,6.1,1.9,virginica 133 | 7.9,3.8,6.4,2.0,virginica 134 | 6.4,2.8,5.6,2.2,virginica 135 | 6.3,2.8,5.1,1.5,virginica 136 | 6.1,2.6,5.6,1.4,virginica 137 | 7.7,3.0,6.1,2.3,virginica 138 | 6.3,3.4,5.6,2.4,virginica 139 | 6.4,3.1,5.5,1.8,virginica 140 | 6.0,3.0,4.8,1.8,virginica 141 | 6.9,3.1,5.4,2.1,virginica 142 | 6.7,3.1,5.6,2.4,virginica 143 | 6.9,3.1,5.1,2.3,virginica 144 | 5.8,2.7,5.1,1.9,virginica 145 | 6.8,3.2,5.9,2.3,virginica 146 | 6.7,3.3,5.7,2.5,virginica 147 | 6.7,3.0,5.2,2.3,virginica 148 | 6.3,2.5,5.0,1.9,virginica 149 | 6.5,3.0,5.2,2.0,virginica 150 | 6.2,3.4,5.4,2.3,virginica 151 | 5.9,3.0,5.1,1.8,virginica 152 | -------------------------------------------------------------------------------- /K-means/utf-8''iris.csv: -------------------------------------------------------------------------------- 1 | sepal_length,sepal_width,petal_length,petal_width,species 2 | 5.1,3.5,1.4,0.2,setosa 3 | 4.9,3.0,1.4,0.2,setosa 4 | 4.7,3.2,1.3,0.2,setosa 5 | 4.6,3.1,1.5,0.2,setosa 6 | 5.0,3.6,1.4,0.2,setosa 7 | 5.4,3.9,1.7,0.4,setosa 8 | 4.6,3.4,1.4,0.3,setosa 9 | 5.0,3.4,1.5,0.2,setosa 10 | 4.4,2.9,1.4,0.2,setosa 11 | 4.9,3.1,1.5,0.1,setosa 12 | 5.4,3.7,1.5,0.2,setosa 13 | 4.8,3.4,1.6,0.2,setosa 14 | 4.8,3.0,1.4,0.1,setosa 15 | 4.3,3.0,1.1,0.1,setosa 16 | 5.8,4.0,1.2,0.2,setosa 17 | 5.7,4.4,1.5,0.4,setosa 18 | 5.4,3.9,1.3,0.4,setosa 19 | 5.1,3.5,1.4,0.3,setosa 20 | 5.7,3.8,1.7,0.3,setosa 21 | 5.1,3.8,1.5,0.3,setosa 22 | 5.4,3.4,1.7,0.2,setosa 23 | 5.1,3.7,1.5,0.4,setosa 24 | 4.6,3.6,1.0,0.2,setosa 25 | 5.1,3.3,1.7,0.5,setosa 26 | 4.8,3.4,1.9,0.2,setosa 27 | 5.0,3.0,1.6,0.2,setosa 28 | 5.0,3.4,1.6,0.4,setosa 29 | 5.2,3.5,1.5,0.2,setosa 30 | 5.2,3.4,1.4,0.2,setosa 31 | 4.7,3.2,1.6,0.2,setosa 32 | 4.8,3.1,1.6,0.2,setosa 33 | 5.4,3.4,1.5,0.4,setosa 34 | 5.2,4.1,1.5,0.1,setosa 35 | 5.5,4.2,1.4,0.2,setosa 36 | 4.9,3.1,1.5,0.2,setosa 37 | 5.0,3.2,1.2,0.2,setosa 38 | 5.5,3.5,1.3,0.2,setosa 39 | 4.9,3.6,1.4,0.1,setosa 40 | 4.4,3.0,1.3,0.2,setosa 41 | 5.1,3.4,1.5,0.2,setosa 42 | 5.0,3.5,1.3,0.3,setosa 43 | 4.5,2.3,1.3,0.3,setosa 44 | 4.4,3.2,1.3,0.2,setosa 45 | 5.0,3.5,1.6,0.6,setosa 46 | 5.1,3.8,1.9,0.4,setosa 47 | 4.8,3.0,1.4,0.3,setosa 48 | 5.1,3.8,1.6,0.2,setosa 49 | 4.6,3.2,1.4,0.2,setosa 50 | 5.3,3.7,1.5,0.2,setosa 51 | 5.0,3.3,1.4,0.2,setosa 52 | 7.0,3.2,4.7,1.4,versicolor 53 | 6.4,3.2,4.5,1.5,versicolor 54 | 6.9,3.1,4.9,1.5,versicolor 55 | 5.5,2.3,4.0,1.3,versicolor 56 | 6.5,2.8,4.6,1.5,versicolor 57 | 5.7,2.8,4.5,1.3,versicolor 58 | 6.3,3.3,4.7,1.6,versicolor 59 | 4.9,2.4,3.3,1.0,versicolor 60 | 6.6,2.9,4.6,1.3,versicolor 61 | 5.2,2.7,3.9,1.4,versicolor 62 | 5.0,2.0,3.5,1.0,versicolor 63 | 5.9,3.0,4.2,1.5,versicolor 64 | 6.0,2.2,4.0,1.0,versicolor 65 | 6.1,2.9,4.7,1.4,versicolor 66 | 5.6,2.9,3.6,1.3,versicolor 67 | 6.7,3.1,4.4,1.4,versicolor 68 | 5.6,3.0,4.5,1.5,versicolor 69 | 5.8,2.7,4.1,1.0,versicolor 70 | 6.2,2.2,4.5,1.5,versicolor 71 | 5.6,2.5,3.9,1.1,versicolor 72 | 5.9,3.2,4.8,1.8,versicolor 73 | 6.1,2.8,4.0,1.3,versicolor 74 | 6.3,2.5,4.9,1.5,versicolor 75 | 6.1,2.8,4.7,1.2,versicolor 76 | 6.4,2.9,4.3,1.3,versicolor 77 | 6.6,3.0,4.4,1.4,versicolor 78 | 6.8,2.8,4.8,1.4,versicolor 79 | 6.7,3.0,5.0,1.7,versicolor 80 | 6.0,2.9,4.5,1.5,versicolor 81 | 5.7,2.6,3.5,1.0,versicolor 82 | 5.5,2.4,3.8,1.1,versicolor 83 | 5.5,2.4,3.7,1.0,versicolor 84 | 5.8,2.7,3.9,1.2,versicolor 85 | 6.0,2.7,5.1,1.6,versicolor 86 | 5.4,3.0,4.5,1.5,versicolor 87 | 6.0,3.4,4.5,1.6,versicolor 88 | 6.7,3.1,4.7,1.5,versicolor 89 | 6.3,2.3,4.4,1.3,versicolor 90 | 5.6,3.0,4.1,1.3,versicolor 91 | 5.5,2.5,4.0,1.3,versicolor 92 | 5.5,2.6,4.4,1.2,versicolor 93 | 6.1,3.0,4.6,1.4,versicolor 94 | 5.8,2.6,4.0,1.2,versicolor 95 | 5.0,2.3,3.3,1.0,versicolor 96 | 5.6,2.7,4.2,1.3,versicolor 97 | 5.7,3.0,4.2,1.2,versicolor 98 | 5.7,2.9,4.2,1.3,versicolor 99 | 6.2,2.9,4.3,1.3,versicolor 100 | 5.1,2.5,3.0,1.1,versicolor 101 | 5.7,2.8,4.1,1.3,versicolor 102 | 6.3,3.3,6.0,2.5,virginica 103 | 5.8,2.7,5.1,1.9,virginica 104 | 7.1,3.0,5.9,2.1,virginica 105 | 6.3,2.9,5.6,1.8,virginica 106 | 6.5,3.0,5.8,2.2,virginica 107 | 7.6,3.0,6.6,2.1,virginica 108 | 4.9,2.5,4.5,1.7,virginica 109 | 7.3,2.9,6.3,1.8,virginica 110 | 6.7,2.5,5.8,1.8,virginica 111 | 7.2,3.6,6.1,2.5,virginica 112 | 6.5,3.2,5.1,2.0,virginica 113 | 6.4,2.7,5.3,1.9,virginica 114 | 6.8,3.0,5.5,2.1,virginica 115 | 5.7,2.5,5.0,2.0,virginica 116 | 5.8,2.8,5.1,2.4,virginica 117 | 6.4,3.2,5.3,2.3,virginica 118 | 6.5,3.0,5.5,1.8,virginica 119 | 7.7,3.8,6.7,2.2,virginica 120 | 7.7,2.6,6.9,2.3,virginica 121 | 6.0,2.2,5.0,1.5,virginica 122 | 6.9,3.2,5.7,2.3,virginica 123 | 5.6,2.8,4.9,2.0,virginica 124 | 7.7,2.8,6.7,2.0,virginica 125 | 6.3,2.7,4.9,1.8,virginica 126 | 6.7,3.3,5.7,2.1,virginica 127 | 7.2,3.2,6.0,1.8,virginica 128 | 6.2,2.8,4.8,1.8,virginica 129 | 6.1,3.0,4.9,1.8,virginica 130 | 6.4,2.8,5.6,2.1,virginica 131 | 7.2,3.0,5.8,1.6,virginica 132 | 7.4,2.8,6.1,1.9,virginica 133 | 7.9,3.8,6.4,2.0,virginica 134 | 6.4,2.8,5.6,2.2,virginica 135 | 6.3,2.8,5.1,1.5,virginica 136 | 6.1,2.6,5.6,1.4,virginica 137 | 7.7,3.0,6.1,2.3,virginica 138 | 6.3,3.4,5.6,2.4,virginica 139 | 6.4,3.1,5.5,1.8,virginica 140 | 6.0,3.0,4.8,1.8,virginica 141 | 6.9,3.1,5.4,2.1,virginica 142 | 6.7,3.1,5.6,2.4,virginica 143 | 6.9,3.1,5.1,2.3,virginica 144 | 5.8,2.7,5.1,1.9,virginica 145 | 6.8,3.2,5.9,2.3,virginica 146 | 6.7,3.3,5.7,2.5,virginica 147 | 6.7,3.0,5.2,2.3,virginica 148 | 6.3,2.5,5.0,1.9,virginica 149 | 6.5,3.0,5.2,2.0,virginica 150 | 6.2,3.4,5.4,2.3,virginica 151 | 5.9,3.0,5.1,1.8,virginica 152 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 suubh 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Linear Regression/LinearRegressionMultipleVariables.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Linear Resgression Multiple variables" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Import the Libraries" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 17, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import numpy as np\n", 24 | "import pandas as pd\n", 25 | "import matplotlib.pyplot as plt\n", 26 | "import seaborn as sb" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "Load the Dataset" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 18, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "dataset=pd.read_csv('ex1data2.txt')\n", 43 | "X=dataset.iloc[:,:-1].values\n", 44 | "y=dataset.iloc[:,-1].values" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 19, 50 | "metadata": {}, 51 | "outputs": [ 52 | { 53 | "name": "stdout", 54 | "output_type": "stream", 55 | "text": [ 56 | "[[1600 3]\n", 57 | " [2400 3]\n", 58 | " [1416 2]\n", 59 | " [3000 4]\n", 60 | " [1985 4]\n", 61 | " [1534 3]\n", 62 | " [1427 3]\n", 63 | " [1380 3]\n", 64 | " [1494 3]\n", 65 | " [1940 4]\n", 66 | " [2000 3]\n", 67 | " [1890 3]\n", 68 | " [4478 5]\n", 69 | " [1268 3]\n", 70 | " [2300 4]\n", 71 | " [1320 2]\n", 72 | " [1236 3]\n", 73 | " [2609 4]\n", 74 | " [3031 4]\n", 75 | " [1767 3]\n", 76 | " [1888 2]\n", 77 | " [1604 3]\n", 78 | " [1962 4]\n", 79 | " [3890 3]\n", 80 | " [1100 3]\n", 81 | " [1458 3]\n", 82 | " [2526 3]\n", 83 | " [2200 3]\n", 84 | " [2637 3]\n", 85 | " [1839 2]\n", 86 | " [1000 1]\n", 87 | " [2040 4]\n", 88 | " [3137 3]\n", 89 | " [1811 4]\n", 90 | " [1437 3]\n", 91 | " [1239 3]\n", 92 | " [2132 4]\n", 93 | " [4215 4]\n", 94 | " [2162 4]\n", 95 | " [1664 2]\n", 96 | " [2238 3]\n", 97 | " [2567 4]\n", 98 | " [1200 3]\n", 99 | " [ 852 2]\n", 100 | " [1852 4]\n", 101 | " [1203 3]]\n" 102 | ] 103 | } 104 | ], 105 | "source": [ 106 | "print(X)" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 20, 112 | "metadata": {}, 113 | "outputs": [ 114 | { 115 | "name": "stdout", 116 | "output_type": "stream", 117 | "text": [ 118 | "[329900 369000 232000 539900 299900 314900 198999 212000 242500 239999\n", 119 | " 347000 329999 699900 259900 449900 299900 199900 499998 599000 252900\n", 120 | " 255000 242900 259900 573900 249900 464500 469000 475000 299900 349900\n", 121 | " 169900 314900 579900 285900 249900 229900 345000 549000 287000 368500\n", 122 | " 329900 314000 299000 179900 299900 239500]\n" 123 | ] 124 | } 125 | ], 126 | "source": [ 127 | "print(y)" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "Splitting the Dataset" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 21, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "from sklearn.model_selection import train_test_split\n", 144 | "X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0)" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 22, 150 | "metadata": {}, 151 | "outputs": [], 152 | "source": [ 153 | "from sklearn.linear_model import LinearRegression\n", 154 | "model=LinearRegression(normalize=True)\n", 155 | "model.fit(X_train,y_train)\n", 156 | "result=model.predict(X_test)\n" 157 | ] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "metadata": {}, 162 | "source": [] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 23, 167 | "metadata": {}, 168 | "outputs": [ 169 | { 170 | "name": "stdout", 171 | "output_type": "stream", 172 | "text": [ 173 | "[266611.31683851 324085.31639041 438749.980843 327443.40737608\n", 174 | " 345745.46093557 335473.6249505 338350.81825691 329685.02578672\n", 175 | " 345505.01209594 511751.95879232 231965.49347897 353286.10757014\n", 176 | " 228942.29621667 276591.14491179]\n" 177 | ] 178 | } 179 | ], 180 | "source": [ 181 | "print(result)" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": null, 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [] 190 | } 191 | ], 192 | "metadata": { 193 | "kernelspec": { 194 | "display_name": "Python 3", 195 | "language": "python", 196 | "name": "python3" 197 | }, 198 | "language_info": { 199 | "codemirror_mode": { 200 | "name": "ipython", 201 | "version": 3 202 | }, 203 | "file_extension": ".py", 204 | "mimetype": "text/x-python", 205 | "name": "python", 206 | "nbconvert_exporter": "python", 207 | "pygments_lexer": "ipython3", 208 | "version": "3.7.1" 209 | } 210 | }, 211 | "nbformat": 4, 212 | "nbformat_minor": 2 213 | } 214 | -------------------------------------------------------------------------------- /Linear Regression/LinearRegressionSingle Variables.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Import the Libraries" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import pandas as pd\n", 17 | "import numpy as np\n", 18 | "import matplotlib.pyplot as plt\n", 19 | "\n" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "Load the Dataset" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 2, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "dataset=pd.read_csv('ex1data1.txt')\n", 36 | "X=dataset.iloc[:,:-1].values\n", 37 | "y=dataset.iloc[:,-1].values" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "Split the dataset" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 3, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "from sklearn.model_selection import train_test_split\n", 54 | "X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0)\n" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "Train Linear Regression using Skicit Library" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 4, 67 | "metadata": {}, 68 | "outputs": [ 69 | { 70 | "data": { 71 | "text/plain": [ 72 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n", 73 | " normalize=False)" 74 | ] 75 | }, 76 | "execution_count": 4, 77 | "metadata": {}, 78 | "output_type": "execute_result" 79 | } 80 | ], 81 | "source": [ 82 | "from sklearn.linear_model import LinearRegression\n", 83 | "model=LinearRegression()\n", 84 | "model.fit(X_train,y_train)\n" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "Predict " 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 9, 97 | "metadata": {}, 98 | "outputs": [ 99 | { 100 | "data": { 101 | "text/plain": [ 102 | "array([ 8.88953046, 6.00803956, 4.21995151, 3.35897649, 19.97236945,\n", 103 | " 3.4552309 , 2.00775355, 1.98129835, 3.45901021, 8.03800372,\n", 104 | " 3.42818518, 1.92425432, 3.04245892, 2.61079038, 3.60935606,\n", 105 | " 3.73123894, 3.42027224, 18.32364361, 5.64652453, 1.91858535,\n", 106 | " 5.19017234, 13.5558025 , 2.8695553 , 3.03749857, 1.93700951,\n", 107 | " 2.49374974, 6.07996464, 2.3700953 , 2.07660793])" 108 | ] 109 | }, 110 | "execution_count": 9, 111 | "metadata": {}, 112 | "output_type": "execute_result" 113 | } 114 | ], 115 | "source": [ 116 | "result=model.predict(X_test)\n", 117 | "result" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "Plot the Training set" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 10, 130 | "metadata": {}, 131 | "outputs": [ 132 | { 133 | "data": { 134 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3X2cHFWd7/HPLw8o4TkhYHjIDCrrinu5AeYiT+4irCzhxYO4KuAgUVgi6+WuLK7Impe7qDeroguyuopRWCIZJYgogUVW5AICCjgJzxsQxEwIxJBATAJBQjK/+8epJj09Vd3V013d1V3f9+vVr+k+Xd39m0mnfnWqzvkdc3dERKS4xrU7ABERaS8lAhGRglMiEBEpOCUCEZGCUyIQESk4JQIRkYJTIpDCMLPxZvaSmU1v5rYinU6JQHIr2hGXbsNm9krZ4/5638/dt7j79u6+vJnb1svM/q+ZvWZmG6LbE2b2b2b2pjre424z+0izY5NiUiKQ3Ip2xNu7+/bAcuCEsraByu3NbELroxyzAXffAZgC/DWwNzBoZru3NywpIiUC6VjRkfVCM/uBmW0ATjezQ83sXjP7g5mtjI60J0bbTzAzN7Pe6PGC6PmfRkfmvzKzferdNnp+ppn9xszWmdnXzeyeNEfs7r7J3R8FPgD8Afj76P2mmNnNZrbazNaa2Y1mtmf03JeBQ4HLo97R16L2b5jZCjNbb2a/NrPDmvKHlq6nRCCd7mTg+8BOwEJgM/AJYFfgcOBY4GNVXv8h4LPAZEKv4wv1bmtmuwHXAp+KPvd3wMH1/BLuvhlYBLwrahoHfAeYDvQArwGXRdt+GvgVcE7UOzoves19wP5RfNcBPzSzN9QThxSTEoF0urvd/UZ3H3b3V9z91+5+n7tvdvengXnAX1R5/XXuPujurwEDwIwxbHs88KC73xA9dymwZgy/y3OEnTjuvtrdfxz9TuuBf6nxe+DuV7v7i1FSuRjYEXjrGOKQgumkc6oicZ4pf2Bmfwr8K3AQMInwHb+vyut/X3Z/I7D9GLbdozwOd3czW1Ez8tH2BF4EMLPtCD2AY4Cdo+d3qPZiM7sAOBOYBjiwHaGHIlKVegTS6SrL534beBR4q7vvCPwTYBnHsBLYq/TAzIywU0/NzMYDJwB3RU0XAPsAB0e/x1EVLxnxe5vZu4HzCReedwZ2AV4i+99duoASgXSbHYB1wMtm9naqXx9olpuAA83shGjk0ieAqWleaGYTzWw/4BrCaaGvRU/tQOh1rDWzKYSEVm4V8OayxzsQro+sASYCFxF6BCI1KRFIt/kkMAvYQOgdLMz6A919FXAKcAnwAvAW4AHg1Sov649GOq0FbiDs2PvcvXT66RLCBfAXgF8CP614/deA06LRUZcANwM/B54ElgHrCT0VkZpMC9OINFd0muc54P3uflet7UXaTT0CkSYws2PNbKdouOZnCadp7m9zWCKpKBGINMcRwNOEc/THAu9192qnhkRyQ6eGREQKTj0CEZGC64gJZbvuuqv39va2OwwRkY6yePHiNe5ecyhzZonAzPYGvge8CRgG5rn7ZWZ2EXA2sDra9DPufnO19+rt7WVwcDCrUEVEupKZDaXZLssewWbgk+6+xMx2ABab2a3Rc5e6+1cz/GwREUkps0Tg7iuJJrS4+wYzW0qd0+5FRCR7LblYHNV0P4Ctxb/ONbOHzexKM9sl4TWzzWzQzAZXr14dt4mIiDRB5onAzLYHfgScF5XT/RZhCv4MQo/hX+Ne5+7z3L3P3fumTk1VtkVERMYg00QQrQz1I8KyfNdDqMsSrQc7TFh4o64FPEREpLkySwRRKd4rgKXufklZ+7SyzU4mlAwWEZGSgQHo7YVx48LPgVFLdDdVlqOGDgc+DDxiZg9GbZ8hVEycQainvozWlAkWEekMAwMwezZs3BgeDw2FxwD9/Zl8ZEeUmOjr63PNIxCRQujtDTv/Sj09sGxZXW9lZovdva/WdioxISKSJ8uX19feBEoEIiJ5Mn16fe1NoEQgIpInc+fCpEkj2yZNCu0ZUSIQEcmT/n6YNy9cEzALP+fNy+xCMXRI9VERkULp7890x19JPQIRkYJTIhARKTglAhGRglMiEBEpOCUCEZGCUyIQESk4JQIRkYJTIhARKTglAhGRglMiEBHJmccfD9Ul9tgDNm3K/vOUCERESlq8MlilRx4JCeDtbw+PV66ECS0oBKREICICW1cGGxoC960rg7UgGSxeHBLA/vtvbbv++hDGuBbspZUIREQA5szZujxkycaNoT0jv/pVSAB9ZWuI/ed/hgRw8smZfewoqj4qIgItXRnszjvhyCNHtv3853D00U3/qFTUIxARgZasDHbrraEHUJ4EfvGL0ANoVxIAJQIRkSDDlcFuvDEkgGOO2dp2330hAbzrXQ2/fcOUCEREIJOVwa67LrzViSdubVuyJCSAgw9uQsxNomsEIiIlTVoZ7Ljj4Kc/Hdn26KPwjnc0/NaZUCIQEWmSQw+Fe+8d2fbEE/Anf9KeeNLSqSERkQbtt184BVSeBH7+83AKKO9JANQjEBEZsze9CVatGtn2y1+GnkEnUSIQEanTNtvAa6+NbFu8GA48sD3xNEqJQEQkhaRyD489Fk4NdbLMrhGY2d5mdruZLTWzx8zsE1H7ZDO71cyejH7uklUMIiKNGh4O5/8rk8BTT4Xk0OlJALK9WLwZ+KS7vx04BPjfZrYfcCFwm7vvC9wWPRYRyZUtW0ICGD9+ZPszz4QE8Ja3tCeuLGSWCNx9pbsvie5vAJYCewInAfOjzeYD780qBhGRem3aFBJAZfnnVatCAthrr/bElaWWDB81s17gAOA+YHd3XwkhWQC7JbxmtpkNmtng6tWrWxGmiBTYhg0hAbzhDSPbX3ghJIDdYvdU3SHzRGBm2wM/As5z9/VpX+fu89y9z937pk6dml2AIlJoL74YEsCOO45sX78+JIDJk9sTVytlmgjMbCIhCQy4+/VR8yozmxY9Pw14PssYRETi/P73IQFMmTKyfd26kAB22KE9cbVDlqOGDLgCWOrul5Q9tQiYFd2fBdyQVQwiIpWWLQsJYNq0ke0bN4YEUNkzKIIs5xEcDnwYeMTMHozaPgN8CbjWzM4ClgMfyDAGEREAli6NH+q5aRNMnNj6ePIks0Tg7ncDlvB0G5dgEJEieeCB+Bm/mzePHhpaVCo6JyJd6e67wymgyiQwPBxOASkJbKVEICJd5ZZbQgKoXPmrlAAs6TxFgSkRiEhXKK0GNnPmyHZ3JYBalAhEpKP9x3+EnfwHKoadlBKA1KZEICId6bLLQgI488yR7UoA9VMZahHpKB/8IPzwh6PbtfMfO/UIRKQjHHVU6AFUJoFc9gAGBqC3N9Su7u0Nj3NMPQIRybX99guTwSrlbudfMjAAs2eHqcoAQ0PhMUB/f/viqkI9AhHJpcmTQw+gMgnksgdQbs6crUmgZOPG0J5T6hGISK4kDfPM9c6/3PLl9bXngHoEIpILZqOTwG67dUAPoNL06fW154ASgYi0VVwC2H//sPNftao9MTVk7lyYNGlk26RJoT2nlAhEpC3iEsB73hMSwEMPtSempujvh3nzoKcn/II9PeFxTi8UgxKBiLRYXAL40IdCAvjZz1K8QScMzezvDwsfDA+HnzlOAqCLxSLSInEXgT/xCfja1+p4kw4cmtkJ1CMQkUzF9QA+//nQA6grCUBHDs3sBOoRiEgm4noAX/86nHtuA2/agUMzO4F6BCLSNKVyz5VJ4Oqrw3MNJQHoyKGZnUCJQEQaNjwcdv7jKvYoixaFBHD66U36oA4cmtkJlAhEZMw2bw4JoHLZx9tvDwnghBOa/IEdODSzE+gagYjU7Y9/hG23Hd0+OAgHHZTxh/f3a8ffZEoEIpLahg2w446j25cuhT/909bHI81RMxGYmQEHA3sCDjwH3O/eUdU/RKQBa9bA1Kmj25ctC2dnpLNVTQRmdgzwTeBJ4NmoeS/grWb2cXdPMw9QRDrUs8/CXnuNbl+1KhSEk+5Q62LxZcBfuvtMd/+b6HYs8J7oORGpphPKIcR48slwLbYyCaxdGy4CKwl0l1qnhiYAK2LanwUmNj8ckS7SgeUQHnoIZswY3f7yy6NHbUr3qJUIrgR+bWbXAM9EbXsDpwJXZBmYSMerVg4hZ4ngnnvgiCNGt7/6KmyzTevjkdaqmgjc/YtmdgNwInAoYIQeQr+7/3cL4hPpXB1QDuGWW2DmzNHtmzePnhsg3avmqKFoh//fZjY5PPS1ad7YzK4Ejgeed/c/i9ouAs4GVkebfcbdbx5L4CK5N316OB0U195mCxfCqaeObi/NEJZiqXqx2Mymm9k1ZvY8cB9wv5k9H7X11njvq4BjY9ovdfcZ0U1JQLpXDsshfPvbYUdfmQRKy0EqCRRTrVFDC4EfA9PcfV933xeYBvwEuKbaC939F8CLTYlSpB0aHfGTo3IIF18cQjjnnJHtHbcesGTCqs0LM7Mno51/Xc+VbdML3FRxaugjwHpgEPhk0qkmM5sNzAaYPn36QUNxXWyRrFSO+IFwNN9hdW0uuAC+8pXR7dr5F4OZLXb3vprb1UgE1xCO6uczctTQLGBXd/9gjSB6GZkIdgfWEGYof4HQ0zizVpB9fX0+ODhYazOR5untjT+/39MTptPm3FlnwZVXjm5XAiiWtImg1sXiM4CzgM8RSkyURg0tYgzDR919VVmA3wFuqvc9RFqiA0b8xDnxRLjxxtHtSgBSTa3ho5uAb0W3hpnZNHdfGT08GXi0Ge8r0nQ5HvET553vhPvvH92uBCBp1Ko1NIHQI3gvI4vO3QBc4e6vVXntD4AjgV3NbAXwz8CRZjYjep9lwMca/xVEMjB3bvw1gpwtgJJ0BksJQOpR69TQ1cAfCKeGSqUm9iJcI1gAnJL0Qnc/LaZZs5GlM5QuCM+ZE04HTZ8ekkBOLhS/4Q2wadPodiUAGYtaieBAd39bRdsK4F4z+01GMYnkQw4XQIkb5//GN8Irr7Q+FuketeYRrDWzD5jZ69uZ2TgzOwVINcNYWqxDq11KdXELwvf2hh6AkoA0qlYiOBV4P7DKzH4T9QJ+D7wvek7ypDT2fWgo7CFK1S6VDJLlPHHGJYBDDgn/vL/7XXtiku5TdR7BiA3NpkTbr8k2pNE0jyClDh/73nI5njQWdwropJPgJz9pfSzSudLOI6jVI3idu79QSgJm1mdmezYSoGSgQ8e+t021MtFtEtcDOOus0ANQEpCspE4EFf4PcJOZLWxmMNKgpDHuOR373nY5SpxxCeDTnw4J4LvfbXk4UjBjSgTuPsvdDwD+psnxSCNyWO0y13KQOOMSwMUXhwTwpS+1LAwpuJqJwMx2MrNTzOx8M/v76P7OAO6+IfsQJbUcVbvsCG1MnHEJYN68kAA+9anMP15khFrrEZwBLCHMEJ4EbAe8G1gcPSd5098fLgwPD4efSgLJWpw4S/X+KxPAtdeG584+O5OPFampVvXRJ4B3uvsfKtp3Ae5z9z/JOD5Ao4aks23ZAhNipm7ecgv81V+1Ph4pjmZVHzVCXaBKw9FzIpLg1VfDrN9K99wDhx3W+nhEktRKBHOBJWb2M7auRzAdeA9hPQERqbBhA+y44+j2hx6C/fdvfTwitVS9RuDu84E+4E7gVWATcAfQ5+5XZR2ctEjOZ9d2ijVrwvn/yiTw8MPhGoCSgORVrR4B0VKSVdcnlg5WObu2VJYCdKE5pWeeiR9x+vTTsM8+rY9HpF5jnVCGmT3SzECkQWM9qs/h7NpO8cQToQdQmQRWrgw9ACUB6RS1FqZ5X9JTwJuaH46MSSNH9TmaXdspFi+GvphxGGvXws47tz4ekUbVOjW0EBggfuRQzHgIaYtqR/W1EkGHLcnYTjfeGNYErvTyy6PnpYl0klqJ4GHgq+4+am1hM/vLbEKSujVyVN8hSzK201VXwUc/Orp90yaYOLHl4Yg0Xa1rBOcB6xOeO7nJschYNVIzR2UpEl14YfiTVCaBLVvCNQAlAekWVXsE7n5Xlec01TcvGj2qz+GSjO10xhlw9dWj24eH49cJEOl0aYrO7WZm20X3tzWzOWb2JTObln14koqO6pviqKPCn68yCbhvrRMk0o1qziMgzCH4CPAy8DlgKvA48H1CATrJAx3Vj9mECeF0T6WUi/eJdLxaw0dnAW8BjjQzA04BLgZeAnqiCqQPuvvDmUcq0mRJR/hKAFI0tXoEdwCvAEuBnYBVwI2EeQTnRs+vyy48keZTAhAZqdbF4iEzuwy4CZgInOHuy81sOrDG3TXrSDqGEoBIvDS1hr5lZlcDw+5eGpbyAnBappGJNIkSgEh1aS4W4+4vVTx+OZtwRJpHCUAknTEXnavFzK40s+fN7NGytslmdquZPRn93CWrz5fiilsOErYOAx1FZbil4DJLBMBVwLEVbRcCt7n7vsBt0WORpqg7AcDWgn1DQ2GjUsE+JQMpkMwSgbv/AnixovkkYH50fz7w3qw+X4pjTAmgRGW4RdIlAjN7X3Q6Z52ZrTezDWaWVIOomt3dfSVA9HO3Kp8528wGzWxw9erVY/go6XYNJYASleEWSd0juBg40d13cvcd3X0Hd49ZlbV53H2eu/e5e9/UqVOz/CjpME1JACWNFOwT6RJpE8Eqd1/ahM9bVapRFP18vgnvKQXR1ARQMnfu6MUEVIZbCiZtIhg0s4Vmdlp0muh9VVYvq2YRMCu6Pwu4YQzvIQWTSQIoUcE+kdSJYEdgI3AMcEJ0O77aC8zsB8CvgLeZ2QozOwv4EvAeM3sSeE/0uH00bDDXMk0A5fr7YdmyUGd62bL4JKDvinSxtBPKYtZnqvmapJnHR9f7XploZJ1fyYx72NdW2m03WLWq9fEA+q5I1zOvcmhlZhe4+8Vm9nVi1i1297/LMriSvr4+Hxxs8jo4vb3xa/X29ISjQmmpLVtCOehKhx8Od9/d+nhG0HdFOpSZLXb3vlrb1eoRlC4Qd99qZBo2mAuvvBK/8PsZZ8D8+aPb20LfFelytaqP3hj9zMt/yeaZPj3+KE/DBlvixRdhypTR7Z/9LHz+862Ppyp9V6TLZVliIt80bLAtli8PF4Ark8Dll4frA7lLAqDvinS94iYCDRtsqYcf3vpnLrdoUUgAH/tYe+JKRd8V6XJVLxa/vpHZ4e5+T622rGRysVha4vbbw6Lwle65Bw47rPXxiBRJ2ovFaXsEX0/ZJgLAtdeGg+fKJLB0aegBjDkJaDy/SNPVWrz+UOAwYKqZnV/21I7A+CwDk8506aVw/vmj2597DqZNa/DNNZ5fJBO1egTbANsTEsYOZbf1wPuzDU06yfnnhx5AZRJYty70ABpOAqCS0SIZqTV89E7gTjO7yt1jxs9J0Z11Flx55ej2V1+FbbZp8odpPL9IJqr2CMzsa9Hdb5jZospbC+KTvKg4N3/0O36P2egksGVL6AE0PQmASkaLZKTWzOLvRT+/mnUgkmNl5+Z7+R1DQ72jNmnJgvBz5468RgAazy/SBLWuEXwl+nmcu99Zecs6uLbpxJEpWcY8Zw628WUMZ4jeEU81vRJoNRrPL5KJWj2CaWb2F8CJZnYNMKIosLsvySyydunEkSkZxhzKQC8b1e6UakQPN/T+devvz++/g0iHqlV99P3AWcARjC485+4eM1Wo+Vo6oawTK01mEHPcOgAQJYAmvL+IZK8pE8rc/Tp3nwlc7O7vrri1JAm0XDtHpoz19E4jMVd8ZuJiMJO2G5kE4s7Nd+IpNREBd091A04kXDT+KnB82tc143bQQQd5y/T0lE57j7z19GT7uQsWuE+aNPIzJ00K7VnFXPaZcS8Pnb6ybXt63M3Cz/K4FixwnzJl9IvTxi8imQAGPc3+PdVG8EXgNuDM6HYr8MU0r23GraWJoJEdciMaSUBjjbmnp3YCGMtntzKBikiitIkgbdG5h4EZ7j4cPR4PPODu+ze9ixKj5UXnBgbCbNXly8MY9blzs79AOW5c/PAbs7CWbi11xlz1GkDaz4Tk6xPlH5T2vUSkqZq1Qlm5nYEXo/s7jSmqTtGOkSmNLn6SMuZUF4HrmaBV6zqEJnuJ5F7a6qNfBB4ws6vMbD6wGPiX7MIqoIwXP2noInA11Xb0muwl0hFqJgIzM+Bu4BDg+uh2qLtfk3FsxZLRZKnEBFCaCNboZ8YlMAhLkGmyl0hHSHuNYLG7H9SCeGJpYZr6JZ4CymIWcDuuqYhITc1emOZeM/tfDcbUWkUZ0552HkCWpSD6+8PEsuHh8FNJQKSjpL1Y/G7gHDNbBrxMKDXhrRo1VLdOLBMxFmW/p+EQc605s52/iHSNtD2CmcCbgaOAE4Djo5/51IoFTMbS42h2L6WsGFylhnoAee9N5T0+kU5TbZIB8EbgPOAbwMeACWkmJzT7VveEMrP4yU1m9b1PkgUL3LfZJnkCVdxEriZPVEucCNbo79muCXVp5T0+kRyhGTOLgYXAgigJ/AS4LM2bNvtWdyLIukxEXDmFWjumJsS0ZUuNBFDtPauViGhynJnKe3wiOdKsRPBI2f0JwJI0b1rzQ0Nd40eAB9MEWnciyPqosVoSSNoxpe2lxOyw//jHKgkgze9Zz98j695Uo/Ien0iONCsRLKn2eKy3KBHsmnb7MdUaSnsEPBZpEkHljinpSHb8+K2xVeyw1zA59iVvfnOdv2c9R9F5P+LOe3wiOdKsRLAFWB/dNgCby+6vT/MBCe+bfSLIUq1TQ3E7pmrF2UpH59FO7rfsE7vZiSeOMd56jqLzfg4+7/GJ5EhTEkFWN+B3wBJCqYrZCdvMJiyGMzh9+vSM/kyRensPCxa4T5yYnASSdkwLFoQeQELi+CWHxj51AV9u7Per9yg6y95UM+Q9PpGcyHsi2CP6uRvwEPDn1bbPtEcw1iPM8p3RlCnhlmbHFHN0/n1Ojd1Pf5uzt+6wG9n56ShapJBynQhGBAAXAf9QbZtME0GrzzmXfd5c/jH2oxdx/NYHZu5HH934jlxH0SKFkzYRpKo11Exmth0wzt03RPdvBT7v7rckvSazWkMDA3D66UmBZlNHf2CAv/3IRi7ffPaop+7lnbyT++Njift30prBIlJFFusRNMvuwI9DUVMmAN+vlgQyUyrPkCSDOvrvfjfcccfoEhdPsw/7sCz5hUnJemgozKxVkTcRaUDaEhNN4+5Pu/v/jG7vcPf2FKyPK0NR0qw6+lEphKm2GjO4446RT6/ddg8cq54EainVUWq0zILKNogUVssTQW5UW1mrGXX0Bwaw0/uxoWWsYeqIp159Fbynl51fWZn+/ZLqSkPjdZRKvaOhodD7aFZyEZGOUNxEkHTqp6cnfRJIOIo2Azt99HsMY3hPL9tsQ+0lHstNmgTnnBNiS1LP+1VqRZE+EcmtYiaCgQF46aXR7fWcEoo5irbT++PXAsBwLCwIWdph13MNYt48+OY3w4XhpGTQyDWNpCTSSHIRkY5RvERQ2oG/8MLI9nHjth4FpzklUnYUbdGuvlIpAYwwfXpyIopT2UPJYm3jpCSihedFCqF4iSDpInFpqOjQEHz4w/Dxj1d/n+XLkxOAgy8YiN9hH3dcfCLafnuYOHH09pU7+CzWNs4iuYhI50gz2aDdt6ZOKEuquxN3+9u/HfnaaFJW0uajJqLFTeKqNoGtnZO+NOFMpOuQ1wllY9HUCWW9veGoPw0zuPrqcLQdjQKK8/rpn0mTah+djxsXPy8gqwlsIlJYzV68vnvEnQZJ4h6Wg0wYBeQYPn5CfadodD5eRHKmHTOL26u0o54zp2bPIHFB+PILwMPD9R3Jz537+oLzr9P5eBFpo+L1CCAkgypDMeseBVTvZzf7Yq+ISAOKmQhKKk4TjWkU0FiO5EuJaHg4/FQSEJE2KnYiiI7Oq/YAJm0Xxv3rSF5EulTxRg2VSSrfM+r0j8o9i0gH0qihOFFtILP4JOA2bnQSAJVaEJGuVpxEUFYNtFJpVlfihd9x41SJU0S6ViESwb77jp4HcAR3hWsAPb2hoVr9ny1bVJZZRLpWVyeCFSvCKaCnntradi0fwDHu4s9Dw9BQciG6cirLLCJdqqsnlC1evPX+knF9HDC8ePRG48dXX62snK4ViEgX6upEcNJJZWV9LCYJQDjtk3YHrzIQItKFuvrU0AhJC7r09KTbwasMhIh0qeIkgmo192sVopsypWnrGGuBeBHJm64+NTRCebG55ctDL+C447Y+njwZtt02XDAePz6cMurpCUmiGbOHSxekS9ciSgvEl8cmItIGxZ1ZXLljhnTrCYxV0joImrUsIhnRzOJa4kYKZTlEVAvEi0hOFTMRDAwkr0WQ1Y5ZC9KISE4VLxGUTgklGTcum4u5WiBeRHKqeImg1uSxLVvC5IPSxdw0ySDNaCCVsRaRnCpeIqjn1E+aawalHsbQUO0EksWCNBqSKiINaksiMLNjzewJM3vKzC5s6YfXe06+VuJo9UXncvUkIRGRBC1PBGY2Hvh3YCawH3Came3XsgBqTR6rVCtxtHM0UDuTkIh0jXb0CA4GnnL3p919E3ANcFLLPr10rj6NNBdz2zkaSENSRaQJ2pEI9gSeKXu8Imobwcxmm9mgmQ2uXr26uRH09yfXHho/vr6Lue0cDaQhqSLSBO1IBHErBY+a3uzu89y9z937pk6d2vwoknbg8+fXdzG3naOBNCRVRJqgHYlgBbB32eO9gOdaHkUzd+BZjAZK+7kakioiDWp5rSEzmwD8BjgaeBb4NfAhd38s6TUN1xoaGBhZbK5ZheRERHIsba2hllcfdffNZnYu8F/AeODKakmgYar6KSJSVfdXH1XVTxEpKFUfLUkaSplUdE5EpGC6PxEkDaU00wxcERGKkAjmzg07/UrumoErIkIREkF/f9jpx9EMXBGRAiQCSJ5FrBm4IiIFSQSagSsikqgYiUAzcEVEErV8Qlnb9Pdrxy8iEqMYPQIREUmkRCAiUnBKBCIiBde9iUCLuouIpNKdF4tVcVREJLXu7BFoUXcRkdS6MxFoUXcRkdS6MxFoUXcRkdS6MxGopISISGrdmQhUUkJEJLXuHDUEKikhIpJSd/YIREQkNSUCEZEDeIvkAAAIqUlEQVSCUyIQESk4JQIRkYJTIhARKTjzpIXdc8TMVgNDY3z5rsCaJoaTNcWbvU6LWfFmq9PihfQx97j71FobdUQiaISZDbp7X7vjSEvxZq/TYla82eq0eKH5MevUkIhIwSkRiIgUXBESwbx2B1AnxZu9TotZ8War0+KFJsfc9dcIRESkuiL0CEREpAolAhGRguuaRGBmy8zsETN70MwGY543M/s3M3vKzB42swPbEWcUy9uiOEu39WZ2XsU2R5rZurJt/qnFMV5pZs+b2aNlbZPN7FYzezL6uUvCa2dF2zxpZrPaHPNXzOzx6N/8x2a2c8Jrq35/WhjvRWb2bNm/+3EJrz3WzJ6Ivs8XtjHehWWxLjOzBxNe246/795mdruZLTWzx8zsE1F7Lr/HVeLN/jvs7l1xA5YBu1Z5/jjgp4ABhwD3tTvmKK7xwO8JEz/K248EbmpjXH8OHAg8WtZ2MXBhdP9C4Msxr5sMPB393CW6v0sbYz4GmBDd/3JczGm+Py2M9yLgH1J8Z34LvBnYBngI2K8d8VY8/6/AP+Xo7zsNODC6vwPwG2C/vH6Pq8Sb+Xe4a3oEKZwEfM+De4GdzWxau4MCjgZ+6+5jnTmdCXf/BfBiRfNJwPzo/nzgvTEv/SvgVnd/0d3XArcCx2YWaJm4mN39Z+6+OXp4L7BXK2JJI+FvnMbBwFPu/rS7bwKuIfzbZKpavGZmwAeBH2QdR1ruvtLdl0T3NwBLgT3J6fc4Kd5WfIe7KRE48DMzW2xms2Oe3xN4puzxiqit3U4l+T/PoWb2kJn91Mze0cqgEuzu7ishfGmB3WK2yevfGeBMQq8wTq3vTyudG50GuDLhtEUe/8bvAla5+5MJz7f172tmvcABwH10wPe4It5ymXyHu2mFssPd/Tkz2w241cwej45gSizmNW0dO2tm2wAnAv8Y8/QSwumil6LzxD8B9m1lfGOUu78zgJnNATYDAwmb1Pr+tMq3gC8Q/mZfIJxuObNimzz+jU+jem+gbX9fM9se+BFwnruvD52X2i+LaWvJ37gy3rL2zL7DXdMjcPfnop/PAz8mdJ/LrQD2Lnu8F/Bca6JLNBNY4u6rKp9w9/Xu/lJ0/2Zgopnt2uoAK6wqnU6Lfj4fs03u/s7Rhb7jgX6PTqZWSvH9aQl3X+XuW9x9GPhOQhy5+hub2QTgfcDCpG3a9fc1s4mEneqAu18fNef2e5wQb+bf4a5IBGa2nZntULpPuLjyaMVmi4AzLDgEWFfqHrZR4lGUmb0pOu+KmR1M+Ld6oYWxxVkElEZPzAJuiNnmv4BjzGyX6LTGMVFbW5jZscCngRPdfWPCNmm+Py1Rcd3q5IQ4fg3sa2b7RL3KUwn/Nu3yl8Dj7r4i7sl2/X2j/z9XAEvd/ZKyp3L5PU6KtyXf4SyvgrfqRhg98VB0ewyYE7WfA5wT3Tfg3wmjLR4B+toc8yTCjn2nsrbyeM+NfpeHCBeIDmtxfD8AVgKvEY6OzgKmALcBT0Y/J0fb9gHfLXvtmcBT0e2jbY75KcK53gej2+XRtnsAN1f7/rQp3quj7+fDhB3WtMp4o8fHEUaV/Lad8UbtV5W+t2Xb5uHvewThdM7DZf/+x+X1e1wl3sy/wyoxISJScF1xakhERMZOiUBEpOCUCERECk6JQESk4JQIREQKTolA2sLMtkRVEh81sx+a2aQmv/9HzOwbNbY50swOK3t8jpmd0YTP3sPMrqvzNedaqCTq5RMHo3kvNavmWkI10miuwX1RBc2F0bwDzOwN0eOnoud7x/bbSjdQIpB2ecXdZ7j7nwGbCHMoWu1I4PVE4O6Xu/v3Gn1Td3/O3d9f58vuIUzMqiw+OJNQWmRfYDahBMUIZjaeMEdmJqFa5Wlmtl/09JeBS919X2AtYa4C0c+17v5W4NJoOykoJQLJg7uAtwKY2flRL+FRi9ZoMLNeC/XY50dHxdeVehAWarDvGt3vM7M7Kt/czE6IjnofMLOfm9nu0RHwOcDfRz2Td1lYC+AfotfMMLN7bWsN+F2i9jvM7Mtmdr+Z/cbM3hXzeb0W1eyPeibXm9kt0VH5xXF/AHd/wN2XxTyVpmpubDXSaKbqUUCpd1JeabO8Aud1wNGlmexSPEoE0lYW6tTMBB4xs4OAjwLvJKwZcbaZHRBt+jZgnrvvD6wHPl7Hx9wNHOLuBxB2khdEO93LCUfLM9z9rorXfA/4dPR5jwD/XPbcBHc/GDivoj3JDOAU4H8Ap5jZ3jW2L5emCmbSNlOAP/jWEsblr339NdHz66LtpYCUCKRdtrWwmtUgsJxQY+UI4Mfu/rKHgnvXE8obAzzj7vdE9xdE26a1F/BfZvYI8CmgaklvM9sJ2Nnd74ya5hMWZSkpFQNbDPSm+Pzb3H2du/8R+G+gp47Y01TBTNqm2mvzWMFU2qSbylBLZ3nF3WeUN9Q4NVG5kyo93szWA5o3Jrz268Al7r7IzI4krALWiFejn1tI93/o1bL7aV9TkqYKZtI2awinkiZER/3lry29ZkXUK9uJsS2SI11APQLJk18A7zWzSVEFxZMJ1w8AppvZodH90wineyAsz3dQdP+vE953J+DZ6H752rMbCEsCjuDu64C1Zef/PwzcWbldiyRWzTWzx6NtYquReigkdjtQunBdXmmzvALn+4H/5yo8VlhKBJIbHpbpuwq4n7Ay03fd/YHo6aXALDN7mLCObGn0zOeAy8zsLsLRdpyLgB9G26wpa78ROLl0sbjiNbOAr0SfNwP4fCO/Wy1m9ndmtoJw1P6wmX03eupmwnq5TxHWJ/h4tP2uRKd3oqP9cwllkpcC17r7Y9HrPw2cb2ZPEa4BXBG1XwFMidrPJ6zdKwWl6qOSe9EIn5uioaYCmNnxwJvd/d/aHYt0Pl0jEOlA7n5Tu2OQ7qEegYhIwekagYhIwSkRiIgUnBKBiEjBKRGIiBScEoGISMH9f8PpdrCFcwwVAAAAAElFTkSuQmCC\n", 135 | "text/plain": [ 136 | "
" 137 | ] 138 | }, 139 | "metadata": { 140 | "needs_background": "light" 141 | }, 142 | "output_type": "display_data" 143 | } 144 | ], 145 | "source": [ 146 | "plt.scatter(X_train,y_train,color='red')\n", 147 | "plt.plot(X_train,model.predict(X_train),color='blue')\n", 148 | "plt.xlabel('Population in 10,000')\n", 149 | "plt.ylabel('Profit in $10,000')\n", 150 | "plt.title('Training Data')\n", 151 | "plt.show()" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 12, 157 | "metadata": {}, 158 | "outputs": [ 159 | { 160 | "data": { 161 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3Xu0XHV99/H3h4T7HRIhQkJEaJWnIMgpDxfBeKNAC0LVgk8eibeH0ooFsQrKkqos2gooorVSFBagUQIUSsrCC6ImSgU9XAwB1ERMIFxy4ZIQMJDkfJ8/fnvInDN7zsyZM3uun9das2b2b/ae/c2cyf7u32X/tiICMzPrX5u1OwAzM2svJwIzsz7nRGBm1uecCMzM+pwTgZlZn3MiMDPrc04EZmZ9zonAeoaktWWPIUl/LFueOY7PvUvS/x3l/ddJirJ9PSVprqS3jGEfp0v6UaMxmo2HE4H1jIjYrvQAHgWOLyubXfDuN5bt+yBgPnCrpFMK3q/ZuDkRWN+QNEHSZyQ9ImmVpNmSdsre21bSdZKekfScpLsl7Szpi8CfA9/Mzva/WGs/EfFkRFwC/Atwcdn+z5f0B0nPS1oo6S+z8oOALwMzSjWKrPwkSb+WtEbSUkmfbv63YuZEYP3lE8DRwJuAPYH1wKXZex8GJgJ7AJOAM4CXI+LjwK+AD2dn/B8fw/5uAvaU9Jps+bfA4cCOwBeA6yRNioj7gLOAn2b72D1bfw3wf4CdgJOAf5R0TAP/brNRORFYP/lb4NyIeCIi1gGfA06WJFJSmAy8NiI2RMSvIuKFce7viex5F4CImJPVFoYi4lvA48DB1TaOiDsi4sFs/XuB64E3jzMmswpOBNYXsoP9VOC2rOnnOeA+0v+BXYErgXnAjZKWSfpnSRPGuds9sudnshg+JGlB2f73IdU+qsV8hKR5klZKWg28f7T1zRrlRGB9IdI0u48Db42IncoeW0XEqoh4KSLOj4jXAUcB7wFKHb2NTtF7ErAsIv4g6U+ArwKnAbtExE7AYkCj7ON6YA4wNSJ2BK4uW9+saZwIrJ9cDvyrpKkAkl4l6fjs9dsl7SdpM1Lb/AZgY7bdcmDvenciaXdJHwM+BZybFW8HDAErgc0knU6qEZQsB6ZK2jz7DGXbPB0R6yQdTkpOZk3nRGD95CLgR8CPJT0P/A/wxuy9PYBbgOeBhcBtpDNySB3Kp0p6VtJFVT57Qjbi5wXg18DbgHeWhq1mbfyXA4PAk8Brstcl3weWACskLctqMKcDl2SxfhK4YZz/frNc8o1pzMz6m2sEZmZ9zonAzKzPORGYmfU5JwIzsz43sd0B1GPSpEkxffr0dodhZtZV7rnnnlURMbnWel2RCKZPn87g4GDtFc3M7BWSltaznpuGzMz6nBOBmVmfcyIwM+tzTgRmZn3OicDMrM85EZhZl5kNTCcdvqZnyzYeXTF81MwsmU26pcOL2fLSbBlgZlsi6gWuEZhZFzmPTUmg5MWs3BrlRGBmXeTRMZZbPZwIzKyLTBtjudXDicDMusiFwDYjyrbJyq1RTgRm1kVmAlcAewHKnq+gFzuKb78dpkyBNWuK35dHDZlZl5lJLx74S377W3jd6zYtP/ggHHZYsft0IjAz6wDPPAN77w2rV28qGxyEgw8uft9uGjIza6P162HGDNh1101J4IYbIKI1SQCcCMzM2iICzjoLttgC5s1LZZ//fCp/97tbG4ubhszMWuzKK+HDH960/K53wfXXw2ZtOjV3IjAza5H58+HNb960/NrXwv33w3bbtS8mcCIwMyvcI4+kg365pUthWodcB+c+AjOzCs2Z4XT1anj1q4cngV/8IvUDdEoSANcIzMxGGP8Mpy+9BFttNbzs29+GmR16+YNrBGZmw4xvhlNpeBL49KdTDaBTkwC4RmBmNkJjM5wedxx873vDy9atgy23bE5URXKNwMxsmLHNcHrppakWUJ4EHn001QK6IQmAE4GZ2Qj1zXD685+nBHD22ZvKbr89JYCpU4uOsbmcCMzMhhl9htOnnkoJ4MgjN21xwQUpAbz97S0PtincR2BmVqFyhtMNG2DzzYevdfjhcOedrYuqKIUlAklTgWuB3YEh4IqIuEzSLsAc0uDcJcDfRMSzRcVhZjZeEyfCxo3Dy4aGUs2gFxTZNLQB+HhEvB44FPiIpP2Ac4E7ImJf4I5s2cys45x8cjrYlyeBF15IzUC9kgSgwEQQEU9GxL3Z6+eBh4E9gHcC12SrXQOcWFQMZmaN+I//SAf666/fVLZ4cUoA24zsR+4BLeksljQdOAi4G9gtIp6ElCyAV1XZ5jRJg5IGV65c2YowzazP3XJLSgCnn76pbO7clABGzhXUSwpPBJK2A/4TOCsi6r77ZkRcEREDETEwefLk4gI0s7732GMpAZxY1j5xzjkpARx/fPviapVCRw1J2pyUBGZHxE1Z8XJJUyLiSUlTgBVFxmBmVk3eSCBICaCfFFYjkCTgSuDhiPhS2VtzgVnZ61nALUXFYGZWjVSZBIaG+i8JQLFNQ0cA7wPeKun+7HEc8K/AOyQtAt6RLZuZtcRuu1WO+Fm1qvdGAo1FYU1DEfFz0mV5ed5W1H7NzPJ88pNw8cXDy+bNg6OOak88ncRTTJhZT/vRj9KZfnkSOP/8VANwEkg8xYSZ9aTly2H33YeX7b03/P737YmnkzkRmFlPGRqCCRMqy/uxE7heTgRm1jPyOns3boTN3Ag+Kn89Ztb1Xv/6yiTwxBOpFuAkUJu/IjPrWhdckBLAb36zqez7308JYMqU9sXVbdw0ZGZdZ948mDFjeNmZZ8KXv9yWcLqeE4GZdY28kUA77ACrV7cnnl7hRGBmHa9aW79HAjWH+wjMrKNJlUngpZecBJrJicDMOpJUORLooYdSAthii/bE1KucCMyso3zkI5UJ4CtfSQng9a9vT0y9zn0EZtYRfvxjeNuI6Sj33x8WLGhPPP3EicDM2urZZ2GXXSrL3QfQOk4EZtY2eVNCOAG0nvsIzKzl8jqCX3jBSaBdnAjMrGV23LEyAQwOpgSwzTbticmcCMysBT7zmZQA1qzZVPb5z6cEcPDB7YvLEvcRmFlh7r4bDj10eNluu8FTT7UnHsvnRGBmTbd2LWy/fWW5+wA6kxOBmTWVRwJ1H/cRmFlT5I0EWr3aSaAbOBGY2bjsu29lApg/PyWAHXZoT0w2Nk4EZtaQL34xJYDFizeVfexjKQEceWT74rKxcx+BmY3JAw/AAQdUlrsJqHs5EZhZXV56CbbaqrLcCaD7ORGYWU15I4GGhvLLrfu4j8DMqsobCbRyZaoFOAn0DicCM6tw+OGVB/rbbksJYNKk9sRkxXEiMLNXfPObKQH84hebyt7//pQAjj22bWFZwdxHYGYsXpyuBxjJHcH9oWYikCTgEGAPIIAngF9G+Cdi1u02bIDNN68s9//u/jJq05Cko4FFwGeB44C/BD4HLMreG23bqyStkLSwrOyzkh6XdH/2OG7c/wIza4hUmQQ2bnQS6Ee1agSXAW+PiCXlhZJeA9wGvH6Uba8G/g24dkT5pRFxydjCNLNmyRvts2wZ7LFH62OxzlCrs3gisCyn/HEgp0K5SUTMB55pMC4za7ITTqhMAnPmpBqAk0B/q1UjuAr4laTrgMeysqnAKcCVDe7zDEmnAoPAxyPi2byVJJ0GnAYwbdq0BndlZtdfDyefPLzs+ONh7tz2xGOdR7X6fCXtB5xA6iwWqYYwNyIeqvnh0nTg1oj4s2x5N2AVqdP5AmBKRHyw1ucMDAzE4OBgrdXMrMzjj8Oee1aWuw+gf0i6JyIGaq1Xc9RQdsB/SNIuaTH/DL4eEbG8LMBvALc2+llmlm9oCCZMqCx3ArBqao0amibpOkkrgLuBX2Yjga7LzvbHRNKUssWTgIXV1jWzsZMqk8D69U4CNrpaNYI5wJeBmRGxEUDSBOA9wHXAodU2lPRdYAYwSdIy4J+AGZIOJDUNLQH+dpzxmxn5I4EWLYJ99ml9LNZ9ao0amhQRc0pJACAiNkbEdcCuo20YEe+NiCkRsXlE7BkRV0bE+yJi/4g4ICJOiIgnm/GPMOtXH/hAZRK44opUA3ASsHrVqhHcI+nfgWsYPmpoFnBfkYGZWXXf+x4cN+JyzEMPHT5HkFm9aiWCU4EPka4mHjZqiMaHj5pZg1atgsmTK8vdB2DjMWoiiIiXga9nDzNrkwjYLKch1wnAmmHURCBpIqlGcCLDJ527BbgyItYXHqFZn8vrCF63DrbcsvWxWG+q1TT0LeA5UtNQaaqJPUl9BN8GTq6ynZmNU14CWLAA9t+/9bFYb6s1auiNEfF3EXFXRCzLHndFxN8BB7UiQLN+c/bZlUng4otTM5CTgBWhVo3gWUnvAf4zIoYAJG1Guo6g4SuMzazS/Pnw5jcPL3vta9NNY8yKVCsRnAJ8Afh3SaUD/07AT7L3zGycVq+GnXaqLHdHsLVKrVFDS8j6ASTtSpqkblUL4jLrC3n9AE4A1mp137w+Ip4uJQFJA5I8g7lZg6TKJLB2rZOAtUfdiWCEjwK3SprTzGDMet3uu1cmgLvvTglg223bE5NZQ4kgImZFxEHAh5scj1lPuuCClACWL99U9pnPpARwyCHti8sM6rgfgaQdgWMYfkHZDyLiuYh4vuD4zLravHkwY8bwsh13hOeea0s4Zrlq3Y/gVOBe0nTS2wDbAm8hTUZ3auHRmXWptWtTDWBkEohwErDOU6tGcB5wcEQM++lK2pl0o5priwrMrFt5JJB1m1p9BCI1B400lL1nZpm8kUArVjSSBGYD00n/Padny2bFqZUILgTulfR1SZ/OHpeTmosuLD48a4wPJK2UlwBuvDElgLwpo0c3GzgNWEo6B1uaLftvaMUZNRFExDXAADAPeAl4GfgpMBARVxcdXO9o5YHZB5JWOffcygRw9NEpAbzrXY1+6nnAiyPKXszKzYpRc9RQRDxLuj+xNaR0YC795y4dmAFmFrC/0Q4kReyv/9xzDwwMVJY3px/g0TGWm41foxeUIemBZgbSu1p9hucDSVHWrUs1gJFJIKKZncHTxlhuNn61bkzz19XeAnZvfji9qNUH5mmkWkdeuTUqbyTQ0FB++fhcyPAaJKSR2+6Ss+LUahqaQ2rbyDvf2ar54fSiVh+YfSBpprwD/aOPwtSpRe2x1Hx3HulkYRrpb+dmPStOrUSwALgkIhaOfEPS24sJqde0+sDsA0kz5CWAiy6CT3yiFXufif9e1kq1EsFZwJoq753U5Fh6VDsOzD6QNGrmTPjOd4aXbbYZbNzYnnjMWqHW/Qh+Nsp7g80Pp1f5wNzp7roLDjusstxXBFs/qGfSuVcBL0TEC5K2Bs4Gtgcui4gniw7QrEjr18MWW1SWOwFYP6ln+Oh1wK7Z688B+5DuV/ydqluYdQGpMgmsX+8kYP2n1uyjs4DXAjOy1ycDg8BTwF6STpV0QPFhmjVP3pQQP/tZSgATa9aRzXpPrRrBT4E/Ag8DjwPLgf/Oyp/OnvPGRpp1nLwEcOyxKQG86U3ticmsE9TqLF4q6TLgVmBz4NSIeFTSNGBVRPhyVet4p54K3/pWZbmbgMySeuYa+rqkbwFDEVEaDP808N5CIzMbp/vvh4MOqix3AjAbrq4W0YhYO2L5hWLCMRu/oSGYMKGy3AnALF/Dk87VIukqSSskLSwr20XS7ZIWZc87F7V/609SZRJ44QUnAbPRFJYIgKtJN70vdy5wR0TsC9yRLZuNW15H8Ny5KQFss017YjLrFoUlgoiYDzwzovidwDXZ62uAE4vav/WHvASw//4pARx/fHtiMus2dSUCSX+dNeeslrRG0vOSqs1BNJrdSlcjZ8+vGmWfp0kalDS4cuXKBnZlveycc6rfJH7BgtbHY9bN6r185iLg+Ih4uMhgykXEFcAVAAMDA27hNQAeeAAOyLmE0X0AZo2rNxEsb1ISWC5pSkQ8KWkKsKIJn2l9ICLNAppXbmbjU28iGJQ0B/gv0k3sAYiIm8a4v7nALOBfs+dbxri99aG8JqAVK2Dy5NbHYtaL6u0s3oF0Z5WjgeOzx1+NtoGk7wK/AP5U0jJJHyIlgHdIWgS8I1s2y5XXEfzP/5xqAU4CZs1T7wVlHxjrB0dEtSuP3zbWz7L+Uu0+wG4GMitGrZvXfzIiLpL0VXLuWxwR/1BYZNZ3zj4bLr20stwJwKxYtWoEpQ5i343MCrN0KUyfXlnuBGDWGrVmH/3v7Pma0dYza1S1awHMrHWKnGLCrKq8juA//MFJwKwdnAispfISwEc/mhJAXvOQmRWvrlFDko6IiDtrlZlV45FAZp2r3hrBV+ssMxvmkkuq9wM4CZh1hlrDRw8DDgcmSzq77K0dgJxbf5glTz8NkyZVlvvgb9Z5ajUNbQFsl623fVn5GuDdRQVl3S2vBjA0VL15yMzaq9bw0XnAPElXR8TSFsVkXWk20syK0vvvhze8oQ3hmFndajUNfTkizgL+TVLelcUnFBaZdY10pj88CZx44lxuvvn5inIz6zy1moauzZ4vKToQ6z677ZZmAR0potQGtBdOBGadr1YiuJg0SdxxEXFOC+KxLnDjjfCe91SWb0oAJY+2JB4zG59aiWCKpDcDJ0i6Dhj2Pz0i7i0sMus4L74I225bWR4xHcjrQppWcERm1gy1EsH5wLnAnsCXRrwXwFuLCMo6T96Inw0bYMIEgAuB00i3rCjZJis3s0436gVlEXFjRBwLXBQRbxnxcBLoA3lTQtx5Z7oeYMIrV5LMJN1eei9SpXGvbLlW/8BsYDrpZzg9WzazVqv3xjQXSDoBOCor+mlE3FpcWNZueTWAN7whDQfNN5OxdQzPZngtYmm2XPosM2uVuqaYkPQvwJnAQ9njzKysR/Xvmeq221afEqJ6EmjEeQxvSiJbPq+ZOzGzOtQ719BfAu+IiKsi4irgmKysB5XOVJeSukFKZ6rtSgatSUpz5qQE8OKIY3NxcwJVG1HkkUZmrTaWaah3Knu9Y7MD6RxjPVMt8kBdKymNf98vv5wSwCmnDC8vflK4aiOKPNLIrNXq6iMA/gW4T9JPSL2BRwGfKiyqthrLmWrR7dy1ktL49p3XBPTii7D11mMMsyEeaWTWKWrWCCQJ+DlwKHBT9jgsIq4rOLY2GcuZatHt3KMlpcb3nTcS6GtfSzWA1iQBaHykkZk1W80aQUSEpP+KiIOBuS2Iqc3GcqZadDv3NKpfqDX2fXfezWHGOtLIzIpQbx/BXZL+vNBIOsZYzlSLbue+kJSEypWSUv37zqsBgG8OY2ZJvYngLaRk8HtJCyQ9IGlBkYG110xgCTCUPVc7a807UAOspTmdxqMlpdGSRHLrrU4AZlZbvZ3FxxYaRdcqJYgzgafLyp+meZ3G1ZpPSmXnkZqDppGSwEyGhsqv+t3EB38zy6MY5eggaSvgdGAf4AHgyojY0KLYXjEwMBCDg4Ot3u0YTCe/LX8vUo2idfJqACtWwOTJLQ3DzDqApHsiYqDWerWahq4BBkhJ4Fjgi02IrQe1/+KovH6AM85ItQAnATMbTa2mof0iYn8ASVcCvyw+pG402uieYnXeSCAz6za1agTrSy/a0STUPWp33DbbtGnuCDaz5qiVCN4gaU32eB44oPRa0ppWBNgdWndx1P/8T0oAjz02vNwJwMwaVet+BBMiYofssX1ETCx7vUOrgmxcUfMA5X1uvUNOGyfBEUcML3MCMLPxqnf4aFNJWgI8D2wENtTTqz12Rc0D1Pp59POagH73O9h330J2Z2Z9ZiyzjzbbWyLiwGKSABQ3D1Dr5tHPGwl05JGpBuAkYGbN0pYaQWsUNaSz+KGiHglkZq3UrhpBAD+UdI+k0/JWkHSapEFJgytXrmxgF0XNA1Tc/EIHHeSRQGbWeu1KBEdExBtJF6l9RNJRI1eIiCsiYiAiBiY3dEVUUUM6m/+5CxemBDDyVpBOAGbWCm1JBBHxRPa8ArgZOKT5eylqSGdzP1eC/fcfXuYEYGat1PJEIGlbSduXXgNHAwuL2VtRQzrH/7l5HcHz5zcjAbTmHsdm1jva0Vm8G3BzuvEZE4HvRMT32xBHWxTbEdz6oa1m1v1anggi4hHgDa3eb7u1ZiTQaENbnQjMLF87ryPoC7NmtXIkUPtnQTWz7tPjiaB97eVPPpkSwLXXDi8vtiO46Ftnmlkv6uFEUGovX0q6bKHUXl58MpDg1a8eXjY01IqRQK2fBdXMul8PJ4LWTQVRkjcS6IYbUgKo1kfQXK2bBdXMeoenmGiCzpoSoto9js3M8vVwjaD49vK8GgD4gjAz6y49nAiKay//1KecAMysd/Rw01CpeeQ8UnPQNFISaLzZZO1a2H77ynIf/M2sm/VwIoBmtpfn1QDWrYMtt2zKx5uZtU0PNw1BM64jyOsH+NKXUi3AScDMekEP1wjGN+9OZ40EMjMrTg/XCBq7jsAjgcys3/RwIhjbdQQ33FBvAvA0z2bWW3q4aWgaqTkor3yTjRthYs63kH/272mezaz39HCNoPZ1BFJlElizZrQmoNZPW2FmVrQeTgSleXd2LSvbGsjvB/jKV1ICyLtOYBNP82xmvaeHE0HJH1959bGPnYc0vAnn4INTAvjoR+v5LE/zbGa9p4f7CKDUlLNo0T78yZ8sqnh37KOALmR4HwF4mmcz63Y9nQheeukpttqq8mgfsRnpxvNj1fxpK8zM2q2nE8EJJ/zwlddbbrmOdeu2zpb2GseneppnM+stPd1H8I1vPM373vcd1q+fWJYEmtGU42sJzKx39HQimDbtJK69Npg4cU+ad8eu9t0C08ysCD2dCJKZwBJSn8ASxp4ERp79n4mvJTCzXtLTfQTjl3clcTW+lsDMulMf1AhGU6utP+9K4mp8LYGZdac+rhHUM2/QaDWAcr6WwMy6Vx8ngmrzBs0qWxapQ3ikXYHt8LUEZtYL+jQRzKb62f5GUs1ga/KTgIDLstelC8tKHcVOBmbWffqgj2BkP8Dfs6kJqJoXgaervFdKDh5Cama9occTQd6Y/8upvwM4z154Omoz6yU9ngjyDtj1zjS3K9XvZ+DpqM2sd/R4Imj0wLwNqR/gClINYORVyZ6O2sx6R1sSgaRjJP1W0mJJ5xa3p3oPzFuQagAjD/jVrkquffczM7Nu0fJEIGkC8DXgWGA/4L2S9itmb7UOzKUD/1XAKuqfhqJ097O82oKZWXdpx/DRQ4DFEfEIgKTrgHcCDzV/VzNJcwPljQDai3TQH89n+8BvZt2vHU1DewCPlS0vy8qGkXSapEFJgytXrhzH7i7DzThmZtW1IxEop6xiKE9EXBERAxExMHny5HHszs04ZmajaUfT0DJgatnynsATxe7SzThmZtW0o0bwK2BfSa+RtAVwCjC3DXGYmRltqBFExAZJZwA/ACYAV0XEg62Ow8zMkrZMOhcRtwG3tWPfZmY2XI9fWWxmZrU4EZiZ9TknAjOzPudE0LBa9zs2M+sOfXqHsvGq537HZmbdwTWChvjGNGbWO5wIGuIb05hZ73AiaIhvTGNmvcOJoCG+MY2Z9Q4ngoZ4RlMz6x0eNdQwz2hqZr3BNQIzsz7nRGBm1uecCMzM+pwTgZlZn3MiMDPrc4qouG98x5G0kjShTyMmAauaGE7RHG/xui1mx1usbosX6o95r4iYXGulrkgE4yFpMCIG2h1HvRxv8botZsdbrG6LF5ofs5uGzMz6nBOBmVmf64dEcEW7Axgjx1u8bovZ8Rar2+KFJsfc830EZmY2un6oEZiZ2SicCMzM+lzPJAJJSyQ9IOl+SYM570vSVyQtlrRA0hvbEWcWy59mcZYeaySdNWKdGZJWl61zfotjvErSCkkLy8p2kXS7pEXZ885Vtp2VrbNI0qw2x3yxpN9kf/ObJe1UZdtRfz8tjPezkh4v+7sfV2XbYyT9Nvs9n9vGeOeUxbpE0v1Vtm3H9ztV0k8kPSzpQUlnZuUd+TseJd7if8MR0RMPYAkwaZT3jwO+R7qBwKHA3e2OOYtrAvAU6cKP8vIZwK1tjOso4I3AwrKyi4Bzs9fnAl/I2W4X4JHseefs9c5tjPloYGL2+gt5Mdfz+2lhvJ8F/rGO38zvgb2BLYBfA/u1I94R738ROL+Dvt8pwBuz19sDvwP269Tf8SjxFv4b7pkaQR3eCVwbyV3ATpKmtDso4G3A7yOi0SunCxER84FnRhS/E7gme30NcGLOpn8B3B4Rz0TEs8DtwDGFBVomL+aI+GFEbMgW7wL2bEUs9ajyHdfjEGBxRDwSES8D15H+NoUaLV5JAv4G+G7RcdQrIp6MiHuz188DDwN70KG/42rxtuI33EuJIIAfSrpH0mk57+8BPFa2vCwra7dTqP6f5zBJv5b0PUn/q5VBVbFbRDwJ6UcLvCpnnU79ngE+SKoV5qn1+2mlM7JmgKuqNFt04nd8JLA8IhZVeb+t36+k6cBBwN10we94RLzlCvkN99Idyo6IiCckvQq4XdJvsjOYEuVs09axs5K2AE4APpXz9r2k5qK1WTvxfwH7tjK+BnXc9wwg6TxgAzC7yiq1fj+t8nXgAtJ3dgGpueWDI9bpxO/4vYxeG2jb9ytpO+A/gbMiYk2qvNTeLKesJd/xyHjLygv7DfdMjSAinsieVwA3k6rP5ZYBU8uW9wSeaE10VR0L3BsRy0e+ERFrImJt9vo2YHNJk1od4AjLS81p2fOKnHU67nvOOvr+CpgZWWPqSHX8floiIpZHxMaIGAK+USWOjvqOJU0E/hqYU22ddn2/kjYnHVRnR8RNWXHH/o6rxFv4b7gnEoGkbSVtX3pN6lxZOGK1ucCpSg4FVpeqh21U9SxK0u5ZuyuSDiH9rZ5uYWx55gKl0ROzgFty1vkBcLSknbNmjaOzsraQdAxwDnBCRLxYZZ16fj8tMaLf6qQqcfwK2FfSa7Ja5Smkv027vB34TUQsy3uzXd9v9v/nSuDhiPhS2Vsd+TuuFm9LfsNF9oK36kEaPfHr7PEgcF5WfjpwevZawNdIoy0eAAbaHPM2pAP7jmVl5fGekf1bfk3qIDq8xfF9F3gSWE86O/oQsCtwB7Aoe94lW3cA+GbZth8EFme8EHkOAAAENElEQVSPD7Q55sWktt77s8fl2bqvBm4b7ffTpni/lf0+F5AOWFNGxpstH0caVfL7dsablV9d+t2WrdsJ3++bSM05C8r+/sd16u94lHgL/w17igkzsz7XE01DZmbWOCcCM7M+50RgZtbnnAjMzPqcE4GZWZ9zIrC2kLQxmyVxoaQbJG3T5M9/v6R/q7HODEmHly2fLunUJuz71ZJuHOM2ZyjNJBrlFw5m173UnDVXVWYjza41uDubQXNOdt0BkrbMlhdn709v7F9rvcCJwNrljxFxYET8GfAy6RqKVpsBvJIIIuLyiLh2vB8aEU9ExLvHuNmdpAuzRk4+eCxpapF9gdNIU1AMI2kC6RqZY0mzVb5X0n7Z218ALo2IfYFnSdcqkD0/GxH7AJdm61mfciKwTvAzYB8ASWdntYSFyu7RIGm60nzs12RnxTeWahBKc7BPyl4PSPrpyA+XdHx21nufpB9J2i07Az4d+FhWMzlS6V4A/5htc6Cku7RpDvids/KfSvqCpF9K+p2kI3P2N13ZnP1ZzeQmSd/PzsovyvsCIuK+iFiS81Y9s+bmzkaaXan6VqBUOymfabN8Bs4bgbeVrmS3/uNEYG2lNE/NscADkg4GPgD8b9I9I/6fpIOyVf8UuCIiDgDWAH8/ht38HDg0Ig4iHSQ/mR10LyedLR8YET8bsc21wDnZ/h4A/qnsvYkRcQhw1ojyag4ETgb2B06WNLXG+uXqmQWz2jq7As/FpimMy7d9ZZvs/dXZ+taHnAisXbZWupvVIPAoaY6VNwE3R8QLkSbcu4k0vTHAYxFxZ/b629m69doT+IGkB4BPAKNO6S1pR2CniJiXFV1DuilLSWkysHuA6XXs/46IWB0R64CHgL3GEHs9s2BWW2e0bTtxBlNrk16ahtq6yx8j4sDyghpNEyMPUqXlDWw6odmqyrZfBb4UEXMlzSDdBWw8XsqeN1Lf/6GXyl7Xu01JPbNgVltnFakpaWJ21l++bWmbZVmtbEcau0mO9QDXCKyTzAdOlLRNNoPiSaT+A4Bpkg7LXr+X1NwD6fZ8B2ev31Xlc3cEHs9el9979nnSLQGHiYjVwLNl7f/vA+aNXK9Fqs6aK+k32Tq5s5FGmkjsJ0Cp47p8ps3yGTjfDfw4PPFY33IisI4R6TZ9VwO/JN2Z6ZsRcV/29sPALEkLSPeRLY2e+RxwmaSfkc6283wWuCFbZ1VZ+X8DJ5U6i0dsMwu4ONvfgcDnx/Nvq0XSP0haRjprXyDpm9lbt5Hul7uYdH+Cv8/Wn0TWvJOd7Z9Bmib5YeD6iHgw2/4c4GxJi0l9AFdm5VcCu2blZ5Pu3Wt9yrOPWsfLRvjcmg01NUDSXwF7R8RX2h2LdT/3EZh1oYi4td0xWO9wjcDMrM+5j8DMrM85EZiZ9TknAjOzPudEYGbW55wIzMz63P8Ht7TcPey9OBYAAAAASUVORK5CYII=\n", 162 | "text/plain": [ 163 | "
" 164 | ] 165 | }, 166 | "metadata": { 167 | "needs_background": "light" 168 | }, 169 | "output_type": "display_data" 170 | } 171 | ], 172 | "source": [ 173 | "plt.scatter(X_test,y_test,color='yellow')\n", 174 | "plt.plot(X_train,model.predict(X_train),color='blue')\n", 175 | "plt.xlabel('Population in 10,000')\n", 176 | "plt.ylabel('Profit in $10,000')\n", 177 | "plt.title('Test Data')\n", 178 | "plt.show()" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": null, 184 | "metadata": {}, 185 | "outputs": [], 186 | "source": [] 187 | } 188 | ], 189 | "metadata": { 190 | "kernelspec": { 191 | "display_name": "Python 3", 192 | "language": "python", 193 | "name": "python3" 194 | }, 195 | "language_info": { 196 | "codemirror_mode": { 197 | "name": "ipython", 198 | "version": 3 199 | }, 200 | "file_extension": ".py", 201 | "mimetype": "text/x-python", 202 | "name": "python", 203 | "nbconvert_exporter": "python", 204 | "pygments_lexer": "ipython3", 205 | "version": "3.7.1" 206 | } 207 | }, 208 | "nbformat": 4, 209 | "nbformat_minor": 2 210 | } 211 | -------------------------------------------------------------------------------- /Linear Regression/ex1data1.txt: -------------------------------------------------------------------------------- 1 | 6.1101,17.592 2 | 5.5277,9.1302 3 | 8.5186,13.662 4 | 7.0032,11.854 5 | 5.8598,6.8233 6 | 8.3829,11.886 7 | 7.4764,4.3483 8 | 8.5781,12 9 | 6.4862,6.5987 10 | 5.0546,3.8166 11 | 5.7107,3.2522 12 | 14.164,15.505 13 | 5.734,3.1551 14 | 8.4084,7.2258 15 | 5.6407,0.71618 16 | 5.3794,3.5129 17 | 6.3654,5.3048 18 | 5.1301,0.56077 19 | 6.4296,3.6518 20 | 7.0708,5.3893 21 | 6.1891,3.1386 22 | 20.27,21.767 23 | 5.4901,4.263 24 | 6.3261,5.1875 25 | 5.5649,3.0825 26 | 18.945,22.638 27 | 12.828,13.501 28 | 10.957,7.0467 29 | 13.176,14.692 30 | 22.203,24.147 31 | 5.2524,-1.22 32 | 6.5894,5.9966 33 | 9.2482,12.134 34 | 5.8918,1.8495 35 | 8.2111,6.5426 36 | 7.9334,4.5623 37 | 8.0959,4.1164 38 | 5.6063,3.3928 39 | 12.836,10.117 40 | 6.3534,5.4974 41 | 5.4069,0.55657 42 | 6.8825,3.9115 43 | 11.708,5.3854 44 | 5.7737,2.4406 45 | 7.8247,6.7318 46 | 7.0931,1.0463 47 | 5.0702,5.1337 48 | 5.8014,1.844 49 | 11.7,8.0043 50 | 5.5416,1.0179 51 | 7.5402,6.7504 52 | 5.3077,1.8396 53 | 7.4239,4.2885 54 | 7.6031,4.9981 55 | 6.3328,1.4233 56 | 6.3589,-1.4211 57 | 6.2742,2.4756 58 | 5.6397,4.6042 59 | 9.3102,3.9624 60 | 9.4536,5.4141 61 | 8.8254,5.1694 62 | 5.1793,-0.74279 63 | 21.279,17.929 64 | 14.908,12.054 65 | 18.959,17.054 66 | 7.2182,4.8852 67 | 8.2951,5.7442 68 | 10.236,7.7754 69 | 5.4994,1.0173 70 | 20.341,20.992 71 | 10.136,6.6799 72 | 7.3345,4.0259 73 | 6.0062,1.2784 74 | 7.2259,3.3411 75 | 5.0269,-2.6807 76 | 6.5479,0.29678 77 | 7.5386,3.8845 78 | 5.0365,5.7014 79 | 10.274,6.7526 80 | 5.1077,2.0576 81 | 5.7292,0.47953 82 | 5.1884,0.20421 83 | 6.3557,0.67861 84 | 9.7687,7.5435 85 | 6.5159,5.3436 86 | 8.5172,4.2415 87 | 9.1802,6.7981 88 | 6.002,0.92695 89 | 5.5204,0.152 90 | 5.0594,2.8214 91 | 5.7077,1.8451 92 | 7.6366,4.2959 93 | 5.8707,7.2029 94 | 5.3054,1.9869 95 | 8.2934,0.14454 96 | 13.394,9.0551 97 | 5.4369,0.61705 98 | -------------------------------------------------------------------------------- /Linear Regression/ex1data2.txt: -------------------------------------------------------------------------------- 1 | 2104,3,399900 2 | 1600,3,329900 3 | 2400,3,369000 4 | 1416,2,232000 5 | 3000,4,539900 6 | 1985,4,299900 7 | 1534,3,314900 8 | 1427,3,198999 9 | 1380,3,212000 10 | 1494,3,242500 11 | 1940,4,239999 12 | 2000,3,347000 13 | 1890,3,329999 14 | 4478,5,699900 15 | 1268,3,259900 16 | 2300,4,449900 17 | 1320,2,299900 18 | 1236,3,199900 19 | 2609,4,499998 20 | 3031,4,599000 21 | 1767,3,252900 22 | 1888,2,255000 23 | 1604,3,242900 24 | 1962,4,259900 25 | 3890,3,573900 26 | 1100,3,249900 27 | 1458,3,464500 28 | 2526,3,469000 29 | 2200,3,475000 30 | 2637,3,299900 31 | 1839,2,349900 32 | 1000,1,169900 33 | 2040,4,314900 34 | 3137,3,579900 35 | 1811,4,285900 36 | 1437,3,249900 37 | 1239,3,229900 38 | 2132,4,345000 39 | 4215,4,549000 40 | 2162,4,287000 41 | 1664,2,368500 42 | 2238,3,329900 43 | 2567,4,314000 44 | 1200,3,299000 45 | 852,2,179900 46 | 1852,4,299900 47 | 1203,3,239500 48 | -------------------------------------------------------------------------------- /Logistic Regression/Logistic/Untitled.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "IMPORTING THE LIBRARIES " 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import pandas as pd\n", 17 | "import numpy as np\n", 18 | "import matplotlib.pyplot as plt\n", 19 | "import seaborn as sb" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "TRAINING A LOGISTIC REGRESSION MODEL TO PREDICT EHETHER A STUDENT GET SELECTED ON THE BASIS OF TWO EXAMS" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 2, 32 | "metadata": {}, 33 | "outputs": [ 34 | { 35 | "data": { 36 | "text/html": [ 37 | "
\n", 38 | "\n", 51 | "\n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | "
34.6236596245169778.02469281536240
7547.26426988.4758651
\n", 69 | "
" 70 | ], 71 | "text/plain": [ 72 | " 34.62365962451697 78.0246928153624 0\n", 73 | "75 47.264269 88.475865 1" 74 | ] 75 | }, 76 | "execution_count": 2, 77 | "metadata": {}, 78 | "output_type": "execute_result" 79 | } 80 | ], 81 | "source": [ 82 | "dataset=pd.read_csv('ex2data1.txt')\n", 83 | "dataset.sample()" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 3, 89 | "metadata": {}, 90 | "outputs": [ 91 | { 92 | "name": "stdout", 93 | "output_type": "stream", 94 | "text": [ 95 | "\n", 96 | "RangeIndex: 99 entries, 0 to 98\n", 97 | "Data columns (total 3 columns):\n", 98 | "34.62365962451697 99 non-null float64\n", 99 | "78.0246928153624 99 non-null float64\n", 100 | "0 99 non-null int64\n", 101 | "dtypes: float64(2), int64(1)\n", 102 | "memory usage: 2.4 KB\n" 103 | ] 104 | } 105 | ], 106 | "source": [ 107 | "dataset.info()" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 4, 113 | "metadata": {}, 114 | "outputs": [ 115 | { 116 | "data": { 117 | "text/html": [ 118 | "
\n", 119 | "\n", 132 | "\n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | "
34.6236596245169778.02469281536240
count99.00000099.00000099.000000
mean65.95761466.1027790.606061
std19.30200918.6388750.491108
min30.05882230.6032630.000000
25%51.29773647.9781250.000000
50%67.31925766.5893531.000000
75%80.23487779.8764231.000000
max99.82785898.8694361.000000
\n", 192 | "
" 193 | ], 194 | "text/plain": [ 195 | " 34.62365962451697 78.0246928153624 0\n", 196 | "count 99.000000 99.000000 99.000000\n", 197 | "mean 65.957614 66.102779 0.606061\n", 198 | "std 19.302009 18.638875 0.491108\n", 199 | "min 30.058822 30.603263 0.000000\n", 200 | "25% 51.297736 47.978125 0.000000\n", 201 | "50% 67.319257 66.589353 1.000000\n", 202 | "75% 80.234877 79.876423 1.000000\n", 203 | "max 99.827858 98.869436 1.000000" 204 | ] 205 | }, 206 | "execution_count": 4, 207 | "metadata": {}, 208 | "output_type": "execute_result" 209 | } 210 | ], 211 | "source": [ 212 | "dataset.describe()" 213 | ] 214 | }, 215 | { 216 | "cell_type": "markdown", 217 | "metadata": {}, 218 | "source": [ 219 | "CREATE FEATURE MATRIX AND DEPENDENT MATRIX" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": 7, 225 | "metadata": {}, 226 | "outputs": [], 227 | "source": [ 228 | "X=dataset.iloc[:,:-1].values\n", 229 | "y=dataset.iloc[:,-1].values" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": 12, 235 | "metadata": {}, 236 | "outputs": [ 237 | { 238 | "name": "stdout", 239 | "output_type": "stream", 240 | "text": [ 241 | "[[30.28671077 43.89499752]\n", 242 | " [35.84740877 72.90219803]\n", 243 | " [60.18259939 86.3085521 ]\n", 244 | " [79.03273605 75.34437644]\n", 245 | " [45.08327748 56.31637178]\n", 246 | " [61.10666454 96.51142588]\n", 247 | " [75.02474557 46.55401354]\n", 248 | " [76.0987867 87.42056972]\n", 249 | " [84.43281996 43.53339331]\n", 250 | " [95.86155507 38.22527806]\n", 251 | " [75.01365839 30.60326323]\n", 252 | " [82.30705337 76.4819633 ]\n", 253 | " [69.36458876 97.71869196]\n", 254 | " [39.53833914 76.03681085]\n", 255 | " [53.97105215 89.20735014]\n", 256 | " [69.07014406 52.74046973]\n", 257 | " [67.94685548 46.67857411]\n", 258 | " [70.66150955 92.92713789]\n", 259 | " [76.97878373 47.57596365]\n", 260 | " [67.37202755 42.83843832]\n", 261 | " [89.67677575 65.79936593]\n", 262 | " [50.53478829 48.85581153]\n", 263 | " [34.21206098 44.2095286 ]\n", 264 | " [77.92409145 68.97235999]\n", 265 | " [62.27101367 69.95445795]\n", 266 | " [80.19018075 44.82162893]\n", 267 | " [93.1143888 38.80067034]\n", 268 | " [61.83020602 50.25610789]\n", 269 | " [38.7858038 64.99568096]\n", 270 | " [61.37928945 72.80788731]\n", 271 | " [85.40451939 57.05198398]\n", 272 | " [52.10797973 63.12762377]\n", 273 | " [52.04540477 69.43286012]\n", 274 | " [40.23689374 71.16774802]\n", 275 | " [54.63510555 52.21388588]\n", 276 | " [33.91550011 98.86943574]\n", 277 | " [64.17698887 80.90806059]\n", 278 | " [74.78925296 41.57341523]\n", 279 | " [34.18364003 75.23772034]\n", 280 | " [83.90239366 56.30804622]\n", 281 | " [51.54772027 46.85629026]\n", 282 | " [94.44336777 65.56892161]\n", 283 | " [82.36875376 40.61825516]\n", 284 | " [51.04775177 45.82270146]\n", 285 | " [62.22267576 52.06099195]\n", 286 | " [77.19303493 70.4582 ]\n", 287 | " [97.77159928 86.72782233]\n", 288 | " [62.0730638 96.76882412]\n", 289 | " [91.5649745 88.69629255]\n", 290 | " [79.94481794 74.16311935]\n", 291 | " [99.27252693 60.999031 ]\n", 292 | " [90.54671411 43.39060181]\n", 293 | " [34.52451385 60.39634246]\n", 294 | " [50.28649612 49.80453881]\n", 295 | " [49.58667722 59.80895099]\n", 296 | " [97.64563396 68.86157272]\n", 297 | " [32.57720017 95.59854761]\n", 298 | " [74.24869137 69.82457123]\n", 299 | " [71.79646206 78.45356225]\n", 300 | " [75.39561147 85.75993667]\n", 301 | " [35.28611282 47.02051395]\n", 302 | " [56.2538175 39.26147251]\n", 303 | " [30.05882245 49.59297387]\n", 304 | " [44.66826172 66.45008615]\n", 305 | " [66.56089447 41.09209808]\n", 306 | " [40.45755098 97.53518549]\n", 307 | " [49.07256322 51.88321182]\n", 308 | " [80.27957401 92.11606081]\n", 309 | " [66.74671857 60.99139403]\n", 310 | " [32.72283304 43.30717306]\n", 311 | " [64.03932042 78.03168802]\n", 312 | " [72.34649423 96.22759297]\n", 313 | " [60.45788574 73.0949981 ]\n", 314 | " [58.84095622 75.85844831]\n", 315 | " [99.8278578 72.36925193]\n", 316 | " [47.26426911 88.475865 ]\n", 317 | " [50.4581598 75.80985953]\n", 318 | " [60.45555629 42.50840944]\n", 319 | " [82.22666158 42.71987854]\n", 320 | " [88.91389642 69.8037889 ]\n", 321 | " [94.83450672 45.6943068 ]\n", 322 | " [67.31925747 66.58935318]\n", 323 | " [57.23870632 59.51428198]\n", 324 | " [80.366756 90.9601479 ]\n", 325 | " [68.46852179 85.5943071 ]\n", 326 | " [42.07545454 78.844786 ]\n", 327 | " [75.47770201 90.424539 ]\n", 328 | " [78.63542435 96.64742717]\n", 329 | " [52.34800399 60.76950526]\n", 330 | " [94.09433113 77.15910509]\n", 331 | " [90.44855097 87.50879176]\n", 332 | " [55.48216114 35.57070347]\n", 333 | " [74.49269242 84.84513685]\n", 334 | " [89.84580671 45.35828361]\n", 335 | " [83.48916274 48.3802858 ]\n", 336 | " [42.26170081 87.10385094]\n", 337 | " [99.31500881 68.77540947]\n", 338 | " [55.34001756 64.93193801]\n", 339 | " [74.775893 89.5298129 ]]\n" 340 | ] 341 | } 342 | ], 343 | "source": [ 344 | "print(X)" 345 | ] 346 | }, 347 | { 348 | "cell_type": "code", 349 | "execution_count": 13, 350 | "metadata": {}, 351 | "outputs": [ 352 | { 353 | "name": "stdout", 354 | "output_type": "stream", 355 | "text": [ 356 | "[0 0 1 1 0 1 1 1 1 0 0 1 1 0 1 1 0 1 1 0 1 0 0 1 1 1 0 0 0 1 1 0 1 0 0 0 1\n", 357 | " 0 0 1 0 1 0 0 0 1 1 1 1 1 1 1 0 0 0 1 0 1 1 1 0 0 0 0 0 1 0 1 1 0 1 1 1 1\n", 358 | " 1 1 1 0 0 1 1 1 1 1 1 0 1 1 0 1 1 0 1 1 1 1 1 1 1]\n" 359 | ] 360 | } 361 | ], 362 | "source": [ 363 | "print(y)" 364 | ] 365 | }, 366 | { 367 | "cell_type": "markdown", 368 | "metadata": {}, 369 | "source": [ 370 | "SPLITTING THE DATASET INTO TRAINING SET AND TEST SET" 371 | ] 372 | }, 373 | { 374 | "cell_type": "code", 375 | "execution_count": 14, 376 | "metadata": {}, 377 | "outputs": [], 378 | "source": [ 379 | "from sklearn.model_selection import train_test_split\n", 380 | "X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0)\n" 381 | ] 382 | }, 383 | { 384 | "cell_type": "markdown", 385 | "metadata": {}, 386 | "source": [ 387 | "TRAIN USING LOGISTIC REGRESSION SCIKIT LIBRARY" 388 | ] 389 | }, 390 | { 391 | "cell_type": "code", 392 | "execution_count": 15, 393 | "metadata": {}, 394 | "outputs": [ 395 | { 396 | "name": "stderr", 397 | "output_type": "stream", 398 | "text": [ 399 | "C:\\Users\\HP\\Documents\\New folder\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", 400 | " FutureWarning)\n" 401 | ] 402 | }, 403 | { 404 | "data": { 405 | "text/plain": [ 406 | "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", 407 | " intercept_scaling=1, max_iter=100, multi_class='warn',\n", 408 | " n_jobs=None, penalty='l2', random_state=None, solver='warn',\n", 409 | " tol=0.0001, verbose=0, warm_start=False)" 410 | ] 411 | }, 412 | "execution_count": 15, 413 | "metadata": {}, 414 | "output_type": "execute_result" 415 | } 416 | ], 417 | "source": [ 418 | "from sklearn.linear_model import LogisticRegression\n", 419 | "model=LogisticRegression()\n", 420 | "model.fit(X_train,y_train)" 421 | ] 422 | }, 423 | { 424 | "cell_type": "code", 425 | "execution_count": 17, 426 | "metadata": {}, 427 | "outputs": [ 428 | { 429 | "data": { 430 | "text/plain": [ 431 | "array([1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0,\n", 432 | " 1, 1, 1, 1, 1, 1, 1, 1], dtype=int64)" 433 | ] 434 | }, 435 | "execution_count": 17, 436 | "metadata": {}, 437 | "output_type": "execute_result" 438 | } 439 | ], 440 | "source": [ 441 | "predicted=model.predict(X_test)\n", 442 | "predicted" 443 | ] 444 | }, 445 | { 446 | "cell_type": "markdown", 447 | "metadata": {}, 448 | "source": [ 449 | "PREDICTING BY PROVIDING INPUT" 450 | ] 451 | }, 452 | { 453 | "cell_type": "code", 454 | "execution_count": 19, 455 | "metadata": {}, 456 | "outputs": [ 457 | { 458 | "name": "stdout", 459 | "output_type": "stream", 460 | "text": [ 461 | "[0]\n" 462 | ] 463 | } 464 | ], 465 | "source": [ 466 | "print(model.predict([[25,85]]))" 467 | ] 468 | }, 469 | { 470 | "cell_type": "markdown", 471 | "metadata": {}, 472 | "source": [ 473 | "FINAL REPORT " 474 | ] 475 | }, 476 | { 477 | "cell_type": "code", 478 | "execution_count": 21, 479 | "metadata": {}, 480 | "outputs": [ 481 | { 482 | "name": "stdout", 483 | "output_type": "stream", 484 | "text": [ 485 | "90.0\n" 486 | ] 487 | }, 488 | { 489 | "data": { 490 | "text/plain": [ 491 | "' precision recall f1-score support\\n\\n 0 1.00 0.73 0.84 11\\n 1 0.86 1.00 0.93 19\\n\\n micro avg 0.90 0.90 0.90 30\\n macro avg 0.93 0.86 0.88 30\\nweighted avg 0.91 0.90 0.90 30\\n'" 492 | ] 493 | }, 494 | "execution_count": 21, 495 | "metadata": {}, 496 | "output_type": "execute_result" 497 | } 498 | ], 499 | "source": [ 500 | "from sklearn import metrics\n", 501 | "score=100*metrics.accuracy_score(y_test,predicted)\n", 502 | "print(score)\n", 503 | "report=metrics.classification_report(y_test,predicted)\n", 504 | "report" 505 | ] 506 | }, 507 | { 508 | "cell_type": "markdown", 509 | "metadata": {}, 510 | "source": [] 511 | }, 512 | { 513 | "cell_type": "code", 514 | "execution_count": null, 515 | "metadata": {}, 516 | "outputs": [], 517 | "source": [] 518 | }, 519 | { 520 | "cell_type": "code", 521 | "execution_count": null, 522 | "metadata": {}, 523 | "outputs": [], 524 | "source": [] 525 | } 526 | ], 527 | "metadata": { 528 | "kernelspec": { 529 | "display_name": "Python 3", 530 | "language": "python", 531 | "name": "python3" 532 | }, 533 | "language_info": { 534 | "codemirror_mode": { 535 | "name": "ipython", 536 | "version": 3 537 | }, 538 | "file_extension": ".py", 539 | "mimetype": "text/x-python", 540 | "name": "python", 541 | "nbconvert_exporter": "python", 542 | "pygments_lexer": "ipython3", 543 | "version": "3.7.1" 544 | } 545 | }, 546 | "nbformat": 4, 547 | "nbformat_minor": 2 548 | } 549 | -------------------------------------------------------------------------------- /Logistic Regression/Logistic/ex2data1.txt: -------------------------------------------------------------------------------- 1 | 34.62365962451697,78.0246928153624,0 2 | 30.28671076822607,43.89499752400101,0 3 | 35.84740876993872,72.90219802708364,0 4 | 60.18259938620976,86.30855209546826,1 5 | 79.0327360507101,75.3443764369103,1 6 | 45.08327747668339,56.3163717815305,0 7 | 61.10666453684766,96.51142588489624,1 8 | 75.02474556738889,46.55401354116538,1 9 | 76.09878670226257,87.42056971926803,1 10 | 84.43281996120035,43.53339331072109,1 11 | 95.86155507093572,38.22527805795094,0 12 | 75.01365838958247,30.60326323428011,0 13 | 82.30705337399482,76.48196330235604,1 14 | 69.36458875970939,97.71869196188608,1 15 | 39.53833914367223,76.03681085115882,0 16 | 53.9710521485623,89.20735013750205,1 17 | 69.07014406283025,52.74046973016765,1 18 | 67.94685547711617,46.67857410673128,0 19 | 70.66150955499435,92.92713789364831,1 20 | 76.97878372747498,47.57596364975532,1 21 | 67.37202754570876,42.83843832029179,0 22 | 89.67677575072079,65.79936592745237,1 23 | 50.534788289883,48.85581152764205,0 24 | 34.21206097786789,44.20952859866288,0 25 | 77.9240914545704,68.9723599933059,1 26 | 62.27101367004632,69.95445795447587,1 27 | 80.1901807509566,44.82162893218353,1 28 | 93.114388797442,38.80067033713209,0 29 | 61.83020602312595,50.25610789244621,0 30 | 38.78580379679423,64.99568095539578,0 31 | 61.379289447425,72.80788731317097,1 32 | 85.40451939411645,57.05198397627122,1 33 | 52.10797973193984,63.12762376881715,0 34 | 52.04540476831827,69.43286012045222,1 35 | 40.23689373545111,71.16774802184875,0 36 | 54.63510555424817,52.21388588061123,0 37 | 33.91550010906887,98.86943574220611,0 38 | 64.17698887494485,80.90806058670817,1 39 | 74.78925295941542,41.57341522824434,0 40 | 34.1836400264419,75.2377203360134,0 41 | 83.90239366249155,56.30804621605327,1 42 | 51.54772026906181,46.85629026349976,0 43 | 94.44336776917852,65.56892160559052,1 44 | 82.36875375713919,40.61825515970618,0 45 | 51.04775177128865,45.82270145776001,0 46 | 62.22267576120188,52.06099194836679,0 47 | 77.19303492601364,70.45820000180959,1 48 | 97.77159928000232,86.7278223300282,1 49 | 62.07306379667647,96.76882412413983,1 50 | 91.56497449807442,88.69629254546599,1 51 | 79.94481794066932,74.16311935043758,1 52 | 99.2725269292572,60.99903099844988,1 53 | 90.54671411399852,43.39060180650027,1 54 | 34.52451385320009,60.39634245837173,0 55 | 50.2864961189907,49.80453881323059,0 56 | 49.58667721632031,59.80895099453265,0 57 | 97.64563396007767,68.86157272420604,1 58 | 32.57720016809309,95.59854761387875,0 59 | 74.24869136721598,69.82457122657193,1 60 | 71.79646205863379,78.45356224515052,1 61 | 75.3956114656803,85.75993667331619,1 62 | 35.28611281526193,47.02051394723416,0 63 | 56.25381749711624,39.26147251058019,0 64 | 30.05882244669796,49.59297386723685,0 65 | 44.66826172480893,66.45008614558913,0 66 | 66.56089447242954,41.09209807936973,0 67 | 40.45755098375164,97.53518548909936,1 68 | 49.07256321908844,51.88321182073966,0 69 | 80.27957401466998,92.11606081344084,1 70 | 66.74671856944039,60.99139402740988,1 71 | 32.72283304060323,43.30717306430063,0 72 | 64.0393204150601,78.03168802018232,1 73 | 72.34649422579923,96.22759296761404,1 74 | 60.45788573918959,73.09499809758037,1 75 | 58.84095621726802,75.85844831279042,1 76 | 99.82785779692128,72.36925193383885,1 77 | 47.26426910848174,88.47586499559782,1 78 | 50.45815980285988,75.80985952982456,1 79 | 60.45555629271532,42.50840943572217,0 80 | 82.22666157785568,42.71987853716458,0 81 | 88.9138964166533,69.80378889835472,1 82 | 94.83450672430196,45.69430680250754,1 83 | 67.31925746917527,66.58935317747915,1 84 | 57.23870631569862,59.51428198012956,1 85 | 80.36675600171273,90.96014789746954,1 86 | 68.46852178591112,85.59430710452014,1 87 | 42.0754545384731,78.84478600148043,0 88 | 75.47770200533905,90.42453899753964,1 89 | 78.63542434898018,96.64742716885644,1 90 | 52.34800398794107,60.76950525602592,0 91 | 94.09433112516793,77.15910509073893,1 92 | 90.44855097096364,87.50879176484702,1 93 | 55.48216114069585,35.57070347228866,0 94 | 74.49269241843041,84.84513684930135,1 95 | 89.84580670720979,45.35828361091658,1 96 | 83.48916274498238,48.38028579728175,1 97 | 42.2617008099817,87.10385094025457,1 98 | 99.31500880510394,68.77540947206617,1 99 | 55.34001756003703,64.9319380069486,1 100 | 74.77589300092767,89.52981289513276,1 101 | -------------------------------------------------------------------------------- /Logistic Regression/Logistic/ex2data2.txt: -------------------------------------------------------------------------------- 1 | 0.051267,0.69956,1 2 | -0.092742,0.68494,1 3 | -0.21371,0.69225,1 4 | -0.375,0.50219,1 5 | -0.51325,0.46564,1 6 | -0.52477,0.2098,1 7 | -0.39804,0.034357,1 8 | -0.30588,-0.19225,1 9 | 0.016705,-0.40424,1 10 | 0.13191,-0.51389,1 11 | 0.38537,-0.56506,1 12 | 0.52938,-0.5212,1 13 | 0.63882,-0.24342,1 14 | 0.73675,-0.18494,1 15 | 0.54666,0.48757,1 16 | 0.322,0.5826,1 17 | 0.16647,0.53874,1 18 | -0.046659,0.81652,1 19 | -0.17339,0.69956,1 20 | -0.47869,0.63377,1 21 | -0.60541,0.59722,1 22 | -0.62846,0.33406,1 23 | -0.59389,0.005117,1 24 | -0.42108,-0.27266,1 25 | -0.11578,-0.39693,1 26 | 0.20104,-0.60161,1 27 | 0.46601,-0.53582,1 28 | 0.67339,-0.53582,1 29 | -0.13882,0.54605,1 30 | -0.29435,0.77997,1 31 | -0.26555,0.96272,1 32 | -0.16187,0.8019,1 33 | -0.17339,0.64839,1 34 | -0.28283,0.47295,1 35 | -0.36348,0.31213,1 36 | -0.30012,0.027047,1 37 | -0.23675,-0.21418,1 38 | -0.06394,-0.18494,1 39 | 0.062788,-0.16301,1 40 | 0.22984,-0.41155,1 41 | 0.2932,-0.2288,1 42 | 0.48329,-0.18494,1 43 | 0.64459,-0.14108,1 44 | 0.46025,0.012427,1 45 | 0.6273,0.15863,1 46 | 0.57546,0.26827,1 47 | 0.72523,0.44371,1 48 | 0.22408,0.52412,1 49 | 0.44297,0.67032,1 50 | 0.322,0.69225,1 51 | 0.13767,0.57529,1 52 | -0.0063364,0.39985,1 53 | -0.092742,0.55336,1 54 | -0.20795,0.35599,1 55 | -0.20795,0.17325,1 56 | -0.43836,0.21711,1 57 | -0.21947,-0.016813,1 58 | -0.13882,-0.27266,1 59 | 0.18376,0.93348,0 60 | 0.22408,0.77997,0 61 | 0.29896,0.61915,0 62 | 0.50634,0.75804,0 63 | 0.61578,0.7288,0 64 | 0.60426,0.59722,0 65 | 0.76555,0.50219,0 66 | 0.92684,0.3633,0 67 | 0.82316,0.27558,0 68 | 0.96141,0.085526,0 69 | 0.93836,0.012427,0 70 | 0.86348,-0.082602,0 71 | 0.89804,-0.20687,0 72 | 0.85196,-0.36769,0 73 | 0.82892,-0.5212,0 74 | 0.79435,-0.55775,0 75 | 0.59274,-0.7405,0 76 | 0.51786,-0.5943,0 77 | 0.46601,-0.41886,0 78 | 0.35081,-0.57968,0 79 | 0.28744,-0.76974,0 80 | 0.085829,-0.75512,0 81 | 0.14919,-0.57968,0 82 | -0.13306,-0.4481,0 83 | -0.40956,-0.41155,0 84 | -0.39228,-0.25804,0 85 | -0.74366,-0.25804,0 86 | -0.69758,0.041667,0 87 | -0.75518,0.2902,0 88 | -0.69758,0.68494,0 89 | -0.4038,0.70687,0 90 | -0.38076,0.91886,0 91 | -0.50749,0.90424,0 92 | -0.54781,0.70687,0 93 | 0.10311,0.77997,0 94 | 0.057028,0.91886,0 95 | -0.10426,0.99196,0 96 | -0.081221,1.1089,0 97 | 0.28744,1.087,0 98 | 0.39689,0.82383,0 99 | 0.63882,0.88962,0 100 | 0.82316,0.66301,0 101 | 0.67339,0.64108,0 102 | 1.0709,0.10015,0 103 | -0.046659,-0.57968,0 104 | -0.23675,-0.63816,0 105 | -0.15035,-0.36769,0 106 | -0.49021,-0.3019,0 107 | -0.46717,-0.13377,0 108 | -0.28859,-0.060673,0 109 | -0.61118,-0.067982,0 110 | -0.66302,-0.21418,0 111 | -0.59965,-0.41886,0 112 | -0.72638,-0.082602,0 113 | -0.83007,0.31213,0 114 | -0.72062,0.53874,0 115 | -0.59389,0.49488,0 116 | -0.48445,0.99927,0 117 | -0.0063364,0.99927,0 118 | 0.63265,-0.030612,0 119 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Machine Learning in Python 2 | This repository contains Machine Learning Projects in Python programming language. 3 | All the projects are done on Jupyter Notebooks. 4 | 5 | ## Libraries Required 6 | The following libraries are required to successfully implement the projects. 7 | - Python 3.6+ 8 | - NumPy (for Linear Algebra) 9 | - Pandas (for Data Preprocesssing) 10 | - Scikit-learn (for ML models) 11 | - Matplotlib (for Data Visualization) 12 | - Seaborn (for statistical data visualization) 13 | 14 | The projects are divided into various categories listed below - 15 | 16 | ## Supervised Learning 17 | - [**Linear Regression**]() 18 | - [Linear Regression Single Variables.](https://github.com/suubh/Machine-Learning-in-Python/blob/master/Linear%20Regression/LinearRegressionSingle%20Variables.ipynb) : A Simple Linear Regression Model to model the linear relationship between Population and Profit for plot sales. 19 | - [Linear Regression Multiple Variables.](https://github.com/suubh/Machine-Learning-in-Python/blob/master/Linear%20Regression/LinearRegressionMultipleVariables.ipynb) : In this project, I build a Linear Regression Model for multiple variables for predicting the House price based on acres and number of rooms. 20 | 21 | - [**Logistic Regression**](https://github.com/suubh/Machine-Learning-in-Python/blob/master/Logistic%20Regression/Logistic/Untitled.ipynb) : In this project, I train a binary Logistic Regression classifier to predict whether a student will get selected on the basis of mid semester and end semester marks. 22 | 23 | - [**Support Vector Machine**](https://github.com/suubh/Machine-Learning-in-Python/blob/master/SVM/Untitled.ipynb) : In this project, I build a Support Vector Machines classifier for predicting Social Network Ads . It predicts whether a user with age and estimated salary will buy the product after watching the ads or not. It uses the Radial Basic Function Kernal of SVM. 24 | 25 | - [**K Nearest Neighbours**](https://github.com/suubh/Machine-Learning-in-Python/blob/master/K-NN/Untitled.ipynb) : K Nearest Neighbours or KNN is the simplest of all machine learning algorithms. In this project, I build a kNN classifier on the Iris Species Dataset which predict the three species of Iris with four features *sepal_length, sepal_width, petal_length* and *petal_width*. 26 | 27 | - [**Naive Bayes**](https://github.com/suubh/Machine-Learning-in-Python/blob/master/TextClassification/Textclassification.ipynb) : In this project, I build a Naïve Bayes Classifier to classify the different class of a message from sklearn dataset called [*fetch_20newsgroups*](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_20newsgroups.html). 28 | 29 | - [**Decision Tree Classification**](https://github.com/suubh/Machine-Learning-in-Python/blob/master/Decision%20Tree/Untitled.ipynb) : In this project, I used the Iris Dataset and tried a Decision Tree Classifier which give an accuracy of 96.7% which is less than KNN. 30 | 31 | - [**Random Forest Classification**](https://github.com/suubh/Machine-Learning-in-Python/blob/master/RandomForest/RandomForest.ipynb) : In this project I used Random Forest Classifier and Random Forest Regressor on the Social Network Ads dataset. 32 | 33 | ## Unsupervised Learning 34 | - [**K Means Clustering**](https://github.com/suubh/Machine-Learning-in-Python/blob/master/K-means/creditcard.ipynb) : K-Means clustering is used to find intrinsic groups within the unlabelled dataset and draw inferences.It is one of the most detailed projects, In this project, I implement K-Means Clustering on Credit Card Dataset to cluster different credit card users based on the features.I scaled the data using *StandardScaler* because normalizing(scale in range 0 to 1) will improves the convergence.I also implemented the [*Elbow Method*](https://en.wikipedia.org/wiki/Elbow_method_(clustering)) to search for the best numbers of clusters.For visualizing the dataset I used [*PCA(Principal Component Analysis)*](https://en.wikipedia.org/wiki/Principal_component_analysis) for dimensionality reduction as the dataset features were large in number.In the end I used [*Silhouette Score*](https://en.wikipedia.org/wiki/Silhouette_(clustering)) which is used to calculate the performance of clustering . It ranges from -1 to 1 and I got a score of 0.203. 35 | 36 | ## NLP( Natural Language Processing ) 37 | - [**Text Analytics**](https://github.com/suubh/Machine-Learning-in-Python/blob/master/TextAnalytics/textAnalytics.ipynb) : It is a project for Introduction to Text Analytics in NLP.I performed the important steps - 38 | - ***Tokenization*** 39 | - ***Removal of Special Characters*** 40 | - ***Lower Case*** 41 | - ***Removing StopWords*** 42 | - ***Stemming*** 43 | - ***Count Vectorizer*** ( which generally performs all the steps mentioned above except Stemming) 44 | - ***DTM (Document Term Matrix)*** 45 | - ***TF-IDF (Text Frequency Inverse Document Frequency)*** 46 | 47 | - [**Sentiment Analysis**](https://github.com/suubh/Machine-Learning-in-Python/tree/master/Sentiment%20Analysis) : I applied Sentiment analysis in MovieReview (Dataset from nltk library) and RestaurentReview Datasets to predict the positive and negative review . I used Naive Bayes Classifier (78.8%) and Logistic Regression (84.3%) to build the models and for prediction. 48 | 49 | ## Data Cleaning and Preprocessing 50 | - [**Data Preprocessing**](https://github.com/suubh/Machine-Learning-in-Python/blob/master/Data%20Preprocessing/Untitled.ipynb) : I perform various data preprocessin and cleaning methods which are mentioned below - 51 | - ***Label Encoding*** : It converts each category into a unique numeric value ranging from 0 to n(size of dataset). 52 | - ***Ordinal Encoding*** : Categories to ordered numerical values. 53 | - ***One Hot Encoding*** : It creates a dummy variable with value 0 to n(unique value count in the column) for each category value.Extra columns are created. 54 | 55 | ## Some Comparisons on Datasets 56 | 57 | | **Social Network Ads** | **Accuracy**| 58 | | ----------- | ----------- | 59 | | Support Vector Machine | 90.83% | 60 | | Random Forest Classifier | 90.0% | 61 | | Random Forest Regressor | 61.8% | 62 | 63 | | **Iris Dataset** | **Accuracy** | 64 | | ----------- | ----------- | 65 | | KNN | 98.3% | 66 | | Decision Tree | 96.7% | 67 | 68 | ## Kaggle 69 | 70 | 71 | ![Screenshot from 2021-08-05 06-34-18](https://user-images.githubusercontent.com/47265493/128275262-3e8c18c3-3710-4fbf-aff6-7d511bf1a7fb.png) 72 | 73 | 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /RandomForest/RandomForest.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "RANDOM FOREST" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 13, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "import pandas as pd\n", 18 | "import matplotlib.pyplot as plt" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 14, 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "data": { 28 | "text/html": [ 29 | "
\n", 30 | "\n", 43 | "\n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | "
AgeEstimatedSalaryPurchased
019190000
135200000
226430000
327570000
419760000
\n", 85 | "
" 86 | ], 87 | "text/plain": [ 88 | " Age EstimatedSalary Purchased\n", 89 | "0 19 19000 0\n", 90 | "1 35 20000 0\n", 91 | "2 26 43000 0\n", 92 | "3 27 57000 0\n", 93 | "4 19 76000 0" 94 | ] 95 | }, 96 | "execution_count": 14, 97 | "metadata": {}, 98 | "output_type": "execute_result" 99 | } 100 | ], 101 | "source": [ 102 | "df=pd.read_csv('Social_Network_Ads.csv')\n", 103 | "df.head()" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 16, 109 | "metadata": {}, 110 | "outputs": [ 111 | { 112 | "data": { 113 | "text/plain": [ 114 | "0 257\n", 115 | "1 143\n", 116 | "Name: Purchased, dtype: int64" 117 | ] 118 | }, 119 | "execution_count": 16, 120 | "metadata": {}, 121 | "output_type": "execute_result" 122 | } 123 | ], 124 | "source": [ 125 | "df.Purchased.value_counts()" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "NOW MAKE THE DEPENDENT MATRIX AND INDEPENDENT MATRIX" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 21, 138 | "metadata": {}, 139 | "outputs": [ 140 | { 141 | "name": "stdout", 142 | "output_type": "stream", 143 | "text": [ 144 | " Age EstimatedSalary\n", 145 | "0 19 19000\n", 146 | "1 35 20000\n", 147 | "2 26 43000\n", 148 | "3 27 57000\n", 149 | "4 19 76000\n" 150 | ] 151 | }, 152 | { 153 | "data": { 154 | "text/plain": [ 155 | "0 0\n", 156 | "1 0\n", 157 | "2 0\n", 158 | "3 0\n", 159 | "4 0\n", 160 | "Name: Purchased, dtype: int64" 161 | ] 162 | }, 163 | "execution_count": 21, 164 | "metadata": {}, 165 | "output_type": "execute_result" 166 | } 167 | ], 168 | "source": [ 169 | "X=df[['Age','EstimatedSalary']]\n", 170 | "y=df['Purchased']\n", 171 | "print(X.head())\n", 172 | "y.head()\n" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": {}, 178 | "source": [ 179 | "NOW SPLIT THE DATASET INTO TRAINING SET AND TEST SET" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 22, 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [ 188 | "from sklearn.model_selection import train_test_split\n", 189 | "X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=23)" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": {}, 196 | "outputs": [], 197 | "source": [ 198 | "TRAIN THE MODEL" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 23, 204 | "metadata": {}, 205 | "outputs": [ 206 | { 207 | "name": "stderr", 208 | "output_type": "stream", 209 | "text": [ 210 | "C:\\Users\\HP\\Documents\\New folder\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", 211 | " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n" 212 | ] 213 | }, 214 | { 215 | "data": { 216 | "text/plain": [ 217 | "RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n", 218 | " max_depth=None, max_features='auto', max_leaf_nodes=None,\n", 219 | " min_impurity_decrease=0.0, min_impurity_split=None,\n", 220 | " min_samples_leaf=1, min_samples_split=2,\n", 221 | " min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,\n", 222 | " oob_score=False, random_state=None, verbose=0,\n", 223 | " warm_start=False)" 224 | ] 225 | }, 226 | "execution_count": 23, 227 | "metadata": {}, 228 | "output_type": "execute_result" 229 | } 230 | ], 231 | "source": [ 232 | "from sklearn.ensemble import RandomForestClassifier\n", 233 | "model=RandomForestClassifier()\n", 234 | "model.fit(X_train,y_train)" 235 | ] 236 | }, 237 | { 238 | "cell_type": "markdown", 239 | "metadata": {}, 240 | "source": [ 241 | "NOW PREDICT " 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 30, 247 | "metadata": {}, 248 | "outputs": [ 249 | { 250 | "name": "stdout", 251 | "output_type": "stream", 252 | "text": [ 253 | "[0 1 0 0 1 0 1 1 1 1 0 1 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 1 0 0\n", 254 | " 0 1 0 1 0 1 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 1 1 0 0 1 1 0 0 1 0 0 0 1 0 0 0\n", 255 | " 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 1 0 0 0 1 0 0 0 0 0 1 0 0 1 1 0\n", 256 | " 0 0 0 0 0 1 0 1 0]\n" 257 | ] 258 | } 259 | ], 260 | "source": [ 261 | "result=model.predict(X_test)\n", 262 | "print(result)" 263 | ] 264 | }, 265 | { 266 | "cell_type": "markdown", 267 | "metadata": {}, 268 | "source": [ 269 | "NOW FIND THE SCORE AND THE CLASSIFICATION REPORT" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 31, 275 | "metadata": {}, 276 | "outputs": [ 277 | { 278 | "name": "stdout", 279 | "output_type": "stream", 280 | "text": [ 281 | "90.0\n" 282 | ] 283 | } 284 | ], 285 | "source": [ 286 | "score=100*model.score(X_test,y_test)\n", 287 | "print(score)" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": 32, 293 | "metadata": {}, 294 | "outputs": [ 295 | { 296 | "name": "stdout", 297 | "output_type": "stream", 298 | "text": [ 299 | " precision recall f1-score support\n", 300 | "\n", 301 | " 0 0.92 0.92 0.92 79\n", 302 | " 1 0.85 0.85 0.85 41\n", 303 | "\n", 304 | " micro avg 0.90 0.90 0.90 120\n", 305 | " macro avg 0.89 0.89 0.89 120\n", 306 | "weighted avg 0.90 0.90 0.90 120\n", 307 | "\n" 308 | ] 309 | } 310 | ], 311 | "source": [ 312 | "from sklearn.metrics import classification_report\n", 313 | "print(classification_report(y_test,result))" 314 | ] 315 | }, 316 | { 317 | "cell_type": "markdown", 318 | "metadata": {}, 319 | "source": [ 320 | "WE CAN FIND WHICH FEATURE IMPORTANT OR IMPORTANCE OF EACH OF THE FEATURE" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": 52, 326 | "metadata": {}, 327 | "outputs": [ 328 | { 329 | "ename": "AttributeError", 330 | "evalue": "module 'pandas' has no attribute 'df'", 331 | "output_type": "error", 332 | "traceback": [ 333 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 334 | "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", 335 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mfeature_importance\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mzip\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfeature_importance\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'Feature'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'Importance'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mfeature_importance\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msort_values\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mby\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'Importance'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mascending\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 336 | "\u001b[1;31mAttributeError\u001b[0m: module 'pandas' has no attribute 'df'" 337 | ] 338 | } 339 | ], 340 | "source": [ 341 | "feature_importance=pd.dataframe(list(zip(X_train.columns,df.feature_importance)),columns=['Feature','Importance'])\n", 342 | "feature_importance.sort_values(by='Importance',ascending=False)\n", 343 | "#It can show error like pandas has no attribute df sometimes .Try to update the library." 344 | ] 345 | }, 346 | { 347 | "cell_type": "markdown", 348 | "metadata": {}, 349 | "source": [ 350 | "NOW WE WILL USE RANDOM FOREST REGRESSION" 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "execution_count": 54, 356 | "metadata": {}, 357 | "outputs": [ 358 | { 359 | "name": "stderr", 360 | "output_type": "stream", 361 | "text": [ 362 | "C:\\Users\\HP\\Documents\\New folder\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", 363 | " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n" 364 | ] 365 | }, 366 | { 367 | "data": { 368 | "text/plain": [ 369 | "RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,\n", 370 | " max_features='auto', max_leaf_nodes=None,\n", 371 | " min_impurity_decrease=0.0, min_impurity_split=None,\n", 372 | " min_samples_leaf=1, min_samples_split=2,\n", 373 | " min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,\n", 374 | " oob_score=False, random_state=None, verbose=0, warm_start=False)" 375 | ] 376 | }, 377 | "execution_count": 54, 378 | "metadata": {}, 379 | "output_type": "execute_result" 380 | } 381 | ], 382 | "source": [ 383 | "from sklearn.ensemble import RandomForestRegressor\n", 384 | "reg_model=RandomForestRegressor()\n", 385 | "reg_model.fit(X_train,y_train)" 386 | ] 387 | }, 388 | { 389 | "cell_type": "code", 390 | "execution_count": 59, 391 | "metadata": {}, 392 | "outputs": [ 393 | { 394 | "data": { 395 | "text/plain": [ 396 | "61.886003910672" 397 | ] 398 | }, 399 | "execution_count": 59, 400 | "metadata": {}, 401 | "output_type": "execute_result" 402 | } 403 | ], 404 | "source": [ 405 | "result=reg_model.predict(X_test)\n", 406 | "score=100*reg_model.score(X_test,y_test)\n", 407 | "score" 408 | ] 409 | } 410 | ], 411 | "metadata": { 412 | "kernelspec": { 413 | "display_name": "Python 3", 414 | "language": "python", 415 | "name": "python3" 416 | }, 417 | "language_info": { 418 | "codemirror_mode": { 419 | "name": "ipython", 420 | "version": 3 421 | }, 422 | "file_extension": ".py", 423 | "mimetype": "text/x-python", 424 | "name": "python", 425 | "nbconvert_exporter": "python", 426 | "pygments_lexer": "ipython3", 427 | "version": "3.7.1" 428 | } 429 | }, 430 | "nbformat": 4, 431 | "nbformat_minor": 2 432 | } 433 | -------------------------------------------------------------------------------- /RandomForest/Social_Network_Ads.csv: -------------------------------------------------------------------------------- 1 | Age,EstimatedSalary,Purchased 2 | 19,19000,0 3 | 35,20000,0 4 | 26,43000,0 5 | 27,57000,0 6 | 19,76000,0 7 | 27,58000,0 8 | 27,84000,0 9 | 32,150000,1 10 | 25,33000,0 11 | 35,65000,0 12 | 26,80000,0 13 | 26,52000,0 14 | 20,86000,0 15 | 32,18000,0 16 | 18,82000,0 17 | 29,80000,0 18 | 47,25000,1 19 | 45,26000,1 20 | 46,28000,1 21 | 48,29000,1 22 | 45,22000,1 23 | 47,49000,1 24 | 48,41000,1 25 | 45,22000,1 26 | 46,23000,1 27 | 47,20000,1 28 | 49,28000,1 29 | 47,30000,1 30 | 29,43000,0 31 | 31,18000,0 32 | 31,74000,0 33 | 27,137000,1 34 | 21,16000,0 35 | 28,44000,0 36 | 27,90000,0 37 | 35,27000,0 38 | 33,28000,0 39 | 30,49000,0 40 | 26,72000,0 41 | 27,31000,0 42 | 27,17000,0 43 | 33,51000,0 44 | 35,108000,0 45 | 30,15000,0 46 | 28,84000,0 47 | 23,20000,0 48 | 25,79000,0 49 | 27,54000,0 50 | 30,135000,1 51 | 31,89000,0 52 | 24,32000,0 53 | 18,44000,0 54 | 29,83000,0 55 | 35,23000,0 56 | 27,58000,0 57 | 24,55000,0 58 | 23,48000,0 59 | 28,79000,0 60 | 22,18000,0 61 | 32,117000,0 62 | 27,20000,0 63 | 25,87000,0 64 | 23,66000,0 65 | 32,120000,1 66 | 59,83000,0 67 | 24,58000,0 68 | 24,19000,0 69 | 23,82000,0 70 | 22,63000,0 71 | 31,68000,0 72 | 25,80000,0 73 | 24,27000,0 74 | 20,23000,0 75 | 33,113000,0 76 | 32,18000,0 77 | 34,112000,1 78 | 18,52000,0 79 | 22,27000,0 80 | 28,87000,0 81 | 26,17000,0 82 | 30,80000,0 83 | 39,42000,0 84 | 20,49000,0 85 | 35,88000,0 86 | 30,62000,0 87 | 31,118000,1 88 | 24,55000,0 89 | 28,85000,0 90 | 26,81000,0 91 | 35,50000,0 92 | 22,81000,0 93 | 30,116000,0 94 | 26,15000,0 95 | 29,28000,0 96 | 29,83000,0 97 | 35,44000,0 98 | 35,25000,0 99 | 28,123000,1 100 | 35,73000,0 101 | 28,37000,0 102 | 27,88000,0 103 | 28,59000,0 104 | 32,86000,0 105 | 33,149000,1 106 | 19,21000,0 107 | 21,72000,0 108 | 26,35000,0 109 | 27,89000,0 110 | 26,86000,0 111 | 38,80000,0 112 | 39,71000,0 113 | 37,71000,0 114 | 38,61000,0 115 | 37,55000,0 116 | 42,80000,0 117 | 40,57000,0 118 | 35,75000,0 119 | 36,52000,0 120 | 40,59000,0 121 | 41,59000,0 122 | 36,75000,0 123 | 37,72000,0 124 | 40,75000,0 125 | 35,53000,0 126 | 41,51000,0 127 | 39,61000,0 128 | 42,65000,0 129 | 26,32000,0 130 | 30,17000,0 131 | 26,84000,0 132 | 31,58000,0 133 | 33,31000,0 134 | 30,87000,0 135 | 21,68000,0 136 | 28,55000,0 137 | 23,63000,0 138 | 20,82000,0 139 | 30,107000,1 140 | 28,59000,0 141 | 19,25000,0 142 | 19,85000,0 143 | 18,68000,0 144 | 35,59000,0 145 | 30,89000,0 146 | 34,25000,0 147 | 24,89000,0 148 | 27,96000,1 149 | 41,30000,0 150 | 29,61000,0 151 | 20,74000,0 152 | 26,15000,0 153 | 41,45000,0 154 | 31,76000,0 155 | 36,50000,0 156 | 40,47000,0 157 | 31,15000,0 158 | 46,59000,0 159 | 29,75000,0 160 | 26,30000,0 161 | 32,135000,1 162 | 32,100000,1 163 | 25,90000,0 164 | 37,33000,0 165 | 35,38000,0 166 | 33,69000,0 167 | 18,86000,0 168 | 22,55000,0 169 | 35,71000,0 170 | 29,148000,1 171 | 29,47000,0 172 | 21,88000,0 173 | 34,115000,0 174 | 26,118000,0 175 | 34,43000,0 176 | 34,72000,0 177 | 23,28000,0 178 | 35,47000,0 179 | 25,22000,0 180 | 24,23000,0 181 | 31,34000,0 182 | 26,16000,0 183 | 31,71000,0 184 | 32,117000,1 185 | 33,43000,0 186 | 33,60000,0 187 | 31,66000,0 188 | 20,82000,0 189 | 33,41000,0 190 | 35,72000,0 191 | 28,32000,0 192 | 24,84000,0 193 | 19,26000,0 194 | 29,43000,0 195 | 19,70000,0 196 | 28,89000,0 197 | 34,43000,0 198 | 30,79000,0 199 | 20,36000,0 200 | 26,80000,0 201 | 35,22000,0 202 | 35,39000,0 203 | 49,74000,0 204 | 39,134000,1 205 | 41,71000,0 206 | 58,101000,1 207 | 47,47000,0 208 | 55,130000,1 209 | 52,114000,0 210 | 40,142000,1 211 | 46,22000,0 212 | 48,96000,1 213 | 52,150000,1 214 | 59,42000,0 215 | 35,58000,0 216 | 47,43000,0 217 | 60,108000,1 218 | 49,65000,0 219 | 40,78000,0 220 | 46,96000,0 221 | 59,143000,1 222 | 41,80000,0 223 | 35,91000,1 224 | 37,144000,1 225 | 60,102000,1 226 | 35,60000,0 227 | 37,53000,0 228 | 36,126000,1 229 | 56,133000,1 230 | 40,72000,0 231 | 42,80000,1 232 | 35,147000,1 233 | 39,42000,0 234 | 40,107000,1 235 | 49,86000,1 236 | 38,112000,0 237 | 46,79000,1 238 | 40,57000,0 239 | 37,80000,0 240 | 46,82000,0 241 | 53,143000,1 242 | 42,149000,1 243 | 38,59000,0 244 | 50,88000,1 245 | 56,104000,1 246 | 41,72000,0 247 | 51,146000,1 248 | 35,50000,0 249 | 57,122000,1 250 | 41,52000,0 251 | 35,97000,1 252 | 44,39000,0 253 | 37,52000,0 254 | 48,134000,1 255 | 37,146000,1 256 | 50,44000,0 257 | 52,90000,1 258 | 41,72000,0 259 | 40,57000,0 260 | 58,95000,1 261 | 45,131000,1 262 | 35,77000,0 263 | 36,144000,1 264 | 55,125000,1 265 | 35,72000,0 266 | 48,90000,1 267 | 42,108000,1 268 | 40,75000,0 269 | 37,74000,0 270 | 47,144000,1 271 | 40,61000,0 272 | 43,133000,0 273 | 59,76000,1 274 | 60,42000,1 275 | 39,106000,1 276 | 57,26000,1 277 | 57,74000,1 278 | 38,71000,0 279 | 49,88000,1 280 | 52,38000,1 281 | 50,36000,1 282 | 59,88000,1 283 | 35,61000,0 284 | 37,70000,1 285 | 52,21000,1 286 | 48,141000,0 287 | 37,93000,1 288 | 37,62000,0 289 | 48,138000,1 290 | 41,79000,0 291 | 37,78000,1 292 | 39,134000,1 293 | 49,89000,1 294 | 55,39000,1 295 | 37,77000,0 296 | 35,57000,0 297 | 36,63000,0 298 | 42,73000,1 299 | 43,112000,1 300 | 45,79000,0 301 | 46,117000,1 302 | 58,38000,1 303 | 48,74000,1 304 | 37,137000,1 305 | 37,79000,1 306 | 40,60000,0 307 | 42,54000,0 308 | 51,134000,0 309 | 47,113000,1 310 | 36,125000,1 311 | 38,50000,0 312 | 42,70000,0 313 | 39,96000,1 314 | 38,50000,0 315 | 49,141000,1 316 | 39,79000,0 317 | 39,75000,1 318 | 54,104000,1 319 | 35,55000,0 320 | 45,32000,1 321 | 36,60000,0 322 | 52,138000,1 323 | 53,82000,1 324 | 41,52000,0 325 | 48,30000,1 326 | 48,131000,1 327 | 41,60000,0 328 | 41,72000,0 329 | 42,75000,0 330 | 36,118000,1 331 | 47,107000,1 332 | 38,51000,0 333 | 48,119000,1 334 | 42,65000,0 335 | 40,65000,0 336 | 57,60000,1 337 | 36,54000,0 338 | 58,144000,1 339 | 35,79000,0 340 | 38,55000,0 341 | 39,122000,1 342 | 53,104000,1 343 | 35,75000,0 344 | 38,65000,0 345 | 47,51000,1 346 | 47,105000,1 347 | 41,63000,0 348 | 53,72000,1 349 | 54,108000,1 350 | 39,77000,0 351 | 38,61000,0 352 | 38,113000,1 353 | 37,75000,0 354 | 42,90000,1 355 | 37,57000,0 356 | 36,99000,1 357 | 60,34000,1 358 | 54,70000,1 359 | 41,72000,0 360 | 40,71000,1 361 | 42,54000,0 362 | 43,129000,1 363 | 53,34000,1 364 | 47,50000,1 365 | 42,79000,0 366 | 42,104000,1 367 | 59,29000,1 368 | 58,47000,1 369 | 46,88000,1 370 | 38,71000,0 371 | 54,26000,1 372 | 60,46000,1 373 | 60,83000,1 374 | 39,73000,0 375 | 59,130000,1 376 | 37,80000,0 377 | 46,32000,1 378 | 46,74000,0 379 | 42,53000,0 380 | 41,87000,1 381 | 58,23000,1 382 | 42,64000,0 383 | 48,33000,1 384 | 44,139000,1 385 | 49,28000,1 386 | 57,33000,1 387 | 56,60000,1 388 | 49,39000,1 389 | 39,71000,0 390 | 47,34000,1 391 | 48,35000,1 392 | 48,33000,1 393 | 47,23000,1 394 | 45,45000,1 395 | 60,42000,1 396 | 39,59000,0 397 | 46,41000,1 398 | 51,23000,1 399 | 50,20000,1 400 | 36,33000,0 401 | 49,36000,1 -------------------------------------------------------------------------------- /RandomForest/Untitled.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "RANDOM FOREST" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 13, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "import pandas as pd\n", 18 | "import matplotlib.pyplot as plt" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 14, 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "data": { 28 | "text/html": [ 29 | "
\n", 30 | "\n", 43 | "\n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | "
AgeEstimatedSalaryPurchased
019190000
135200000
226430000
327570000
419760000
\n", 85 | "
" 86 | ], 87 | "text/plain": [ 88 | " Age EstimatedSalary Purchased\n", 89 | "0 19 19000 0\n", 90 | "1 35 20000 0\n", 91 | "2 26 43000 0\n", 92 | "3 27 57000 0\n", 93 | "4 19 76000 0" 94 | ] 95 | }, 96 | "execution_count": 14, 97 | "metadata": {}, 98 | "output_type": "execute_result" 99 | } 100 | ], 101 | "source": [ 102 | "df=pd.read_csv('Social_Network_Ads.csv')\n", 103 | "df.head()" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 16, 109 | "metadata": {}, 110 | "outputs": [ 111 | { 112 | "data": { 113 | "text/plain": [ 114 | "0 257\n", 115 | "1 143\n", 116 | "Name: Purchased, dtype: int64" 117 | ] 118 | }, 119 | "execution_count": 16, 120 | "metadata": {}, 121 | "output_type": "execute_result" 122 | } 123 | ], 124 | "source": [ 125 | "df.Purchased.value_counts()" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "NOW MAKE THE DEPENDENT MATRIX AND INDEPENDENT MATRIX" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 21, 138 | "metadata": {}, 139 | "outputs": [ 140 | { 141 | "name": "stdout", 142 | "output_type": "stream", 143 | "text": [ 144 | " Age EstimatedSalary\n", 145 | "0 19 19000\n", 146 | "1 35 20000\n", 147 | "2 26 43000\n", 148 | "3 27 57000\n", 149 | "4 19 76000\n" 150 | ] 151 | }, 152 | { 153 | "data": { 154 | "text/plain": [ 155 | "0 0\n", 156 | "1 0\n", 157 | "2 0\n", 158 | "3 0\n", 159 | "4 0\n", 160 | "Name: Purchased, dtype: int64" 161 | ] 162 | }, 163 | "execution_count": 21, 164 | "metadata": {}, 165 | "output_type": "execute_result" 166 | } 167 | ], 168 | "source": [ 169 | "X=df[['Age','EstimatedSalary']]\n", 170 | "y=df['Purchased']\n", 171 | "print(X.head())\n", 172 | "y.head()\n" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": {}, 178 | "source": [ 179 | "NOW SPLIT THE DATASET INTO TRAINING SET AND TEST SET" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 22, 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [ 188 | "from sklearn.model_selection import train_test_split\n", 189 | "X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=23)" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": {}, 196 | "outputs": [], 197 | "source": [ 198 | "TRAIN THE MODEL" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 23, 204 | "metadata": {}, 205 | "outputs": [ 206 | { 207 | "name": "stderr", 208 | "output_type": "stream", 209 | "text": [ 210 | "C:\\Users\\HP\\Documents\\New folder\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", 211 | " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n" 212 | ] 213 | }, 214 | { 215 | "data": { 216 | "text/plain": [ 217 | "RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n", 218 | " max_depth=None, max_features='auto', max_leaf_nodes=None,\n", 219 | " min_impurity_decrease=0.0, min_impurity_split=None,\n", 220 | " min_samples_leaf=1, min_samples_split=2,\n", 221 | " min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,\n", 222 | " oob_score=False, random_state=None, verbose=0,\n", 223 | " warm_start=False)" 224 | ] 225 | }, 226 | "execution_count": 23, 227 | "metadata": {}, 228 | "output_type": "execute_result" 229 | } 230 | ], 231 | "source": [ 232 | "from sklearn.ensemble import RandomForestClassifier\n", 233 | "model=RandomForestClassifier()\n", 234 | "model.fit(X_train,y_train)" 235 | ] 236 | }, 237 | { 238 | "cell_type": "markdown", 239 | "metadata": {}, 240 | "source": [ 241 | "NOW PREDICT " 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 30, 247 | "metadata": {}, 248 | "outputs": [ 249 | { 250 | "name": "stdout", 251 | "output_type": "stream", 252 | "text": [ 253 | "[0 1 0 0 1 0 1 1 1 1 0 1 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 1 0 0\n", 254 | " 0 1 0 1 0 1 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 1 1 0 0 1 1 0 0 1 0 0 0 1 0 0 0\n", 255 | " 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 1 0 0 0 1 0 0 0 0 0 1 0 0 1 1 0\n", 256 | " 0 0 0 0 0 1 0 1 0]\n" 257 | ] 258 | } 259 | ], 260 | "source": [ 261 | "result=model.predict(X_test)\n", 262 | "print(result)" 263 | ] 264 | }, 265 | { 266 | "cell_type": "markdown", 267 | "metadata": {}, 268 | "source": [ 269 | "NOW FIND THE SCORE AND THE CLASSIFICATION REPORT" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 31, 275 | "metadata": {}, 276 | "outputs": [ 277 | { 278 | "name": "stdout", 279 | "output_type": "stream", 280 | "text": [ 281 | "90.0\n" 282 | ] 283 | } 284 | ], 285 | "source": [ 286 | "score=100*model.score(X_test,y_test)\n", 287 | "print(score)" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": 32, 293 | "metadata": {}, 294 | "outputs": [ 295 | { 296 | "name": "stdout", 297 | "output_type": "stream", 298 | "text": [ 299 | " precision recall f1-score support\n", 300 | "\n", 301 | " 0 0.92 0.92 0.92 79\n", 302 | " 1 0.85 0.85 0.85 41\n", 303 | "\n", 304 | " micro avg 0.90 0.90 0.90 120\n", 305 | " macro avg 0.89 0.89 0.89 120\n", 306 | "weighted avg 0.90 0.90 0.90 120\n", 307 | "\n" 308 | ] 309 | } 310 | ], 311 | "source": [ 312 | "from sklearn.metrics import classification_report\n", 313 | "print(classification_report(y_test,result))" 314 | ] 315 | }, 316 | { 317 | "cell_type": "markdown", 318 | "metadata": {}, 319 | "source": [ 320 | "WE CAN FIND WHICH FEATURE IMPORTANT OR IMPORTANCE OF EACH OF THE FEATURE" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": 52, 326 | "metadata": {}, 327 | "outputs": [ 328 | { 329 | "ename": "AttributeError", 330 | "evalue": "module 'pandas' has no attribute 'df'", 331 | "output_type": "error", 332 | "traceback": [ 333 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 334 | "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", 335 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mfeature_importance\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mzip\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfeature_importance\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'Feature'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'Importance'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mfeature_importance\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msort_values\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mby\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'Importance'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mascending\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 336 | "\u001b[1;31mAttributeError\u001b[0m: module 'pandas' has no attribute 'df'" 337 | ] 338 | } 339 | ], 340 | "source": [ 341 | "feature_importance=pd.dataframe(list(zip(X_train.columns,df.feature_importance)),columns=['Feature','Importance'])\n", 342 | "feature_importance.sort_values(by='Importance',ascending=False)\n", 343 | "#It can show error like pandas has no attribute df sometimes .Try to update the library." 344 | ] 345 | }, 346 | { 347 | "cell_type": "markdown", 348 | "metadata": {}, 349 | "source": [ 350 | "NOW WE WILL USE RANDOM FOREST REGRESSION" 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "execution_count": 54, 356 | "metadata": {}, 357 | "outputs": [ 358 | { 359 | "name": "stderr", 360 | "output_type": "stream", 361 | "text": [ 362 | "C:\\Users\\HP\\Documents\\New folder\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", 363 | " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n" 364 | ] 365 | }, 366 | { 367 | "data": { 368 | "text/plain": [ 369 | "RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,\n", 370 | " max_features='auto', max_leaf_nodes=None,\n", 371 | " min_impurity_decrease=0.0, min_impurity_split=None,\n", 372 | " min_samples_leaf=1, min_samples_split=2,\n", 373 | " min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,\n", 374 | " oob_score=False, random_state=None, verbose=0, warm_start=False)" 375 | ] 376 | }, 377 | "execution_count": 54, 378 | "metadata": {}, 379 | "output_type": "execute_result" 380 | } 381 | ], 382 | "source": [ 383 | "from sklearn.ensemble import RandomForestRegressor\n", 384 | "reg_model=RandomForestRegressor()\n", 385 | "reg_model.fit(X_train,y_train)" 386 | ] 387 | }, 388 | { 389 | "cell_type": "code", 390 | "execution_count": 59, 391 | "metadata": {}, 392 | "outputs": [ 393 | { 394 | "data": { 395 | "text/plain": [ 396 | "61.886003910672" 397 | ] 398 | }, 399 | "execution_count": 59, 400 | "metadata": {}, 401 | "output_type": "execute_result" 402 | } 403 | ], 404 | "source": [ 405 | "result=reg_model.predict(X_test)\n", 406 | "score=100*reg_model.score(X_test,y_test)\n", 407 | "score" 408 | ] 409 | } 410 | ], 411 | "metadata": { 412 | "kernelspec": { 413 | "display_name": "Python 3", 414 | "language": "python", 415 | "name": "python3" 416 | }, 417 | "language_info": { 418 | "codemirror_mode": { 419 | "name": "ipython", 420 | "version": 3 421 | }, 422 | "file_extension": ".py", 423 | "mimetype": "text/x-python", 424 | "name": "python", 425 | "nbconvert_exporter": "python", 426 | "pygments_lexer": "ipython3", 427 | "version": "3.7.1" 428 | } 429 | }, 430 | "nbformat": 4, 431 | "nbformat_minor": 2 432 | } 433 | -------------------------------------------------------------------------------- /SVM/Social_Network_Ads.csv: -------------------------------------------------------------------------------- 1 | Age,EstimatedSalary,Purchased 2 | 19,19000,0 3 | 35,20000,0 4 | 26,43000,0 5 | 27,57000,0 6 | 19,76000,0 7 | 27,58000,0 8 | 27,84000,0 9 | 32,150000,1 10 | 25,33000,0 11 | 35,65000,0 12 | 26,80000,0 13 | 26,52000,0 14 | 20,86000,0 15 | 32,18000,0 16 | 18,82000,0 17 | 29,80000,0 18 | 47,25000,1 19 | 45,26000,1 20 | 46,28000,1 21 | 48,29000,1 22 | 45,22000,1 23 | 47,49000,1 24 | 48,41000,1 25 | 45,22000,1 26 | 46,23000,1 27 | 47,20000,1 28 | 49,28000,1 29 | 47,30000,1 30 | 29,43000,0 31 | 31,18000,0 32 | 31,74000,0 33 | 27,137000,1 34 | 21,16000,0 35 | 28,44000,0 36 | 27,90000,0 37 | 35,27000,0 38 | 33,28000,0 39 | 30,49000,0 40 | 26,72000,0 41 | 27,31000,0 42 | 27,17000,0 43 | 33,51000,0 44 | 35,108000,0 45 | 30,15000,0 46 | 28,84000,0 47 | 23,20000,0 48 | 25,79000,0 49 | 27,54000,0 50 | 30,135000,1 51 | 31,89000,0 52 | 24,32000,0 53 | 18,44000,0 54 | 29,83000,0 55 | 35,23000,0 56 | 27,58000,0 57 | 24,55000,0 58 | 23,48000,0 59 | 28,79000,0 60 | 22,18000,0 61 | 32,117000,0 62 | 27,20000,0 63 | 25,87000,0 64 | 23,66000,0 65 | 32,120000,1 66 | 59,83000,0 67 | 24,58000,0 68 | 24,19000,0 69 | 23,82000,0 70 | 22,63000,0 71 | 31,68000,0 72 | 25,80000,0 73 | 24,27000,0 74 | 20,23000,0 75 | 33,113000,0 76 | 32,18000,0 77 | 34,112000,1 78 | 18,52000,0 79 | 22,27000,0 80 | 28,87000,0 81 | 26,17000,0 82 | 30,80000,0 83 | 39,42000,0 84 | 20,49000,0 85 | 35,88000,0 86 | 30,62000,0 87 | 31,118000,1 88 | 24,55000,0 89 | 28,85000,0 90 | 26,81000,0 91 | 35,50000,0 92 | 22,81000,0 93 | 30,116000,0 94 | 26,15000,0 95 | 29,28000,0 96 | 29,83000,0 97 | 35,44000,0 98 | 35,25000,0 99 | 28,123000,1 100 | 35,73000,0 101 | 28,37000,0 102 | 27,88000,0 103 | 28,59000,0 104 | 32,86000,0 105 | 33,149000,1 106 | 19,21000,0 107 | 21,72000,0 108 | 26,35000,0 109 | 27,89000,0 110 | 26,86000,0 111 | 38,80000,0 112 | 39,71000,0 113 | 37,71000,0 114 | 38,61000,0 115 | 37,55000,0 116 | 42,80000,0 117 | 40,57000,0 118 | 35,75000,0 119 | 36,52000,0 120 | 40,59000,0 121 | 41,59000,0 122 | 36,75000,0 123 | 37,72000,0 124 | 40,75000,0 125 | 35,53000,0 126 | 41,51000,0 127 | 39,61000,0 128 | 42,65000,0 129 | 26,32000,0 130 | 30,17000,0 131 | 26,84000,0 132 | 31,58000,0 133 | 33,31000,0 134 | 30,87000,0 135 | 21,68000,0 136 | 28,55000,0 137 | 23,63000,0 138 | 20,82000,0 139 | 30,107000,1 140 | 28,59000,0 141 | 19,25000,0 142 | 19,85000,0 143 | 18,68000,0 144 | 35,59000,0 145 | 30,89000,0 146 | 34,25000,0 147 | 24,89000,0 148 | 27,96000,1 149 | 41,30000,0 150 | 29,61000,0 151 | 20,74000,0 152 | 26,15000,0 153 | 41,45000,0 154 | 31,76000,0 155 | 36,50000,0 156 | 40,47000,0 157 | 31,15000,0 158 | 46,59000,0 159 | 29,75000,0 160 | 26,30000,0 161 | 32,135000,1 162 | 32,100000,1 163 | 25,90000,0 164 | 37,33000,0 165 | 35,38000,0 166 | 33,69000,0 167 | 18,86000,0 168 | 22,55000,0 169 | 35,71000,0 170 | 29,148000,1 171 | 29,47000,0 172 | 21,88000,0 173 | 34,115000,0 174 | 26,118000,0 175 | 34,43000,0 176 | 34,72000,0 177 | 23,28000,0 178 | 35,47000,0 179 | 25,22000,0 180 | 24,23000,0 181 | 31,34000,0 182 | 26,16000,0 183 | 31,71000,0 184 | 32,117000,1 185 | 33,43000,0 186 | 33,60000,0 187 | 31,66000,0 188 | 20,82000,0 189 | 33,41000,0 190 | 35,72000,0 191 | 28,32000,0 192 | 24,84000,0 193 | 19,26000,0 194 | 29,43000,0 195 | 19,70000,0 196 | 28,89000,0 197 | 34,43000,0 198 | 30,79000,0 199 | 20,36000,0 200 | 26,80000,0 201 | 35,22000,0 202 | 35,39000,0 203 | 49,74000,0 204 | 39,134000,1 205 | 41,71000,0 206 | 58,101000,1 207 | 47,47000,0 208 | 55,130000,1 209 | 52,114000,0 210 | 40,142000,1 211 | 46,22000,0 212 | 48,96000,1 213 | 52,150000,1 214 | 59,42000,0 215 | 35,58000,0 216 | 47,43000,0 217 | 60,108000,1 218 | 49,65000,0 219 | 40,78000,0 220 | 46,96000,0 221 | 59,143000,1 222 | 41,80000,0 223 | 35,91000,1 224 | 37,144000,1 225 | 60,102000,1 226 | 35,60000,0 227 | 37,53000,0 228 | 36,126000,1 229 | 56,133000,1 230 | 40,72000,0 231 | 42,80000,1 232 | 35,147000,1 233 | 39,42000,0 234 | 40,107000,1 235 | 49,86000,1 236 | 38,112000,0 237 | 46,79000,1 238 | 40,57000,0 239 | 37,80000,0 240 | 46,82000,0 241 | 53,143000,1 242 | 42,149000,1 243 | 38,59000,0 244 | 50,88000,1 245 | 56,104000,1 246 | 41,72000,0 247 | 51,146000,1 248 | 35,50000,0 249 | 57,122000,1 250 | 41,52000,0 251 | 35,97000,1 252 | 44,39000,0 253 | 37,52000,0 254 | 48,134000,1 255 | 37,146000,1 256 | 50,44000,0 257 | 52,90000,1 258 | 41,72000,0 259 | 40,57000,0 260 | 58,95000,1 261 | 45,131000,1 262 | 35,77000,0 263 | 36,144000,1 264 | 55,125000,1 265 | 35,72000,0 266 | 48,90000,1 267 | 42,108000,1 268 | 40,75000,0 269 | 37,74000,0 270 | 47,144000,1 271 | 40,61000,0 272 | 43,133000,0 273 | 59,76000,1 274 | 60,42000,1 275 | 39,106000,1 276 | 57,26000,1 277 | 57,74000,1 278 | 38,71000,0 279 | 49,88000,1 280 | 52,38000,1 281 | 50,36000,1 282 | 59,88000,1 283 | 35,61000,0 284 | 37,70000,1 285 | 52,21000,1 286 | 48,141000,0 287 | 37,93000,1 288 | 37,62000,0 289 | 48,138000,1 290 | 41,79000,0 291 | 37,78000,1 292 | 39,134000,1 293 | 49,89000,1 294 | 55,39000,1 295 | 37,77000,0 296 | 35,57000,0 297 | 36,63000,0 298 | 42,73000,1 299 | 43,112000,1 300 | 45,79000,0 301 | 46,117000,1 302 | 58,38000,1 303 | 48,74000,1 304 | 37,137000,1 305 | 37,79000,1 306 | 40,60000,0 307 | 42,54000,0 308 | 51,134000,0 309 | 47,113000,1 310 | 36,125000,1 311 | 38,50000,0 312 | 42,70000,0 313 | 39,96000,1 314 | 38,50000,0 315 | 49,141000,1 316 | 39,79000,0 317 | 39,75000,1 318 | 54,104000,1 319 | 35,55000,0 320 | 45,32000,1 321 | 36,60000,0 322 | 52,138000,1 323 | 53,82000,1 324 | 41,52000,0 325 | 48,30000,1 326 | 48,131000,1 327 | 41,60000,0 328 | 41,72000,0 329 | 42,75000,0 330 | 36,118000,1 331 | 47,107000,1 332 | 38,51000,0 333 | 48,119000,1 334 | 42,65000,0 335 | 40,65000,0 336 | 57,60000,1 337 | 36,54000,0 338 | 58,144000,1 339 | 35,79000,0 340 | 38,55000,0 341 | 39,122000,1 342 | 53,104000,1 343 | 35,75000,0 344 | 38,65000,0 345 | 47,51000,1 346 | 47,105000,1 347 | 41,63000,0 348 | 53,72000,1 349 | 54,108000,1 350 | 39,77000,0 351 | 38,61000,0 352 | 38,113000,1 353 | 37,75000,0 354 | 42,90000,1 355 | 37,57000,0 356 | 36,99000,1 357 | 60,34000,1 358 | 54,70000,1 359 | 41,72000,0 360 | 40,71000,1 361 | 42,54000,0 362 | 43,129000,1 363 | 53,34000,1 364 | 47,50000,1 365 | 42,79000,0 366 | 42,104000,1 367 | 59,29000,1 368 | 58,47000,1 369 | 46,88000,1 370 | 38,71000,0 371 | 54,26000,1 372 | 60,46000,1 373 | 60,83000,1 374 | 39,73000,0 375 | 59,130000,1 376 | 37,80000,0 377 | 46,32000,1 378 | 46,74000,0 379 | 42,53000,0 380 | 41,87000,1 381 | 58,23000,1 382 | 42,64000,0 383 | 48,33000,1 384 | 44,139000,1 385 | 49,28000,1 386 | 57,33000,1 387 | 56,60000,1 388 | 49,39000,1 389 | 39,71000,0 390 | 47,34000,1 391 | 48,35000,1 392 | 48,33000,1 393 | 47,23000,1 394 | 45,45000,1 395 | 60,42000,1 396 | 39,59000,0 397 | 46,41000,1 398 | 51,23000,1 399 | 50,20000,1 400 | 36,33000,0 401 | 49,36000,1 -------------------------------------------------------------------------------- /Sentiment Analysis/moviereview.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "import seaborn as sb\n", 12 | "import matplotlib.pyplot as plt" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "# Using Movie Reviews dataset of nltk library" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "## Importing the moview_reviews from nltk" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 3, 32 | "metadata": {}, 33 | "outputs": [ 34 | { 35 | "name": "stdout", 36 | "output_type": "stream", 37 | "text": [ 38 | "Help on LazyCorpusLoader in module nltk.corpus.util object:\n", 39 | "\n", 40 | "movie_reviews = class LazyCorpusLoader(builtins.object)\n", 41 | " | movie_reviews(name, reader_cls, *args, **kwargs)\n", 42 | " | \n", 43 | " | To see the API documentation for this lazily loaded corpus, first\n", 44 | " | run corpus.ensure_loaded(), and then run help(this_corpus).\n", 45 | " | \n", 46 | " | LazyCorpusLoader is a proxy object which is used to stand in for a\n", 47 | " | corpus object before the corpus is loaded. This allows NLTK to\n", 48 | " | create an object for each corpus, but defer the costs associated\n", 49 | " | with loading those corpora until the first time that they're\n", 50 | " | actually accessed.\n", 51 | " | \n", 52 | " | The first time this object is accessed in any way, it will load\n", 53 | " | the corresponding corpus, and transform itself into that corpus\n", 54 | " | (by modifying its own ``__class__`` and ``__dict__`` attributes).\n", 55 | " | \n", 56 | " | If the corpus can not be found, then accessing this object will\n", 57 | " | raise an exception, displaying installation instructions for the\n", 58 | " | NLTK data package. Once they've properly installed the data\n", 59 | " | package (or modified ``nltk.data.path`` to point to its location),\n", 60 | " | they can then use the corpus object without restarting python.\n", 61 | " | \n", 62 | " | :param name: The name of the corpus\n", 63 | " | :type name: str\n", 64 | " | :param reader_cls: The specific CorpusReader class, e.g. PlaintextCorpusReader, WordListCorpusReader\n", 65 | " | :type reader: nltk.corpus.reader.api.CorpusReader\n", 66 | " | :param nltk_data_subdir: The subdirectory where the corpus is stored.\n", 67 | " | :type nltk_data_subdir: str\n", 68 | " | :param *args: Any other non-keywords arguments that `reader_cls` might need.\n", 69 | " | :param *kargs: Any other keywords arguments that `reader_cls` might need.\n", 70 | " | \n", 71 | " | Methods defined here:\n", 72 | " | \n", 73 | " | __getattr__(self, attr)\n", 74 | " | \n", 75 | " | __init__(self, name, reader_cls, *args, **kwargs)\n", 76 | " | Initialize self. See help(type(self)) for accurate signature.\n", 77 | " | \n", 78 | " | __repr__(self)\n", 79 | " | Return repr(self).\n", 80 | " | \n", 81 | " | __unicode__ = __str__(self, /)\n", 82 | " | Return str(self).\n", 83 | " | \n", 84 | " | unicode_repr = __repr__(self)\n", 85 | " | \n", 86 | " | ----------------------------------------------------------------------\n", 87 | " | Data descriptors defined here:\n", 88 | " | \n", 89 | " | __dict__\n", 90 | " | dictionary for instance variables (if defined)\n", 91 | " | \n", 92 | " | __weakref__\n", 93 | " | list of weak references to the object (if defined)\n", 94 | "\n" 95 | ] 96 | } 97 | ], 98 | "source": [ 99 | "import random\n", 100 | "from nltk.corpus import movie_reviews\n", 101 | "help(movie_reviews)" 102 | ] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "metadata": {}, 107 | "source": [ 108 | "## Preparing the document " 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 16, 114 | "metadata": {}, 115 | "outputs": [ 116 | { 117 | "name": "stdout", 118 | "output_type": "stream", 119 | "text": [ 120 | "Number of Reviews: 2000\n", 121 | "Number of Positive review: 1000\n", 122 | "Number of Negative review: 1000\n" 123 | ] 124 | } 125 | ], 126 | "source": [ 127 | "#loading moview_reviews\n", 128 | "#import nltk\n", 129 | "#nltk.download('movie_reviews')\n", 130 | "documents = [(list(movie_reviews.words(fileid)), category)\n", 131 | " for category in movie_reviews.categories()\n", 132 | " for fileid in movie_reviews.fileids(category)]\n", 133 | "print(\"Number of Reviews:\",len(documents))\n", 134 | "\n", 135 | "#This loaded document is a list of tokens eg-['don',''','t',i,movie,was,good]\n", 136 | "\n", 137 | "#For shuffling the document (not so important ,just to increase reproductibility)\n", 138 | "random.seed\n", 139 | "random.shuffle(documents)\n", 140 | "\n", 141 | "#list to store all review text and label\n", 142 | "text_data=[]\n", 143 | "label=[]\n", 144 | "for i in range(len(documents)):\n", 145 | " text_data.append(' '.join(documents[i][0]))\n", 146 | " label.append(0 if documents[i][1]=='neg' else 1)\n", 147 | " \n", 148 | "print(\"Number of Positive review:\",label.count(1))\n", 149 | "print(\"Number of Negative review:\",label.count(0))\n", 150 | "\n" 151 | ] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "metadata": {}, 156 | "source": [ 157 | "## Splitting the Dataset" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 29, 163 | "metadata": {}, 164 | "outputs": [ 165 | { 166 | "name": "stdout", 167 | "output_type": "stream", 168 | "text": [ 169 | "eddie murphy has a lot riding on harlem nights . as the movie ' s writer , director , executive producer , and star , murphy will shoulder all of the blame if harlem nights fails . but at the same time , he ' ll receive all of the credit if it succeeds . should you sacrifice your hard - earned cash to support murphy ' s risky gamble ? well , that depends on whom you trust more : me or eddie murphy . here ' s what murphy thinks : \" i think the audience is expecting a good time . they gonna get sexy . they gonna get funny . they gonna get drama . they gonna get all of that . i think it ' s the best movie i ' ve done \" ( paramount radio network ) . here ' s what i think : harlem nights is charmless , unoriginal , disappointing , and almost without question , the worst film of the actor ' s career ( i haven ' t seen best defense ) . and guess who ' s to blame ? ! the movie ' s problem is not murphy ' s direction : harlem nights is a fairly good looking film . no , the project was probably doomed even before the cameras rolled . murphy ' s awful script is the culprit . let ' s count the mistakes he makes in his first attempt at screenwriting : ( 1 ) murphy shatters the record for the most profanity in a motion picture . yes , he even outdoes his own work in raw . practically every line of dialogue in harlem nights contains at least one four letter word . and after 15 minutes , it gets irritating . ( 2 ) murphy wastes the talents of his fine cast . richard pryor , redd foxx , michael lerner , and della reese face the impossible task of carving out credible characters from a script riddled with stereotypes . each of them shines occasionally , but basically what we have are good performers stuck in a bad vehicle . ( 3 ) the movie demeans women by depicting them solely as sexual objects and as pawns in power struggles between men . murphy has admitted in interviews that he is weary of women in his private life , which is really neither here nor there . but when murphy puts his bitter feelings on 3 , 000 movie screens across the country , it ' s another matter altogether . you ' re forced to swallow some pretty gruesome stuff . for instance , murphy punches della reese in the stomach . and he shoots jasmine guy in the head . this is a mean - spirited movie , folks ! lovely newcomer lela rochon gets off easy in her role as a common whore , but only because she doesn ' t have any scenes with murphy . thank god : he might have run her over with a bulldozer . ( 4 ) murphy has written for himself perhaps his blandest role to date . the loveable eddie murphy charisma emerges only once or twice during the film . murphy would rather give his character a spiffy wardrobe than a spiffy personality . sometimes it seems as if murphy made harlem nights just so he could wear fancy suits and look debonair . ( 5 ) the plot is a shameless rip - off of the sting . if you ' re going to make another sting movie , you ' ve got to do something original . murphy ' s tale of warring nightclub owners in harlem ( circa 1938 ) fails to add anything new to the formula . ( 6 ) to get laughs , murphy makes fun of stuttering . you know a comedy is digging deep when it resorts to ridiculing the handicapped . ( 7 ) murphy ' s idea of drama is a scene in which his character apologizes for the first time in his life . for what ? for shooting reese ' s little toe off ! needless to say , murphy shows little , if any , promise or imagination as a screenwriter . in all fairness , however , a few rays of sunshine do manage to break through the gloomy cloud surrounding the movie . danny aiello is fun to watch as a dirty cop on the take . aiello stands out in the large , ensemble cast : he obviously relishes the opportunity to play such a nasty character ( a racist detective with mob ties ) . aiello ' s zesty performance gives harlem nights some much needed spice . another bright spot is arsenio hall , who has a hilarious , show - stopping cameo as a cry - baby gangster ; hall virtually steals the spotlight from murphy . in fact , hall ' s ten minutes on screen are the funniest ten minutes in the movie . unfortunately , his character is completely irrelevant to the plot ; murphy should have given hall a much bigger role . of course , i ' ve already mentioned that i didn ' t care for murphy ' s character , but i have to admit that i did love his neckties . they are simply spectacular -- almost worth the price of admission .\n", 170 | "\n", 171 | "0\n" 172 | ] 173 | } 174 | ], 175 | "source": [ 176 | "from sklearn.model_selection import train_test_split\n", 177 | "X_train,X_test,y_train,y_test=train_test_split(text_data,label,test_size=0.25,random_state=23)\n", 178 | "print(X_train[3])\n", 179 | "print()\n", 180 | "print(y_train[34])" 181 | ] 182 | }, 183 | { 184 | "cell_type": "markdown", 185 | "metadata": {}, 186 | "source": [ 187 | "## Preparing the Bag of words(DTM) ,Fitting the model , Calculating the Score" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": 54, 193 | "metadata": {}, 194 | "outputs": [ 195 | { 196 | "name": "stdout", 197 | "output_type": "stream", 198 | "text": [ 199 | "The accuracy of the Naive bayes: 78.8\n", 200 | "Classification Report:\n", 201 | " precision recall f1-score support\n", 202 | "\n", 203 | " Negative 0.72 0.89 0.80 238\n", 204 | " Positive 0.88 0.69 0.77 262\n", 205 | "\n", 206 | " micro avg 0.79 0.79 0.79 500\n", 207 | " macro avg 0.80 0.79 0.79 500\n", 208 | "weighted avg 0.81 0.79 0.79 500\n", 209 | "\n" 210 | ] 211 | } 212 | ], 213 | "source": [ 214 | "from sklearn import metrics\n", 215 | "from sklearn.naive_bayes import MultinomialNB\n", 216 | "from sklearn.feature_extraction.text import TfidfVectorizer\n", 217 | "\n", 218 | "tf_cv=TfidfVectorizer(stop_words='english')\n", 219 | "train_dtm_tf=tf_cv.fit_transform(X_train)\n", 220 | "test_dtm_tf=tf_cv.transform(X_test)\n", 221 | "\n", 222 | "nb=MultinomialNB()\n", 223 | "nb=nb.fit(train_dtm_tf,y_train)\n", 224 | "predicted=nb.predict(test_dtm_tf)\n", 225 | "score=100.0* nb.score(test_dtm_tf,y_test)\n", 226 | "print(\"The accuracy of the Naive bayes:\",score)\n", 227 | "print(\"Classification Report:\")\n", 228 | "report=metrics.classification_report(y_test,predicted, target_names = ['Negative', 'Positive'])\n", 229 | "print(report)" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": 46, 235 | "metadata": {}, 236 | "outputs": [ 237 | { 238 | "data": { 239 | "text/plain": [ 240 | "array([[213, 25],\n", 241 | " [ 81, 181]], dtype=int64)" 242 | ] 243 | }, 244 | "execution_count": 46, 245 | "metadata": {}, 246 | "output_type": "execute_result" 247 | } 248 | ], 249 | "source": [ 250 | "metrics.confusion_matrix(y_test,predicted)\n", 251 | "#confusion(y_test, y_pred, ['Negative', 'Positive'], 'Naive Bayes Model')" 252 | ] 253 | }, 254 | { 255 | "cell_type": "markdown", 256 | "metadata": {}, 257 | "source": [ 258 | "## Top 20 positive words " 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": 47, 264 | "metadata": {}, 265 | "outputs": [ 266 | { 267 | "name": "stdout", 268 | "output_type": "stream", 269 | "text": [ 270 | "['film', 'movie', 'like', 'life', 'story', 'good', 'just', 'time', 'character', 'characters', 'films', 'great', 'way', 'people', 'best', 'really', 'does', 'love', 'man', 'world']\n" 271 | ] 272 | } 273 | ], 274 | "source": [ 275 | "all_words=np.array(tf_cv.get_feature_names())\n", 276 | "top_word_index=np.argsort(nb.coef_[0])[-20:]\n", 277 | "tn_lst=[word for word in all_words[top_word_index]]\n", 278 | "tn_lst.reverse()\n", 279 | "print(tn_lst)" 280 | ] 281 | }, 282 | { 283 | "cell_type": "markdown", 284 | "metadata": {}, 285 | "source": [ 286 | "## Using Logistic Regression" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": 53, 292 | "metadata": {}, 293 | "outputs": [ 294 | { 295 | "name": "stdout", 296 | "output_type": "stream", 297 | "text": [ 298 | "Accuracy of Logistic Regression: 84.39999999999999\n" 299 | ] 300 | } 301 | ], 302 | "source": [ 303 | "from sklearn.linear_model import LogisticRegression\n", 304 | "lr=LogisticRegression(C=1000)\n", 305 | "\n", 306 | "lr=lr.fit(train_dtm_tf,y_train)\n", 307 | "predicted=lr.predict(test_dtm_tf)\n", 308 | "scr = 100.0 * lr.score(test_dtm_tf, y_test)\n", 309 | "print(\"Accuracy of Logistic Regression:\",scr)" 310 | ] 311 | }, 312 | { 313 | "cell_type": "markdown", 314 | "metadata": {}, 315 | "source": [ 316 | "### Top 20 Positive Word " 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": 55, 322 | "metadata": {}, 323 | "outputs": [ 324 | { 325 | "name": "stdout", 326 | "output_type": "stream", 327 | "text": [ 328 | "['great', 'fun', 'overall', 'life', 'memorable', 'definitely', 'quite', 'frank', 'performance', 'seen', 'excellent', 'hilarious', 'titanic', 'terrific', 'enjoyed', 'job', 'rob', 'family', 'different', 'performances']\n" 329 | ] 330 | } 331 | ], 332 | "source": [ 333 | "top_word_index=np.argsort(lr.coef_[0])[-20:]\n", 334 | "tn_lst=[word for word in all_words[top_word_index]]\n", 335 | "tn_lst.reverse()\n", 336 | "print(tn_lst)" 337 | ] 338 | }, 339 | { 340 | "cell_type": "markdown", 341 | "metadata": {}, 342 | "source": [ 343 | "### Top 20 Negative Word" 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": 56, 349 | "metadata": {}, 350 | "outputs": [ 351 | { 352 | "name": "stderr", 353 | "output_type": "stream", 354 | "text": [ 355 | "C:\\Users\\HP\\Documents\\New folder\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", 356 | " FutureWarning)\n" 357 | ] 358 | }, 359 | { 360 | "name": "stdout", 361 | "output_type": "stream", 362 | "text": [ 363 | "['bad', 'plot', 'unfortunately', 'boring', 'worst', 'reason', 'supposed', 'awful', 'poor', 'waste', 'stupid', 'script', 'ridiculous', 'fails', 'harry', 'dull', 'carpenter', 'terrible', 'mess', 'poorly']\n" 364 | ] 365 | } 366 | ], 367 | "source": [ 368 | "y_train_reverse = [0 if y==1 else 1 for y in y_train]\n", 369 | "lr = lr.fit(train_dtm_tf, y_train_reverse)\n", 370 | "\n", 371 | "top_word_index = np.argsort(lr.coef_[0])[-20:]\n", 372 | "tn_lst = [word for word in all_words[top_word_index]]\n", 373 | "tn_lst.reverse()\n", 374 | "print(tn_lst)" 375 | ] 376 | }, 377 | { 378 | "cell_type": "markdown", 379 | "metadata": {}, 380 | "source": [ 381 | "# Now using Stemming " 382 | ] 383 | }, 384 | { 385 | "cell_type": "markdown", 386 | "metadata": {}, 387 | "source": [ 388 | "## As Countvectorizer and TF-IDF dont do stemming " 389 | ] 390 | }, 391 | { 392 | "cell_type": "code", 393 | "execution_count": 5, 394 | "metadata": {}, 395 | "outputs": [ 396 | { 397 | "ename": "NameError", 398 | "evalue": "name 'X_train' is not defined", 399 | "output_type": "error", 400 | "traceback": [ 401 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 402 | "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", 403 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 12\u001b[0m \u001b[0mtf_cv\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mTfidfVectorizer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtokenizer\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtokenize\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 13\u001b[1;33m \u001b[0mtrain_dtm_tf\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtf_cv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit_transform\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 14\u001b[0m \u001b[0mtest_dtm_tf\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtf_cv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 15\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", 404 | "\u001b[1;31mNameError\u001b[0m: name 'X_train' is not defined" 405 | ] 406 | } 407 | ], 408 | "source": [ 409 | "import string,nltk\n", 410 | "from nltk.stem import PorterStemmer\n", 411 | "from sklearn.feature_extraction.text import TfidfVectorizer\n", 412 | "from sklearn.linear_model import LogisticRegression\n", 413 | "\n", 414 | "def tokenize(text):\n", 415 | " tokens=nltk.word_tokenize(text)\n", 416 | " tokens=[token for token in tokens if token not in string.punctuation]\n", 417 | " ps=PorterStemmer()\n", 418 | " stems=map(stemmer.stem,tokens)\n", 419 | " return stems\n", 420 | "\n", 421 | "tf_cv=TfidfVectorizer(tokenizer=tokenize)\n", 422 | "train_dtm_tf=tf_cv.fit_transform(X_train)\n", 423 | "test_dtm_tf=tf_cv.transform(X_test)\n", 424 | "\n", 425 | "lr=LogisticRegression(C=1000)\n", 426 | "lr=lr.fit(train_dtm_tf,y_train)\n", 427 | "predicted=lr.predict(test_dtm_tf)\n", 428 | "\n", 429 | "scr=100.0 * lr.score(test_dtm_tf,y_test)\n", 430 | "print(\"Accuracy after applying stemming:\",scr)" 431 | ] 432 | } 433 | ], 434 | "metadata": { 435 | "kernelspec": { 436 | "display_name": "Python 3", 437 | "language": "python", 438 | "name": "python3" 439 | }, 440 | "language_info": { 441 | "codemirror_mode": { 442 | "name": "ipython", 443 | "version": 3 444 | }, 445 | "file_extension": ".py", 446 | "mimetype": "text/x-python", 447 | "name": "python", 448 | "nbconvert_exporter": "python", 449 | "pygments_lexer": "ipython3", 450 | "version": "3.7.1" 451 | } 452 | }, 453 | "nbformat": 4, 454 | "nbformat_minor": 2 455 | } 456 | -------------------------------------------------------------------------------- /Sentiment Analysis/restaurentreview.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "import seaborn as sb\n", 12 | "import matplotlib.pyplot as plt" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "# Restaurant Reviews using Logistic Regression" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 4, 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "data": { 29 | "text/html": [ 30 | "
\n", 31 | "\n", 44 | "\n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | "
ReviewLiked
0Wow... Loved this place.1
1Crust is not good.0
2Not tasty and the texture was just nasty.0
3Stopped by during the late May bank holiday of...1
4The selection on the menu was great and so wer...1
\n", 80 | "
" 81 | ], 82 | "text/plain": [ 83 | " Review Liked\n", 84 | "0 Wow... Loved this place. 1\n", 85 | "1 Crust is not good. 0\n", 86 | "2 Not tasty and the texture was just nasty. 0\n", 87 | "3 Stopped by during the late May bank holiday of... 1\n", 88 | "4 The selection on the menu was great and so wer... 1" 89 | ] 90 | }, 91 | "execution_count": 4, 92 | "metadata": {}, 93 | "output_type": "execute_result" 94 | } 95 | ], 96 | "source": [ 97 | "document= pd.read_csv('Restaurant_Reviews.tsv', delimiter = '\\t', quoting = 3)\n", 98 | "document.head()" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 5, 104 | "metadata": {}, 105 | "outputs": [ 106 | { 107 | "name": "stdout", 108 | "output_type": "stream", 109 | "text": [ 110 | "\n", 111 | "RangeIndex: 1000 entries, 0 to 999\n", 112 | "Data columns (total 2 columns):\n", 113 | "Review 1000 non-null object\n", 114 | "Liked 1000 non-null int64\n", 115 | "dtypes: int64(1), object(1)\n", 116 | "memory usage: 15.7+ KB\n" 117 | ] 118 | } 119 | ], 120 | "source": [ 121 | "document.info()" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 6, 127 | "metadata": {}, 128 | "outputs": [ 129 | { 130 | "data": { 131 | "text/plain": [ 132 | "1 500\n", 133 | "0 500\n", 134 | "Name: Liked, dtype: int64" 135 | ] 136 | }, 137 | "execution_count": 6, 138 | "metadata": {}, 139 | "output_type": "execute_result" 140 | } 141 | ], 142 | "source": [ 143 | "document['Liked'].value_counts()" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 10, 149 | "metadata": {}, 150 | "outputs": [ 151 | { 152 | "data": { 153 | "text/plain": [ 154 | "(0 Wow... Loved this place.\n", 155 | " 1 Crust is not good.\n", 156 | " 2 Not tasty and the texture was just nasty.\n", 157 | " 3 Stopped by during the late May bank holiday of...\n", 158 | " 4 The selection on the menu was great and so wer...\n", 159 | " Name: Review, dtype: object, 0 1\n", 160 | " 1 0\n", 161 | " 2 0\n", 162 | " 3 1\n", 163 | " 4 1\n", 164 | " Name: Liked, dtype: int64)" 165 | ] 166 | }, 167 | "execution_count": 10, 168 | "metadata": {}, 169 | "output_type": "execute_result" 170 | } 171 | ], 172 | "source": [ 173 | "X=document['Review']\n", 174 | "y=document['Liked']\n", 175 | "X.head(),y.head()" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 11, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | "from sklearn.model_selection import train_test_split\n", 185 | "X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=23)" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 15, 191 | "metadata": {}, 192 | "outputs": [ 193 | { 194 | "name": "stdout", 195 | "output_type": "stream", 196 | "text": [ 197 | "The accuracy using Logistic Regression 73.33333333333333\n" 198 | ] 199 | }, 200 | { 201 | "name": "stderr", 202 | "output_type": "stream", 203 | "text": [ 204 | "C:\\Users\\HP\\Documents\\New folder\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", 205 | " FutureWarning)\n" 206 | ] 207 | } 208 | ], 209 | "source": [ 210 | "from sklearn.linear_model import LogisticRegression\n", 211 | "from sklearn.feature_extraction.text import CountVectorizer\n", 212 | "\n", 213 | "cv=CountVectorizer(stop_words='english')\n", 214 | "train_dtm=cv.fit_transform(X_train)\n", 215 | "test_dtm=cv.transform(X_test)\n", 216 | "\n", 217 | "lr=LogisticRegression(C=1000)\n", 218 | "lr=lr.fit(train_dtm,y_train)\n", 219 | "predicted=lr.predict(test_dtm)\n", 220 | "\n", 221 | "scr=lr.score(test_dtm,y_test)\n", 222 | "print(\"The accuracy using Logistic Regression\",scr*100.0)" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": 16, 228 | "metadata": {}, 229 | "outputs": [ 230 | { 231 | "name": "stdout", 232 | "output_type": "stream", 233 | "text": [ 234 | "['great', 'eclectic', 'loved', 'glad', 'amazing', 'heart', 'gyros', 'awesome', 'inside', 'delicious', 'fantastic', 'prompt', 'enjoyed', 'check', 'excellent', 'beat', 'complaints', 'nachos', 'friendly', 'complain']\n" 235 | ] 236 | } 237 | ], 238 | "source": [ 239 | "#top 20 positive words\n", 240 | "all_words=np.array(cv.get_feature_names())\n", 241 | "top_word_index=np.argsort(lr.coef_[0])[-20:]\n", 242 | "tn_lst=[word for word in all_words[top_word_index]]\n", 243 | "tn_lst.reverse()\n", 244 | "print(tn_lst)" 245 | ] 246 | } 247 | ], 248 | "metadata": { 249 | "kernelspec": { 250 | "display_name": "Python 3", 251 | "language": "python", 252 | "name": "python3" 253 | }, 254 | "language_info": { 255 | "codemirror_mode": { 256 | "name": "ipython", 257 | "version": 3 258 | }, 259 | "file_extension": ".py", 260 | "mimetype": "text/x-python", 261 | "name": "python", 262 | "nbconvert_exporter": "python", 263 | "pygments_lexer": "ipython3", 264 | "version": "3.7.1" 265 | } 266 | }, 267 | "nbformat": 4, 268 | "nbformat_minor": 2 269 | } 270 | -------------------------------------------------------------------------------- /TextClassification/20news-bydate_py3.pkz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suubh/Machine-Learning-in-Python/154596a9509dc7a066ae3caf5526b6f663a359cc/TextClassification/20news-bydate_py3.pkz -------------------------------------------------------------------------------- /TextClassification/Textclassification.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Text Classification and Naive Bayes Classifier" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 2, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import pandas as ps\n", 17 | "import numpy as np\n", 18 | "import seaborn as sb\n", 19 | "import matplotlib.pyplot as plt\n", 20 | "import warnings\n", 21 | "warnings.filterwarnings(\"ignore\")" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 3, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "from sklearn.datasets import fetch_20newsgroups" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 6, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "#help(fetch_20newsgroups)" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 4, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "train=fetch_20newsgroups(data_home='.', subset='train')" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 5, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "test=fetch_20newsgroups(data_home='.', subset='test')" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 12, 63 | "metadata": {}, 64 | "outputs": [ 65 | { 66 | "data": { 67 | "text/plain": [ 68 | "(dict_keys(['data', 'filenames', 'target_names', 'target', 'DESCR']),\n", 69 | " dict_keys(['data', 'filenames', 'target_names', 'target', 'DESCR']))" 70 | ] 71 | }, 72 | "execution_count": 12, 73 | "metadata": {}, 74 | "output_type": "execute_result" 75 | } 76 | ], 77 | "source": [ 78 | "train.keys(),test.keys()" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "There are 5 keys in the train object:\n", 86 | "\n", 87 | "data: List of text messages\n", 88 | "filenames: List of file names, for each message in data, there's a corresponding file name. This is normally not needed in classification.\n", 89 | "target: Numeric code for the 20 news groups, from 0 to 19. Each message has a corresponding target, that is used as label or class in classification.\n", 90 | "target_names: String, name of the 20 targets.\n", 91 | "DESCR: Description of the dataset.\n" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 19, 97 | "metadata": {}, 98 | "outputs": [ 99 | { 100 | "data": { 101 | "text/plain": [ 102 | "(11314, 7532)" 103 | ] 104 | }, 105 | "execution_count": 19, 106 | "metadata": {}, 107 | "output_type": "execute_result" 108 | } 109 | ], 110 | "source": [ 111 | "len(train['data']),len(test['data'])" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 20, 117 | "metadata": {}, 118 | "outputs": [ 119 | { 120 | "name": "stdout", 121 | "output_type": "stream", 122 | "text": [ 123 | "Class 0=alt.atheism\n", 124 | "Class 1=comp.graphics\n", 125 | "Class 2=comp.os.ms-windows.misc\n", 126 | "Class 3=comp.sys.ibm.pc.hardware\n", 127 | "Class 4=comp.sys.mac.hardware\n", 128 | "Class 5=comp.windows.x\n", 129 | "Class 6=misc.forsale\n", 130 | "Class 7=rec.autos\n", 131 | "Class 8=rec.motorcycles\n", 132 | "Class 9=rec.sport.baseball\n", 133 | "Class10=rec.sport.hockey\n", 134 | "Class11=sci.crypt\n", 135 | "Class12=sci.electronics\n", 136 | "Class13=sci.med\n", 137 | "Class14=sci.space\n", 138 | "Class15=soc.religion.christian\n", 139 | "Class16=talk.politics.guns\n", 140 | "Class17=talk.politics.mideast\n", 141 | "Class18=talk.politics.misc\n", 142 | "Class19=talk.religion.misc\n" 143 | ] 144 | } 145 | ], 146 | "source": [ 147 | "for i ,label in enumerate(train['target_names']):\n", 148 | " print(f'Class{i:2d}={label}')" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "# Printing a random message" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 28, 161 | "metadata": {}, 162 | "outputs": [ 163 | { 164 | "name": "stdout", 165 | "output_type": "stream", 166 | "text": [ 167 | "Class number=0\n", 168 | "Class name=alt.atheism\n", 169 | "\n", 170 | "From: keith@cco.caltech.edu (Keith Allan Schneider)\n", 171 | "Subject: Re: <>The \"`little' things\" above were in reference to Germany, clearly. People\n", 180 | ">>said that there were similar things in Germany, but no one could name any.\n", 181 | ">That's not true. I gave you two examples. One was the rather\n", 182 | ">pevasive anti-semitism in German Christianity well before Hitler\n", 183 | ">arrived. The other was the system of social ranks that were used\n", 184 | ">in Imperail Germany and Austria to distinguish Jews from the rest \n", 185 | ">of the population.\n", 186 | "\n", 187 | "These don't seem like \"little things\" to me. At least, they are orders\n", 188 | "worse than the motto. Do you think that the motto is a \"little thing\"\n", 189 | "that will lead to worse things?\n", 190 | "\n", 191 | "keith\n", 192 | "\n" 193 | ] 194 | } 195 | ], 196 | "source": [ 197 | "#From training set\n", 198 | "item_num=20\n", 199 | "class_num=train['target'][item_num]\n", 200 | "print(f'Class number={class_num}')\n", 201 | "print(f'Class name={train[\"target_names\"][class_num]}')\n", 202 | "print()\n", 203 | "print(train['data'][item_num])" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 32, 209 | "metadata": {}, 210 | "outputs": [ 211 | { 212 | "name": "stdout", 213 | "output_type": "stream", 214 | "text": [ 215 | "Class Number = 0\n", 216 | "Class Name = alt.atheism\n", 217 | "From: mathew \n", 218 | "Subject: Re: STRONG & weak Atheism\n", 219 | "Organization: Mantis Consultants, Cambridge. UK.\n", 220 | "X-Newsreader: rusnews v1.02\n", 221 | "Lines: 9\n", 222 | "\n", 223 | "acooper@mac.cc.macalstr.edu (Turin Turambar, ME Department of Utter Misery) writes:\n", 224 | "> Did that FAQ ever got modified to re-define strong atheists as not those who\n", 225 | "> assert the nonexistence of God, but as those who assert that they BELIEVE in \n", 226 | "> the nonexistence of God?\n", 227 | "\n", 228 | "In a word, yes.\n", 229 | "\n", 230 | "\n", 231 | "mathew\n", 232 | "\n" 233 | ] 234 | } 235 | ], 236 | "source": [ 237 | "#From Testing set\n", 238 | "test_message = 2\n", 239 | "class_num = test['target'][test_message]\n", 240 | "print(f'Class Number = {class_num}')\n", 241 | "print(f'Class Name = {test[\"target_names\"][class_num]}')\n", 242 | "print(test['data'][test_message])" 243 | ] 244 | }, 245 | { 246 | "cell_type": "markdown", 247 | "metadata": {}, 248 | "source": [ 249 | "# Naive Bayes Classifier" 250 | ] 251 | }, 252 | { 253 | "cell_type": "markdown", 254 | "metadata": {}, 255 | "source": [ 256 | "## 1)Using Count vectorizer for making DTM" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": 6, 262 | "metadata": {}, 263 | "outputs": [ 264 | { 265 | "name": "stdout", 266 | "output_type": "stream", 267 | "text": [ 268 | "Acurracy of the model is : 80.23101433882103\n" 269 | ] 270 | } 271 | ], 272 | "source": [ 273 | "from sklearn.feature_extraction.text import CountVectorizer\n", 274 | "from sklearn.naive_bayes import MultinomialNB\n", 275 | "\n", 276 | "#make the DTM\n", 277 | "cv=CountVectorizer(stop_words='english')\n", 278 | "train_dtm=cv.fit_transform(train['data'])\n", 279 | "test_dtm=cv.transform(test['data'])\n", 280 | "\n", 281 | "#Fit the model\n", 282 | "nb=MultinomialNB()\n", 283 | "nb=nb.fit(train_dtm,train['target'])\n", 284 | "\n", 285 | "\n", 286 | "predicted = nb.predict(test_dtm)\n", 287 | "score=100.0 * nb.score(test_dtm,test['target'])\n", 288 | "print('Acurracy of the model is :',score)\n" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": 52, 294 | "metadata": {}, 295 | "outputs": [ 296 | { 297 | "name": "stdout", 298 | "output_type": "stream", 299 | "text": [ 300 | " precision recall f1-score support\n", 301 | "\n", 302 | " alt.atheism 0.80 0.81 0.80 319\n", 303 | " comp.graphics 0.65 0.80 0.72 389\n", 304 | " comp.os.ms-windows.misc 0.80 0.04 0.08 394\n", 305 | "comp.sys.ibm.pc.hardware 0.55 0.80 0.65 392\n", 306 | " comp.sys.mac.hardware 0.85 0.79 0.82 385\n", 307 | " comp.windows.x 0.69 0.84 0.76 395\n", 308 | " misc.forsale 0.89 0.74 0.81 390\n", 309 | " rec.autos 0.89 0.92 0.91 396\n", 310 | " rec.motorcycles 0.95 0.94 0.95 398\n", 311 | " rec.sport.baseball 0.95 0.92 0.93 397\n", 312 | " rec.sport.hockey 0.92 0.97 0.94 399\n", 313 | " sci.crypt 0.80 0.96 0.87 396\n", 314 | " sci.electronics 0.79 0.70 0.74 393\n", 315 | " sci.med 0.88 0.87 0.87 396\n", 316 | " sci.space 0.84 0.92 0.88 394\n", 317 | " soc.religion.christian 0.81 0.95 0.87 398\n", 318 | " talk.politics.guns 0.72 0.93 0.81 364\n", 319 | " talk.politics.mideast 0.93 0.94 0.94 376\n", 320 | " talk.politics.misc 0.68 0.62 0.65 310\n", 321 | " talk.religion.misc 0.88 0.44 0.59 251\n", 322 | "\n", 323 | " micro avg 0.80 0.80 0.80 7532\n", 324 | " macro avg 0.81 0.79 0.78 7532\n", 325 | " weighted avg 0.81 0.80 0.78 7532\n", 326 | "\n" 327 | ] 328 | } 329 | ], 330 | "source": [ 331 | "#Classification Report\n", 332 | "from sklearn import metrics\n", 333 | "print(metrics.classification_report(test['target'],predicted,target_names=test['target_names']))" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": 7, 339 | "metadata": {}, 340 | "outputs": [ 341 | { 342 | "ename": "ModuleNotFoundError", 343 | "evalue": "No module named 'mlplots'", 344 | "output_type": "error", 345 | "traceback": [ 346 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 347 | "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", 348 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;31m#Confusion Matrix\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[1;32mimport\u001b[0m \u001b[0mmlplots\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mml\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[0mfig\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0max\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mplt\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msubplots\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfigsize\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m13\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m10\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;31m# Call confusion matrix plotting routine\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 349 | "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'mlplots'" 350 | ] 351 | } 352 | ], 353 | "source": [ 354 | "#Confusion Matrix\n", 355 | "import mlplots as ml\n", 356 | "\n", 357 | "fig, ax = plt.subplots(figsize=(13, 10))\n", 358 | "# Call confusion matrix plotting routine\n", 359 | "ml.confusion(test['target'], predicted, test['target_names'], 'Naive Bayes Model')" 360 | ] 361 | }, 362 | { 363 | "cell_type": "code", 364 | "execution_count": 8, 365 | "metadata": {}, 366 | "outputs": [ 367 | { 368 | "ename": "InvalidArgument", 369 | "evalue": "y_true must contain only values of 0 or 1", 370 | "output_type": "error", 371 | "traceback": [ 372 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 373 | "\u001b[1;31mInvalidArgument\u001b[0m Traceback (most recent call last)", 374 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mmlplot\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mevaluation\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mClassificationEvaluation\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0meval\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mClassificationEvaluation\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtest\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'target'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpredicted\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mtest\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'target_names'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'Naive Bayes'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3\u001b[0m \u001b[0meval\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mconfusion_matrix\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mthreshold\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0.5\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;31m#confusion matrix\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 375 | "\u001b[1;32m~\\Documents\\New folder\\lib\\site-packages\\mlplot\\evaluation\\classification.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, y_true, y_pred, class_names, model_name)\u001b[0m\n\u001b[0;32m 30\u001b[0m \u001b[0mtrue_values\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msort\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0munique\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0my_true\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 31\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtrue_values\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m!=\u001b[0m \u001b[1;36m2\u001b[0m \u001b[1;32mor\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mequal\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtrue_values\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mall\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 32\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mInvalidArgument\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'y_true must contain only values of 0 or 1'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 33\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 34\u001b[0m \u001b[1;31m# Check y_pred values\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 376 | "\u001b[1;31mInvalidArgument\u001b[0m: y_true must contain only values of 0 or 1" 377 | ] 378 | } 379 | ], 380 | "source": [ 381 | "from mlplot.evaluation import ClassificationEvaluation\n", 382 | "eval = ClassificationEvaluation(test['target'], predicted,test['target_names'],'Naive Bayes')\n", 383 | "eval.confusion_matrix(threshold=0.5)\n", 384 | "\n", 385 | "#confusion matrix" 386 | ] 387 | } 388 | ], 389 | "metadata": { 390 | "kernelspec": { 391 | "display_name": "Python 3", 392 | "language": "python", 393 | "name": "python3" 394 | }, 395 | "language_info": { 396 | "codemirror_mode": { 397 | "name": "ipython", 398 | "version": 3 399 | }, 400 | "file_extension": ".py", 401 | "mimetype": "text/x-python", 402 | "name": "python", 403 | "nbconvert_exporter": "python", 404 | "pygments_lexer": "ipython3", 405 | "version": "3.7.1" 406 | } 407 | }, 408 | "nbformat": 4, 409 | "nbformat_minor": 2 410 | } 411 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-cayman --------------------------------------------------------------------------------