├── .gitignore └── SPARK_TASK_1_.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /SPARK_TASK_1_.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "SPARK_TASK_1 .ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [], 9 | "authorship_tag": "ABX9TyMqrE/S3DQSQkR1Eaz5PBwv", 10 | "include_colab_link": true 11 | }, 12 | "kernelspec": { 13 | "name": "python3", 14 | "display_name": "Python 3" 15 | }, 16 | "language_info": { 17 | "name": "python" 18 | } 19 | }, 20 | "cells": [ 21 | { 22 | "cell_type": "markdown", 23 | "metadata": { 24 | "id": "view-in-github", 25 | "colab_type": "text" 26 | }, 27 | "source": [ 28 | "\"Open" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "metadata": { 34 | "id": "88dV5rGW5Baa" 35 | }, 36 | "source": [ 37 | "" 38 | ], 39 | "execution_count": null, 40 | "outputs": [] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": { 45 | "id": "Y5KNclE35Ftj" 46 | }, 47 | "source": [ 48 | "#THE SPARKS FOUNDATION\n", 49 | "Name : BHARADWAJ S\n", 50 | "\n", 51 | "#GRIPNOVEMBER21\n", 52 | "\n", 53 | "Task-1 : Prediction Using Supervised ML\n", 54 | "\n", 55 | "Problem: Predict the percentage of a student based on the number of study hours" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "metadata": { 61 | "id": "Gl25ERJo5QkO" 62 | }, 63 | "source": [ 64 | "#Importing the Libraries required for the problem\n", 65 | "import pandas as pd\n", 66 | "import matplotlib.pyplot as plt\n", 67 | "import seaborn as sns" 68 | ], 69 | "execution_count": 1, 70 | "outputs": [] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "metadata": { 75 | "id": "cw7WpnNG5WJ-" 76 | }, 77 | "source": [ 78 | "#reading data\n", 79 | "data= pd.read_csv('https://raw.githubusercontent.com/AdiPersonalWorks/Random/master/student_scores%20-%20student_scores.csv')" 80 | ], 81 | "execution_count": 2, 82 | "outputs": [] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "metadata": { 87 | "colab": { 88 | "base_uri": "https://localhost:8080/", 89 | "height": 817 90 | }, 91 | "id": "wqLPjy705WMb", 92 | "outputId": "0be25dc4-43c4-41d7-dbd5-9793df338fea" 93 | }, 94 | "source": [ 95 | "data" 96 | ], 97 | "execution_count": 3, 98 | "outputs": [ 99 | { 100 | "output_type": "execute_result", 101 | "data": { 102 | "text/html": [ 103 | "
\n", 104 | "\n", 117 | "\n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | "
HoursScores
02.521
15.147
23.227
38.575
43.530
51.520
69.288
75.560
88.381
92.725
107.785
115.962
124.541
133.342
141.117
158.995
162.530
171.924
186.167
197.469
202.730
214.854
223.835
236.976
247.886
\n", 253 | "
" 254 | ], 255 | "text/plain": [ 256 | " Hours Scores\n", 257 | "0 2.5 21\n", 258 | "1 5.1 47\n", 259 | "2 3.2 27\n", 260 | "3 8.5 75\n", 261 | "4 3.5 30\n", 262 | "5 1.5 20\n", 263 | "6 9.2 88\n", 264 | "7 5.5 60\n", 265 | "8 8.3 81\n", 266 | "9 2.7 25\n", 267 | "10 7.7 85\n", 268 | "11 5.9 62\n", 269 | "12 4.5 41\n", 270 | "13 3.3 42\n", 271 | "14 1.1 17\n", 272 | "15 8.9 95\n", 273 | "16 2.5 30\n", 274 | "17 1.9 24\n", 275 | "18 6.1 67\n", 276 | "19 7.4 69\n", 277 | "20 2.7 30\n", 278 | "21 4.8 54\n", 279 | "22 3.8 35\n", 280 | "23 6.9 76\n", 281 | "24 7.8 86" 282 | ] 283 | }, 284 | "metadata": {}, 285 | "execution_count": 3 286 | } 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "metadata": { 292 | "colab": { 293 | "base_uri": "https://localhost:8080/" 294 | }, 295 | "id": "kEBfp80s5WPE", 296 | "outputId": "b4124c77-3ffb-4dee-99b5-9008fcda46d7" 297 | }, 298 | "source": [ 299 | "\n", 300 | "data.shape" 301 | ], 302 | "execution_count": 4, 303 | "outputs": [ 304 | { 305 | "output_type": "execute_result", 306 | "data": { 307 | "text/plain": [ 308 | "(25, 2)" 309 | ] 310 | }, 311 | "metadata": {}, 312 | "execution_count": 4 313 | } 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "metadata": { 319 | "colab": { 320 | "base_uri": "https://localhost:8080/" 321 | }, 322 | "id": "G7BajaN85WR2", 323 | "outputId": "20aec55f-139f-4910-952f-f49b7e49ddb6" 324 | }, 325 | "source": [ 326 | "\n", 327 | "data.info" 328 | ], 329 | "execution_count": 5, 330 | "outputs": [ 331 | { 332 | "output_type": "execute_result", 333 | "data": { 334 | "text/plain": [ 335 | "" 361 | ] 362 | }, 363 | "metadata": {}, 364 | "execution_count": 5 365 | } 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "metadata": { 371 | "colab": { 372 | "base_uri": "https://localhost:8080/", 373 | "height": 356 374 | }, 375 | "id": "KcWx4bXj5WUj", 376 | "outputId": "6dc14eb4-b3ee-4cfe-da32-106ab42403cc" 377 | }, 378 | "source": [ 379 | "data.head(10)" 380 | ], 381 | "execution_count": 6, 382 | "outputs": [ 383 | { 384 | "output_type": "execute_result", 385 | "data": { 386 | "text/html": [ 387 | "
\n", 388 | "\n", 401 | "\n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | "
HoursScores
02.521
15.147
23.227
38.575
43.530
51.520
69.288
75.560
88.381
92.725
\n", 462 | "
" 463 | ], 464 | "text/plain": [ 465 | " Hours Scores\n", 466 | "0 2.5 21\n", 467 | "1 5.1 47\n", 468 | "2 3.2 27\n", 469 | "3 8.5 75\n", 470 | "4 3.5 30\n", 471 | "5 1.5 20\n", 472 | "6 9.2 88\n", 473 | "7 5.5 60\n", 474 | "8 8.3 81\n", 475 | "9 2.7 25" 476 | ] 477 | }, 478 | "metadata": {}, 479 | "execution_count": 6 480 | } 481 | ] 482 | }, 483 | { 484 | "cell_type": "code", 485 | "metadata": { 486 | "colab": { 487 | "base_uri": "https://localhost:8080/", 488 | "height": 294 489 | }, 490 | "id": "1-AaOezN5WYj", 491 | "outputId": "67b22a81-ca46-4092-b97f-a7d194df7f24" 492 | }, 493 | "source": [ 494 | "\n", 495 | "data.describe()" 496 | ], 497 | "execution_count": 7, 498 | "outputs": [ 499 | { 500 | "output_type": "execute_result", 501 | "data": { 502 | "text/html": [ 503 | "
\n", 504 | "\n", 517 | "\n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | "
HoursScores
count25.00000025.000000
mean5.01200051.480000
std2.52509425.286887
min1.10000017.000000
25%2.70000030.000000
50%4.80000047.000000
75%7.40000075.000000
max9.20000095.000000
\n", 568 | "
" 569 | ], 570 | "text/plain": [ 571 | " Hours Scores\n", 572 | "count 25.000000 25.000000\n", 573 | "mean 5.012000 51.480000\n", 574 | "std 2.525094 25.286887\n", 575 | "min 1.100000 17.000000\n", 576 | "25% 2.700000 30.000000\n", 577 | "50% 4.800000 47.000000\n", 578 | "75% 7.400000 75.000000\n", 579 | "max 9.200000 95.000000" 580 | ] 581 | }, 582 | "metadata": {}, 583 | "execution_count": 7 584 | } 585 | ] 586 | }, 587 | { 588 | "cell_type": "code", 589 | "metadata": { 590 | "colab": { 591 | "base_uri": "https://localhost:8080/", 592 | "height": 300 593 | }, 594 | "id": "ImNGT6Xz5WaJ", 595 | "outputId": "d01cd333-3bf6-4baf-d49e-def08358214e" 596 | }, 597 | "source": [ 598 | "\n", 599 | "#Visualizing the data\n", 600 | "sns.set_style('darkgrid')\n", 601 | "sns.scatterplot(y=data['Scores'],x=data['Hours'])\n", 602 | "plt.title('Hours Studied vs Percentage Score',size=20)\n", 603 | "plt.xlabel('Hours Studied')\n", 604 | "plt.ylabel('Percentage Score')\n", 605 | "plt.show()" 606 | ], 607 | "execution_count": 8, 608 | "outputs": [ 609 | { 610 | "output_type": "display_data", 611 | "data": { 612 | "image/png": "\n", 613 | "text/plain": [ 614 | "
" 615 | ] 616 | }, 617 | "metadata": {} 618 | } 619 | ] 620 | }, 621 | { 622 | "cell_type": "code", 623 | "metadata": { 624 | "id": "RjalYNm25Wcn" 625 | }, 626 | "source": [ 627 | "\n", 628 | "#From the above graph, we can see a positive linear relation between the hours studied and the percentage obtained(score).\n", 629 | "\n", 630 | "#Training the Model\n", 631 | "\n", 632 | "#1. Preparing the Data" 633 | ], 634 | "execution_count": null, 635 | "outputs": [] 636 | }, 637 | { 638 | "cell_type": "code", 639 | "metadata": { 640 | "id": "nOmd_Sim5WfZ" 641 | }, 642 | "source": [ 643 | "X =data.iloc[:, :-1].values \n", 644 | "y =data.iloc[:, 1].values" 645 | ], 646 | "execution_count": 9, 647 | "outputs": [] 648 | }, 649 | { 650 | "cell_type": "code", 651 | "metadata": { 652 | "id": "x6nbtoJN5Wh-" 653 | }, 654 | "source": [ 655 | "#the next step is to split this data into training and test sets.\n", 656 | "from sklearn.model_selection import train_test_split\n", 657 | "X_train, X_test, y_train, y_test =train_test_split(X, y,test_size=0.2, random_state=0)" 658 | ], 659 | "execution_count": 10, 660 | "outputs": [] 661 | }, 662 | { 663 | "cell_type": "code", 664 | "metadata": { 665 | "colab": { 666 | "base_uri": "https://localhost:8080/" 667 | }, 668 | "id": "-ahPNhAi504N", 669 | "outputId": "2f7e78f8-e676-4668-f8d7-70c9371bab60" 670 | }, 671 | "source": [ 672 | "from sklearn.linear_model import LinearRegression \n", 673 | "regressor = LinearRegression() \n", 674 | "regressor.fit(X_train, y_train) \n", 675 | "print(\"Training complete.\")" 676 | ], 677 | "execution_count": 11, 678 | "outputs": [ 679 | { 680 | "output_type": "stream", 681 | "name": "stdout", 682 | "text": [ 683 | "Training complete.\n" 684 | ] 685 | } 686 | ] 687 | }, 688 | { 689 | "cell_type": "code", 690 | "metadata": { 691 | "colab": { 692 | "base_uri": "https://localhost:8080/", 693 | "height": 265 694 | }, 695 | "id": "YNn4gAiS51DG", 696 | "outputId": "c06db9d2-9ac2-4369-e1cc-eff04edfc135" 697 | }, 698 | "source": [ 699 | "\n", 700 | "# Plotting the regression line\n", 701 | "line = regressor.coef_*X+regressor.intercept_\n", 702 | "\n", 703 | "# Plotting for the test data\n", 704 | "plt.scatter(X, y)\n", 705 | "plt.plot(X, line);\n", 706 | "plt.show()" 707 | ], 708 | "execution_count": 12, 709 | "outputs": [ 710 | { 711 | "output_type": "display_data", 712 | "data": { 713 | "image/png": "\n", 714 | "text/plain": [ 715 | "
" 716 | ] 717 | }, 718 | "metadata": {} 719 | } 720 | ] 721 | }, 722 | { 723 | "cell_type": "code", 724 | "metadata": { 725 | "colab": { 726 | "base_uri": "https://localhost:8080/", 727 | "height": 302 728 | }, 729 | "id": "z1CXd5Hl51Fx", 730 | "outputId": "f122cd18-6248-4cfa-80a0-3419a82152b8" 731 | }, 732 | "source": [ 733 | "data.plot.bar(x=\"Hours\",y=\"Scores\")" 734 | ], 735 | "execution_count": 13, 736 | "outputs": [ 737 | { 738 | "output_type": "execute_result", 739 | "data": { 740 | "text/plain": [ 741 | "" 742 | ] 743 | }, 744 | "metadata": {}, 745 | "execution_count": 13 746 | }, 747 | { 748 | "output_type": "display_data", 749 | "data": { 750 | "image/png": "\n", 751 | "text/plain": [ 752 | "
" 753 | ] 754 | }, 755 | "metadata": {} 756 | } 757 | ] 758 | }, 759 | { 760 | "cell_type": "code", 761 | "metadata": { 762 | "colab": { 763 | "base_uri": "https://localhost:8080/", 764 | "height": 302 765 | }, 766 | "id": "5FvwAFfZ51Ia", 767 | "outputId": "6b497b2d-77af-43e9-a187-b5514cb7d9c0" 768 | }, 769 | "source": [ 770 | "\n", 771 | "#sorting the data\n", 772 | "data.sort_values([\"Hours\"], axis=0, ascending=[True],inplace=True)\n", 773 | "\n", 774 | "#plotting the data\n", 775 | "data.plot.bar(x=\"Hours\",y=\"Scores\")" 776 | ], 777 | "execution_count": 14, 778 | "outputs": [ 779 | { 780 | "output_type": "execute_result", 781 | "data": { 782 | "text/plain": [ 783 | "" 784 | ] 785 | }, 786 | "metadata": {}, 787 | "execution_count": 14 788 | }, 789 | { 790 | "output_type": "display_data", 791 | "data": { 792 | "image/png": "\n", 793 | "text/plain": [ 794 | "
" 795 | ] 796 | }, 797 | "metadata": {} 798 | } 799 | ] 800 | }, 801 | { 802 | "cell_type": "code", 803 | "metadata": { 804 | "id": "5pXdqEP251LW" 805 | }, 806 | "source": [ 807 | "x = data.iloc[:,:-1].values\n", 808 | "y = data.iloc[:,1].values" 809 | ], 810 | "execution_count": 15, 811 | "outputs": [] 812 | }, 813 | { 814 | "cell_type": "code", 815 | "metadata": { 816 | "colab": { 817 | "base_uri": "https://localhost:8080/" 818 | }, 819 | "id": "4UR7d3ob51N_", 820 | "outputId": "6858c395-5c8f-458e-c24e-e5b3519ff6c1" 821 | }, 822 | "source": [ 823 | "print(x)" 824 | ], 825 | "execution_count": 16, 826 | "outputs": [ 827 | { 828 | "output_type": "stream", 829 | "name": "stdout", 830 | "text": [ 831 | "[[1.1]\n", 832 | " [1.5]\n", 833 | " [1.9]\n", 834 | " [2.5]\n", 835 | " [2.5]\n", 836 | " [2.7]\n", 837 | " [2.7]\n", 838 | " [3.2]\n", 839 | " [3.3]\n", 840 | " [3.5]\n", 841 | " [3.8]\n", 842 | " [4.5]\n", 843 | " [4.8]\n", 844 | " [5.1]\n", 845 | " [5.5]\n", 846 | " [5.9]\n", 847 | " [6.1]\n", 848 | " [6.9]\n", 849 | " [7.4]\n", 850 | " [7.7]\n", 851 | " [7.8]\n", 852 | " [8.3]\n", 853 | " [8.5]\n", 854 | " [8.9]\n", 855 | " [9.2]]\n" 856 | ] 857 | } 858 | ] 859 | }, 860 | { 861 | "cell_type": "code", 862 | "metadata": { 863 | "id": "IBi8Pv-A6FTs" 864 | }, 865 | "source": [ 866 | "\n", 867 | "#Now , we are dividing the data for training and testing the model\n", 868 | "#importing the train_test_split\n", 869 | "\n", 870 | "from sklearn.model_selection import train_test_split\n", 871 | "\n", 872 | "# splitting the data into X_train, X_test, y_train, y_test\n", 873 | "\n", 874 | "X_train, X_test, y_train, y_test = train_test_split(x,y, test_size=0.2,random_state=0)" 875 | ], 876 | "execution_count": 17, 877 | "outputs": [] 878 | }, 879 | { 880 | "cell_type": "code", 881 | "metadata": { 882 | "colab": { 883 | "base_uri": "https://localhost:8080/" 884 | }, 885 | "id": "gxf_gHRR6FX6", 886 | "outputId": "5fb82b3f-b186-46bd-c117-71f0e6a6eff5" 887 | }, 888 | "source": [ 889 | "print(X_train.shape)" 890 | ], 891 | "execution_count": 18, 892 | "outputs": [ 893 | { 894 | "output_type": "stream", 895 | "name": "stdout", 896 | "text": [ 897 | "(20, 1)\n" 898 | ] 899 | } 900 | ] 901 | }, 902 | { 903 | "cell_type": "code", 904 | "metadata": { 905 | "colab": { 906 | "base_uri": "https://localhost:8080/" 907 | }, 908 | "id": "7Wx3imgI6KST", 909 | "outputId": "5a9cdfdf-8186-4658-fec5-75e2d3d0a3d0" 910 | }, 911 | "source": [ 912 | "print(X_test.shape)" 913 | ], 914 | "execution_count": 19, 915 | "outputs": [ 916 | { 917 | "output_type": "stream", 918 | "name": "stdout", 919 | "text": [ 920 | "(5, 1)\n" 921 | ] 922 | } 923 | ] 924 | }, 925 | { 926 | "cell_type": "code", 927 | "metadata": { 928 | "colab": { 929 | "base_uri": "https://localhost:8080/" 930 | }, 931 | "id": "ZVEJ_C406KVg", 932 | "outputId": "22c6158f-8401-4e2b-c335-82e5fa79149c" 933 | }, 934 | "source": [ 935 | "print(y_train.shape)\n" 936 | ], 937 | "execution_count": 20, 938 | "outputs": [ 939 | { 940 | "output_type": "stream", 941 | "name": "stdout", 942 | "text": [ 943 | "(20,)\n" 944 | ] 945 | } 946 | ] 947 | }, 948 | { 949 | "cell_type": "code", 950 | "metadata": { 951 | "colab": { 952 | "base_uri": "https://localhost:8080/" 953 | }, 954 | "id": "ehp8x_Fv6KY5", 955 | "outputId": "2ef1525c-d88d-4217-9ec4-fc6ca772be26" 956 | }, 957 | "source": [ 958 | "print(y_test.shape)" 959 | ], 960 | "execution_count": 21, 961 | "outputs": [ 962 | { 963 | "output_type": "stream", 964 | "name": "stdout", 965 | "text": [ 966 | "(5,)\n" 967 | ] 968 | } 969 | ] 970 | }, 971 | { 972 | "cell_type": "code", 973 | "metadata": { 974 | "colab": { 975 | "base_uri": "https://localhost:8080/" 976 | }, 977 | "id": "MB0M8cKb6Kbf", 978 | "outputId": "ec5a6976-2214-42f7-d6c1-79572f26c634" 979 | }, 980 | "source": [ 981 | "#Predicting the % score\n", 982 | "print(X_test)\n", 983 | "y_pred = regressor.predict(X_test)\n" 984 | ], 985 | "execution_count": 22, 986 | "outputs": [ 987 | { 988 | "output_type": "stream", 989 | "name": "stdout", 990 | "text": [ 991 | "[[2.7]\n", 992 | " [1.9]\n", 993 | " [7.7]\n", 994 | " [6.1]\n", 995 | " [4.5]]\n" 996 | ] 997 | } 998 | ] 999 | }, 1000 | { 1001 | "cell_type": "code", 1002 | "metadata": { 1003 | "colab": { 1004 | "base_uri": "https://localhost:8080/", 1005 | "height": 202 1006 | }, 1007 | "id": "CtvWYggA51Qm", 1008 | "outputId": "76f97010-77ad-4f31-a32a-4582867917c3" 1009 | }, 1010 | "source": [ 1011 | "#Comparing the result with acutal data\n", 1012 | "df= pd.DataFrame({'ACTUAL' : y_test, 'PREDICTION' : y_pred})\n", 1013 | "df" 1014 | ], 1015 | "execution_count": 23, 1016 | "outputs": [ 1017 | { 1018 | "output_type": "execute_result", 1019 | "data": { 1020 | "text/html": [ 1021 | "
\n", 1022 | "\n", 1035 | "\n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | "
ACTUALPREDICTION
03028.776933
12420.848407
28578.330215
36762.473165
44146.616114
\n", 1071 | "
" 1072 | ], 1073 | "text/plain": [ 1074 | " ACTUAL PREDICTION\n", 1075 | "0 30 28.776933\n", 1076 | "1 24 20.848407\n", 1077 | "2 85 78.330215\n", 1078 | "3 67 62.473165\n", 1079 | "4 41 46.616114" 1080 | ] 1081 | }, 1082 | "metadata": {}, 1083 | "execution_count": 23 1084 | } 1085 | ] 1086 | }, 1087 | { 1088 | "cell_type": "code", 1089 | "metadata": { 1090 | "colab": { 1091 | "base_uri": "https://localhost:8080/" 1092 | }, 1093 | "id": "xOVpPyP-6agJ", 1094 | "outputId": "f8c47533-ebb8-4049-ce9e-244d9293eb5a" 1095 | }, 1096 | "source": [ 1097 | "\n", 1098 | "#Custom input(9.25 hours) and the prediction of percentage\n", 1099 | "hours = [9.25]\n", 1100 | "own_pred = regressor.predict([hours])\n", 1101 | "print(\"No of Hours = {}\".format(hours))\n", 1102 | "print(\"Predicted Score = {}\".format(own_pred[0]))" 1103 | ], 1104 | "execution_count": 24, 1105 | "outputs": [ 1106 | { 1107 | "output_type": "stream", 1108 | "name": "stdout", 1109 | "text": [ 1110 | "No of Hours = [9.25]\n", 1111 | "Predicted Score = 93.69173248737539\n" 1112 | ] 1113 | } 1114 | ] 1115 | }, 1116 | { 1117 | "cell_type": "code", 1118 | "metadata": { 1119 | "colab": { 1120 | "base_uri": "https://localhost:8080/" 1121 | }, 1122 | "id": "TDBgWdNA6aji", 1123 | "outputId": "8f671470-3050-4f85-a3d8-587d512d9d9e" 1124 | }, 1125 | "source": [ 1126 | "#Evaluating the Model(Accuracy)\n", 1127 | "from sklearn import metrics \n", 1128 | "print('Mean Absolute Error:', \n", 1129 | " metrics.mean_absolute_error(y_test, y_pred))" 1130 | ], 1131 | "execution_count": 25, 1132 | "outputs": [ 1133 | { 1134 | "output_type": "stream", 1135 | "name": "stdout", 1136 | "text": [ 1137 | "Mean Absolute Error: 4.237478958953777\n" 1138 | ] 1139 | } 1140 | ] 1141 | }, 1142 | { 1143 | "cell_type": "code", 1144 | "metadata": { 1145 | "colab": { 1146 | "base_uri": "https://localhost:8080/" 1147 | }, 1148 | "id": "GvFLQgy76amW", 1149 | "outputId": "5f185206-ce13-4026-defb-51936d891f61" 1150 | }, 1151 | "source": [ 1152 | "# importing LinearRegression\n", 1153 | "from sklearn.linear_model import LinearRegression\n", 1154 | "\n", 1155 | "#creating an object for LinearRegression\n", 1156 | "model = LinearRegression()\n", 1157 | "\n", 1158 | "# fitting the model\n", 1159 | "model.fit(X_train, y_train)" 1160 | ], 1161 | "execution_count": 26, 1162 | "outputs": [ 1163 | { 1164 | "output_type": "execute_result", 1165 | "data": { 1166 | "text/plain": [ 1167 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)" 1168 | ] 1169 | }, 1170 | "metadata": {}, 1171 | "execution_count": 26 1172 | } 1173 | ] 1174 | }, 1175 | { 1176 | "cell_type": "code", 1177 | "metadata": { 1178 | "colab": { 1179 | "base_uri": "https://localhost:8080/", 1180 | "height": 265 1181 | }, 1182 | "id": "ZyM0-n6j6apS", 1183 | "outputId": "75c8b0d9-77f2-4e82-dc9f-f85ff25e12ba" 1184 | }, 1185 | "source": [ 1186 | "#Fitting The Regression Line\n", 1187 | "# plotting the regression line\n", 1188 | "line = model.coef_ * x + model.intercept_\n", 1189 | "\n", 1190 | "#plotting for test data\n", 1191 | "plt.scatter(x,y,c=\"g\")\n", 1192 | "plt.plot(x,line,c=\"r\")\n", 1193 | "plt.show()" 1194 | ], 1195 | "execution_count": 27, 1196 | "outputs": [ 1197 | { 1198 | "output_type": "display_data", 1199 | "data": { 1200 | "image/png": "\n", 1201 | "text/plain": [ 1202 | "
" 1203 | ] 1204 | }, 1205 | "metadata": {} 1206 | } 1207 | ] 1208 | }, 1209 | { 1210 | "cell_type": "code", 1211 | "metadata": { 1212 | "colab": { 1213 | "base_uri": "https://localhost:8080/" 1214 | }, 1215 | "id": "FtxSRQ316asF", 1216 | "outputId": "979e1428-1195-4d07-b6b3-c35e6f69dad2" 1217 | }, 1218 | "source": [ 1219 | "#Making Predictions\n", 1220 | "# testing the model\n", 1221 | "y_pred = model.predict(X_test)\n", 1222 | "\n", 1223 | "#checking accuracy of our model\n", 1224 | "data = pd.DataFrame({\"Actual\" : y_test,\"Predicted\":y_pred})\n", 1225 | "print(data)" 1226 | ], 1227 | "execution_count": 28, 1228 | "outputs": [ 1229 | { 1230 | "output_type": "stream", 1231 | "name": "stdout", 1232 | "text": [ 1233 | " Actual Predicted\n", 1234 | "0 30 28.617714\n", 1235 | "1 24 20.888033\n", 1236 | "2 85 76.928222\n", 1237 | "3 67 61.468859\n", 1238 | "4 41 46.009497\n" 1239 | ] 1240 | } 1241 | ] 1242 | }, 1243 | { 1244 | "cell_type": "code", 1245 | "metadata": { 1246 | "colab": { 1247 | "base_uri": "https://localhost:8080/" 1248 | }, 1249 | "id": "Umd2Wep96oqJ", 1250 | "outputId": "05d3bee2-5e64-488b-8301-9307bff3f73d" 1251 | }, 1252 | "source": [ 1253 | "#Evaluating the model\n", 1254 | "from sklearn import metrics as mts\n", 1255 | "\n", 1256 | "#mean abolute error\n", 1257 | "mean_abs_error = mts.mean_absolute_error(y_test,y_pred)\n", 1258 | "\n", 1259 | "print(\"Mean Absolute Error : \",mean_abs_error)" 1260 | ], 1261 | "execution_count": 29, 1262 | "outputs": [ 1263 | { 1264 | "output_type": "stream", 1265 | "name": "stdout", 1266 | "text": [ 1267 | "Mean Absolute Error : 4.621333622532767\n" 1268 | ] 1269 | } 1270 | ] 1271 | }, 1272 | { 1273 | "cell_type": "code", 1274 | "metadata": { 1275 | "id": "Af7BR4H46xjK" 1276 | }, 1277 | "source": [ 1278 | "" 1279 | ], 1280 | "execution_count": null, 1281 | "outputs": [] 1282 | } 1283 | ] 1284 | } --------------------------------------------------------------------------------