├── .gitignore
└── SPARK_TASK_1_.ipynb
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
--------------------------------------------------------------------------------
/SPARK_TASK_1_.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "SPARK_TASK_1 .ipynb",
7 | "provenance": [],
8 | "collapsed_sections": [],
9 | "authorship_tag": "ABX9TyMqrE/S3DQSQkR1Eaz5PBwv",
10 | "include_colab_link": true
11 | },
12 | "kernelspec": {
13 | "name": "python3",
14 | "display_name": "Python 3"
15 | },
16 | "language_info": {
17 | "name": "python"
18 | }
19 | },
20 | "cells": [
21 | {
22 | "cell_type": "markdown",
23 | "metadata": {
24 | "id": "view-in-github",
25 | "colab_type": "text"
26 | },
27 | "source": [
28 | "
"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "metadata": {
34 | "id": "88dV5rGW5Baa"
35 | },
36 | "source": [
37 | ""
38 | ],
39 | "execution_count": null,
40 | "outputs": []
41 | },
42 | {
43 | "cell_type": "markdown",
44 | "metadata": {
45 | "id": "Y5KNclE35Ftj"
46 | },
47 | "source": [
48 | "#THE SPARKS FOUNDATION\n",
49 | "Name : BHARADWAJ S\n",
50 | "\n",
51 | "#GRIPNOVEMBER21\n",
52 | "\n",
53 | "Task-1 : Prediction Using Supervised ML\n",
54 | "\n",
55 | "Problem: Predict the percentage of a student based on the number of study hours"
56 | ]
57 | },
58 | {
59 | "cell_type": "code",
60 | "metadata": {
61 | "id": "Gl25ERJo5QkO"
62 | },
63 | "source": [
64 | "#Importing the Libraries required for the problem\n",
65 | "import pandas as pd\n",
66 | "import matplotlib.pyplot as plt\n",
67 | "import seaborn as sns"
68 | ],
69 | "execution_count": 1,
70 | "outputs": []
71 | },
72 | {
73 | "cell_type": "code",
74 | "metadata": {
75 | "id": "cw7WpnNG5WJ-"
76 | },
77 | "source": [
78 | "#reading data\n",
79 | "data= pd.read_csv('https://raw.githubusercontent.com/AdiPersonalWorks/Random/master/student_scores%20-%20student_scores.csv')"
80 | ],
81 | "execution_count": 2,
82 | "outputs": []
83 | },
84 | {
85 | "cell_type": "code",
86 | "metadata": {
87 | "colab": {
88 | "base_uri": "https://localhost:8080/",
89 | "height": 817
90 | },
91 | "id": "wqLPjy705WMb",
92 | "outputId": "0be25dc4-43c4-41d7-dbd5-9793df338fea"
93 | },
94 | "source": [
95 | "data"
96 | ],
97 | "execution_count": 3,
98 | "outputs": [
99 | {
100 | "output_type": "execute_result",
101 | "data": {
102 | "text/html": [
103 | "
\n",
104 | "\n",
117 | "
\n",
118 | " \n",
119 | " \n",
120 | " | \n",
121 | " Hours | \n",
122 | " Scores | \n",
123 | "
\n",
124 | " \n",
125 | " \n",
126 | " \n",
127 | " | 0 | \n",
128 | " 2.5 | \n",
129 | " 21 | \n",
130 | "
\n",
131 | " \n",
132 | " | 1 | \n",
133 | " 5.1 | \n",
134 | " 47 | \n",
135 | "
\n",
136 | " \n",
137 | " | 2 | \n",
138 | " 3.2 | \n",
139 | " 27 | \n",
140 | "
\n",
141 | " \n",
142 | " | 3 | \n",
143 | " 8.5 | \n",
144 | " 75 | \n",
145 | "
\n",
146 | " \n",
147 | " | 4 | \n",
148 | " 3.5 | \n",
149 | " 30 | \n",
150 | "
\n",
151 | " \n",
152 | " | 5 | \n",
153 | " 1.5 | \n",
154 | " 20 | \n",
155 | "
\n",
156 | " \n",
157 | " | 6 | \n",
158 | " 9.2 | \n",
159 | " 88 | \n",
160 | "
\n",
161 | " \n",
162 | " | 7 | \n",
163 | " 5.5 | \n",
164 | " 60 | \n",
165 | "
\n",
166 | " \n",
167 | " | 8 | \n",
168 | " 8.3 | \n",
169 | " 81 | \n",
170 | "
\n",
171 | " \n",
172 | " | 9 | \n",
173 | " 2.7 | \n",
174 | " 25 | \n",
175 | "
\n",
176 | " \n",
177 | " | 10 | \n",
178 | " 7.7 | \n",
179 | " 85 | \n",
180 | "
\n",
181 | " \n",
182 | " | 11 | \n",
183 | " 5.9 | \n",
184 | " 62 | \n",
185 | "
\n",
186 | " \n",
187 | " | 12 | \n",
188 | " 4.5 | \n",
189 | " 41 | \n",
190 | "
\n",
191 | " \n",
192 | " | 13 | \n",
193 | " 3.3 | \n",
194 | " 42 | \n",
195 | "
\n",
196 | " \n",
197 | " | 14 | \n",
198 | " 1.1 | \n",
199 | " 17 | \n",
200 | "
\n",
201 | " \n",
202 | " | 15 | \n",
203 | " 8.9 | \n",
204 | " 95 | \n",
205 | "
\n",
206 | " \n",
207 | " | 16 | \n",
208 | " 2.5 | \n",
209 | " 30 | \n",
210 | "
\n",
211 | " \n",
212 | " | 17 | \n",
213 | " 1.9 | \n",
214 | " 24 | \n",
215 | "
\n",
216 | " \n",
217 | " | 18 | \n",
218 | " 6.1 | \n",
219 | " 67 | \n",
220 | "
\n",
221 | " \n",
222 | " | 19 | \n",
223 | " 7.4 | \n",
224 | " 69 | \n",
225 | "
\n",
226 | " \n",
227 | " | 20 | \n",
228 | " 2.7 | \n",
229 | " 30 | \n",
230 | "
\n",
231 | " \n",
232 | " | 21 | \n",
233 | " 4.8 | \n",
234 | " 54 | \n",
235 | "
\n",
236 | " \n",
237 | " | 22 | \n",
238 | " 3.8 | \n",
239 | " 35 | \n",
240 | "
\n",
241 | " \n",
242 | " | 23 | \n",
243 | " 6.9 | \n",
244 | " 76 | \n",
245 | "
\n",
246 | " \n",
247 | " | 24 | \n",
248 | " 7.8 | \n",
249 | " 86 | \n",
250 | "
\n",
251 | " \n",
252 | "
\n",
253 | "
"
254 | ],
255 | "text/plain": [
256 | " Hours Scores\n",
257 | "0 2.5 21\n",
258 | "1 5.1 47\n",
259 | "2 3.2 27\n",
260 | "3 8.5 75\n",
261 | "4 3.5 30\n",
262 | "5 1.5 20\n",
263 | "6 9.2 88\n",
264 | "7 5.5 60\n",
265 | "8 8.3 81\n",
266 | "9 2.7 25\n",
267 | "10 7.7 85\n",
268 | "11 5.9 62\n",
269 | "12 4.5 41\n",
270 | "13 3.3 42\n",
271 | "14 1.1 17\n",
272 | "15 8.9 95\n",
273 | "16 2.5 30\n",
274 | "17 1.9 24\n",
275 | "18 6.1 67\n",
276 | "19 7.4 69\n",
277 | "20 2.7 30\n",
278 | "21 4.8 54\n",
279 | "22 3.8 35\n",
280 | "23 6.9 76\n",
281 | "24 7.8 86"
282 | ]
283 | },
284 | "metadata": {},
285 | "execution_count": 3
286 | }
287 | ]
288 | },
289 | {
290 | "cell_type": "code",
291 | "metadata": {
292 | "colab": {
293 | "base_uri": "https://localhost:8080/"
294 | },
295 | "id": "kEBfp80s5WPE",
296 | "outputId": "b4124c77-3ffb-4dee-99b5-9008fcda46d7"
297 | },
298 | "source": [
299 | "\n",
300 | "data.shape"
301 | ],
302 | "execution_count": 4,
303 | "outputs": [
304 | {
305 | "output_type": "execute_result",
306 | "data": {
307 | "text/plain": [
308 | "(25, 2)"
309 | ]
310 | },
311 | "metadata": {},
312 | "execution_count": 4
313 | }
314 | ]
315 | },
316 | {
317 | "cell_type": "code",
318 | "metadata": {
319 | "colab": {
320 | "base_uri": "https://localhost:8080/"
321 | },
322 | "id": "G7BajaN85WR2",
323 | "outputId": "20aec55f-139f-4910-952f-f49b7e49ddb6"
324 | },
325 | "source": [
326 | "\n",
327 | "data.info"
328 | ],
329 | "execution_count": 5,
330 | "outputs": [
331 | {
332 | "output_type": "execute_result",
333 | "data": {
334 | "text/plain": [
335 | ""
361 | ]
362 | },
363 | "metadata": {},
364 | "execution_count": 5
365 | }
366 | ]
367 | },
368 | {
369 | "cell_type": "code",
370 | "metadata": {
371 | "colab": {
372 | "base_uri": "https://localhost:8080/",
373 | "height": 356
374 | },
375 | "id": "KcWx4bXj5WUj",
376 | "outputId": "6dc14eb4-b3ee-4cfe-da32-106ab42403cc"
377 | },
378 | "source": [
379 | "data.head(10)"
380 | ],
381 | "execution_count": 6,
382 | "outputs": [
383 | {
384 | "output_type": "execute_result",
385 | "data": {
386 | "text/html": [
387 | "\n",
388 | "\n",
401 | "
\n",
402 | " \n",
403 | " \n",
404 | " | \n",
405 | " Hours | \n",
406 | " Scores | \n",
407 | "
\n",
408 | " \n",
409 | " \n",
410 | " \n",
411 | " | 0 | \n",
412 | " 2.5 | \n",
413 | " 21 | \n",
414 | "
\n",
415 | " \n",
416 | " | 1 | \n",
417 | " 5.1 | \n",
418 | " 47 | \n",
419 | "
\n",
420 | " \n",
421 | " | 2 | \n",
422 | " 3.2 | \n",
423 | " 27 | \n",
424 | "
\n",
425 | " \n",
426 | " | 3 | \n",
427 | " 8.5 | \n",
428 | " 75 | \n",
429 | "
\n",
430 | " \n",
431 | " | 4 | \n",
432 | " 3.5 | \n",
433 | " 30 | \n",
434 | "
\n",
435 | " \n",
436 | " | 5 | \n",
437 | " 1.5 | \n",
438 | " 20 | \n",
439 | "
\n",
440 | " \n",
441 | " | 6 | \n",
442 | " 9.2 | \n",
443 | " 88 | \n",
444 | "
\n",
445 | " \n",
446 | " | 7 | \n",
447 | " 5.5 | \n",
448 | " 60 | \n",
449 | "
\n",
450 | " \n",
451 | " | 8 | \n",
452 | " 8.3 | \n",
453 | " 81 | \n",
454 | "
\n",
455 | " \n",
456 | " | 9 | \n",
457 | " 2.7 | \n",
458 | " 25 | \n",
459 | "
\n",
460 | " \n",
461 | "
\n",
462 | "
"
463 | ],
464 | "text/plain": [
465 | " Hours Scores\n",
466 | "0 2.5 21\n",
467 | "1 5.1 47\n",
468 | "2 3.2 27\n",
469 | "3 8.5 75\n",
470 | "4 3.5 30\n",
471 | "5 1.5 20\n",
472 | "6 9.2 88\n",
473 | "7 5.5 60\n",
474 | "8 8.3 81\n",
475 | "9 2.7 25"
476 | ]
477 | },
478 | "metadata": {},
479 | "execution_count": 6
480 | }
481 | ]
482 | },
483 | {
484 | "cell_type": "code",
485 | "metadata": {
486 | "colab": {
487 | "base_uri": "https://localhost:8080/",
488 | "height": 294
489 | },
490 | "id": "1-AaOezN5WYj",
491 | "outputId": "67b22a81-ca46-4092-b97f-a7d194df7f24"
492 | },
493 | "source": [
494 | "\n",
495 | "data.describe()"
496 | ],
497 | "execution_count": 7,
498 | "outputs": [
499 | {
500 | "output_type": "execute_result",
501 | "data": {
502 | "text/html": [
503 | "\n",
504 | "\n",
517 | "
\n",
518 | " \n",
519 | " \n",
520 | " | \n",
521 | " Hours | \n",
522 | " Scores | \n",
523 | "
\n",
524 | " \n",
525 | " \n",
526 | " \n",
527 | " | count | \n",
528 | " 25.000000 | \n",
529 | " 25.000000 | \n",
530 | "
\n",
531 | " \n",
532 | " | mean | \n",
533 | " 5.012000 | \n",
534 | " 51.480000 | \n",
535 | "
\n",
536 | " \n",
537 | " | std | \n",
538 | " 2.525094 | \n",
539 | " 25.286887 | \n",
540 | "
\n",
541 | " \n",
542 | " | min | \n",
543 | " 1.100000 | \n",
544 | " 17.000000 | \n",
545 | "
\n",
546 | " \n",
547 | " | 25% | \n",
548 | " 2.700000 | \n",
549 | " 30.000000 | \n",
550 | "
\n",
551 | " \n",
552 | " | 50% | \n",
553 | " 4.800000 | \n",
554 | " 47.000000 | \n",
555 | "
\n",
556 | " \n",
557 | " | 75% | \n",
558 | " 7.400000 | \n",
559 | " 75.000000 | \n",
560 | "
\n",
561 | " \n",
562 | " | max | \n",
563 | " 9.200000 | \n",
564 | " 95.000000 | \n",
565 | "
\n",
566 | " \n",
567 | "
\n",
568 | "
"
569 | ],
570 | "text/plain": [
571 | " Hours Scores\n",
572 | "count 25.000000 25.000000\n",
573 | "mean 5.012000 51.480000\n",
574 | "std 2.525094 25.286887\n",
575 | "min 1.100000 17.000000\n",
576 | "25% 2.700000 30.000000\n",
577 | "50% 4.800000 47.000000\n",
578 | "75% 7.400000 75.000000\n",
579 | "max 9.200000 95.000000"
580 | ]
581 | },
582 | "metadata": {},
583 | "execution_count": 7
584 | }
585 | ]
586 | },
587 | {
588 | "cell_type": "code",
589 | "metadata": {
590 | "colab": {
591 | "base_uri": "https://localhost:8080/",
592 | "height": 300
593 | },
594 | "id": "ImNGT6Xz5WaJ",
595 | "outputId": "d01cd333-3bf6-4baf-d49e-def08358214e"
596 | },
597 | "source": [
598 | "\n",
599 | "#Visualizing the data\n",
600 | "sns.set_style('darkgrid')\n",
601 | "sns.scatterplot(y=data['Scores'],x=data['Hours'])\n",
602 | "plt.title('Hours Studied vs Percentage Score',size=20)\n",
603 | "plt.xlabel('Hours Studied')\n",
604 | "plt.ylabel('Percentage Score')\n",
605 | "plt.show()"
606 | ],
607 | "execution_count": 8,
608 | "outputs": [
609 | {
610 | "output_type": "display_data",
611 | "data": {
612 | "image/png": "\n",
613 | "text/plain": [
614 | ""
615 | ]
616 | },
617 | "metadata": {}
618 | }
619 | ]
620 | },
621 | {
622 | "cell_type": "code",
623 | "metadata": {
624 | "id": "RjalYNm25Wcn"
625 | },
626 | "source": [
627 | "\n",
628 | "#From the above graph, we can see a positive linear relation between the hours studied and the percentage obtained(score).\n",
629 | "\n",
630 | "#Training the Model\n",
631 | "\n",
632 | "#1. Preparing the Data"
633 | ],
634 | "execution_count": null,
635 | "outputs": []
636 | },
637 | {
638 | "cell_type": "code",
639 | "metadata": {
640 | "id": "nOmd_Sim5WfZ"
641 | },
642 | "source": [
643 | "X =data.iloc[:, :-1].values \n",
644 | "y =data.iloc[:, 1].values"
645 | ],
646 | "execution_count": 9,
647 | "outputs": []
648 | },
649 | {
650 | "cell_type": "code",
651 | "metadata": {
652 | "id": "x6nbtoJN5Wh-"
653 | },
654 | "source": [
655 | "#the next step is to split this data into training and test sets.\n",
656 | "from sklearn.model_selection import train_test_split\n",
657 | "X_train, X_test, y_train, y_test =train_test_split(X, y,test_size=0.2, random_state=0)"
658 | ],
659 | "execution_count": 10,
660 | "outputs": []
661 | },
662 | {
663 | "cell_type": "code",
664 | "metadata": {
665 | "colab": {
666 | "base_uri": "https://localhost:8080/"
667 | },
668 | "id": "-ahPNhAi504N",
669 | "outputId": "2f7e78f8-e676-4668-f8d7-70c9371bab60"
670 | },
671 | "source": [
672 | "from sklearn.linear_model import LinearRegression \n",
673 | "regressor = LinearRegression() \n",
674 | "regressor.fit(X_train, y_train) \n",
675 | "print(\"Training complete.\")"
676 | ],
677 | "execution_count": 11,
678 | "outputs": [
679 | {
680 | "output_type": "stream",
681 | "name": "stdout",
682 | "text": [
683 | "Training complete.\n"
684 | ]
685 | }
686 | ]
687 | },
688 | {
689 | "cell_type": "code",
690 | "metadata": {
691 | "colab": {
692 | "base_uri": "https://localhost:8080/",
693 | "height": 265
694 | },
695 | "id": "YNn4gAiS51DG",
696 | "outputId": "c06db9d2-9ac2-4369-e1cc-eff04edfc135"
697 | },
698 | "source": [
699 | "\n",
700 | "# Plotting the regression line\n",
701 | "line = regressor.coef_*X+regressor.intercept_\n",
702 | "\n",
703 | "# Plotting for the test data\n",
704 | "plt.scatter(X, y)\n",
705 | "plt.plot(X, line);\n",
706 | "plt.show()"
707 | ],
708 | "execution_count": 12,
709 | "outputs": [
710 | {
711 | "output_type": "display_data",
712 | "data": {
713 | "image/png": "\n",
714 | "text/plain": [
715 | ""
716 | ]
717 | },
718 | "metadata": {}
719 | }
720 | ]
721 | },
722 | {
723 | "cell_type": "code",
724 | "metadata": {
725 | "colab": {
726 | "base_uri": "https://localhost:8080/",
727 | "height": 302
728 | },
729 | "id": "z1CXd5Hl51Fx",
730 | "outputId": "f122cd18-6248-4cfa-80a0-3419a82152b8"
731 | },
732 | "source": [
733 | "data.plot.bar(x=\"Hours\",y=\"Scores\")"
734 | ],
735 | "execution_count": 13,
736 | "outputs": [
737 | {
738 | "output_type": "execute_result",
739 | "data": {
740 | "text/plain": [
741 | ""
742 | ]
743 | },
744 | "metadata": {},
745 | "execution_count": 13
746 | },
747 | {
748 | "output_type": "display_data",
749 | "data": {
750 | "image/png": "\n",
751 | "text/plain": [
752 | ""
753 | ]
754 | },
755 | "metadata": {}
756 | }
757 | ]
758 | },
759 | {
760 | "cell_type": "code",
761 | "metadata": {
762 | "colab": {
763 | "base_uri": "https://localhost:8080/",
764 | "height": 302
765 | },
766 | "id": "5FvwAFfZ51Ia",
767 | "outputId": "6b497b2d-77af-43e9-a187-b5514cb7d9c0"
768 | },
769 | "source": [
770 | "\n",
771 | "#sorting the data\n",
772 | "data.sort_values([\"Hours\"], axis=0, ascending=[True],inplace=True)\n",
773 | "\n",
774 | "#plotting the data\n",
775 | "data.plot.bar(x=\"Hours\",y=\"Scores\")"
776 | ],
777 | "execution_count": 14,
778 | "outputs": [
779 | {
780 | "output_type": "execute_result",
781 | "data": {
782 | "text/plain": [
783 | ""
784 | ]
785 | },
786 | "metadata": {},
787 | "execution_count": 14
788 | },
789 | {
790 | "output_type": "display_data",
791 | "data": {
792 | "image/png": "\n",
793 | "text/plain": [
794 | ""
795 | ]
796 | },
797 | "metadata": {}
798 | }
799 | ]
800 | },
801 | {
802 | "cell_type": "code",
803 | "metadata": {
804 | "id": "5pXdqEP251LW"
805 | },
806 | "source": [
807 | "x = data.iloc[:,:-1].values\n",
808 | "y = data.iloc[:,1].values"
809 | ],
810 | "execution_count": 15,
811 | "outputs": []
812 | },
813 | {
814 | "cell_type": "code",
815 | "metadata": {
816 | "colab": {
817 | "base_uri": "https://localhost:8080/"
818 | },
819 | "id": "4UR7d3ob51N_",
820 | "outputId": "6858c395-5c8f-458e-c24e-e5b3519ff6c1"
821 | },
822 | "source": [
823 | "print(x)"
824 | ],
825 | "execution_count": 16,
826 | "outputs": [
827 | {
828 | "output_type": "stream",
829 | "name": "stdout",
830 | "text": [
831 | "[[1.1]\n",
832 | " [1.5]\n",
833 | " [1.9]\n",
834 | " [2.5]\n",
835 | " [2.5]\n",
836 | " [2.7]\n",
837 | " [2.7]\n",
838 | " [3.2]\n",
839 | " [3.3]\n",
840 | " [3.5]\n",
841 | " [3.8]\n",
842 | " [4.5]\n",
843 | " [4.8]\n",
844 | " [5.1]\n",
845 | " [5.5]\n",
846 | " [5.9]\n",
847 | " [6.1]\n",
848 | " [6.9]\n",
849 | " [7.4]\n",
850 | " [7.7]\n",
851 | " [7.8]\n",
852 | " [8.3]\n",
853 | " [8.5]\n",
854 | " [8.9]\n",
855 | " [9.2]]\n"
856 | ]
857 | }
858 | ]
859 | },
860 | {
861 | "cell_type": "code",
862 | "metadata": {
863 | "id": "IBi8Pv-A6FTs"
864 | },
865 | "source": [
866 | "\n",
867 | "#Now , we are dividing the data for training and testing the model\n",
868 | "#importing the train_test_split\n",
869 | "\n",
870 | "from sklearn.model_selection import train_test_split\n",
871 | "\n",
872 | "# splitting the data into X_train, X_test, y_train, y_test\n",
873 | "\n",
874 | "X_train, X_test, y_train, y_test = train_test_split(x,y, test_size=0.2,random_state=0)"
875 | ],
876 | "execution_count": 17,
877 | "outputs": []
878 | },
879 | {
880 | "cell_type": "code",
881 | "metadata": {
882 | "colab": {
883 | "base_uri": "https://localhost:8080/"
884 | },
885 | "id": "gxf_gHRR6FX6",
886 | "outputId": "5fb82b3f-b186-46bd-c117-71f0e6a6eff5"
887 | },
888 | "source": [
889 | "print(X_train.shape)"
890 | ],
891 | "execution_count": 18,
892 | "outputs": [
893 | {
894 | "output_type": "stream",
895 | "name": "stdout",
896 | "text": [
897 | "(20, 1)\n"
898 | ]
899 | }
900 | ]
901 | },
902 | {
903 | "cell_type": "code",
904 | "metadata": {
905 | "colab": {
906 | "base_uri": "https://localhost:8080/"
907 | },
908 | "id": "7Wx3imgI6KST",
909 | "outputId": "5a9cdfdf-8186-4658-fec5-75e2d3d0a3d0"
910 | },
911 | "source": [
912 | "print(X_test.shape)"
913 | ],
914 | "execution_count": 19,
915 | "outputs": [
916 | {
917 | "output_type": "stream",
918 | "name": "stdout",
919 | "text": [
920 | "(5, 1)\n"
921 | ]
922 | }
923 | ]
924 | },
925 | {
926 | "cell_type": "code",
927 | "metadata": {
928 | "colab": {
929 | "base_uri": "https://localhost:8080/"
930 | },
931 | "id": "ZVEJ_C406KVg",
932 | "outputId": "22c6158f-8401-4e2b-c335-82e5fa79149c"
933 | },
934 | "source": [
935 | "print(y_train.shape)\n"
936 | ],
937 | "execution_count": 20,
938 | "outputs": [
939 | {
940 | "output_type": "stream",
941 | "name": "stdout",
942 | "text": [
943 | "(20,)\n"
944 | ]
945 | }
946 | ]
947 | },
948 | {
949 | "cell_type": "code",
950 | "metadata": {
951 | "colab": {
952 | "base_uri": "https://localhost:8080/"
953 | },
954 | "id": "ehp8x_Fv6KY5",
955 | "outputId": "2ef1525c-d88d-4217-9ec4-fc6ca772be26"
956 | },
957 | "source": [
958 | "print(y_test.shape)"
959 | ],
960 | "execution_count": 21,
961 | "outputs": [
962 | {
963 | "output_type": "stream",
964 | "name": "stdout",
965 | "text": [
966 | "(5,)\n"
967 | ]
968 | }
969 | ]
970 | },
971 | {
972 | "cell_type": "code",
973 | "metadata": {
974 | "colab": {
975 | "base_uri": "https://localhost:8080/"
976 | },
977 | "id": "MB0M8cKb6Kbf",
978 | "outputId": "ec5a6976-2214-42f7-d6c1-79572f26c634"
979 | },
980 | "source": [
981 | "#Predicting the % score\n",
982 | "print(X_test)\n",
983 | "y_pred = regressor.predict(X_test)\n"
984 | ],
985 | "execution_count": 22,
986 | "outputs": [
987 | {
988 | "output_type": "stream",
989 | "name": "stdout",
990 | "text": [
991 | "[[2.7]\n",
992 | " [1.9]\n",
993 | " [7.7]\n",
994 | " [6.1]\n",
995 | " [4.5]]\n"
996 | ]
997 | }
998 | ]
999 | },
1000 | {
1001 | "cell_type": "code",
1002 | "metadata": {
1003 | "colab": {
1004 | "base_uri": "https://localhost:8080/",
1005 | "height": 202
1006 | },
1007 | "id": "CtvWYggA51Qm",
1008 | "outputId": "76f97010-77ad-4f31-a32a-4582867917c3"
1009 | },
1010 | "source": [
1011 | "#Comparing the result with acutal data\n",
1012 | "df= pd.DataFrame({'ACTUAL' : y_test, 'PREDICTION' : y_pred})\n",
1013 | "df"
1014 | ],
1015 | "execution_count": 23,
1016 | "outputs": [
1017 | {
1018 | "output_type": "execute_result",
1019 | "data": {
1020 | "text/html": [
1021 | "\n",
1022 | "\n",
1035 | "
\n",
1036 | " \n",
1037 | " \n",
1038 | " | \n",
1039 | " ACTUAL | \n",
1040 | " PREDICTION | \n",
1041 | "
\n",
1042 | " \n",
1043 | " \n",
1044 | " \n",
1045 | " | 0 | \n",
1046 | " 30 | \n",
1047 | " 28.776933 | \n",
1048 | "
\n",
1049 | " \n",
1050 | " | 1 | \n",
1051 | " 24 | \n",
1052 | " 20.848407 | \n",
1053 | "
\n",
1054 | " \n",
1055 | " | 2 | \n",
1056 | " 85 | \n",
1057 | " 78.330215 | \n",
1058 | "
\n",
1059 | " \n",
1060 | " | 3 | \n",
1061 | " 67 | \n",
1062 | " 62.473165 | \n",
1063 | "
\n",
1064 | " \n",
1065 | " | 4 | \n",
1066 | " 41 | \n",
1067 | " 46.616114 | \n",
1068 | "
\n",
1069 | " \n",
1070 | "
\n",
1071 | "
"
1072 | ],
1073 | "text/plain": [
1074 | " ACTUAL PREDICTION\n",
1075 | "0 30 28.776933\n",
1076 | "1 24 20.848407\n",
1077 | "2 85 78.330215\n",
1078 | "3 67 62.473165\n",
1079 | "4 41 46.616114"
1080 | ]
1081 | },
1082 | "metadata": {},
1083 | "execution_count": 23
1084 | }
1085 | ]
1086 | },
1087 | {
1088 | "cell_type": "code",
1089 | "metadata": {
1090 | "colab": {
1091 | "base_uri": "https://localhost:8080/"
1092 | },
1093 | "id": "xOVpPyP-6agJ",
1094 | "outputId": "f8c47533-ebb8-4049-ce9e-244d9293eb5a"
1095 | },
1096 | "source": [
1097 | "\n",
1098 | "#Custom input(9.25 hours) and the prediction of percentage\n",
1099 | "hours = [9.25]\n",
1100 | "own_pred = regressor.predict([hours])\n",
1101 | "print(\"No of Hours = {}\".format(hours))\n",
1102 | "print(\"Predicted Score = {}\".format(own_pred[0]))"
1103 | ],
1104 | "execution_count": 24,
1105 | "outputs": [
1106 | {
1107 | "output_type": "stream",
1108 | "name": "stdout",
1109 | "text": [
1110 | "No of Hours = [9.25]\n",
1111 | "Predicted Score = 93.69173248737539\n"
1112 | ]
1113 | }
1114 | ]
1115 | },
1116 | {
1117 | "cell_type": "code",
1118 | "metadata": {
1119 | "colab": {
1120 | "base_uri": "https://localhost:8080/"
1121 | },
1122 | "id": "TDBgWdNA6aji",
1123 | "outputId": "8f671470-3050-4f85-a3d8-587d512d9d9e"
1124 | },
1125 | "source": [
1126 | "#Evaluating the Model(Accuracy)\n",
1127 | "from sklearn import metrics \n",
1128 | "print('Mean Absolute Error:', \n",
1129 | " metrics.mean_absolute_error(y_test, y_pred))"
1130 | ],
1131 | "execution_count": 25,
1132 | "outputs": [
1133 | {
1134 | "output_type": "stream",
1135 | "name": "stdout",
1136 | "text": [
1137 | "Mean Absolute Error: 4.237478958953777\n"
1138 | ]
1139 | }
1140 | ]
1141 | },
1142 | {
1143 | "cell_type": "code",
1144 | "metadata": {
1145 | "colab": {
1146 | "base_uri": "https://localhost:8080/"
1147 | },
1148 | "id": "GvFLQgy76amW",
1149 | "outputId": "5f185206-ce13-4026-defb-51936d891f61"
1150 | },
1151 | "source": [
1152 | "# importing LinearRegression\n",
1153 | "from sklearn.linear_model import LinearRegression\n",
1154 | "\n",
1155 | "#creating an object for LinearRegression\n",
1156 | "model = LinearRegression()\n",
1157 | "\n",
1158 | "# fitting the model\n",
1159 | "model.fit(X_train, y_train)"
1160 | ],
1161 | "execution_count": 26,
1162 | "outputs": [
1163 | {
1164 | "output_type": "execute_result",
1165 | "data": {
1166 | "text/plain": [
1167 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)"
1168 | ]
1169 | },
1170 | "metadata": {},
1171 | "execution_count": 26
1172 | }
1173 | ]
1174 | },
1175 | {
1176 | "cell_type": "code",
1177 | "metadata": {
1178 | "colab": {
1179 | "base_uri": "https://localhost:8080/",
1180 | "height": 265
1181 | },
1182 | "id": "ZyM0-n6j6apS",
1183 | "outputId": "75c8b0d9-77f2-4e82-dc9f-f85ff25e12ba"
1184 | },
1185 | "source": [
1186 | "#Fitting The Regression Line\n",
1187 | "# plotting the regression line\n",
1188 | "line = model.coef_ * x + model.intercept_\n",
1189 | "\n",
1190 | "#plotting for test data\n",
1191 | "plt.scatter(x,y,c=\"g\")\n",
1192 | "plt.plot(x,line,c=\"r\")\n",
1193 | "plt.show()"
1194 | ],
1195 | "execution_count": 27,
1196 | "outputs": [
1197 | {
1198 | "output_type": "display_data",
1199 | "data": {
1200 | "image/png": "\n",
1201 | "text/plain": [
1202 | ""
1203 | ]
1204 | },
1205 | "metadata": {}
1206 | }
1207 | ]
1208 | },
1209 | {
1210 | "cell_type": "code",
1211 | "metadata": {
1212 | "colab": {
1213 | "base_uri": "https://localhost:8080/"
1214 | },
1215 | "id": "FtxSRQ316asF",
1216 | "outputId": "979e1428-1195-4d07-b6b3-c35e6f69dad2"
1217 | },
1218 | "source": [
1219 | "#Making Predictions\n",
1220 | "# testing the model\n",
1221 | "y_pred = model.predict(X_test)\n",
1222 | "\n",
1223 | "#checking accuracy of our model\n",
1224 | "data = pd.DataFrame({\"Actual\" : y_test,\"Predicted\":y_pred})\n",
1225 | "print(data)"
1226 | ],
1227 | "execution_count": 28,
1228 | "outputs": [
1229 | {
1230 | "output_type": "stream",
1231 | "name": "stdout",
1232 | "text": [
1233 | " Actual Predicted\n",
1234 | "0 30 28.617714\n",
1235 | "1 24 20.888033\n",
1236 | "2 85 76.928222\n",
1237 | "3 67 61.468859\n",
1238 | "4 41 46.009497\n"
1239 | ]
1240 | }
1241 | ]
1242 | },
1243 | {
1244 | "cell_type": "code",
1245 | "metadata": {
1246 | "colab": {
1247 | "base_uri": "https://localhost:8080/"
1248 | },
1249 | "id": "Umd2Wep96oqJ",
1250 | "outputId": "05d3bee2-5e64-488b-8301-9307bff3f73d"
1251 | },
1252 | "source": [
1253 | "#Evaluating the model\n",
1254 | "from sklearn import metrics as mts\n",
1255 | "\n",
1256 | "#mean abolute error\n",
1257 | "mean_abs_error = mts.mean_absolute_error(y_test,y_pred)\n",
1258 | "\n",
1259 | "print(\"Mean Absolute Error : \",mean_abs_error)"
1260 | ],
1261 | "execution_count": 29,
1262 | "outputs": [
1263 | {
1264 | "output_type": "stream",
1265 | "name": "stdout",
1266 | "text": [
1267 | "Mean Absolute Error : 4.621333622532767\n"
1268 | ]
1269 | }
1270 | ]
1271 | },
1272 | {
1273 | "cell_type": "code",
1274 | "metadata": {
1275 | "id": "Af7BR4H46xjK"
1276 | },
1277 | "source": [
1278 | ""
1279 | ],
1280 | "execution_count": null,
1281 | "outputs": []
1282 | }
1283 | ]
1284 | }
--------------------------------------------------------------------------------