├── README.md
├── TASK 1.ipynb
└── TASK-2.ipynb
/README.md:
--------------------------------------------------------------------------------
1 | # Spark-Foundation
2 | The spark Foundation intership
3 | Hello Everyone i am doing an internship at the sparks foundation and this repository has task given by them i hope you guys like it and if you like do follow me on linkdin.
4 | ## Mandatory Task
5 | Improve your linkdin account: www.linkedin.com/in/shruthi-jain-81b4571ab
6 | ## PROJECTS(TASKS)
7 | There were many task given in which few are begginer level and some intermediate or advanced level , I have not completed all of them but i want to mention
8 | all the task according to which you can open my file.
9 | * Task-1 : Prediction Using Supervised Learning , predicting the percentage of student based on the number of hours studied.
10 | * Task-2: Prediction Using Unsupervised Learning. (iris dataset)
11 | * Task-3: Exploratory Data Analysis - Retail (As a manager figure out which area in business needs more attention)
12 | * Task-4 : Perform ‘Exploratory Data Analysis’ on dataset ‘Global Terrorism’
13 | * Task-5:Perform ‘Exploratory Data Analysis’ on dataset ‘Indian Premier League’
14 | * Task-6:Prediction using Decision Tree Algorithm
15 | * Task-7: Stock Market Prediction using Numerical and Textual Analysis
16 | * Task-8: Timeline Analysis : Covid-19
17 | # THANK YOU!!!!
18 |
--------------------------------------------------------------------------------
/TASK 1.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# ***The Spark Foundation - Task-1*** "
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "## ***Prediction using supervised learning ,Predict the percentage of an student based on the no. of study hours.***"
15 | ]
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "### ***Import The Required Libraries***"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": 67,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": [
30 | "import numpy as np\n",
31 | "import pandas as pd\n",
32 | "import matplotlib.pyplot as plt\n",
33 | "%matplotlib inline\n",
34 | "from sklearn.model_selection import train_test_split\n",
35 | "from sklearn.linear_model import LinearRegression\n",
36 | "from sklearn.metrics import mean_absolute_error"
37 | ]
38 | },
39 | {
40 | "cell_type": "markdown",
41 | "metadata": {},
42 | "source": [
43 | "### ***Reading The CSV file***"
44 | ]
45 | },
46 | {
47 | "cell_type": "code",
48 | "execution_count": 2,
49 | "metadata": {},
50 | "outputs": [
51 | {
52 | "data": {
53 | "text/html": [
54 | "
\n",
55 | "\n",
68 | "
\n",
69 | " \n",
70 | " \n",
71 | " \n",
72 | " Hours \n",
73 | " Scores \n",
74 | " \n",
75 | " \n",
76 | " \n",
77 | " \n",
78 | " 0 \n",
79 | " 2.5 \n",
80 | " 21 \n",
81 | " \n",
82 | " \n",
83 | " 1 \n",
84 | " 5.1 \n",
85 | " 47 \n",
86 | " \n",
87 | " \n",
88 | " 2 \n",
89 | " 3.2 \n",
90 | " 27 \n",
91 | " \n",
92 | " \n",
93 | " 3 \n",
94 | " 8.5 \n",
95 | " 75 \n",
96 | " \n",
97 | " \n",
98 | " 4 \n",
99 | " 3.5 \n",
100 | " 30 \n",
101 | " \n",
102 | " \n",
103 | " 5 \n",
104 | " 1.5 \n",
105 | " 20 \n",
106 | " \n",
107 | " \n",
108 | " 6 \n",
109 | " 9.2 \n",
110 | " 88 \n",
111 | " \n",
112 | " \n",
113 | " 7 \n",
114 | " 5.5 \n",
115 | " 60 \n",
116 | " \n",
117 | " \n",
118 | " 8 \n",
119 | " 8.3 \n",
120 | " 81 \n",
121 | " \n",
122 | " \n",
123 | " 9 \n",
124 | " 2.7 \n",
125 | " 25 \n",
126 | " \n",
127 | " \n",
128 | " 10 \n",
129 | " 7.7 \n",
130 | " 85 \n",
131 | " \n",
132 | " \n",
133 | " 11 \n",
134 | " 5.9 \n",
135 | " 62 \n",
136 | " \n",
137 | " \n",
138 | " 12 \n",
139 | " 4.5 \n",
140 | " 41 \n",
141 | " \n",
142 | " \n",
143 | " 13 \n",
144 | " 3.3 \n",
145 | " 42 \n",
146 | " \n",
147 | " \n",
148 | " 14 \n",
149 | " 1.1 \n",
150 | " 17 \n",
151 | " \n",
152 | " \n",
153 | " 15 \n",
154 | " 8.9 \n",
155 | " 95 \n",
156 | " \n",
157 | " \n",
158 | " 16 \n",
159 | " 2.5 \n",
160 | " 30 \n",
161 | " \n",
162 | " \n",
163 | " 17 \n",
164 | " 1.9 \n",
165 | " 24 \n",
166 | " \n",
167 | " \n",
168 | " 18 \n",
169 | " 6.1 \n",
170 | " 67 \n",
171 | " \n",
172 | " \n",
173 | " 19 \n",
174 | " 7.4 \n",
175 | " 69 \n",
176 | " \n",
177 | " \n",
178 | " 20 \n",
179 | " 2.7 \n",
180 | " 30 \n",
181 | " \n",
182 | " \n",
183 | " 21 \n",
184 | " 4.8 \n",
185 | " 54 \n",
186 | " \n",
187 | " \n",
188 | " 22 \n",
189 | " 3.8 \n",
190 | " 35 \n",
191 | " \n",
192 | " \n",
193 | " 23 \n",
194 | " 6.9 \n",
195 | " 76 \n",
196 | " \n",
197 | " \n",
198 | " 24 \n",
199 | " 7.8 \n",
200 | " 86 \n",
201 | " \n",
202 | " \n",
203 | "
\n",
204 | "
"
205 | ],
206 | "text/plain": [
207 | " Hours Scores\n",
208 | "0 2.5 21\n",
209 | "1 5.1 47\n",
210 | "2 3.2 27\n",
211 | "3 8.5 75\n",
212 | "4 3.5 30\n",
213 | "5 1.5 20\n",
214 | "6 9.2 88\n",
215 | "7 5.5 60\n",
216 | "8 8.3 81\n",
217 | "9 2.7 25\n",
218 | "10 7.7 85\n",
219 | "11 5.9 62\n",
220 | "12 4.5 41\n",
221 | "13 3.3 42\n",
222 | "14 1.1 17\n",
223 | "15 8.9 95\n",
224 | "16 2.5 30\n",
225 | "17 1.9 24\n",
226 | "18 6.1 67\n",
227 | "19 7.4 69\n",
228 | "20 2.7 30\n",
229 | "21 4.8 54\n",
230 | "22 3.8 35\n",
231 | "23 6.9 76\n",
232 | "24 7.8 86"
233 | ]
234 | },
235 | "execution_count": 2,
236 | "metadata": {},
237 | "output_type": "execute_result"
238 | }
239 | ],
240 | "source": [
241 | "url=\"https://raw.githubusercontent.com/AdiPersonalWorks/Random/master/student_scores%20-%20student_scores.csv\"\n",
242 | "df=pd.read_csv(url)\n",
243 | "df"
244 | ]
245 | },
246 | {
247 | "cell_type": "markdown",
248 | "metadata": {},
249 | "source": [
250 | "### ***Checking how many null values are there in dataset***"
251 | ]
252 | },
253 | {
254 | "cell_type": "code",
255 | "execution_count": 3,
256 | "metadata": {},
257 | "outputs": [
258 | {
259 | "data": {
260 | "text/plain": [
261 | "Hours 0\n",
262 | "Scores 0\n",
263 | "dtype: int64"
264 | ]
265 | },
266 | "execution_count": 3,
267 | "metadata": {},
268 | "output_type": "execute_result"
269 | }
270 | ],
271 | "source": [
272 | "df.isna().sum()"
273 | ]
274 | },
275 | {
276 | "cell_type": "markdown",
277 | "metadata": {},
278 | "source": [
279 | "### ***Descriptive analysis of the dataset using describe function***"
280 | ]
281 | },
282 | {
283 | "cell_type": "code",
284 | "execution_count": 4,
285 | "metadata": {},
286 | "outputs": [
287 | {
288 | "data": {
289 | "text/html": [
290 | "\n",
291 | "\n",
304 | "
\n",
305 | " \n",
306 | " \n",
307 | " \n",
308 | " Hours \n",
309 | " Scores \n",
310 | " \n",
311 | " \n",
312 | " \n",
313 | " \n",
314 | " count \n",
315 | " 25.000000 \n",
316 | " 25.000000 \n",
317 | " \n",
318 | " \n",
319 | " mean \n",
320 | " 5.012000 \n",
321 | " 51.480000 \n",
322 | " \n",
323 | " \n",
324 | " std \n",
325 | " 2.525094 \n",
326 | " 25.286887 \n",
327 | " \n",
328 | " \n",
329 | " min \n",
330 | " 1.100000 \n",
331 | " 17.000000 \n",
332 | " \n",
333 | " \n",
334 | " 25% \n",
335 | " 2.700000 \n",
336 | " 30.000000 \n",
337 | " \n",
338 | " \n",
339 | " 50% \n",
340 | " 4.800000 \n",
341 | " 47.000000 \n",
342 | " \n",
343 | " \n",
344 | " 75% \n",
345 | " 7.400000 \n",
346 | " 75.000000 \n",
347 | " \n",
348 | " \n",
349 | " max \n",
350 | " 9.200000 \n",
351 | " 95.000000 \n",
352 | " \n",
353 | " \n",
354 | "
\n",
355 | "
"
356 | ],
357 | "text/plain": [
358 | " Hours Scores\n",
359 | "count 25.000000 25.000000\n",
360 | "mean 5.012000 51.480000\n",
361 | "std 2.525094 25.286887\n",
362 | "min 1.100000 17.000000\n",
363 | "25% 2.700000 30.000000\n",
364 | "50% 4.800000 47.000000\n",
365 | "75% 7.400000 75.000000\n",
366 | "max 9.200000 95.000000"
367 | ]
368 | },
369 | "execution_count": 4,
370 | "metadata": {},
371 | "output_type": "execute_result"
372 | }
373 | ],
374 | "source": [
375 | "df.describe()"
376 | ]
377 | },
378 | {
379 | "cell_type": "markdown",
380 | "metadata": {},
381 | "source": [
382 | "### ***information about the dataset***"
383 | ]
384 | },
385 | {
386 | "cell_type": "code",
387 | "execution_count": 5,
388 | "metadata": {},
389 | "outputs": [
390 | {
391 | "name": "stdout",
392 | "output_type": "stream",
393 | "text": [
394 | "\n",
395 | "RangeIndex: 25 entries, 0 to 24\n",
396 | "Data columns (total 2 columns):\n",
397 | " # Column Non-Null Count Dtype \n",
398 | "--- ------ -------------- ----- \n",
399 | " 0 Hours 25 non-null float64\n",
400 | " 1 Scores 25 non-null int64 \n",
401 | "dtypes: float64(1), int64(1)\n",
402 | "memory usage: 528.0 bytes\n"
403 | ]
404 | }
405 | ],
406 | "source": [
407 | "df.info()"
408 | ]
409 | },
410 | {
411 | "cell_type": "markdown",
412 | "metadata": {},
413 | "source": [
414 | "### ***Plotting a scatter plot showing relationship between No of Hours vs scores*** "
415 | ]
416 | },
417 | {
418 | "cell_type": "code",
419 | "execution_count": 6,
420 | "metadata": {},
421 | "outputs": [
422 | {
423 | "data": {
424 | "image/png": "\n",
425 | "text/plain": [
426 | ""
427 | ]
428 | },
429 | "metadata": {
430 | "needs_background": "light"
431 | },
432 | "output_type": "display_data"
433 | }
434 | ],
435 | "source": [
436 | "plt.xlabel('Hours')\n",
437 | "plt.ylabel('Scores')\n",
438 | "plt.title('Hours VS Scores')\n",
439 | "plt.scatter(df.Hours,df.Scores,color='green')\n",
440 | "plt.show()"
441 | ]
442 | },
443 | {
444 | "cell_type": "markdown",
445 | "metadata": {},
446 | "source": [
447 | "### ***Two variables for the regression***"
448 | ]
449 | },
450 | {
451 | "cell_type": "code",
452 | "execution_count": 10,
453 | "metadata": {},
454 | "outputs": [],
455 | "source": [
456 | "x=np.array(df.Hours)\n",
457 | "y=np.array(df.Scores)"
458 | ]
459 | },
460 | {
461 | "cell_type": "markdown",
462 | "metadata": {},
463 | "source": [
464 | "### ***Reshaping the numpy array for vertical output***"
465 | ]
466 | },
467 | {
468 | "cell_type": "code",
469 | "execution_count": 14,
470 | "metadata": {},
471 | "outputs": [],
472 | "source": [
473 | "x=x.reshape(-1,1)\n",
474 | "y=y.reshape(-1,1)"
475 | ]
476 | },
477 | {
478 | "cell_type": "markdown",
479 | "metadata": {},
480 | "source": [
481 | "### ***Splitting the data into test data and train data***"
482 | ]
483 | },
484 | {
485 | "cell_type": "code",
486 | "execution_count": 16,
487 | "metadata": {},
488 | "outputs": [],
489 | "source": [
490 | "X_train,X_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=0)"
491 | ]
492 | },
493 | {
494 | "cell_type": "markdown",
495 | "metadata": {},
496 | "source": [
497 | "### ***Calling the linear function and reshaping all the data and fitting it to the model***"
498 | ]
499 | },
500 | {
501 | "cell_type": "code",
502 | "execution_count": 39,
503 | "metadata": {},
504 | "outputs": [
505 | {
506 | "data": {
507 | "text/plain": [
508 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)"
509 | ]
510 | },
511 | "execution_count": 39,
512 | "metadata": {},
513 | "output_type": "execute_result"
514 | }
515 | ],
516 | "source": [
517 | "reg=LinearRegression()\n",
518 | "X_test=X_test.reshape(-1,1)\n",
519 | "y_test=y_test.reshape(-1,1)\n",
520 | "X_train=X_train.reshape(-1,1)\n",
521 | "y_train=y_train.reshape(-1,1)\n",
522 | "reg.fit(X_train,y_train)"
523 | ]
524 | },
525 | {
526 | "cell_type": "markdown",
527 | "metadata": {},
528 | "source": [
529 | "### ***Formula of simple linear regression***"
530 | ]
531 | },
532 | {
533 | "cell_type": "code",
534 | "execution_count": 40,
535 | "metadata": {},
536 | "outputs": [],
537 | "source": [
538 | "line1=reg.coef_*X_train+reg.intercept_"
539 | ]
540 | },
541 | {
542 | "cell_type": "markdown",
543 | "metadata": {},
544 | "source": [
545 | "### ***Plotting a scatter plot for training dataset***"
546 | ]
547 | },
548 | {
549 | "cell_type": "code",
550 | "execution_count": 41,
551 | "metadata": {},
552 | "outputs": [
553 | {
554 | "data": {
555 | "text/plain": [
556 | "Text(0.5, 1.0, 'TRAINING DATA')"
557 | ]
558 | },
559 | "execution_count": 41,
560 | "metadata": {},
561 | "output_type": "execute_result"
562 | },
563 | {
564 | "data": {
565 | "image/png": "\n",
566 | "text/plain": [
567 | ""
568 | ]
569 | },
570 | "metadata": {
571 | "needs_background": "light"
572 | },
573 | "output_type": "display_data"
574 | }
575 | ],
576 | "source": [
577 | "plt.scatter(X_train,y_train,color='green')\n",
578 | "plt.plot(X_train,line1,color='red',linewidth=2)\n",
579 | "plt.title('TRAINING DATA')"
580 | ]
581 | },
582 | {
583 | "cell_type": "code",
584 | "execution_count": 44,
585 | "metadata": {},
586 | "outputs": [],
587 | "source": [
588 | "line2=reg.coef_*X_test+reg.intercept_"
589 | ]
590 | },
591 | {
592 | "cell_type": "markdown",
593 | "metadata": {},
594 | "source": [
595 | "### ***Plotting a scatter plot for testing dataset***"
596 | ]
597 | },
598 | {
599 | "cell_type": "code",
600 | "execution_count": 45,
601 | "metadata": {},
602 | "outputs": [
603 | {
604 | "data": {
605 | "text/plain": [
606 | "Text(0.5, 1.0, 'TESTING DATA')"
607 | ]
608 | },
609 | "execution_count": 45,
610 | "metadata": {},
611 | "output_type": "execute_result"
612 | },
613 | {
614 | "data": {
615 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAEICAYAAABGaK+TAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nO3deXxU9dXH8c9JCEtYZIuIQAyKilYtKlKXanGtOzy1ioqUKjVWoUpREUFNA0VFLRR3UaxYo4ioFXmwiBTclbIpCiLCwyYgiyCrLMl5/pgBc0kge27u5Pt+veY1mZNZzrB8c/K7c+81d0dERKInKewGRESkdBTgIiIRpQAXEYkoBbiISEQpwEVEIkoBLiISUQpwEZGIUoBLhTCzzfkueWa2Ld/trmb2FzPbudf9NuR7fCczm21mG81srZlNNrMMM3sy3/137PUcb8Xv42ZWI/48z8Vvd8j33G3MzPfq91wzm2Jmm8xsXfy17zCz2vt4f8/FX39T/PKFmd1nZgcUct+O8R765qudnq/vLfHv5/+zSN/rtXaZ2cFl+1uRRKMAlwrh7vV2X4ClwCX5ajnxu72c/37u3hBiAQs8D9wKHAC0Bh4H8tz9j/me9969nuOCfbTzPfDXffVqZpcDY4EXgUPcvQnQBWgJtNrP23zA3esDacC1wMnAh2ZWd6/7dY/30D3fn8/7+d7Hz+Llhvney9J4b3WBy4AfgK776UWqIQW4VEXtgP9z98kes8ndX90daqUwCjjOzH619zfMzIChwEB3f9rdvwdw9/nu/id3X1DUk7v7j+7+X+BSoAmxMN/9/KnAb4GewOFm1r6EvV8GbAAGku8HgAgowKVqmgm0NbNhZnammdUr4/NtJTatDy7ke0cSm7RfLeNr4O6bgEnA6fnKlwGbgVeAicDvSvi03YGXgNHE/kxOKGufkjgU4BKmK8xsQ77LFAB3XwR0BFoAY4C18XXgsgT5U0C6me29zNI0fr1qd8HMRsf72Wpm3Ur4OiuAxvludye2zJNLbInmKjNLKc4TxdfBzwRedPfvgMloCpd8FOASpjHu3jDf5czd33D3T9z9CndPIzbRngEMKO0Luft2YFD8Yvm+tS5+3Tzffa+Mr8fPBJJL+FItiK13Y2atiAXw7jX/N4DawEXFfK5uwDx3nx2/nQNcXdwfAJL4FOBS5cXXl18DjinjU/2D2EbR/8lX+wr4FvhNGZ+b+G8I5wDvx0vdiP0fe9PMVgGLiAV4cZdRfgccamar4o8fSuw3hn1trJVqpkbYDYjszcx+CRwFvOHuq82sLbENhKPK8rzuvsvM/gI8nK/mZnYr8LSZbST2aZQNQBugWTH7rUXsh8sQYD2xHxQQC+Bs4Ml8d+8AvGJmTdx9HftgZqcAhwHHA2vyfetvxJZRxhWnN0lsmsAlTF32+uzzZjM7kFiAXgrMMbPNwL+B14EHyuE1XwJW5i+4+8vAFcA1wDJgLbG19xHENj7uS18z20RsyeR5YAZwqrtvMbOTgQzgMXdfle8yDvgGuKqIPrsT+wE2J//jgeHAxWbWuIjHSzVgOqGDiEg0aQIXEYkoBbiISEQpwEVEIkoBLiISUZX6McKmTZt6RkZGZb6kiEjkzZgxY218p7aASg3wjIwMpk+fXpkvKSISeWa2pLC6llBERCJKAS4iElEKcBGRiFKAi4hElAJcRCSiFOAiIhGlABcRiSgFuIhIBZnz3Rws22gxtEWFPL9O6CAiUs7cnfNzzufthW8DsGLTigp5HQW4iEg5+mjZR5z27GmB2tTuUyvktRTgIiLlIDcvlxNHnMhn3322p3bGIWcwpfsUkqxiVqsV4CIiZfTWgre48MULA7UZmTM4ofkJFfq6CnARkVLavms7GcMzWLV51Z7aFT+7gtGXjcbMKvz1FeAiIqXw0pyXuPq1qwO1+b3mc0STIyqtBwW4iEgJbN6xmfr31Q/Uep3Ui0cufKTSe1GAi4gU0+P/fZyeE3oGasv+vIyWDVqG0o8CXESkCOu2rqPpg00Dtb/86i9kdcwKqaMYBbiIyH4MfHcgWVODQb329rU0SW0SUkc/UYCLiBRi+cbltBrWKlB79IJH6dmh5z4eUfkU4CIie+k1oReP/fexQG3TnZuoV7NeSB0VTgEuIhL39bqvOfLRIwO1F3/zIlcde1VIHe2fAlxEqj13p8vYLrwy95U9tYPqHcTiWxZTq0atEDvbPwW4iFRrs1bO4oQRwV3eJ1w9gQsOvyCkjoqvyAA3syOBl/OVDgXuAZ6P1zOAxcAV7r6+/FsUESl/7k7HUR15b8l7e2rtDmrH9Ounk5yUHGJnxVfkIbLcfb67t3P3dsCJwFbgdaAfMNndDwcmx2+LiFR57y5+l6SBSYHw/vC6D5l1w6zIhDeUfAnlbGChuy8xs05Ax3h9FDAVuKP8WhMRKV+78nZxzOPHMH/d/D21Xx/2a97q+lalHHyqvJU0wK8EXop/3czdVwK4+0ozO7CwB5hZJpAJkJ6eXto+RUTKZNz8cXQa3SlQ+/yPn3Nss2ND6qjsin2UcTOrCVwKvFLUffNz9xHu3t7d26elpZW0PxGRMvlx1480GtIoEN7djuuGZ3nFh3dODmRkQFJS7Donp1yfviQT+AXATHf/Ln77OzNrHp++mwOry7UzEZEyGjV7FL9/4/eB2sKbF3Joo0Mr/sVzciAzE7Zujd1esiR2G6Br13J5iZKc5+cqflo+ARgHdI9/3R14o1w6EhEpo43bN2LZFgjv2065Dc/yyglvgAEDfgrv3bZujdXLSbEmcDNLBc4FbshXvh8YY2Y9gKXA5eXWlYhIKQ37eBh93u4TqK28dSUH1TuochtZurRk9VIoVoC7+1agyV61dcQ+lSIiErrVW1bT7KFmgdr9Z9/PHb8M6cNx6emxZZPC6uWkYk6VLCJSiQZMHlAgvNffsT688AYYPBhSU4O11NRYvZxoV3oRiawlG5aQMTwjUHvmkmfocUKPcBrKb/eGygEDYssm6emx8C6nDZigABeRiLp+3PU8M+uZPbdTU1JZc/saUlNS9/OoSta1a7kG9t4U4CISKXPXzOVnj/8sUBt7+VguO/qykDoKjwJcRCLB3ek0uhNvfv3mnlrrhq2Z32s+KckpIXYWHgW4iFR5076dxi+e+UWg9k63dzj70Or9QTgFuIhUWXmexykjT2Hat9P21E5ueTIfXvchSaYP0SnARaRKmrRwEue9cF6gNu0P0zipxUkhdVT1KMBFpErZmbuTNo+0YekPP+2x2LltZ1674rVIHvK1IinARaTKGDt3LJe/Ejwqx9yb5nJU2lEhdVS1KcBFJHRbd26l0ZBG7MjdsaeWeUImT13yVIhdVX0KcBEJ1YgZI7hh/A2B2pLeS0g/QCeAKYoCXERCsX7beho/0DhQu+v0uxh01qCQOooeBbiIVLr73r+P/v/pH6itvm01aXV11q6SUICLSKVZuWklBw89OFAb9uth9D65d0gdRZsCXEQqRZ+JfRj2ybBAbWO/jdSvVT+kjqJPAS4iFWrh9wtp80ibQO35zs/T7efdQuoocSjARaTCXPPaNeTM+elM7E3qNGF5n+XUrlE7xK4Shw4mICLFk5MDGRmQlBS7zsnZ510/W/UZlm2B8H7zqjdZ23etwrscaQIXkaLl5EBm5k9nWV+yJHYbAicscHfOe+E83ln0zp7aUU2P4vMbP6dGkuKmvGkCF5GiDRjwU3jvtnVrrB73wdIPSBqYFAjvd3//LnN7zlV4VxD9qYpI0ZYu3Wc9Ny+Xdk+144vVX+wpn5lxJpN/N1kHn6pgCnARKVp6emzZZC8TTk3jokHBGJl1wyzaHdSusjqr1rSEIiJFGzwYUn86WfD2ZDjwdrjo3NV7alcecyWe5QrvSqQJXESKtntD5YAB5BywhGt+E/z2172+5vAmh1d+X9VcsQLczBoCzwDHAA5cB8wHXgYygMXAFe6+vkK6FJHQrfuf82n6zTWB2s0dbmb4BcND6kiKu4QyHPi3u7cFfg7MA/oBk939cGBy/LaIJKBur3ej6YNNA7Vv+3yr8A5ZkRO4mTUAzgB+D+DuO4AdZtYJ6Bi/2yhgKnBHRTQpIuFY9sMy0v8ePC73eYedx8RrJobUkeRXnCWUQ4E1wD/M7OfADOAWoJm7rwRw95VmdmBhDzazTCATID1dB2gXiYqOz3Xk3SXvBmrL/7ycFg1ahNSR7K04Syg1gBOAJ9z9eGALJVgucfcR7t7e3dunpelYvyJV3Zerv8SyLRDe17W7Ds9yhXcVU5wJfDmw3N0/jd8eSyzAvzOz5vHpuzmwep/PICKR0Hp4axZvWByofd/3exrVaRROQ7JfRU7g7r4KWGZmR8ZLZwNzgXFA93itO/BGhXQoIhXuo2UfYdkWCO+7z7gbz3KFdxVW3M+B/wnIMbOawCLgWmLhP8bMegBLgcsrpkURqSjuTtLAgnPc1v5bqZNSJ4SOpCSKFeDuPhtoX8i3zi7fdkSksoz/ejyXvHRJoPbYhY9x00k3hdSRlJT2xBSpZvI8j+SByQXqO+/eqaMGRoyOhSJSjTw3+7kC4T3mt2PwLFd4R5D+xkSqge27tlN7cMEz4eTdk6dDvkaYJnCRBHf/B/cXCO/Jv5uMZ7nCO+I0gYskqE3bN9Hg/gaBWqPajfj+ju9D6kjKmyZwkQR0y1u3FAjvmZkzFd4JRhO4SAJZvWU1zR5qFqgdf9DxzLxhZkgdSUVSgIskiMvGXMZr814L1BbevJBDGx0aUkdS0RTgIhG3aP0iDnv4sECtc9vOvN7l9ZA6ksqiABeJsJOePonpK6YHaqtuXUWzes328QhJJNqIKRJBs1fNxrItEN5/6vAnPMsV3tWIJnCRiGn6QFPWbVsXqP3Q7wca1Gqwj0dIotIELhIRU/5vCpZtgfC+7+z78CxXeFdTmsBFqrh9HfL1xwE/UqtGrRA6kqpCE7hIFfbq3FcLhPfIS0fiWa7wFk3gIlXRrrxdpAxKKVDPvSeXJNPcJTH6lyBSxTzx3ycKhPe4K8fhWa7wlgBN4CJVxLad20i9N7VAXYd8lX3Rj3ORKiBrSlaB8P7g2g90yFfZL03gIiHa8OMGGg0JnvW9VYNWLP3z0pA6kijRBC4SkuvHXV8gvL+48QuFtxSbJnCRSrZi0wpaDG0RqJ2efjrvXfteSB1JVCnARSrR+S+cz8SFEwO1Jb2XkH5AekgdSZQpwEUqwfy182n7WNtAreuxXXnhNy+E1JEkAgW4SAVr+2hb5q+bH6itvX0tTVKbhNSRJIpibcQ0s8VmNsfMZpvZ9HitsZlNMrMF8etGRT2PSHUy7dtpWLYFwrvvqX3xLFd4S7koyQR+pruvzXe7HzDZ3e83s37x23eUa3ciEVVzUE125u0M1DbfuZm6NeuG1JEkorJ8jLATMCr+9Sigc9nbEYm2id9MxLItEN7Dfj0Mz3KFt5S74k7gDrxtZg485e4jgGbuvhLA3Vea2YGFPdDMMoFMgPR0bWmXxJTneSQPTC5Q33HXDlKSCx6USqQ8FHcCP83dTwAuAHqa2RnFfQF3H+Hu7d29fVpaWqmaFKnKXvj8hQLhnfObHDzLFd5SoYo1gbv7ivj1ajN7HegAfGdmzePTd3NgdQX2KVLl7MzdSc2/1ixQ18GnpLIUOYGbWV0zq7/7a+A84AtgHNA9frfuwBsV1aRIVTP046EFwnviNRN18CmpVMWZwJsBr8f/UdYAXnT3f5vZf4ExZtYDWApcXnFtilQNW3Zsod599QK12jVqs23AtpA6kuqsyAB390XAzwuprwPOroimRKqivpP68uBHDwZq0/4wjZNanBRSR1LdaU9MkSKs3bqWtAeDG+CPTjuaL2/6MqSORGIU4CL7cfWrV/PSFy8FavN7zeeIJkeE1JHITxTgIoVYsmEJGcMzArUL2lzAhK4TwmlIpBAKcJG9nP6P0/lg6QeB2oo+K2hev3lIHYkUTmfkEYn7YvUXWLYFwjvzhEw8yxXeUiVpAhcBWg1rxfKNywO19Xesp2HthiF1JFI0TeBSrX2w9AMs2wLhnfWrLDzLFd5S5WkCl2rJ3UkaWHB+2TZgG7Vr1A6hI5GS0wQu1c64+eMKhPeTFz2JZ7nCWyJFE7hUG7l5udQYVPCf/K67d5GcVPBQsCJVnSZwqRZGzhxZILxfveJVPMsV3hJZmsAloW3ftZ3agwsui+iQr5IINIFLwrr3/XsLhPeU7lN0yFdJGJrAJeFs3L6RA+4/IFBLS01j9e0654gkFk3gklB6TehVILxn3zBb4S0JSRO4JIRVm1fR/G/B3d07tOjAp3/4NKSORCqeAlwir9PoToybPy5QW3TzIlo3ah1SRyKVQwEukbXw+4W0eaRNoPbbo3/LK5e/ElJHIpVLAS6RdPxTxzN71exAbfVtq0mrm7aPR4gkHm3ElEiZtXIWlm2B8O79i954liu8pdrRBC6R0WhIIzb8uCFQ29hvI/Vr1Q+pI5FwaQKXKm/yoslYtgXC+4FzHsCzXOEt1ZomcKmy9nXI1+13badmcs0QOhKpWjSBS5U05ssxBcL7uU7P4Vmu8BaJ0wQuVcquvF2kDEopUM+9J5ck07whkl+x/0eYWbKZzTKz8fHbrc3sUzNbYGYvm5nGIimTx6Y9ViC8x181Hs9yhbdIIUoygd8CzAMaxG8PAYa5+2gzexLoATxRzv1JNbBt5zZS700N1Awj955cHTVQZD+KNdaYWUvgIuCZ+G0DzgLGxu8yCuhcEQ1KYrvrP3cVCO+PrvuIvCwdr1ukKMWdwP8O9AV2f2arCbDB3XfFby8HWhT2QDPLBDIB0tPTS9+pJJT129bT+IHGgdphjQ7jm5u/CakjkegpcgI3s4uB1e4+I3+5kLt6YY939xHu3t7d26elaU85gWvfuLZAeM+9aa7CW6SEijOBnwZcamYXArWJrYH/HWhoZjXiU3hLYEXFtSmJYPnG5bQa1ipQO6v1WUz+3eSQOhKJtiIncHe/091bunsGcCXwH3fvCkwBfhu/W3fgjQrrUiLv3H+eWyC8l/15mcJbpAzK8tmsO4A+ZvYNsTXxkeXTkiSSeWvmYdnGO4ve2VPr/vPueJbTskHLEDsTib4S7cjj7lOBqfGvFwEdyr8lSRSHP3I433wfXNde13cdjes03scjRKQktHeElLtPln+CZVsgvPv/sj+e5QpvkXKkXeml3Lg7yQOT8b0+kLSl/xZSU1L38SgRKS1N4FIu3lrwFkkDkwLh/fD5D+NZrvAWqSCawKVM8jyP5IHJBeo7795JjST98xKpSJrApdSe/+z5AuE9+rLReJYrvEUqgf6XSYntyN1Brb/WKlDPu0fHLxGpTJrApUQe/PDBAuE9qdskPMsV3iKVTBO4FMvmHZupf1/w/JP1atZj052bQupIRDSBS5H6TOxTILxnZM5QeIuETBO47NOaLWs48KEDA7Xjmh3HZ3/8LKSORCQ/BbgUqsvYLoz5ckygtuBPC2jTuE1IHYnI3hTgErB4w2JaD28dqF1yxCWMu2pcSB2JyL4owGWPU0eeysfLPw7UVt66koPqHRRSRyKyP9qIKXz+3edYtgXC+6b2N+FZrvAWqcI0gVdzB//tYFZuXhmo/dDvBxrUahBSRyJSXJrAq6n3lryHZVsgvAedOQjPcoW3SERoAq9m3J2kgQV/bm8bsI3aNWqH0JGIlJYm8GrkX1/9q0B4P33J03iWK7xFIkgTeDWQm5dLjUEF/6p33b2L5KSCh4IVkWjQBJ7gRswYUSC8/9XlX3iWK7xFIk4TeIL6cdeP1Blcp0Bdh3wVSRyawBPQoHcHFQjv937/ng75KpJgNIEnkB9+/IGGQxoGas3rNWfFrStC6khEKpIm8ARx4/gbC4T353/8XOEtksCKnMDNrDbwHlArfv+x7p5lZq2B0UBjYCbQzd13VGSzUtDKTSs5eOjBgdqprU7lw+s+DKkjEaksxZnAtwNnufvPgXbA+WZ2MjAEGObuhwPrgR4V12YCycmBjAxISopd5+SU+qkueemSAuG9+JbFCm+RaqLIAPeYzfGbKfGLA2cBY+P1UUDnCukwkeTkQGYmLFkC7rHrzMwSh/iCdQuwbGP81+P31Lr8rAue5RzS8JDy7lpEqqhibcQ0s2RgBtAGeAxYCGxw913xuywHWlRIh4lkwADYujVY27o1Vu/atVhPcdwTxzFn9ZxAbc3ta2ia2rS8uhSRiCjWRkx3z3X3dkBLoANwVGF3K+yxZpZpZtPNbPqaNWtK32kiWLq0ZPV8ZqyYgWVbILz7nNwHz3KFt0g1VaKPEbr7BjObCpwMNDSzGvEpvCVQ6Mcd3H0EMAKgffv2hYZ8tZGeHls2Kay+H/XurceWnVsCtU13bqJezXrl2Z2IREyRE7iZpZlZw/jXdYBzgHnAFOC38bt1B96oqCYTxuDBkJoarKWmxuqFeGfRO1i2BcL7oXMfwrNc4S0ixZrAmwOj4uvgScAYdx9vZnOB0Wb2V2AWMLIC+0wMu9e5BwyILZukp8fCe6/1730d8nX7XdupmVyzMjoVkQgw98pb1Wjfvr1Pnz690l4vikZ/MZqrXr0qUPvn//yTa467JqSORCRsZjbD3dvvXdeu9FXErrxdpAxKKVDPvSeXJNMOsyJSkJKhCnj404cLhPdbXd/Cs1zhLSL7pHQI0dadW7Fs45Z/37KnlpKUgmc557c5v3KaKMc9Q0WkcinAQ9J/cn/q3ls3UPukxyfsuLsSDydTTnuGikg4tBGzku3I3UGtv9YK1I5ocgTze82v/GYyMgr/XPohh8DixZXdjYjsgzZiVgHTvp3GtW9cG6jN6zmPtk3bhtNQGfYMFZHwaQmlEmzduZXb3r6NU0aewtw1cwHI+lUWnuXhhTfsew/QIvYMFZGqQRN4BZu6eCp/GPcHFq5fSJIlcfupt5PdMZs6KQXPV1npBg+OrXnnP8DWfvYMFZGqRQFeQTZu30jfSX15asZTABx74LGMvHQkJ7U4KeTO8inmnqEiUjUpwCvAhAUTuGH8DSzfuJyUpBTuOuMu+v2yX9XcDb5rVwW2SEQpwMvR2q1r6f3v3uTMiX0Mr0OLDoy8dCTHHHhMyJ2JSCJSgJcDd+eVua/Qa0Iv1mxdQ50adRh05iB6n9yb5KTksNsTkQSlAC+jFZtW0HNCT/711b8A6JjRkacveZo2jduE3JmIJDoFeCm5O/+Y/Q/6TOzDD9t/oH7N+jx47oNcf+L1On6JiFQKBXgpLN6wmMw3M5m0aBIAFx5+IU9e9CStDmgVcmciUp0owEsgz/N4dNqj9J/cny07t9CkThOGnz+cq4+9GjMLuz0RqWYU4MX01dqv6DGuBx8t+wiAK352BY9c8AgH1j0w5M5EpLpSgBdhZ+5OHvroIbLfzWZ77naa12vO4xc9Tue2ncNuTUSqOQX4fsxaOYse43owa9UsAK5rdx0PnfcQjeo0CrkzEREFeKF+3PUjg94dxJAPh5DruWQ0zGDExSM497Bzw25NRGQPBfhePl72MdeNu46v1n6FYdzc4WYGnz2YejXrhd2aiEiAAjxuy44tDPjPAB7+9GEc58gmRzLy0pGcln5a2K2JiBRKAQ68s+gdrn/zehZvWEyyJXPHaXdw96/upnaN2mG3JiKyT9U6wDf8uIHb3r6NkbNGAtDuoHY8e+mzHN/8+JA7ExEpWrUN8HHzx3Hj/97Iik0rqJlck6xfZXH7qbeTkpwSdmsiIsVS5EE7zKyVmU0xs3lm9qWZ3RKvNzazSWa2IH5dcZ+ty8mJnYA3KSl2XYazpq/ZsoYrx15Jp9GdWLFpBae2OpXP/vgZ/U/vr/AWkUgpzlGXdgG3uvtRwMlATzM7GugHTHb3w4HJ8dvlLycndtqvJUvAPXadmVniEHd3XpzzIkc9dhQvf/kyqSmpDD9/OO/9/r1wz0spIlJK5u4le4DZG8Cj8UtHd19pZs2Bqe5+5P4e2759e58+fXrJOszIiIX23g45BBYvLtZTLN+4nBv/90bGfz0egHMOPYcRF4+gdaPWJetFRCQEZjbD3dvvXS/RGriZZQDHA58Czdx9JUA8xAs9KIiZZQKZAOmlOdv50qUlq+fj7jw982lun3Q7G7dv5IBaBzD010O5tt21OviUiEResQPczOoBrwK93X1jcQPQ3UcAIyA2gZe4w/T0wifwIn4YLPx+Ide/eT1TFk8BoNORnXj8osc5uP7BJW5BRKQqKtaZB8wshVh457j7a/Hyd/GlE+LXqyukw8GDITU1WEtNjdULkZuXy9CPh3LsE8cyZfEU0lLTGH3ZaF7v8rrCW0QSSnE+hWLASGCeuw/N961xQPf4192BN8q/PWJnTB8xIrbmbRa7HjGi0DOpf7n6S0579jRufftWtu3aRtdjuzK351y6HNNFSyYiknCK3IhpZr8E3gfmAHnxcn9i6+BjgHRgKXC5u3+/v+cq1UbMYtiRu4MhHwxh0HuD2Jm3kxb1W/DUxU9x0REXlftriYhUtlJvxHT3D4B9ja9nl7Wxspq+Yjo9xvXg8+8+B+CGE29gyDlDOKD2ASF3JiJSsSK7J+a2ndv4y9S/8NDHD5HneRza6FCeueQZzmx9ZtitiYhUikgG+PtL3qfHuB4s+H4BSZZEn5P7MOisQaSmpBb9YBGRBBGpAN+0fRP93unH49MfB+DotKN59tJn+UXLX4TcmYhI5YtMgE/8ZiKZ4zNZ+sNSaiTV4M5f3smA0wdQq0atsFsTEQlFJAK876S+PPjRgwCc2PxEnu30LMc1Oy7krkREwhWJAD+11anUrlGb7I7Z9DmlDzWSItG2iEiFikQSdm7bmYU3L9SelCIi+RRrV/qqQOEtIhIUmQAXEZEgBbiISEQpwEVEIkoBLiISUQpwEZGIUoCLiESUAlxEJKJKfFb6Mr2Y2RqgkBNcVilNgbVhN1GOEun9JNJ7gcR6P3ovFesQd0/bu1ipAR4FZja9sDNfRFUivZ9Eei+QWO9H7yUcWkIREYkoBbiISEQpwAsaEXYD5SyR3k8ivRdIrPej9xICrYGLiESUJnARkYhSgIuIRJQCPM7MWpnZFDObZ2ZfmtktYfdUWmZW28ymmdln8feSHXZPZQF5hPoAAAKZSURBVGVmyWY2y8zGh91LWZnZYjObY2azzWx62P2UlZk1NLOxZvZV/P/PKWH3VBpmdmT872T3ZaOZ9Q67r/3RGnicmTUHmrv7TDOrD8wAOrv73JBbKzEzM6Cuu282sxTgA+AWd/8k5NZKzcz6AO2BBu5+cdj9lIWZLQbau3tV21mkVMxsFPC+uz9jZjWBVHffEHZfZWFmycC3wC/cvcrufKgJPM7dV7r7zPjXm4B5QItwuyodj9kcv5kSv0T2J7WZtQQuAp4JuxcJMrMGwBnASAB33xH18I47G1hYlcMbFOCFMrMM4Hjg03A7Kb34ksNsYDUwyd0j+16AvwN9gbywGyknDrxtZjPMLDPsZsroUGAN8I/4EtczZlY37KbKwZXAS2E3URQF+F7MrB7wKtDb3TeG3U9puXuuu7cDWgIdzOyYsHsqDTO7GFjt7jPC7qUcnebuJwAXAD3N7IywGyqDGsAJwBPufjywBegXbktlE18GuhR4JexeiqIAzye+XvwqkOPur4XdT3mI/zo7FTg/5FZK6zTg0vi68WjgLDN7IdyWysbdV8SvVwOvAx3C7ahMlgPL8/2GN5ZYoEfZBcBMd/8u7EaKogCPi2/4GwnMc/ehYfdTFmaWZmYN41/XAc4Bvgq3q9Jx9zvdvaW7ZxD7tfY/7n5NyG2VmpnVjW8kJ77UcB7wRbhdlZ67rwKWmdmR8dLZQOQ2/O/lKiKwfAKxX38k5jSgGzAnvnYM0N/dJ4TYU2k1B0bFt6QnAWPcPfIfv0sQzYDXY/MCNYAX3f3f4bZUZn8CcuJLD4uAa0Pup9TMLBU4F7gh7F6KQx8jFBGJKC2hiIhElAJcRCSiFOAiIhGlABcRiSgFuIhIRCnARUQiSgEuIhJR/w8X2Aq1Ddq8RgAAAABJRU5ErkJggg==\n",
616 | "text/plain": [
617 | ""
618 | ]
619 | },
620 | "metadata": {
621 | "needs_background": "light"
622 | },
623 | "output_type": "display_data"
624 | }
625 | ],
626 | "source": [
627 | "plt.scatter(X_test,y_test,color='red')\n",
628 | "plt.plot(X_test,line2,color='green',linewidth=2)\n",
629 | "plt.title('TESTING DATA')"
630 | ]
631 | },
632 | {
633 | "cell_type": "markdown",
634 | "metadata": {},
635 | "source": [
636 | "### ***predicting the test value***"
637 | ]
638 | },
639 | {
640 | "cell_type": "code",
641 | "execution_count": 49,
642 | "metadata": {},
643 | "outputs": [
644 | {
645 | "data": {
646 | "text/plain": [
647 | "array([[16.88414476],\n",
648 | " [33.73226078],\n",
649 | " [75.357018 ],\n",
650 | " [26.79480124],\n",
651 | " [60.49103328]])"
652 | ]
653 | },
654 | "execution_count": 49,
655 | "metadata": {},
656 | "output_type": "execute_result"
657 | }
658 | ],
659 | "source": [
660 | "y_predict=reg.predict(X_test)\n",
661 | "y_predict"
662 | ]
663 | },
664 | {
665 | "cell_type": "markdown",
666 | "metadata": {},
667 | "source": [
668 | "### ***Comparing the actuals with the prediction to see the difference***"
669 | ]
670 | },
671 | {
672 | "cell_type": "code",
673 | "execution_count": 56,
674 | "metadata": {},
675 | "outputs": [
676 | {
677 | "name": "stdout",
678 | "output_type": "stream",
679 | "text": [
680 | "Comparing the actuals with prediction: \n"
681 | ]
682 | },
683 | {
684 | "data": {
685 | "text/html": [
686 | "\n",
687 | "\n",
700 | "
\n",
701 | " \n",
702 | " \n",
703 | " \n",
704 | " ACTUAL \n",
705 | " prediction \n",
706 | " \n",
707 | " \n",
708 | " \n",
709 | " \n",
710 | " 0 \n",
711 | " 20 \n",
712 | " 16.884145 \n",
713 | " \n",
714 | " \n",
715 | " 1 \n",
716 | " 27 \n",
717 | " 33.732261 \n",
718 | " \n",
719 | " \n",
720 | " 2 \n",
721 | " 69 \n",
722 | " 75.357018 \n",
723 | " \n",
724 | " \n",
725 | " 3 \n",
726 | " 30 \n",
727 | " 26.794801 \n",
728 | " \n",
729 | " \n",
730 | " 4 \n",
731 | " 62 \n",
732 | " 60.491033 \n",
733 | " \n",
734 | " \n",
735 | "
\n",
736 | "
"
737 | ],
738 | "text/plain": [
739 | " ACTUAL prediction\n",
740 | "0 20 16.884145\n",
741 | "1 27 33.732261\n",
742 | "2 69 75.357018\n",
743 | "3 30 26.794801\n",
744 | "4 62 60.491033"
745 | ]
746 | },
747 | "execution_count": 56,
748 | "metadata": {},
749 | "output_type": "execute_result"
750 | }
751 | ],
752 | "source": [
753 | "a=y_test.flatten()\n",
754 | "b=y_predict.flatten()\n",
755 | "compare={\"ACTUAL\":a,\"prediction\":b}\n",
756 | "label={0,1,2,3,4}\n",
757 | "print(\"Comparing the actuals with prediction: \")\n",
758 | "df=pd.DataFrame(compare,index=label)\n",
759 | "df\n"
760 | ]
761 | },
762 | {
763 | "cell_type": "markdown",
764 | "metadata": {},
765 | "source": [
766 | "### ***Evaluating the model***\n",
767 | "#### ***Evaluation is a very impotant step for knowing the accuracy of the model by using MAE***"
768 | ]
769 | },
770 | {
771 | "cell_type": "code",
772 | "execution_count": 69,
773 | "metadata": {},
774 | "outputs": [
775 | {
776 | "data": {
777 | "text/plain": [
778 | "4.183859899002975"
779 | ]
780 | },
781 | "execution_count": 69,
782 | "metadata": {},
783 | "output_type": "execute_result"
784 | }
785 | ],
786 | "source": [
787 | "mae=mean_absolute_error(y_test,y_predict)\n",
788 | "mae"
789 | ]
790 | },
791 | {
792 | "cell_type": "markdown",
793 | "metadata": {},
794 | "source": [
795 | "## ***prediction***\n",
796 | "### ***What will be predicted score if a student studies for 9.25 hrs/ day?***"
797 | ]
798 | },
799 | {
800 | "cell_type": "code",
801 | "execution_count": 71,
802 | "metadata": {},
803 | "outputs": [
804 | {
805 | "data": {
806 | "text/plain": [
807 | "array([[93.69173249]])"
808 | ]
809 | },
810 | "execution_count": 71,
811 | "metadata": {},
812 | "output_type": "execute_result"
813 | }
814 | ],
815 | "source": [
816 | "data_predict=reg.predict([[9.25]])\n",
817 | "data_predict"
818 | ]
819 | }
820 | ],
821 | "metadata": {
822 | "kernelspec": {
823 | "display_name": "Python 3",
824 | "language": "python",
825 | "name": "python3"
826 | },
827 | "language_info": {
828 | "codemirror_mode": {
829 | "name": "ipython",
830 | "version": 3
831 | },
832 | "file_extension": ".py",
833 | "mimetype": "text/x-python",
834 | "name": "python",
835 | "nbconvert_exporter": "python",
836 | "pygments_lexer": "ipython3",
837 | "version": "3.7.6"
838 | }
839 | },
840 | "nbformat": 4,
841 | "nbformat_minor": 4
842 | }
843 |
--------------------------------------------------------------------------------
/TASK-2.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# ***The Sparks Foundation Task-2***"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "## ***From the given ‘Iris’ dataset, predict the optimum number of clusters and represent it visually.***\n"
15 | ]
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "### ***Import all the required libraries***"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": 14,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": [
30 | "import pandas as pd\n",
31 | "import numpy as np\n",
32 | "from sklearn.cluster import KMeans\n",
33 | "from sklearn.preprocessing import MinMaxScaler\n",
34 | "import matplotlib.pyplot as plt\n",
35 | "%matplotlib inline"
36 | ]
37 | },
38 | {
39 | "cell_type": "markdown",
40 | "metadata": {},
41 | "source": [
42 | "### ***Read the CSV file***"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": 9,
48 | "metadata": {},
49 | "outputs": [
50 | {
51 | "data": {
52 | "text/html": [
53 | "\n",
54 | "\n",
67 | "
\n",
68 | " \n",
69 | " \n",
70 | " \n",
71 | " Id \n",
72 | " SepalLengthCm \n",
73 | " SepalWidthCm \n",
74 | " PetalLengthCm \n",
75 | " PetalWidthCm \n",
76 | " Species \n",
77 | " \n",
78 | " \n",
79 | " \n",
80 | " \n",
81 | " 0 \n",
82 | " 1 \n",
83 | " 5.1 \n",
84 | " 3.5 \n",
85 | " 1.4 \n",
86 | " 0.2 \n",
87 | " Iris-setosa \n",
88 | " \n",
89 | " \n",
90 | " 1 \n",
91 | " 2 \n",
92 | " 4.9 \n",
93 | " 3.0 \n",
94 | " 1.4 \n",
95 | " 0.2 \n",
96 | " Iris-setosa \n",
97 | " \n",
98 | " \n",
99 | " 2 \n",
100 | " 3 \n",
101 | " 4.7 \n",
102 | " 3.2 \n",
103 | " 1.3 \n",
104 | " 0.2 \n",
105 | " Iris-setosa \n",
106 | " \n",
107 | " \n",
108 | " 3 \n",
109 | " 4 \n",
110 | " 4.6 \n",
111 | " 3.1 \n",
112 | " 1.5 \n",
113 | " 0.2 \n",
114 | " Iris-setosa \n",
115 | " \n",
116 | " \n",
117 | " 4 \n",
118 | " 5 \n",
119 | " 5.0 \n",
120 | " 3.6 \n",
121 | " 1.4 \n",
122 | " 0.2 \n",
123 | " Iris-setosa \n",
124 | " \n",
125 | " \n",
126 | " ... \n",
127 | " ... \n",
128 | " ... \n",
129 | " ... \n",
130 | " ... \n",
131 | " ... \n",
132 | " ... \n",
133 | " \n",
134 | " \n",
135 | " 145 \n",
136 | " 146 \n",
137 | " 6.7 \n",
138 | " 3.0 \n",
139 | " 5.2 \n",
140 | " 2.3 \n",
141 | " Iris-virginica \n",
142 | " \n",
143 | " \n",
144 | " 146 \n",
145 | " 147 \n",
146 | " 6.3 \n",
147 | " 2.5 \n",
148 | " 5.0 \n",
149 | " 1.9 \n",
150 | " Iris-virginica \n",
151 | " \n",
152 | " \n",
153 | " 147 \n",
154 | " 148 \n",
155 | " 6.5 \n",
156 | " 3.0 \n",
157 | " 5.2 \n",
158 | " 2.0 \n",
159 | " Iris-virginica \n",
160 | " \n",
161 | " \n",
162 | " 148 \n",
163 | " 149 \n",
164 | " 6.2 \n",
165 | " 3.4 \n",
166 | " 5.4 \n",
167 | " 2.3 \n",
168 | " Iris-virginica \n",
169 | " \n",
170 | " \n",
171 | " 149 \n",
172 | " 150 \n",
173 | " 5.9 \n",
174 | " 3.0 \n",
175 | " 5.1 \n",
176 | " 1.8 \n",
177 | " Iris-virginica \n",
178 | " \n",
179 | " \n",
180 | "
\n",
181 | "
150 rows × 6 columns
\n",
182 | "
"
183 | ],
184 | "text/plain": [
185 | " Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm \\\n",
186 | "0 1 5.1 3.5 1.4 0.2 \n",
187 | "1 2 4.9 3.0 1.4 0.2 \n",
188 | "2 3 4.7 3.2 1.3 0.2 \n",
189 | "3 4 4.6 3.1 1.5 0.2 \n",
190 | "4 5 5.0 3.6 1.4 0.2 \n",
191 | ".. ... ... ... ... ... \n",
192 | "145 146 6.7 3.0 5.2 2.3 \n",
193 | "146 147 6.3 2.5 5.0 1.9 \n",
194 | "147 148 6.5 3.0 5.2 2.0 \n",
195 | "148 149 6.2 3.4 5.4 2.3 \n",
196 | "149 150 5.9 3.0 5.1 1.8 \n",
197 | "\n",
198 | " Species \n",
199 | "0 Iris-setosa \n",
200 | "1 Iris-setosa \n",
201 | "2 Iris-setosa \n",
202 | "3 Iris-setosa \n",
203 | "4 Iris-setosa \n",
204 | ".. ... \n",
205 | "145 Iris-virginica \n",
206 | "146 Iris-virginica \n",
207 | "147 Iris-virginica \n",
208 | "148 Iris-virginica \n",
209 | "149 Iris-virginica \n",
210 | "\n",
211 | "[150 rows x 6 columns]"
212 | ]
213 | },
214 | "execution_count": 9,
215 | "metadata": {},
216 | "output_type": "execute_result"
217 | }
218 | ],
219 | "source": [
220 | "df=pd.read_csv(\"iris.csv\")\n",
221 | "df"
222 | ]
223 | },
224 | {
225 | "cell_type": "markdown",
226 | "metadata": {},
227 | "source": [
228 | "### ***Dropping the unnecessary columns***"
229 | ]
230 | },
231 | {
232 | "cell_type": "code",
233 | "execution_count": 16,
234 | "metadata": {},
235 | "outputs": [],
236 | "source": [
237 | "df.drop(['Id', 'Species'],axis='columns',inplace=True) "
238 | ]
239 | },
240 | {
241 | "cell_type": "code",
242 | "execution_count": 44,
243 | "metadata": {},
244 | "outputs": [
245 | {
246 | "data": {
247 | "text/html": [
248 | "\n",
249 | "\n",
262 | "
\n",
263 | " \n",
264 | " \n",
265 | " \n",
266 | " SepalLengthCm \n",
267 | " SepalWidthCm \n",
268 | " PetalLengthCm \n",
269 | " PetalWidthCm \n",
270 | " cluster \n",
271 | " \n",
272 | " \n",
273 | " \n",
274 | " \n",
275 | " 0 \n",
276 | " 5.1 \n",
277 | " 3.5 \n",
278 | " 1.4 \n",
279 | " 0.2 \n",
280 | " 1 \n",
281 | " \n",
282 | " \n",
283 | " 1 \n",
284 | " 4.9 \n",
285 | " 3.0 \n",
286 | " 1.4 \n",
287 | " 0.2 \n",
288 | " 1 \n",
289 | " \n",
290 | " \n",
291 | " 2 \n",
292 | " 4.7 \n",
293 | " 3.2 \n",
294 | " 1.3 \n",
295 | " 0.2 \n",
296 | " 1 \n",
297 | " \n",
298 | " \n",
299 | " 3 \n",
300 | " 4.6 \n",
301 | " 3.1 \n",
302 | " 1.5 \n",
303 | " 0.2 \n",
304 | " 1 \n",
305 | " \n",
306 | " \n",
307 | " 4 \n",
308 | " 5.0 \n",
309 | " 3.6 \n",
310 | " 1.4 \n",
311 | " 0.2 \n",
312 | " 1 \n",
313 | " \n",
314 | " \n",
315 | " ... \n",
316 | " ... \n",
317 | " ... \n",
318 | " ... \n",
319 | " ... \n",
320 | " ... \n",
321 | " \n",
322 | " \n",
323 | " 145 \n",
324 | " 6.7 \n",
325 | " 3.0 \n",
326 | " 5.2 \n",
327 | " 2.3 \n",
328 | " 0 \n",
329 | " \n",
330 | " \n",
331 | " 146 \n",
332 | " 6.3 \n",
333 | " 2.5 \n",
334 | " 5.0 \n",
335 | " 1.9 \n",
336 | " 2 \n",
337 | " \n",
338 | " \n",
339 | " 147 \n",
340 | " 6.5 \n",
341 | " 3.0 \n",
342 | " 5.2 \n",
343 | " 2.0 \n",
344 | " 0 \n",
345 | " \n",
346 | " \n",
347 | " 148 \n",
348 | " 6.2 \n",
349 | " 3.4 \n",
350 | " 5.4 \n",
351 | " 2.3 \n",
352 | " 0 \n",
353 | " \n",
354 | " \n",
355 | " 149 \n",
356 | " 5.9 \n",
357 | " 3.0 \n",
358 | " 5.1 \n",
359 | " 1.8 \n",
360 | " 2 \n",
361 | " \n",
362 | " \n",
363 | "
\n",
364 | "
150 rows × 5 columns
\n",
365 | "
"
366 | ],
367 | "text/plain": [
368 | " SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm cluster\n",
369 | "0 5.1 3.5 1.4 0.2 1\n",
370 | "1 4.9 3.0 1.4 0.2 1\n",
371 | "2 4.7 3.2 1.3 0.2 1\n",
372 | "3 4.6 3.1 1.5 0.2 1\n",
373 | "4 5.0 3.6 1.4 0.2 1\n",
374 | ".. ... ... ... ... ...\n",
375 | "145 6.7 3.0 5.2 2.3 0\n",
376 | "146 6.3 2.5 5.0 1.9 2\n",
377 | "147 6.5 3.0 5.2 2.0 0\n",
378 | "148 6.2 3.4 5.4 2.3 0\n",
379 | "149 5.9 3.0 5.1 1.8 2\n",
380 | "\n",
381 | "[150 rows x 5 columns]"
382 | ]
383 | },
384 | "execution_count": 44,
385 | "metadata": {},
386 | "output_type": "execute_result"
387 | }
388 | ],
389 | "source": [
390 | "df"
391 | ]
392 | },
393 | {
394 | "cell_type": "markdown",
395 | "metadata": {},
396 | "source": [
397 | "### ***K-Means***"
398 | ]
399 | },
400 | {
401 | "cell_type": "code",
402 | "execution_count": 37,
403 | "metadata": {},
404 | "outputs": [],
405 | "source": [
406 | "x=df.iloc[:,[0,1,2,3]].values\n",
407 | "sse = []\n",
408 | "k_rng = range(1,10)\n",
409 | "for k in k_rng:\n",
410 | " km = KMeans(n_clusters=k)\n",
411 | " km.fit(x)\n",
412 | " sse.append(km.inertia_)"
413 | ]
414 | },
415 | {
416 | "cell_type": "markdown",
417 | "metadata": {},
418 | "source": [
419 | "### ***Plotting an ELBOW GRAPH to find the correct number of cluster***"
420 | ]
421 | },
422 | {
423 | "cell_type": "code",
424 | "execution_count": 31,
425 | "metadata": {},
426 | "outputs": [
427 | {
428 | "data": {
429 | "text/plain": [
430 | "[]"
431 | ]
432 | },
433 | "execution_count": 31,
434 | "metadata": {},
435 | "output_type": "execute_result"
436 | },
437 | {
438 | "data": {
439 | "image/png": "\n",
440 | "text/plain": [
441 | ""
442 | ]
443 | },
444 | "metadata": {
445 | "needs_background": "light"
446 | },
447 | "output_type": "display_data"
448 | }
449 | ],
450 | "source": [
451 | "plt.xlabel('K MEANS')\n",
452 | "plt.ylabel('SUM OF SQUARED ERROR')\n",
453 | "plt.title('THE ELBOW ANALYSIS')\n",
454 | "plt.plot(k_rng,sse,color='purple')"
455 | ]
456 | },
457 | {
458 | "cell_type": "markdown",
459 | "metadata": {},
460 | "source": [
461 | "#### ***This show that 3 is optimum number of cluster to form in iris dataset***"
462 | ]
463 | },
464 | {
465 | "cell_type": "code",
466 | "execution_count": 38,
467 | "metadata": {},
468 | "outputs": [
469 | {
470 | "data": {
471 | "text/plain": [
472 | "array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
473 | " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
474 | " 1, 1, 1, 1, 1, 1, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n",
475 | " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n",
476 | " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 0, 0, 0, 2, 0, 0, 0,\n",
477 | " 0, 0, 0, 2, 2, 0, 0, 0, 0, 2, 0, 2, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0,\n",
478 | " 0, 2, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 2])"
479 | ]
480 | },
481 | "execution_count": 38,
482 | "metadata": {},
483 | "output_type": "execute_result"
484 | }
485 | ],
486 | "source": [
487 | "km = KMeans(n_clusters=3)\n",
488 | "y_predict = km.fit_predict(x)\n",
489 | "y_predict"
490 | ]
491 | },
492 | {
493 | "cell_type": "markdown",
494 | "metadata": {},
495 | "source": [
496 | "## ***Adding a cluster column to show which cluster does the particular feature belong to***"
497 | ]
498 | },
499 | {
500 | "cell_type": "code",
501 | "execution_count": 43,
502 | "metadata": {},
503 | "outputs": [
504 | {
505 | "data": {
506 | "text/html": [
507 | "\n",
508 | "\n",
521 | "
\n",
522 | " \n",
523 | " \n",
524 | " \n",
525 | " SepalLengthCm \n",
526 | " SepalWidthCm \n",
527 | " PetalLengthCm \n",
528 | " PetalWidthCm \n",
529 | " cluster \n",
530 | " \n",
531 | " \n",
532 | " \n",
533 | " \n",
534 | " 0 \n",
535 | " 5.1 \n",
536 | " 3.5 \n",
537 | " 1.4 \n",
538 | " 0.2 \n",
539 | " 1 \n",
540 | " \n",
541 | " \n",
542 | " 1 \n",
543 | " 4.9 \n",
544 | " 3.0 \n",
545 | " 1.4 \n",
546 | " 0.2 \n",
547 | " 1 \n",
548 | " \n",
549 | " \n",
550 | " 2 \n",
551 | " 4.7 \n",
552 | " 3.2 \n",
553 | " 1.3 \n",
554 | " 0.2 \n",
555 | " 1 \n",
556 | " \n",
557 | " \n",
558 | " 3 \n",
559 | " 4.6 \n",
560 | " 3.1 \n",
561 | " 1.5 \n",
562 | " 0.2 \n",
563 | " 1 \n",
564 | " \n",
565 | " \n",
566 | " 4 \n",
567 | " 5.0 \n",
568 | " 3.6 \n",
569 | " 1.4 \n",
570 | " 0.2 \n",
571 | " 1 \n",
572 | " \n",
573 | " \n",
574 | " ... \n",
575 | " ... \n",
576 | " ... \n",
577 | " ... \n",
578 | " ... \n",
579 | " ... \n",
580 | " \n",
581 | " \n",
582 | " 145 \n",
583 | " 6.7 \n",
584 | " 3.0 \n",
585 | " 5.2 \n",
586 | " 2.3 \n",
587 | " 0 \n",
588 | " \n",
589 | " \n",
590 | " 146 \n",
591 | " 6.3 \n",
592 | " 2.5 \n",
593 | " 5.0 \n",
594 | " 1.9 \n",
595 | " 2 \n",
596 | " \n",
597 | " \n",
598 | " 147 \n",
599 | " 6.5 \n",
600 | " 3.0 \n",
601 | " 5.2 \n",
602 | " 2.0 \n",
603 | " 0 \n",
604 | " \n",
605 | " \n",
606 | " 148 \n",
607 | " 6.2 \n",
608 | " 3.4 \n",
609 | " 5.4 \n",
610 | " 2.3 \n",
611 | " 0 \n",
612 | " \n",
613 | " \n",
614 | " 149 \n",
615 | " 5.9 \n",
616 | " 3.0 \n",
617 | " 5.1 \n",
618 | " 1.8 \n",
619 | " 2 \n",
620 | " \n",
621 | " \n",
622 | "
\n",
623 | "
150 rows × 5 columns
\n",
624 | "
"
625 | ],
626 | "text/plain": [
627 | " SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm cluster\n",
628 | "0 5.1 3.5 1.4 0.2 1\n",
629 | "1 4.9 3.0 1.4 0.2 1\n",
630 | "2 4.7 3.2 1.3 0.2 1\n",
631 | "3 4.6 3.1 1.5 0.2 1\n",
632 | "4 5.0 3.6 1.4 0.2 1\n",
633 | ".. ... ... ... ... ...\n",
634 | "145 6.7 3.0 5.2 2.3 0\n",
635 | "146 6.3 2.5 5.0 1.9 2\n",
636 | "147 6.5 3.0 5.2 2.0 0\n",
637 | "148 6.2 3.4 5.4 2.3 0\n",
638 | "149 5.9 3.0 5.1 1.8 2\n",
639 | "\n",
640 | "[150 rows x 5 columns]"
641 | ]
642 | },
643 | "execution_count": 43,
644 | "metadata": {},
645 | "output_type": "execute_result"
646 | }
647 | ],
648 | "source": [
649 | "df['cluster']=y_predict\n",
650 | "df"
651 | ]
652 | },
653 | {
654 | "cell_type": "code",
655 | "execution_count": 23,
656 | "metadata": {},
657 | "outputs": [
658 | {
659 | "data": {
660 | "text/plain": [
661 | "array([1, 0, 2])"
662 | ]
663 | },
664 | "execution_count": 23,
665 | "metadata": {},
666 | "output_type": "execute_result"
667 | }
668 | ],
669 | "source": [
670 | "df.cluster.unique()"
671 | ]
672 | },
673 | {
674 | "cell_type": "code",
675 | "execution_count": 32,
676 | "metadata": {},
677 | "outputs": [],
678 | "source": [
679 | "df1 = df[df.cluster==0]\n",
680 | "df2 = df[df.cluster==1]\n",
681 | "df3 = df[df.cluster==2]"
682 | ]
683 | },
684 | {
685 | "cell_type": "markdown",
686 | "metadata": {},
687 | "source": [
688 | "## ***Plotting a scatter plot showing the cluster***"
689 | ]
690 | },
691 | {
692 | "cell_type": "code",
693 | "execution_count": 55,
694 | "metadata": {},
695 | "outputs": [
696 | {
697 | "data": {
698 | "text/plain": [
699 | ""
700 | ]
701 | },
702 | "execution_count": 55,
703 | "metadata": {},
704 | "output_type": "execute_result"
705 | },
706 | {
707 | "data": {
708 | "image/png": "\n",
709 | "text/plain": [
710 | ""
711 | ]
712 | },
713 | "metadata": {
714 | "needs_background": "light"
715 | },
716 | "output_type": "display_data"
717 | }
718 | ],
719 | "source": [
720 | "plt.title('K Means Clustering')\n",
721 | "plt.scatter(x[y_predict==0,0],x[y_predict==0,1],c='red',label='Iris-setosa')\n",
722 | "plt.scatter(x[y_predict==1,0],x[y_predict==1,1],c='green',label='Iris-virginica')\n",
723 | "plt.scatter(x[y_predict==2,0],x[y_predict==2,1],c='yellow',label='Iris-versicolor')\n",
724 | "plt.legend(loc='best')"
725 | ]
726 | },
727 | {
728 | "cell_type": "markdown",
729 | "metadata": {},
730 | "source": [
731 | "# ***THANK YOU FOR WATCHING***"
732 | ]
733 | }
734 | ],
735 | "metadata": {
736 | "kernelspec": {
737 | "display_name": "Python 3",
738 | "language": "python",
739 | "name": "python3"
740 | },
741 | "language_info": {
742 | "codemirror_mode": {
743 | "name": "ipython",
744 | "version": 3
745 | },
746 | "file_extension": ".py",
747 | "mimetype": "text/x-python",
748 | "name": "python",
749 | "nbconvert_exporter": "python",
750 | "pygments_lexer": "ipython3",
751 | "version": "3.7.6"
752 | }
753 | },
754 | "nbformat": 4,
755 | "nbformat_minor": 4
756 | }
757 |
--------------------------------------------------------------------------------