├── .gitattributes
├── student_scores.csv
├── .ipynb_checkpoints
└── linear-checkpoint.ipynb
└── linear.ipynb
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
--------------------------------------------------------------------------------
/student_scores.csv:
--------------------------------------------------------------------------------
1 | Hours,Scores
2 | 2.5,21
3 | 5.1,47
4 | 3.2,27
5 | 8.5,75
6 | 3.5,30
7 | 1.5,20
8 | 9.2,88
9 | 5.5,60
10 | 8.3,81
11 | 2.7,25
12 | 7.7,85
13 | 5.9,62
14 | 4.5,41
15 | 3.3,42
16 | 1.1,17
17 | 8.9,95
18 | 2.5,30
19 | 1.9,24
20 | 6.1,67
21 | 7.4,69
22 | 2.7,30
23 | 4.8,54
24 | 3.8,35
25 | 6.9,76
26 | 7.8,86
27 |
--------------------------------------------------------------------------------
/.ipynb_checkpoints/linear-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import numpy as np\n",
10 | "import matplotlib.pyplot as plt\n",
11 | "%matplotlib inline"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 6,
17 | "metadata": {},
18 | "outputs": [
19 | {
20 | "data": {
21 | "text/plain": [
22 | "(25, 2)"
23 | ]
24 | },
25 | "execution_count": 6,
26 | "metadata": {},
27 | "output_type": "execute_result"
28 | }
29 | ],
30 | "source": [
31 | "dataset = pd.read_csv('student_scores.csv')\n",
32 | "dataset.shape"
33 | ]
34 | },
35 | {
36 | "cell_type": "code",
37 | "execution_count": 7,
38 | "metadata": {},
39 | "outputs": [
40 | {
41 | "data": {
42 | "text/html": [
43 | "
\n",
44 | "\n",
57 | "
\n",
58 | " \n",
59 | " \n",
60 | " | \n",
61 | " Hours | \n",
62 | " Scores | \n",
63 | "
\n",
64 | " \n",
65 | " \n",
66 | " \n",
67 | " | 0 | \n",
68 | " 2.5 | \n",
69 | " 21 | \n",
70 | "
\n",
71 | " \n",
72 | " | 1 | \n",
73 | " 5.1 | \n",
74 | " 47 | \n",
75 | "
\n",
76 | " \n",
77 | " | 2 | \n",
78 | " 3.2 | \n",
79 | " 27 | \n",
80 | "
\n",
81 | " \n",
82 | " | 3 | \n",
83 | " 8.5 | \n",
84 | " 75 | \n",
85 | "
\n",
86 | " \n",
87 | " | 4 | \n",
88 | " 3.5 | \n",
89 | " 30 | \n",
90 | "
\n",
91 | " \n",
92 | "
\n",
93 | "
"
94 | ],
95 | "text/plain": [
96 | " Hours Scores\n",
97 | "0 2.5 21\n",
98 | "1 5.1 47\n",
99 | "2 3.2 27\n",
100 | "3 8.5 75\n",
101 | "4 3.5 30"
102 | ]
103 | },
104 | "execution_count": 7,
105 | "metadata": {},
106 | "output_type": "execute_result"
107 | }
108 | ],
109 | "source": [
110 | "dataset.head()"
111 | ]
112 | },
113 | {
114 | "cell_type": "code",
115 | "execution_count": 8,
116 | "metadata": {},
117 | "outputs": [
118 | {
119 | "data": {
120 | "text/html": [
121 | "\n",
122 | "\n",
135 | "
\n",
136 | " \n",
137 | " \n",
138 | " | \n",
139 | " Hours | \n",
140 | " Scores | \n",
141 | "
\n",
142 | " \n",
143 | " \n",
144 | " \n",
145 | " | count | \n",
146 | " 25.000000 | \n",
147 | " 25.000000 | \n",
148 | "
\n",
149 | " \n",
150 | " | mean | \n",
151 | " 5.012000 | \n",
152 | " 51.480000 | \n",
153 | "
\n",
154 | " \n",
155 | " | std | \n",
156 | " 2.525094 | \n",
157 | " 25.286887 | \n",
158 | "
\n",
159 | " \n",
160 | " | min | \n",
161 | " 1.100000 | \n",
162 | " 17.000000 | \n",
163 | "
\n",
164 | " \n",
165 | " | 25% | \n",
166 | " 2.700000 | \n",
167 | " 30.000000 | \n",
168 | "
\n",
169 | " \n",
170 | " | 50% | \n",
171 | " 4.800000 | \n",
172 | " 47.000000 | \n",
173 | "
\n",
174 | " \n",
175 | " | 75% | \n",
176 | " 7.400000 | \n",
177 | " 75.000000 | \n",
178 | "
\n",
179 | " \n",
180 | " | max | \n",
181 | " 9.200000 | \n",
182 | " 95.000000 | \n",
183 | "
\n",
184 | " \n",
185 | "
\n",
186 | "
"
187 | ],
188 | "text/plain": [
189 | " Hours Scores\n",
190 | "count 25.000000 25.000000\n",
191 | "mean 5.012000 51.480000\n",
192 | "std 2.525094 25.286887\n",
193 | "min 1.100000 17.000000\n",
194 | "25% 2.700000 30.000000\n",
195 | "50% 4.800000 47.000000\n",
196 | "75% 7.400000 75.000000\n",
197 | "max 9.200000 95.000000"
198 | ]
199 | },
200 | "execution_count": 8,
201 | "metadata": {},
202 | "output_type": "execute_result"
203 | }
204 | ],
205 | "source": [
206 | "dataset.describe() "
207 | ]
208 | },
209 | {
210 | "cell_type": "code",
211 | "execution_count": null,
212 | "metadata": {},
213 | "outputs": [],
214 | "source": []
215 | }
216 | ],
217 | "metadata": {
218 | "interpreter": {
219 | "hash": "2be5faf79681da6f2a61fdfdd5405d65d042280f7fba6178067603e3a2925119"
220 | },
221 | "kernelspec": {
222 | "display_name": "Python 3 (ipykernel)",
223 | "language": "python",
224 | "name": "python3"
225 | },
226 | "language_info": {
227 | "codemirror_mode": {
228 | "name": "ipython",
229 | "version": 3
230 | },
231 | "file_extension": ".py",
232 | "mimetype": "text/x-python",
233 | "name": "python",
234 | "nbconvert_exporter": "python",
235 | "pygments_lexer": "ipython3",
236 | "version": "3.9.7"
237 | }
238 | },
239 | "nbformat": 4,
240 | "nbformat_minor": 2
241 | }
242 |
--------------------------------------------------------------------------------
/linear.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "import numpy as np\n",
11 | "import matplotlib.pyplot as plt\n",
12 | "%matplotlib inline"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": 6,
18 | "metadata": {},
19 | "outputs": [
20 | {
21 | "data": {
22 | "text/plain": [
23 | "(25, 2)"
24 | ]
25 | },
26 | "execution_count": 6,
27 | "metadata": {},
28 | "output_type": "execute_result"
29 | }
30 | ],
31 | "source": [
32 | "dataset = pd.read_csv('student_scores.csv')\n",
33 | "dataset.shape"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": 7,
39 | "metadata": {},
40 | "outputs": [
41 | {
42 | "data": {
43 | "text/html": [
44 | "\n",
45 | "\n",
58 | "
\n",
59 | " \n",
60 | " \n",
61 | " | \n",
62 | " Hours | \n",
63 | " Scores | \n",
64 | "
\n",
65 | " \n",
66 | " \n",
67 | " \n",
68 | " | 0 | \n",
69 | " 2.5 | \n",
70 | " 21 | \n",
71 | "
\n",
72 | " \n",
73 | " | 1 | \n",
74 | " 5.1 | \n",
75 | " 47 | \n",
76 | "
\n",
77 | " \n",
78 | " | 2 | \n",
79 | " 3.2 | \n",
80 | " 27 | \n",
81 | "
\n",
82 | " \n",
83 | " | 3 | \n",
84 | " 8.5 | \n",
85 | " 75 | \n",
86 | "
\n",
87 | " \n",
88 | " | 4 | \n",
89 | " 3.5 | \n",
90 | " 30 | \n",
91 | "
\n",
92 | " \n",
93 | "
\n",
94 | "
"
95 | ],
96 | "text/plain": [
97 | " Hours Scores\n",
98 | "0 2.5 21\n",
99 | "1 5.1 47\n",
100 | "2 3.2 27\n",
101 | "3 8.5 75\n",
102 | "4 3.5 30"
103 | ]
104 | },
105 | "execution_count": 7,
106 | "metadata": {},
107 | "output_type": "execute_result"
108 | }
109 | ],
110 | "source": [
111 | "dataset.head()"
112 | ]
113 | },
114 | {
115 | "cell_type": "code",
116 | "execution_count": 8,
117 | "metadata": {},
118 | "outputs": [
119 | {
120 | "data": {
121 | "text/html": [
122 | "\n",
123 | "\n",
136 | "
\n",
137 | " \n",
138 | " \n",
139 | " | \n",
140 | " Hours | \n",
141 | " Scores | \n",
142 | "
\n",
143 | " \n",
144 | " \n",
145 | " \n",
146 | " | count | \n",
147 | " 25.000000 | \n",
148 | " 25.000000 | \n",
149 | "
\n",
150 | " \n",
151 | " | mean | \n",
152 | " 5.012000 | \n",
153 | " 51.480000 | \n",
154 | "
\n",
155 | " \n",
156 | " | std | \n",
157 | " 2.525094 | \n",
158 | " 25.286887 | \n",
159 | "
\n",
160 | " \n",
161 | " | min | \n",
162 | " 1.100000 | \n",
163 | " 17.000000 | \n",
164 | "
\n",
165 | " \n",
166 | " | 25% | \n",
167 | " 2.700000 | \n",
168 | " 30.000000 | \n",
169 | "
\n",
170 | " \n",
171 | " | 50% | \n",
172 | " 4.800000 | \n",
173 | " 47.000000 | \n",
174 | "
\n",
175 | " \n",
176 | " | 75% | \n",
177 | " 7.400000 | \n",
178 | " 75.000000 | \n",
179 | "
\n",
180 | " \n",
181 | " | max | \n",
182 | " 9.200000 | \n",
183 | " 95.000000 | \n",
184 | "
\n",
185 | " \n",
186 | "
\n",
187 | "
"
188 | ],
189 | "text/plain": [
190 | " Hours Scores\n",
191 | "count 25.000000 25.000000\n",
192 | "mean 5.012000 51.480000\n",
193 | "std 2.525094 25.286887\n",
194 | "min 1.100000 17.000000\n",
195 | "25% 2.700000 30.000000\n",
196 | "50% 4.800000 47.000000\n",
197 | "75% 7.400000 75.000000\n",
198 | "max 9.200000 95.000000"
199 | ]
200 | },
201 | "execution_count": 8,
202 | "metadata": {},
203 | "output_type": "execute_result"
204 | }
205 | ],
206 | "source": [
207 | "dataset.describe() "
208 | ]
209 | },
210 | {
211 | "cell_type": "code",
212 | "execution_count": 9,
213 | "metadata": {},
214 | "outputs": [
215 | {
216 | "data": {
217 | "image/png": "\n",
218 | "text/plain": [
219 | ""
220 | ]
221 | },
222 | "metadata": {
223 | "needs_background": "light"
224 | },
225 | "output_type": "display_data"
226 | }
227 | ],
228 | "source": [
229 | "dataset.plot(x='Hours', y='Scores', style='o')\n",
230 | "plt.title('Hours vs Percentage')\n",
231 | "plt.xlabel('Hours Studied')\n",
232 | "plt.ylabel('Percentage Score')\n",
233 | "plt.show()"
234 | ]
235 | },
236 | {
237 | "cell_type": "code",
238 | "execution_count": 10,
239 | "metadata": {},
240 | "outputs": [],
241 | "source": [
242 | "X = dataset.iloc[:, :-1].values\n",
243 | "y = dataset.iloc[:, 1].values"
244 | ]
245 | },
246 | {
247 | "cell_type": "code",
248 | "execution_count": 12,
249 | "metadata": {},
250 | "outputs": [
251 | {
252 | "data": {
253 | "text/plain": [
254 | "LinearRegression()"
255 | ]
256 | },
257 | "execution_count": 12,
258 | "metadata": {},
259 | "output_type": "execute_result"
260 | }
261 | ],
262 | "source": [
263 | "# Training the LinearRegression Algorithm\n",
264 | "from sklearn.model_selection import train_test_split\n",
265 | "from sklearn.linear_model import LinearRegression\n",
266 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n",
267 | "regressor = LinearRegression()\n",
268 | "regressor.fit(X_train, y_train)"
269 | ]
270 | },
271 | {
272 | "cell_type": "code",
273 | "execution_count": 15,
274 | "metadata": {},
275 | "outputs": [
276 | {
277 | "name": "stdout",
278 | "output_type": "stream",
279 | "text": [
280 | "Intercept: -2.018160041434662\n",
281 | "Slope: -[9.91065648]\n"
282 | ]
283 | }
284 | ],
285 | "source": [
286 | "print('Intercept: -{}'.format(regressor.intercept_))\n",
287 | "print('Slope: -{}'.format(regressor.coef_))"
288 | ]
289 | },
290 | {
291 | "cell_type": "code",
292 | "execution_count": 19,
293 | "metadata": {},
294 | "outputs": [
295 | {
296 | "data": {
297 | "text/html": [
298 | "\n",
299 | "\n",
312 | "
\n",
313 | " \n",
314 | " \n",
315 | " | \n",
316 | " Actual | \n",
317 | " Predicted | \n",
318 | "
\n",
319 | " \n",
320 | " \n",
321 | " \n",
322 | " | 0 | \n",
323 | " 20 | \n",
324 | " 16.884145 | \n",
325 | "
\n",
326 | " \n",
327 | " | 1 | \n",
328 | " 27 | \n",
329 | " 33.732261 | \n",
330 | "
\n",
331 | " \n",
332 | " | 2 | \n",
333 | " 69 | \n",
334 | " 75.357018 | \n",
335 | "
\n",
336 | " \n",
337 | " | 3 | \n",
338 | " 30 | \n",
339 | " 26.794801 | \n",
340 | "
\n",
341 | " \n",
342 | " | 4 | \n",
343 | " 62 | \n",
344 | " 60.491033 | \n",
345 | "
\n",
346 | " \n",
347 | "
\n",
348 | "
"
349 | ],
350 | "text/plain": [
351 | " Actual Predicted\n",
352 | "0 20 16.884145\n",
353 | "1 27 33.732261\n",
354 | "2 69 75.357018\n",
355 | "3 30 26.794801\n",
356 | "4 62 60.491033"
357 | ]
358 | },
359 | "execution_count": 19,
360 | "metadata": {},
361 | "output_type": "execute_result"
362 | }
363 | ],
364 | "source": [
365 | "# Making Predictions\n",
366 | "y_pred = regressor.predict(X_test)\n",
367 | "df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})\n",
368 | "df"
369 | ]
370 | },
371 | {
372 | "cell_type": "code",
373 | "execution_count": 20,
374 | "metadata": {},
375 | "outputs": [
376 | {
377 | "name": "stdout",
378 | "output_type": "stream",
379 | "text": [
380 | "Mean Absolute Error: 4.183859899002982\n",
381 | "Mean Squared Error: 21.598769307217456\n",
382 | "Root Mean Squared Error: 4.647447612100373\n"
383 | ]
384 | }
385 | ],
386 | "source": [
387 | "# Evaluating the Errors\n",
388 | "from sklearn import metrics\n",
389 | "print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))\n",
390 | "print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))\n",
391 | "print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))"
392 | ]
393 | },
394 | {
395 | "cell_type": "code",
396 | "execution_count": null,
397 | "metadata": {},
398 | "outputs": [],
399 | "source": []
400 | },
401 | {
402 | "cell_type": "code",
403 | "execution_count": null,
404 | "metadata": {},
405 | "outputs": [],
406 | "source": []
407 | }
408 | ],
409 | "metadata": {
410 | "interpreter": {
411 | "hash": "2be5faf79681da6f2a61fdfdd5405d65d042280f7fba6178067603e3a2925119"
412 | },
413 | "kernelspec": {
414 | "display_name": "Python 3 (ipykernel)",
415 | "language": "python",
416 | "name": "python3"
417 | },
418 | "language_info": {
419 | "codemirror_mode": {
420 | "name": "ipython",
421 | "version": 3
422 | },
423 | "file_extension": ".py",
424 | "mimetype": "text/x-python",
425 | "name": "python",
426 | "nbconvert_exporter": "python",
427 | "pygments_lexer": "ipython3",
428 | "version": "3.9.7"
429 | }
430 | },
431 | "nbformat": 4,
432 | "nbformat_minor": 2
433 | }
434 |
--------------------------------------------------------------------------------