├── 0. datasets link.txt
├── 7_1_4_Building_Linear_Regression_from_scratch_in_Python.ipynb
├── 7_1_5_Implementing_Linear_Regression_from_scratch_in_Python.ipynb
├── 7_1_5_Lin_Reg_implementation.ipynb
├── 7_2_5_Building_Logistic_Regression_from_scratch_in_Python.ipynb
├── 7_2_6_Implementing_Logistic_Regression_from_scratch_in_Python.ipynb
├── 7_2_6_Logistic_Regression_Implementation.ipynb
├── 7_3_6_Building_Support_Vector_Machine_Classifier_from_Scratch_in_Python.ipynb
├── 7_3_7_Implementing_SVM_Classifier_from_Scratch_in_Python.ipynb
├── 7_3_7_Implementing_SVM_from_Scratch.ipynb
├── 7_4_4_Building_Lasso_Regression_from_Scratch_in_Python.ipynb
└── 7_4_5_Implementing_Lasso_Regression_from_Scratch.ipynb
/0. datasets link.txt:
--------------------------------------------------------------------------------
1 | All Datasets link: https://drive.google.com/drive/folders/1BJLh_8Kx88V6ItrdLA5CQ63RIFcjKkFV?usp=sharing
2 |
--------------------------------------------------------------------------------
/7_1_4_Building_Linear_Regression_from_scratch_in_Python.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": [],
7 | "collapsed_sections": []
8 | },
9 | "kernelspec": {
10 | "name": "python3",
11 | "display_name": "Python 3"
12 | },
13 | "language_info": {
14 | "name": "python"
15 | }
16 | },
17 | "cells": [
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {
21 | "id": "GmDgN3q0rw6y"
22 | },
23 | "source": [
24 | "Linear Regression:\n",
25 | "\n",
26 | "**Y = wX + b**\n",
27 | "\n",
28 | "Y --> Dependent Variable\n",
29 | "\n",
30 | "X --> Independent Variable\n",
31 | "\n",
32 | "w --> weight\n",
33 | "\n",
34 | "b --> bias"
35 | ]
36 | },
37 | {
38 | "cell_type": "markdown",
39 | "metadata": {
40 | "id": "X8OJXGuPvDt2"
41 | },
42 | "source": [
43 | "**Gradient Descent:**\n",
44 | "\n",
45 | "Gradient Descent is an optimization algorithm used for minimizing the loss function in various machine learning algorithms. It is used for updating the parameters of the learning model.\n",
46 | "\n",
47 | "w = w - α*dw\n",
48 | "\n",
49 | "b = b - α*db"
50 | ]
51 | },
52 | {
53 | "cell_type": "markdown",
54 | "metadata": {
55 | "id": "WSAfYP7WmECB"
56 | },
57 | "source": [
58 | "**Learning Rate:**\n",
59 | "\n",
60 | "Learning rate is a tuning parameter in an optimization algorithm that determines the step size at each iteration while moving toward a minimum of a loss function."
61 | ]
62 | },
63 | {
64 | "cell_type": "markdown",
65 | "metadata": {
66 | "id": "rkCM1toLm7oz"
67 | },
68 | "source": [
69 | ""
70 | ]
71 | },
72 | {
73 | "cell_type": "markdown",
74 | "metadata": {
75 | "id": "eSdmQl4Sm_ft"
76 | },
77 | "source": [
78 | ""
79 | ]
80 | },
81 | {
82 | "cell_type": "code",
83 | "metadata": {
84 | "id": "cNxq7tuqllsx"
85 | },
86 | "source": [
87 | "# importing numpy library\n",
88 | "import numpy as np"
89 | ],
90 | "execution_count": null,
91 | "outputs": []
92 | },
93 | {
94 | "cell_type": "markdown",
95 | "metadata": {
96 | "id": "VrbQ5F8etU0G"
97 | },
98 | "source": [
99 | "**Linear Regression**"
100 | ]
101 | },
102 | {
103 | "cell_type": "code",
104 | "metadata": {
105 | "id": "d4zf9fMJtTeW"
106 | },
107 | "source": [
108 | "class Linear_Regression():\n",
109 | "\n",
110 | " # initiating the parameters (learning rate & no. of iterations)\n",
111 | " def __init__(self, learning_rate, no_of_iterations):\n",
112 | "\n",
113 | " self.learning_rate = learning_rate\n",
114 | " self.no_of_iterations = no_of_iterations\n",
115 | "\n",
116 | "\n",
117 | " def fit(self, X, Y ):\n",
118 | "\n",
119 | " # number of training examples & number of features\n",
120 | "\n",
121 | " self.m, self.n = X.shape # number of rows & columns\n",
122 | "\n",
123 | " # initiating the weight and bias \n",
124 | "\n",
125 | " self.w = np.zeros(self.n)\n",
126 | " self.b = 0\n",
127 | " self.X = X\n",
128 | " self.Y = Y\n",
129 | "\n",
130 | " # implementing Gradient Descent\n",
131 | " \n",
132 | " for i in range(self.no_of_iterations):\n",
133 | " self.update_weights()\n",
134 | "\n",
135 | "\n",
136 | " def update_weights(self):\n",
137 | "\n",
138 | " Y_prediction = self.predict(self.X)\n",
139 | "\n",
140 | " # calculate gradients\n",
141 | "\n",
142 | " dw = - (2 * (self.X.T).dot(self.Y - Y_prediction)) / self.m\n",
143 | "\n",
144 | " db = - 2 * np.sum(self.Y - Y_prediction)/self.m\n",
145 | "\n",
146 | " # upadating the weights\n",
147 | " \n",
148 | " self.w = self.w - self.learning_rate*dw\n",
149 | " self.b = selb.b - self.learning_rate*db\n",
150 | " \n",
151 | "\n",
152 | " def predict(self, X):\n",
153 | "\n",
154 | " return X.dot(self.w) + self.b\n",
155 | "\n"
156 | ],
157 | "execution_count": null,
158 | "outputs": []
159 | }
160 | ]
161 | }
--------------------------------------------------------------------------------
/7_1_5_Implementing_Linear_Regression_from_scratch_in_Python.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": []
7 | },
8 | "kernelspec": {
9 | "name": "python3",
10 | "display_name": "Python 3"
11 | },
12 | "language_info": {
13 | "name": "python"
14 | }
15 | },
16 | "cells": [
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {
20 | "id": "GmDgN3q0rw6y"
21 | },
22 | "source": [
23 | "Linear Regression:\n",
24 | "\n",
25 | "**Y = wX + b**\n",
26 | "\n",
27 | "Y --> Dependent Variable\n",
28 | "\n",
29 | "X --> Independent Variable\n",
30 | "\n",
31 | "w --> weight\n",
32 | "\n",
33 | "b --> bias"
34 | ]
35 | },
36 | {
37 | "cell_type": "markdown",
38 | "metadata": {
39 | "id": "X8OJXGuPvDt2"
40 | },
41 | "source": [
42 | "**Gradient Descent:**\n",
43 | "\n",
44 | "Gradient Descent is an optimization algorithm used for minimizing the loss function in various machine learning algorithms. It is used for updating the parameters of the learning model.\n",
45 | "\n",
46 | "w = w - α*dw\n",
47 | "\n",
48 | "b = b - α*db"
49 | ]
50 | },
51 | {
52 | "cell_type": "markdown",
53 | "metadata": {
54 | "id": "m4IWNi9WwNI5"
55 | },
56 | "source": [
57 | "Importing the Dependencies"
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "metadata": {
63 | "id": "WtObJGwFnINT"
64 | },
65 | "source": [
66 | "# Importing numpy library\n",
67 | "import numpy as np"
68 | ],
69 | "execution_count": null,
70 | "outputs": []
71 | },
72 | {
73 | "cell_type": "markdown",
74 | "metadata": {
75 | "id": "POvc3KnRNboc"
76 | },
77 | "source": [
78 | "**Linear Regression**"
79 | ]
80 | },
81 | {
82 | "cell_type": "code",
83 | "metadata": {
84 | "id": "BuflF8sGNQXF"
85 | },
86 | "source": [
87 | "class Linear_Regression():\n",
88 | "\n",
89 | " def __init__( self, learning_rate, no_of_iterations ) :\n",
90 | " \n",
91 | " self.learning_rate = learning_rate\n",
92 | " \n",
93 | " self.no_of_iterations = no_of_iterations\n",
94 | "\n",
95 | " # fit function to train the model\n",
96 | "\n",
97 | " def fit( self, X, Y ) :\n",
98 | " \n",
99 | " # no_of_training_examples, no_of_features\n",
100 | " \n",
101 | " self.m, self.n = X.shape\n",
102 | " \n",
103 | " # initiating the weight and bias\n",
104 | " \n",
105 | " self.w = np.zeros( self.n )\n",
106 | " \n",
107 | " self.b = 0\n",
108 | " \n",
109 | " self.X = X\n",
110 | " \n",
111 | " self.Y = Y\n",
112 | "\n",
113 | "\n",
114 | " # implementing Gradient Descent for Optimization\n",
115 | " \n",
116 | " for i in range( self.no_of_iterations ) :\n",
117 | " \n",
118 | " self.update_weights()\n",
119 | " \n",
120 | " \n",
121 | " \n",
122 | " # function to update weights in gradient descent\n",
123 | " \n",
124 | " def update_weights( self ) :\n",
125 | " \n",
126 | " Y_prediction = self.predict( self.X )\n",
127 | " \n",
128 | " # calculate gradients \n",
129 | " \n",
130 | " dw = - ( 2 * ( self.X.T ).dot( self.Y - Y_prediction ) ) / self.m\n",
131 | " \n",
132 | " db = - 2 * np.sum( self.Y - Y_prediction ) / self.m \n",
133 | " \n",
134 | " # updating the weights\n",
135 | " \n",
136 | " self.w = self.w - self.learning_rate * dw\n",
137 | " \n",
138 | " self.b = self.b - self.learning_rate * db\n",
139 | " \n",
140 | " \n",
141 | " # Line function for prediction:\n",
142 | " \n",
143 | " def predict( self, X ) :\n",
144 | " \n",
145 | " return X.dot( self.w ) + self.b\n"
146 | ],
147 | "execution_count": null,
148 | "outputs": []
149 | },
150 | {
151 | "cell_type": "markdown",
152 | "metadata": {
153 | "id": "099oR2Ip8qgA"
154 | },
155 | "source": [
156 | "Using Linear Regression model for Prediction"
157 | ]
158 | },
159 | {
160 | "cell_type": "code",
161 | "metadata": {
162 | "id": "JWDqO3zw8t6P"
163 | },
164 | "source": [
165 | "# importing the dependencies\n",
166 | "import pandas as pd\n",
167 | "from sklearn.model_selection import train_test_split\n",
168 | "import matplotlib.pyplot as plt"
169 | ],
170 | "execution_count": null,
171 | "outputs": []
172 | },
173 | {
174 | "cell_type": "markdown",
175 | "metadata": {
176 | "id": "iPX7bNko_QJO"
177 | },
178 | "source": [
179 | "Data Pre-Processing"
180 | ]
181 | },
182 | {
183 | "cell_type": "code",
184 | "metadata": {
185 | "id": "_xcrhXnm_K-m"
186 | },
187 | "source": [
188 | "# loading the data from csv file to a pandas dataframe\n",
189 | "\n",
190 | "salary_data = pd.read_csv('/content/salary_data.csv')"
191 | ],
192 | "execution_count": null,
193 | "outputs": []
194 | },
195 | {
196 | "cell_type": "code",
197 | "metadata": {
198 | "colab": {
199 | "base_uri": "https://localhost:8080/",
200 | "height": 206
201 | },
202 | "id": "gkF0Gvxm_sU6",
203 | "outputId": "d105a461-612e-4852-b8f2-f31d0ba79fb0"
204 | },
205 | "source": [
206 | "# printing the first 5 columns of the dataframe\n",
207 | "salary_data.head()"
208 | ],
209 | "execution_count": null,
210 | "outputs": [
211 | {
212 | "output_type": "execute_result",
213 | "data": {
214 | "text/html": [
215 | "
\n",
216 | "\n",
229 | "
\n",
230 | " \n",
231 | " \n",
232 | " | \n",
233 | " YearsExperience | \n",
234 | " Salary | \n",
235 | "
\n",
236 | " \n",
237 | " \n",
238 | " \n",
239 | " 0 | \n",
240 | " 1.1 | \n",
241 | " 39343 | \n",
242 | "
\n",
243 | " \n",
244 | " 1 | \n",
245 | " 1.3 | \n",
246 | " 46205 | \n",
247 | "
\n",
248 | " \n",
249 | " 2 | \n",
250 | " 1.5 | \n",
251 | " 37731 | \n",
252 | "
\n",
253 | " \n",
254 | " 3 | \n",
255 | " 2.0 | \n",
256 | " 43525 | \n",
257 | "
\n",
258 | " \n",
259 | " 4 | \n",
260 | " 2.2 | \n",
261 | " 39891 | \n",
262 | "
\n",
263 | " \n",
264 | "
\n",
265 | "
"
266 | ],
267 | "text/plain": [
268 | " YearsExperience Salary\n",
269 | "0 1.1 39343\n",
270 | "1 1.3 46205\n",
271 | "2 1.5 37731\n",
272 | "3 2.0 43525\n",
273 | "4 2.2 39891"
274 | ]
275 | },
276 | "metadata": {},
277 | "execution_count": 26
278 | }
279 | ]
280 | },
281 | {
282 | "cell_type": "code",
283 | "metadata": {
284 | "colab": {
285 | "base_uri": "https://localhost:8080/",
286 | "height": 206
287 | },
288 | "id": "TZnQ8eyk_0yJ",
289 | "outputId": "2374a90b-6245-4a2f-8aa7-2d29d783c1e9"
290 | },
291 | "source": [
292 | "# last 5 rows of the dataframe\n",
293 | "salary_data.tail()"
294 | ],
295 | "execution_count": null,
296 | "outputs": [
297 | {
298 | "output_type": "execute_result",
299 | "data": {
300 | "text/html": [
301 | "\n",
302 | "\n",
315 | "
\n",
316 | " \n",
317 | " \n",
318 | " | \n",
319 | " YearsExperience | \n",
320 | " Salary | \n",
321 | "
\n",
322 | " \n",
323 | " \n",
324 | " \n",
325 | " 25 | \n",
326 | " 9.0 | \n",
327 | " 105582 | \n",
328 | "
\n",
329 | " \n",
330 | " 26 | \n",
331 | " 9.5 | \n",
332 | " 116969 | \n",
333 | "
\n",
334 | " \n",
335 | " 27 | \n",
336 | " 9.6 | \n",
337 | " 112635 | \n",
338 | "
\n",
339 | " \n",
340 | " 28 | \n",
341 | " 10.3 | \n",
342 | " 122391 | \n",
343 | "
\n",
344 | " \n",
345 | " 29 | \n",
346 | " 10.5 | \n",
347 | " 121872 | \n",
348 | "
\n",
349 | " \n",
350 | "
\n",
351 | "
"
352 | ],
353 | "text/plain": [
354 | " YearsExperience Salary\n",
355 | "25 9.0 105582\n",
356 | "26 9.5 116969\n",
357 | "27 9.6 112635\n",
358 | "28 10.3 122391\n",
359 | "29 10.5 121872"
360 | ]
361 | },
362 | "metadata": {},
363 | "execution_count": 27
364 | }
365 | ]
366 | },
367 | {
368 | "cell_type": "code",
369 | "metadata": {
370 | "colab": {
371 | "base_uri": "https://localhost:8080/"
372 | },
373 | "id": "TUfpF8gp_5ly",
374 | "outputId": "6e4fe4e5-f2ee-481f-dd0c-0a234556ad4a"
375 | },
376 | "source": [
377 | "# number of rows & columns in the dataframe\n",
378 | "salary_data.shape"
379 | ],
380 | "execution_count": null,
381 | "outputs": [
382 | {
383 | "output_type": "execute_result",
384 | "data": {
385 | "text/plain": [
386 | "(30, 2)"
387 | ]
388 | },
389 | "metadata": {},
390 | "execution_count": 28
391 | }
392 | ]
393 | },
394 | {
395 | "cell_type": "code",
396 | "metadata": {
397 | "colab": {
398 | "base_uri": "https://localhost:8080/"
399 | },
400 | "id": "XvMCoPwIAO1u",
401 | "outputId": "2285d932-f61e-468b-997a-1c32b4659c92"
402 | },
403 | "source": [
404 | "# checking for missing values\n",
405 | "salary_data.isnull().sum()"
406 | ],
407 | "execution_count": null,
408 | "outputs": [
409 | {
410 | "output_type": "execute_result",
411 | "data": {
412 | "text/plain": [
413 | "YearsExperience 0\n",
414 | "Salary 0\n",
415 | "dtype: int64"
416 | ]
417 | },
418 | "metadata": {},
419 | "execution_count": 29
420 | }
421 | ]
422 | },
423 | {
424 | "cell_type": "markdown",
425 | "metadata": {
426 | "id": "ZwI59efnA5a1"
427 | },
428 | "source": [
429 | "Splitting the feature & target"
430 | ]
431 | },
432 | {
433 | "cell_type": "code",
434 | "metadata": {
435 | "id": "bV06CwPpAk-d"
436 | },
437 | "source": [
438 | "X = salary_data.iloc[:,:-1].values \n",
439 | "Y = salary_data.iloc[:,1].values"
440 | ],
441 | "execution_count": null,
442 | "outputs": []
443 | },
444 | {
445 | "cell_type": "code",
446 | "metadata": {
447 | "colab": {
448 | "base_uri": "https://localhost:8080/"
449 | },
450 | "id": "myjQFw1aB2EE",
451 | "outputId": "f8bcbf47-fe6a-4b63-8673-32105ed38948"
452 | },
453 | "source": [
454 | "print(X)"
455 | ],
456 | "execution_count": null,
457 | "outputs": [
458 | {
459 | "output_type": "stream",
460 | "name": "stdout",
461 | "text": [
462 | "[[ 1.1]\n",
463 | " [ 1.3]\n",
464 | " [ 1.5]\n",
465 | " [ 2. ]\n",
466 | " [ 2.2]\n",
467 | " [ 2.9]\n",
468 | " [ 3. ]\n",
469 | " [ 3.2]\n",
470 | " [ 3.2]\n",
471 | " [ 3.7]\n",
472 | " [ 3.9]\n",
473 | " [ 4. ]\n",
474 | " [ 4. ]\n",
475 | " [ 4.1]\n",
476 | " [ 4.5]\n",
477 | " [ 4.9]\n",
478 | " [ 5.1]\n",
479 | " [ 5.3]\n",
480 | " [ 5.9]\n",
481 | " [ 6. ]\n",
482 | " [ 6.8]\n",
483 | " [ 7.1]\n",
484 | " [ 7.9]\n",
485 | " [ 8.2]\n",
486 | " [ 8.7]\n",
487 | " [ 9. ]\n",
488 | " [ 9.5]\n",
489 | " [ 9.6]\n",
490 | " [10.3]\n",
491 | " [10.5]]\n"
492 | ]
493 | }
494 | ]
495 | },
496 | {
497 | "cell_type": "code",
498 | "metadata": {
499 | "colab": {
500 | "base_uri": "https://localhost:8080/"
501 | },
502 | "id": "tt4B70LiB34L",
503 | "outputId": "91e8e8b4-ace1-494b-d660-4558ac06c461"
504 | },
505 | "source": [
506 | "print(Y)"
507 | ],
508 | "execution_count": null,
509 | "outputs": [
510 | {
511 | "output_type": "stream",
512 | "name": "stdout",
513 | "text": [
514 | "[ 39343 46205 37731 43525 39891 56642 60150 54445 64445 57189\n",
515 | " 63218 55794 56957 57081 61111 67938 66029 83088 81363 93940\n",
516 | " 91738 98273 101302 113812 109431 105582 116969 112635 122391 121872]\n"
517 | ]
518 | }
519 | ]
520 | },
521 | {
522 | "cell_type": "markdown",
523 | "metadata": {
524 | "id": "JEJ1qsdlCBdu"
525 | },
526 | "source": [
527 | "Splitting the dataset into training & test data"
528 | ]
529 | },
530 | {
531 | "cell_type": "code",
532 | "metadata": {
533 | "id": "xAeWEMPjB6Id"
534 | },
535 | "source": [
536 | "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state = 2)"
537 | ],
538 | "execution_count": null,
539 | "outputs": []
540 | },
541 | {
542 | "cell_type": "markdown",
543 | "metadata": {
544 | "id": "KFIKZOGcDEN5"
545 | },
546 | "source": [
547 | "Training the Linear Regression model"
548 | ]
549 | },
550 | {
551 | "cell_type": "code",
552 | "metadata": {
553 | "id": "4ONMkF5ZC8wv"
554 | },
555 | "source": [
556 | "model = Linear_Regression(learning_rate = 0.02, no_of_iterations=1000)"
557 | ],
558 | "execution_count": null,
559 | "outputs": []
560 | },
561 | {
562 | "cell_type": "code",
563 | "metadata": {
564 | "id": "tCrA-LIlDu1Q"
565 | },
566 | "source": [
567 | "model.fit(X_train, Y_train)"
568 | ],
569 | "execution_count": null,
570 | "outputs": []
571 | },
572 | {
573 | "cell_type": "code",
574 | "metadata": {
575 | "colab": {
576 | "base_uri": "https://localhost:8080/"
577 | },
578 | "id": "RrK2D8R2EJz9",
579 | "outputId": "f14ce5a6-aad8-4a73-b933-3fdb9388a991"
580 | },
581 | "source": [
582 | "# printing the parameter values ( weights & bias)\n",
583 | "\n",
584 | "print('weight = ', model.w[0])\n",
585 | "print('bias = ', model.b)"
586 | ],
587 | "execution_count": null,
588 | "outputs": [
589 | {
590 | "output_type": "stream",
591 | "name": "stdout",
592 | "text": [
593 | "weight = 9514.400999035135\n",
594 | "bias = 23697.406507136307\n"
595 | ]
596 | }
597 | ]
598 | },
599 | {
600 | "cell_type": "markdown",
601 | "metadata": {
602 | "id": "69iNWWvkE9MF"
603 | },
604 | "source": [
605 | "y = 9514(x) + 23697\n",
606 | "\n",
607 | "\n",
608 | "salary = 9514(experience) + 23697"
609 | ]
610 | },
611 | {
612 | "cell_type": "markdown",
613 | "metadata": {
614 | "id": "WddqeITjFy_9"
615 | },
616 | "source": [
617 | "Predict the salary value for test data"
618 | ]
619 | },
620 | {
621 | "cell_type": "code",
622 | "metadata": {
623 | "id": "XWbdku1HE6b8"
624 | },
625 | "source": [
626 | "test_data_prediction = model.predict(X_test)"
627 | ],
628 | "execution_count": null,
629 | "outputs": []
630 | },
631 | {
632 | "cell_type": "code",
633 | "metadata": {
634 | "colab": {
635 | "base_uri": "https://localhost:8080/"
636 | },
637 | "id": "1xkqFPZPGTjm",
638 | "outputId": "c517f060-2dc5-409b-fda6-f904e18a0f1e"
639 | },
640 | "source": [
641 | "print(test_data_prediction)"
642 | ],
643 | "execution_count": null,
644 | "outputs": [
645 | {
646 | "output_type": "stream",
647 | "name": "stdout",
648 | "text": [
649 | "[ 36066.12780588 34163.24760607 66512.21100279 58900.69020357\n",
650 | " 91249.65360029 80783.81250135 101715.49469922 52240.60950424\n",
651 | " 42726.20850521 88395.33330058]\n"
652 | ]
653 | }
654 | ]
655 | },
656 | {
657 | "cell_type": "markdown",
658 | "metadata": {
659 | "id": "LnxLJFR1GZDT"
660 | },
661 | "source": [
662 | "Visualizing the predicted values & actual Values"
663 | ]
664 | },
665 | {
666 | "cell_type": "code",
667 | "metadata": {
668 | "colab": {
669 | "base_uri": "https://localhost:8080/",
670 | "height": 295
671 | },
672 | "id": "jDNBDfASGWCY",
673 | "outputId": "0d5c45ab-fd5c-4a10-aded-e23f967cb9a0"
674 | },
675 | "source": [
676 | "plt.scatter(X_test, Y_test, color = 'red')\n",
677 | "plt.plot(X_test, test_data_prediction, color='blue')\n",
678 | "plt.xlabel(' Work Experience')\n",
679 | "plt.ylabel('Salary')\n",
680 | "plt.title(' Salary vs Experience')\n",
681 | "plt.show()"
682 | ],
683 | "execution_count": null,
684 | "outputs": [
685 | {
686 | "output_type": "display_data",
687 | "data": {
688 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAEWCAYAAACqitpwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3deZyWZdn/8c9XEBUVcSEeA2VcUFMr0wko+2lmKaiJP58sjZTMRNM2nzb7Wfnk0qZl2aLxiCmFC1omrsjjkm2og5qCG+PCJgjK4oIbcPz+OM9p7hlmhhm4577umfm+X6/7Ndd1XNtxU84x53le13kpIjAzMyunjYpOwMzMuh8XFzMzKzsXFzMzKzsXFzMzKzsXFzMzKzsXFzMzKzsXF+tRJIWkXYvOo6uRtKOkVyX1KjoX6xpcXKzLkTRa0sOSXpb0oqS7JO1UdF6dQdI9kt7Iv9gbPjdVOo+ImBsRW0TE6kpf27qm3kUnYNYRudUxETgauAvYAjgE6NRfepJ6R8SqzrxGG74YEZcVdO2iv7t1UW65WFezD/BsRNwZySsR8ceImAsgaZikf0paLmmhpF9J6tPSiSQdLumh3AKaJ+m/S7bV5C60kyTNBe6SdIukLzU7xyOS/m8L575N0hebxf4l6WglF0lanK/9qKS9O/oPIelbku6T1Duvf0HSLEmbluQ/TtLz+d/i6yXHbiTpTElPS3pJ0mRJ27Tx3RtiDdfaStKEfN4Fks5r6DKT9FlJf5N0oaRlkp6VNKrk2ttI+l3Oa5mkP5dsOyK3SpdL+oek93T038Wqg4uLdTUPAnvkX84HSdqi2fbVwBnAdsAHgIOB01o512vACUB/4HDgC5KOarbPgcC7gEOBK4HPNGyQ9F5gEHBLC+e+GjiuZN89gSF530OAA4DdgK2ATwIvtfmtW3YB8CbwHUlDgR8An4mIN0r2OQgYmq/5LUkfzfEvAUfl7/dOYBnw62bnL/3uzV0BrAJ2Bd6Xz//5ku3DgSdJ/zv8BJggSXnb74G+wF7AO4CLACS9D7gcOAXYFvgtMEXSJu3617DqEhH++NOlPsAIYDKwBHiD9Itui1b2/SpwQ8l6ALu2su/PgYvyck3ed+eS7ZuSfgkPzesXAr9p5VxbkorXkLx+PnB5Xv4I8FT+Hhut47veA6wElpd8zi3ZXgMsBR4Hvt0sHsAeJbGfABPy8uPAwSXbtgfeJnWVt/TdG2K9gYGkorZZyfbjgLvz8meB+pJtffOx/5GvswbYuoXveknpd8uxJ4EDi/7/nD8d/7jlYl1OREyPiE9GxADg/5BaAWcBSNpN0s2SFkl6mfTX/HYtnUfScEl3S1oiaQVwagv7ziu57hvAtcBnJG1E+oX6+1ZyfIXUSjk2h44DJuVtdwG/IrUUFksaL6lfG1/5yxHRv+Tz3ZLrPAfcTfrl37zl0SR/YA6plQKpFXVD7n5aTio2q0mFo6VjSw0BNgYWlhz/W1IrpMGikhxX5sUtgB2ApRGxrJXzfq3hnPm8O5TkbF2Ii4t1aRHxAPAnoGHM4hLgCVLroh/w/wC1cvhVwBRgh4jYCri0hX2bTxt+JTCG1N22MiL+2UZ6VwPHSfoAqdVzd0neF0fEfsCepO6xb7T1PVsj6XBS99+dpG6y5nYoWd4ReD4vzwNGNStam0bEgpL9W5syfR6p5bJdybH9ImKvdqQ8D9hGUv9Wtp3fLKe+EXF1O85rVcbFxboUSR+SdLKkd+T1PYAjgel5ly2Bl4FX87YvtHG6LUl/Rb8haRjw6XVdPxeTNcBPaaXVUuJW0l/j5wDXRsSanPP7c6tpY1LX2Rv5nB0iaTvgMtJYx1jg45IOa7bbdyX1lbQXcCKp5QWpkJ4vaUg+1wBJo9tz3YhYCNwB/FRSv3xzwC6SDmznsbcBv5G0taSNJR2QN/8PcGr+t5GkzZVuutiyPXlZdXFxsa5mOamYPCrpVeB24AbSeALA10lF4hXSL6trWzpJdhpwjqRXgO+RxnHaYyLwbuAPbe0UEW+SWlUfJbWSGvTLuS0jdVW9RMutjga/UtPnXGbk+Hjgxoi4NSJeAk4CLpO0bcmxfwHqSS2bCyPijhz/BanVdkf+/tNJg/DtdQLQB3gsf4/rSeMp7XE8aXznCWAxaVyMiKgDTiZ1GS7LeX+2AzlZFVGEXxZm1hGSTgDGRcSHis6lNZJqgGeBjcPPqFgB3HIx6wBJfUktnvFF52JWzVxczNpJ0qGk259foGk3l5k1424xMzMrO7dczMys7DxxZbbddttFTU1N0WmYmXUpM2bMeDE/0NyEi0tWU1NDXV1d0WmYmXUpkua0FHe3mJmZlZ2Li5mZlZ2Li5mZlZ2Li5mZlZ2Li5mZlZ2Li5lZTzVpEtTUwEYbpZ+TJpXt1L4V2cysJ5o0CcaNg5X5XW5z5qR1gDFjNvj0brmYmfVEZ53VWFgarFyZ4mXg4mJm1hPNnduxeAe5uJiZ9UQ77tixeAe5uJiZ9UTnnw99+zaN9e2b4mXg4mJm1hONGQPjx8OQISCln+PHl2UwH3y3mJlZzzVmTNmKSXNuuZiZWdm5uJiZWdl1WnGRdLmkxZJmlsSOkTRL0hpJtc32/7akeklP5neVN8RH5li9pDNL4jtJui/Hr5XUJ8c3yev1eXtNZ31HMzNrWWe2XK4ARjaLzQSOBu4tDUraEzgW2Csf8xtJvST1An4NjAL2BI7L+wL8GLgoInYFlgEn5fhJwLIcvyjvZ2ZmFdRpxSUi7gWWNos9HhFPtrD7aOCaiHgzIp4F6oFh+VMfEc9ExFvANcBoSQI+Alyfj78SOKrkXFfm5euBg/P+ZmZWIdUy5jIImFeyPj/HWotvCyyPiFXN4k3OlbevyPuvRdI4SXWS6pYsWVKmr2JmZtVSXAoREeMjojYiagcMGFB0OmZm3Ua1FJcFwA4l64NzrLX4S0B/Sb2bxZucK2/fKu9vZmYVUi3FZQpwbL7TaydgKHA/8AAwNN8Z1oc06D8lIgK4G/hEPn4scGPJucbm5U8Ad+X9zcysQjrtCX1JVwMfBraTNB84mzTA/0tgAHCLpIcj4tCImCVpMvAYsAo4PSJW5/N8EZgK9AIuj4hZ+RLfAq6RdB7wEDAhxycAv5dUn693bGd9RzMza5n8R31SW1sbdXV1RadhZtalSJoREbXN49XSLWZmZt2Ii4uZmZWdi4uZmZWdi4uZmZWdi4uZmZWdi4uZmZWdi4uZmZWdi4uZmZWdi4uZmZWdi4uZmZWdi4uZmZWdi4uZmZWdi4uZmZWdi4uZWQ/1+OPwwx/CqlXr3rejOu19LmZmVp1eeglqauDVV9P6ccel9XLqtJaLpMslLZY0syS2jaRpkmbnn1vnuCRdLKle0iOS9i05Zmzef7aksSXx/SQ9mo+5WJLauoaZWU/39ttw0EGw3XaNheXPfy5/YYHO7Ra7AhjZLHYmcGdEDAXuzOsAo0ivNh4KjAMugVQoSG+wHA4MA84uKRaXACeXHDdyHdcwM+uxvvlN6NMH7rknrZ93HkTA6NGdc71OKy4RcS/pNcOlRgNX5uUrgaNK4hMjmQ70l7Q9cCgwLSKWRsQyYBowMm/rFxHTI71Kc2Kzc7V0DTOzHmfSJJDgggvS+lFHpTGWs87q3OtWesxlYEQszMuLgIF5eRAwr2S/+TnWVnx+C/G2rmFm1mPcfz8MH964vuOOMHMmbLllZa5f2IB+RISkKPIaksaRuuHYcccdOzMVM7OKWLAABg9uGnv6adh558rmUelbkV/IXVrkn4tzfAGwQ8l+g3OsrfjgFuJtXWMtETE+ImojonbAgAHr/aXMrIeaNCmNhm+0Ufo5aVJhqbz+Ouy9d9PCcvfdaVyl0oUFKl9cpgANd3yNBW4siZ+Q7xobAazIXVtTgUMkbZ0H8g8BpuZtL0sake8SO6HZuVq6hplZ+UyaBOPGwZw56Tf4nDlpvcIFJgI++1no2xdmzUqxSy5J8Q9/uKKpNE8sOuUDXA0sBN4mjYmcBGxLuoNrNvC/wDZ5XwG/Bp4GHgVqS87zOaA+f04sidcCM/MxvwKU4y1eY12f/fbbL8zM2m3IkIj0O7zpZ8iQiqVw8cVNL33KKRFr1lTs8hERAdRFC79TG34h93i1tbVRV1dXdBpm1lVstFH6nd6cBGvWdOqlp02DQw5pXK+thb/+FTbdtFMv2yJJMyKitnncT+ibma2PHXdMXWEtxTvJU0/B7rs3jS1cCP/xH512yfXmucXMzNbH+eengY5SffumeJktXw4DBjQtLHV1qeFUjYUFXFzMzNbPmDEwfjwMGZK6woYMSetjxpTtEqtXw2GHwdZbw4svpti116aist9+ZbtMp3BxMTNbX2PGwHPPpTGW554ra2H53vegd2+47ba0/p3vpKLyyU+W7RKdymMuZmZVZPJk+NSnGtdHjYKbboJevYrLaX24uJiZVYEHH2za1bXddjB7NvTvX1xOG8LFxcysQIsWwfbbN409+STstlsx+ZSLx1zMrFhVNIVKJb35Zno+pbSwTJ2axlW6emEBFxczK1KVTKFSSRHwhS+kBx5nzEixX/wixUsfjOzqXFzMrDhnnQUrVzaNrVzZ+S8bKchvf5saaJdemtbHjk03mn35y8Xm1Rk85mJmxZk7t2PxLuovf2k6ieSee8IDD6z9DGZ34uJiZsUpYAqVSnrmGdhll6axefPWft9Kd+RuMTMrTgWnUGmXMt1c8MorqT6WFpbp09O4Sk8oLODiYmZFqsAUKu1WhpsL1qyB//xP6NcvtVAAJk5Mpyt95XBP4Cn3M0+5b9bD1dS03EU3ZEia2mUdfvCDpvchfO1rcOGFZcuuannKfTOztqznzQUnnQSXX964/uEPwx13wMYbly+1rqiQbjFJX5E0U9IsSV/NsW0kTZM0O//cOscl6WJJ9ZIekbRvyXnG5v1nSxpbEt9P0qP5mIvzq5DNzFrX2k0ErcSvuir15DUUlo03TjMX3323CwsUUFwk7Q2cDAwD3gscIWlX4EzgzogYSnpN8Zn5kFHA0PwZB1ySz7MNcDYwPJ/r7IaClPc5ueS4kZ3/zcysS2vnzQVPPJGKSumw0HXXwVtvwbbbViDPLqKIlsu7gPsiYmVErAL+AhwNjAauzPtcCRyVl0cDE/PrmqcD/SVtDxwKTIuIpRGxDJgGjMzb+kXE9Px+54kl5zIza9k6bi547bUUfte7Gg859dQ0WP+JTxSUcxUrYsxlJnC+pG2B14HDgDpgYEQszPssAgbm5UHAvJLj5+dYW/H5LcTXImkcqTXEjt3kvnoz2wBjxqx1p1pEujO51LbbNr68y1pW8ZZLRDwO/Bi4A7gdeBhY3WyfADr9NraIGB8RtRFRO2DAgM6+nJl1MQceuHZhWbXKhaU9ChnQj4gJEbFfRBwALAOeAl7IXVrkn4vz7guAHUoOH5xjbcUHtxA3M2uXCy9MXWD33tsYW7QotWK62ku7ilLU3WLvyD93JI23XAVMARru+BoL3JiXpwAn5LvGRgArcvfZVOAQSVvngfxDgKl528uSRuS7xE4oOZeZWatuuy0VlW98ozF2zz2pqAwc2Oph1oKinnP5Yx5zeRs4PSKWS/oRMFnSScAcoOFN0beSxmXqgZXAiQARsVTSucADeb9zImJpXj4NuALYDLgtf8zMWvT88zCo2cjsD38IZ57Z8v62bn5CP/MT+mY9z+rV0LvZn9ibbAJvvFFMPl2Rn9A3MyvR0qPVa9a0HLeO88SVZtajHHbY2gVk2bI0ruLCUj4uLmbWI1x2WSoet5WMwN53Xyoq/fsXl1d35eJiZt3arFmpqJx8cmPsJz9JRWXYsOLy6u485mJm3dLKlbD55k1j++0Hvm+nMlxczKzbaWnsxDfGVpa7xcys26ipWbuwvPmmC0sRXFzMrMv7zndSUSl9kWR9fSoqffoUl1dP5m4xM+uy7rkHDjqoaezqq+HYYwtJx0q4uJhZl7NkCbzjHU1jn/40TJpUTD62NhcXM+sy1qxpeVZij6lUHxcXM+sSPF1L1+IBfTOrascdt3YBWbLE07VUOxcXM6tKV12Visc11zTGGt6tst12haVl7eTiYmZVZfbsVFRKX2X/3e+monLggcXlZR3jMRczqwpvvgmbbto0tssu6XkV63qKes3xGZJmSZop6WpJm0raSdJ9kuolXSupT953k7xen7fXlJzn2zn+pKRDS+Ijc6xekt8lZ1blBg1au7BEuLB0ZRUvLpIGAV8GaiNib6AXcCzwY+CiiNgVWAaclA85CViW4xfl/ZC0Zz5uL2Ak8BtJvST1An4NjAL2BI7L+5pZlTnvvNQF9vzzjbGVK31rcXdQ1JhLb2AzSb2BvsBC4CPA9Xn7lcBReXl0XidvP1iScvyaiHgzIp4F6oFh+VMfEc9ExFvANXlfM6sSt9+eisp3v9sYe/75VFQ226y4vKx8Kl5cImIBcCEwl1RUVgAzgOURsSrvNh8YlJcHAfPysavy/tuWxpsd01p8LZLGSaqTVLdkyZIN/3Jm1qZnn01FZdSoxtjf/paKyvbbF5eXlV8R3WJbk1oSOwHvBDYndWtVXESMj4jaiKgdMGBAESmY9QgrV6aisvPOjbFf/jIVlf33Ly4v6zztKi55HKNcPgo8GxFLIuJt4E/A/kD/3E0GMBhYkJcXADvkPHoDWwEvlcabHdNa3MwqLCJNg1/60q5jjknxL36xsLSsAtrbcpkt6YIyDYzPBUZI6pvHTg4GHgPuBj6R9xkL3JiXp+R18va7IiJy/Nh8N9lOwFDgfuABYGi++6wPadB/ShnyNrMOOO002Gijxmnw+/aF1ath8uRi87LKaO9zLu8l/ZK+TNJGwOWkwfSXO3rBiLhP0vXAg8Aq4CFgPHALcI2k83JsQj5kAvB7SfXA0pwHETFL0mRSYVoFnB4RqwEkfRGYSroT7fKImNXRPM1s/fzhD3D88U1jy5fDVlsVk48VQ9HBe/4kHQhcBfQn3b11bkR0+bvRa2tro84v1zZbbw8/DO97X9PYzJmw117F5GOVIWlGRNQ2j7d7zEXSkZJuAH4O/BTYGbgJuLWsmZpZl7J0aRqsLy0s116bxlVcWHqu9naLzSaNiVwQEf8oiV8v6YDyp2Vm1W71aujd7DfIGWfAz35WTD5WXdZZXPKdYldExDktbY+IL5c9KzOrakccAbfc0ri+556pC8xT4FuDdXaL5UHyIyqQi5lVuZ/9LBWQ0sLy+uswa5YLizXV3m6xv0v6FXAt8FpDMCIe7JSszKyq3HMPHHRQ09icObDjjoWkY11Ae4vLPvlnaddYkOYDM7Nuat68tQvInXfCR/xfvq1Du4pLRBy07r3MrLt44421J5D8yU/gG98oJh/retr9sjBJh5Omt//3WxdaG+Q3s67r3e9Og/MNRo6E224rLh/rmtr7nMulwKeALwECjgGGdGJeZlZh3/hGGpQvLSyrVrmw2Ppp79xiH4yIE0gv7fo+8AFgt85Ly8wq5frrU1G58MLG2Isvpocge5VzylrrUdrbLfZ6/rlS0jtJsxL77QtmXdhjj639BP2MGbDvvsXkY91Le1suN0vqD1xAmnDyOeDqzkrKzDrPihWppVJaWK64IrVUXFisXNp7t9i5efGPkm4GNo2IFZ2XlpmV25o1aWbiV19tjJ18MowfX1xO1n21WVwkHd3GNiLiT+VPyczK7bjj4JprGteHDGl85bBZZ1hXy+XjbWwL0lskzaxK/eY3cPrpTWOvvtr0zZBmnaHN4hIRJ1YqETMrn3/8Y+1309fXwy67FJOP9TztHdBH0uGSvinpew2f9bmgpN0lPVzyeVnSVyVtI2mapNn559Z5f0m6WFK9pEck7VtyrrF5/9mSxpbE95P0aD7m4vw6ZbNub9Gi1NVVWlhuuSUN1ruwWCVV/CHKiHgyIvaJiH2A/YCVwA3AmcCdETEUuDOvA4wChubPOOCSnNM2wNnAcGAYcHZDQcr7nFxy3Mj1ydWsq3j77VRUti95QOC//zsVlcMOKywt68GKfojyYODpiJgDjAauzPErgaPy8mhgYiTTgf6StgcOBaZFxNKIWAZMA0bmbf0iYnqkdzhPLDmXWbez//7Qp0/j+oc+lIrK2WcXl5NZe4tL84coV1GehyiPpfF5mYERsTAvLwIG5uVBwLySY+bnWFvx+S3E1yJpnKQ6SXVLlizZkO9hVnHf/W5qrfyj5N2wb70Ff/1rcTmZNejoQ5Q/AWYAz7KBD1FK6gMcCVzXfFtuccSGnL89ImJ8RNRGRO2AAQM6+3JmZXHzzamonHdeY2zRotRa2Xjj4vIyK9VmcZH0fkn/ERHnRsRyYAvgUVJBuGgDrz0KeDAiXsjrL+QuLfLPxTm+ANih5LjBOdZWfHALcbMurb4+FZWPlzwg8M9/pqIycGDrx5kVYV0tl98CbwFIOgD4UY6tADb0ud7jaNr6mQI03PE1FrixJH5CvmtsBLAid59NBQ6RtHUeyD8EmJq3vSxpRL5L7ISSc5l1Oa+9lorK0KGNsUsvTUVlxIji8jJry7oeouwVEUvz8qeA8RHxR9I0MA+v70UlbQ58DDilJPwjYLKkk4A5wCdz/FbgMKCedGfZiQARsVTSucADeb9zSnI9DbgC2Ay4LX/MupQIeOc7U5dXg09/GiZNKi4ns/ZaZ3GR1DsiVpHu7BrXgWNbFRGvAds2i72Ur9F83wBObx7P2y4HLm8hXgfsvb75mRXt85+HCRMa1/v3h6VLPV2LdR3rKhBXA3+R9CLpjrG/AkjaldQ1ZmZldMUVcGKzeTFWrIB+/QpJx2y9rWv6l/Ml3Um67fiO3IqANFbzpc5OzqynuP9+GD68aezxx2GPPYrJx2xDrbNrKz+42Dz2VOekY9azPPss7Lxz09gf/whHtzofuVnXsN7jJma2/t56CzbZpGnsoIPgrruKyces3FxczCqspUH56PRHhs0qq92zIpvZhvnAB9YuLK++6sJi3ZOLi1knu+iiVFSml4xePvJIKip+aZd1V+4WM+skdXXw/vc3jV1yCZx6ajH5mFWSi4tZmb38Mmy1VdPYxz4Gd9xRTD5mRXBxMSuTCNiohY5mj6lYT+QxF7My6Ndv7cLy9tsuLNZzubiYbYAvfzkN1r/ySmNs3rxUVHq7X8B6MBcXs/Vwyy2pqPzyl42xm25KRWXw4NaPM+spXFzMOmDBglRUjjiiMXb66amolMY6ZNIkqKlJ/Wo1NZ5T37oFN9zN2mH16rW7ufr2TS/y2iCTJsG4cbByZVqfMyetA4wZs4EnNyuOWy5WvCr/y11au7CsWVOGwgJw1lmNhaXBypUpbtaFFVJcJPWXdL2kJyQ9LukDkraRNE3S7Pxz67yvJF0sqV7SI5L2LTnP2Lz/bEljS+L7SXo0H3Nxft2xVaOGv9znzEl9Sw1/uVdBgRk5cu3pWpYvT2mW7f9Rc+d2LG7WRRTVcvkFcHtE7AG8F3gcOBO4MyKGAnfmdYBRwND8GQdcAiBpG+BsYDgwDDi7oSDlfU4uOW5kBb6TrY8q/Mt9/PhUPKZObYzdf38qKs0fjtxgO+7YsbhZF1Hx4iJpK+AAYAJARLwVEcuB0cCVebcrgaPy8mhgYiTTgf6StgcOBaZFxNKIWAZMA0bmbf0iYnp+udnEknNZtamiv9xnzkxF5ZRTGmMXXpiKSvNpXMrm/PPT4E2pvn1T3KwLK6LlshOwBPidpIckXSZpc2BgRCzM+ywCBublQcC8kuPn51hb8fktxNciaZykOkl1S5Ys2cCvZeulCv5yX7kyFZV3v7sxNmxYKipf+1onX3zMmNRUGjIkJTFkSFr3YL51cUUUl97AvsAlEfE+4DUau8AAyC2OTn+2OSLGR0RtRNQOGDCgsy9nLSn4L3dp7ZmJI+C++ypy+WTMGHjuuXSXwHPPubBYt1BEcZkPzI+Ihv98rycVmxdylxb55+K8fQGwQ8nxg3OsrfjgFuJWjQr6y/3MM9celH/zTU/XYlYuFS8uEbEImCdp9xw6GHgMmAI03PE1FrgxL08BTsh3jY0AVuTus6nAIZK2zgP5hwBT87aXJY3Id4mdUHIuq0YV/Mv9hhtSUfnxjxtj9fWpqPTp02mXNetxirpb7EvAJEmPAPsAPwB+BHxM0mzgo3kd4FbgGaAe+B/gNICIWAqcCzyQP+fkGHmfy/IxTwO3VeA7WRV74olUVI4+ujH2wDm3EUNq2GVodT5fY9aVKdwPAEBtbW3U1dUVnYaV2SuvpBmLS11+OZzYp9mT8ZDGejyYbtYhkmZERG3zuJ/Qt24pArbeumlh+dznUvzEE6nK52vMuhPPLWbdzvHHwx/+0Lg+cCAsXNhsAL+Knq8x647ccrFuo+HJ+tLC8sorsGhRC9O1VMHzNWbdmYuLdXn33bf2k/VPPZW6wLbYopWD/GS8WadycbEu64UXUlEZMaIxNmVKKipDh67jYD8Zb9apPOZiXc7bb6/9TMp3vgPnntvBE40Z42Ji1klcXKxLOeAA+OtfG9eHD4fp04vLx8xa5m4x6xLOOSf1XpUWljffdGExq1ZuuVhVu+02OOywprHnn4ftty8mHzNrHxcXq0pPPw277to09ve/wwc/WEw+ZtYx7hazqtLwbpXSwvKrX6U7wFxYzLoOt1ysKkSk5xfnl7zm7ZhjYPLk4nIys/Xn4mKFO+WU9IhJg803h5dfho3crjbrslxcrDATJ8LYsU1jy5fDVlsVk4+ZlY+Li1XcQw/Bvvs2jc2cCXvtVUw+ZlZ+hXQ8SHpO0qOSHpZUl2PbSJomaXb+uXWOS9LFkuolPSJp35LzjM37z5Y0tiS+Xz5/fT62+bSFVoCXXkqD9aWFZfLkNN7iwmLWvRTZq31QROxT8pKZM4E7I2IocGdeBxgFDM2fccAlkIoRcDYwHBgGnN1QkPI+J5ccN7Lzv461ZvXqVFS2264xdsYZqagcc0xxeZlZ56mmIdPRwJV5+UrgqJL4xEimA/0lbQ8cCkyLiKURsQyYBozM2/pFxPRIr9mcWHIuq7DDD4feJZ2ve++disrPflZcTmbW+YoqLgHcIWmGpHE5NjAiFublRcDAvDwImFdy7Pwcays+v4W4VdAFF6TWyq23NsZef2qFSjwAAA6dSURBVB0efbS4nMyscooa0P9QRCyQ9A5gmqQnSjdGREiKzk4iF7ZxADv6JVFlcdddcPDBTWNz58IOOxSTj5kVo5CWS0QsyD8XAzeQxkxeyF1a5J+L8+4LgNJfTYNzrK344BbiLeUxPiJqI6J2wIABG/q1erS5c1NLpbSw3Hln6gJzYTHreSpeXCRtLmnLhmXgEGAmMAVouONrLHBjXp4CnJDvGhsBrMjdZ1OBQyRtnQfyDwGm5m0vSxqR7xI7oeRcVmZvvNH4rq0GF1yQispHPlJcXmZWrCK6xQYCN+S7g3sDV0XE7ZIeACZLOgmYA3wy738rcBhQD6wETgSIiKWSzgUeyPudExFL8/JpwBXAZsBt+WNltvfeMGtW4/phh8EttxSXj5lVD6Ubqqy2tjbq6uqKTqNLOOMM+PnPm8ZWrYJevYrJx8yKI2lGySMl/+Yn9K3drrsOPvnJprEXX4Rtty0mHzOrXtX0nEv3NGkS1NSkWRhratJ6FzNrVhpXKS0sDz6YxlVcWMysJW65dKZJk2DcuPSSEoA5c9I6wJgxxeXVTitWQP/+TWMTJ8LxxxeTj5l1HW65dKazzmosLA1WrkzxKrZmDfTt27SwnHJKaqm4sJhZe7i4dKa5czsWrwLHHJMG5l9/Pa0PGZKKzaWXFpuXmXUtLi6dqbWn/qtwNoBf/zqNq1x/fWPstdfguedS3MysI1xcOtP556f+pVJ9+6Z4lfj731Px+OIXG2NPP526wJqnbmbWXi4unWnMmPT+3iFDGh9jHz++KgbzFy5MKX3oQ42xW29NRWXnnYvLy8y6BxeXzjZmTOpbWrMm/axUYWnlFui33kpF5Z3vbNz1+99PRWXUqMqkZmbdn4tLd9RwC/ScOalq5FugR+z6Ipts0rjbgQemzd/7XnGpmln35OLSHTW7Bfo7nItWvsZ9Tze+CvKtt+CeewrIzcx6BD9E2R3lW51v4giO5KYmm154Ad7xjiKSMrOexC2XbuiVwe/ie3y/SWGZznBiSI0Li5lVhFsu3ciqVXDZZXD2ijoWsxkA/8Pn+TwT8i3Q4wvO0Mx6CrdcuoEIuPHG9H6VL3wBdn/vZkz/79uJITV8XpdX1S3QZtYzuOXSxT3wAHz963DvvbDbbvDnP8ORR4I0Es5+ruj0zKyHKqzlIqmXpIck3ZzXd5J0n6R6SddK6pPjm+T1+ry9puQc387xJyUdWhIfmWP1ks6s9HerhGefheOOg2HD4PHH0/QtM2fC6NGersXMildkt9hXgMdL1n8MXBQRuwLLgJNy/CRgWY5flPdD0p7AscBewEjgN7lg9QJ+DYwC9gSOy/t2C0uXwte+BnvskbrCzjoL6uvhtNNg442Lzs7MLCmkuEgaDBwOXJbXBXwEaJg28UrgqLw8Oq+Ttx+c9x8NXBMRb0bEs0A9MCx/6iPimYh4C7gm71t+FXwR2BtvwE9/CrvsAhddlIZPZs+G886Dfv067bJmZuulqJbLz4FvAmvy+rbA8ohYldfnA4Py8iBgHkDeviLv/+94s2Nai69F0jhJdZLqlixZ0rFv0MpT8OUuMGvWwFVXwbvelcZWhg+Hhx+Gyy+HQS1+KzOz4lW8uEg6AlgcETMqfe3mImJ8RNRGRO2AAQM6dnAFXgT2l7+kYjJmDGy1FdxxB9x+O7znPWW7hJlZpyjibrH9gSMlHQZsCvQDfgH0l9Q7t04GAwvy/guAHYD5knoDWwEvlcQblB7TWrx8OvFFYI8/Dt/6Ftx0EwweDFdcAZ/5THqJl5lZV1DxlktEfDsiBkdEDWlA/q6IGAPcDXwi7zYWuDEvT8nr5O13RUTk+LH5brKdgKHA/cADwNB891mffI0pZf8infAisEWL4NRT4d3vTvN+/eAH8NRTMHasC4uZdS3V9BDlt4D/klRPGlOZkOMTgG1z/L+AMwEiYhYwGXgMuB04PSJW55bPF4GppLvRJud9y6uMLwJ77TU45xzYdVeYMCE9CPn00/Dtb8Nmm5UpXzOzClJqBFhtbW3U1dV17KBJk9IYy9y5qcVy/vkdegp+9Wr43e/SlPcLF8LRR8MPf5gehjQz6wokzYiI2uZxP6G/IcaMWa8pVSLgttvgm9+EWbPgAx+A666D/ffvhBzNzApQTd1iPcKDD8JHPwqHH56eXbnuuvQeexcWM+tOXFwqZM4cOP542G8/+Ne/4Be/gMceg098wtO1mFn3426xTvbcc7DTTml5k03SLcZnngn9+xealplZp3Jx6SSvvgrvfS8880xj7KmnNuhOZTOzLsPdYmW2Zg186lOw5ZaNhWXChDSI78JiZj2Fi0sZ/eQn6WHHyZPT+pe/nIrN5z5XbF5mZpXmbrEyuPlm+PjHG9c/+EG4+27o06e4nMzMiuTisoF+/3s44YS03KcPzJ8PHZ0D08ysu3G32Abaffc0cP+vf8Gbb7qwmJmBWy4bbNiw9H4VMzNr5JaLmZmVnYuLmZmVnYuLmZmVnYuLmZmVnYuLmZmVnYuLmZmVnYuLmZmVnYuLmZmVnSKi6ByqgqQlwJx27r4d8GInplNOzrX8ukqe4Fw7i3NtNCQi1pqbxMVlPUiqi4jaovNoD+dafl0lT3CuncW5rpu7xczMrOxcXMzMrOxcXNbP+KIT6ADnWn5dJU9wrp3Fua6Dx1zMzKzs3HIxM7Oyc3ExM7Oyc3HpAEmXS1osaWbRubRF0g6S7pb0mKRZkr5SdE6tkbSppPsl/Svn+v2ic1oXSb0kPSTp5qJzaYuk5yQ9KulhSXVF59MWSf0lXS/pCUmPS/pA0Tm1RNLu+d+z4fOypK8WnVdLJJ2R/5uaKelqSZtW9Poec2k/SQcArwITI2LvovNpjaTtge0j4kFJWwIzgKMi4rGCU1uLJAGbR8SrkjYG/gZ8JSKmF5xaqyT9F1AL9IuII4rOpzWSngNqI6LqH/aTdCXw14i4TFIfoG9ELC86r7ZI6gUsAIZHRHsfwK4ISYNI/y3tGRGvS5oM3BoRV1QqB7dcOiAi7gWWFp3HukTEwoh4MC+/AjwODCo2q5ZF8mpe3Th/qvYvHkmDgcOBy4rOpbuQtBVwADABICLeqvbCkh0MPF1thaVEb2AzSb2BvsDzlby4i0s3J6kGeB9wX7GZtC53Mz0MLAamRUTV5gr8HPgmsKboRNohgDskzZA0ruhk2rATsAT4Xe5uvEzS5kUn1Q7HAlcXnURLImIBcCEwF1gIrIiIOyqZg4tLNyZpC+CPwFcj4uWi82lNRKyOiH2AwcAwSVXZ5SjpCGBxRMwoOpd2+lBE7AuMAk7P3brVqDewL3BJRLwPeA04s9iU2pa77o4Eris6l5ZI2hoYTSrc7wQ2l/SZSubg4tJN5fGLPwKTIuJPRefTHrkr5G5gZNG5tGJ/4Mg8lnEN8BFJfyg2pdblv16JiMXADcCwYjNq1XxgfkmL9XpSsalmo4AHI+KFohNpxUeBZyNiSUS8DfwJ+GAlE3Bx6YbyIPkE4PGI+FnR+bRF0gBJ/fPyZsDHgCeKzaplEfHtiBgcETWkLpG7IqKifw22l6TN880c5C6mQ4CqvMsxIhYB8yTtnkMHA1V380kzx1GlXWLZXGCEpL7598HBpLHXinFx6QBJVwP/BHaXNF/SSUXn1Ir9geNJf1k33DJ5WNFJtWJ74G5JjwAPkMZcqvoW3y5iIPA3Sf8C7gduiYjbC86pLV8CJuX/H+wD/KDgfFqVi/XHSK2BqpRbgdcDDwKPkn7XV3QaGN+KbGZmZeeWi5mZlZ2Li5mZlZ2Li5mZlZ2Li5mZlZ2Li5mZlZ2Li/V4kr4i6ecl67+V9L8l61+SdHEHzneFpE+0Y59nS24V/8f6Zd9+lbiGWYPeRSdgVgX+DowpWX8v0EtSr4hYTXqy+cb2nChPEthe34iI6zuw/3qR1DsiVkVERZ/Qtp7NLRczeBjYTdJmeYbe13Ps3Xn7B4G/S9pH0nRJj0i6Ic/fhKR7JP08vzOlybtzJJ2bWym92pOIpF9I+l5ePlTSvZI2yue4VFKdpKfyPGcNk35eIOmBnNcpOf5hSX+VNIX8tLukV0uu842SY76fYzX5XSr/k98DckeeNQFJu0r6X6X37jwoaZfWzmMGLi5mRMQq4CHg/cAI0gzS04EP5vdiKCLmAROBb0XEe0hPPZ9dcpo+EVEbET9tCEi6ABgAnJhbQM1dUNItNinHvg18StJBwMX52IYZmGtI84MdDlyq9PKnk0gz3r4/53+ypJ3y/vuS3o2zW+lFJR0CDM3n2gfYr2RSy6HAryNiL2A58J85PinH30sqtgvXcR7r4dwtZpb8g/RLczPSFD+zgf9Hmgr+H7lF0z8i/pL3v5KmM+Je2+x83wXui4i2prpfq1ssIlZKOhm4FzgjIp4u2Tw5F5rZkp4B9iDNGfaekjGerUi/8N8C7o+IZ1u47iH581Be3yIfM5c02eHDOT4DqMlzlA2KiBtyjm/Av4tUS+e5t43vbD2Ei4tZ8nfgVGBT4NekorJn/tmegfDXmq0/QPpLfpuI6OgL5t4NvESaKr1U87maAhDwpYiYWrpB0odbyOnfm4EfRsRvmx1TA7xZElpNKratafE8ZuBuMbMG/yR1iQ2IiMWRJt1bQnonxt8jYgWwTNL/yfsfD/yl5VMBcDvwI+CWhtmJ20PSEOBrpBe8jZI0vGTzMXn8ZRdgZ+BJYCrwhfyKBSTtpnW/aGsq8Dml9/0gaZCkd7S2c36b6XxJR+X9N5HUt6PnsZ7FLRczICKWSVoCzCoJ/5M0w/S/8vpY0lhHX+AZ4MR1nPO6XFimSDosIl5vtssFkr5Tsj6c9KqEr0fE83nW7SskvT9vn0ua4bgfcGpEvCHpMtJYzIN5avUlwFHryOsOSe8C/pkO4VXgM6SWSmuOB34r6RzgbeCYNs6zuK3rW8/gWZHNugBJVwA3V+LWZbNycLeYmZmVnVsuZmZWdm65mJlZ2bm4mJlZ2bm4mJlZ2bm4mJlZ2bm4mJlZ2f1/0MOQhFJdppcAAAAASUVORK5CYII=\n",
689 | "text/plain": [
690 | ""
691 | ]
692 | },
693 | "metadata": {
694 | "needs_background": "light"
695 | }
696 | }
697 | ]
698 | }
699 | ]
700 | }
--------------------------------------------------------------------------------
/7_1_5_Lin_Reg_implementation.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": [],
7 | "collapsed_sections": []
8 | },
9 | "kernelspec": {
10 | "name": "python3",
11 | "display_name": "Python 3"
12 | },
13 | "language_info": {
14 | "name": "python"
15 | }
16 | },
17 | "cells": [
18 | {
19 | "cell_type": "code",
20 | "metadata": {
21 | "id": "-y5S4EZyPYt3"
22 | },
23 | "source": [
24 | "import numpy as np\n",
25 | "import pandas as pd\n",
26 | "from sklearn.model_selection import train_test_split\n",
27 | "import matplotlib.pyplot as plt\n",
28 | "\n",
29 | "import Lin_Reg_model"
30 | ],
31 | "execution_count": null,
32 | "outputs": []
33 | },
34 | {
35 | "cell_type": "markdown",
36 | "metadata": {
37 | "id": "1G3UFDztP9tF"
38 | },
39 | "source": [
40 | "Data Processing"
41 | ]
42 | },
43 | {
44 | "cell_type": "code",
45 | "metadata": {
46 | "id": "YQ4Zx9HbPy1-"
47 | },
48 | "source": [
49 | "# loading the data from csv file to pandas dataframe\n",
50 | "salary_data = pd.read_csv('/content/salary_data.csv')"
51 | ],
52 | "execution_count": null,
53 | "outputs": []
54 | },
55 | {
56 | "cell_type": "code",
57 | "metadata": {
58 | "colab": {
59 | "base_uri": "https://localhost:8080/",
60 | "height": 206
61 | },
62 | "id": "nPfG1QLoQOxU",
63 | "outputId": "e78f6224-777a-4d02-9d93-9a63e62dadb7"
64 | },
65 | "source": [
66 | "# printing the first 5 rows of the dataframe\n",
67 | "salary_data.head()"
68 | ],
69 | "execution_count": null,
70 | "outputs": [
71 | {
72 | "output_type": "execute_result",
73 | "data": {
74 | "text/html": [
75 | "\n",
76 | "\n",
89 | "
\n",
90 | " \n",
91 | " \n",
92 | " | \n",
93 | " YearsExperience | \n",
94 | " Salary | \n",
95 | "
\n",
96 | " \n",
97 | " \n",
98 | " \n",
99 | " 0 | \n",
100 | " 1.1 | \n",
101 | " 39343 | \n",
102 | "
\n",
103 | " \n",
104 | " 1 | \n",
105 | " 1.3 | \n",
106 | " 46205 | \n",
107 | "
\n",
108 | " \n",
109 | " 2 | \n",
110 | " 1.5 | \n",
111 | " 37731 | \n",
112 | "
\n",
113 | " \n",
114 | " 3 | \n",
115 | " 2.0 | \n",
116 | " 43525 | \n",
117 | "
\n",
118 | " \n",
119 | " 4 | \n",
120 | " 2.2 | \n",
121 | " 39891 | \n",
122 | "
\n",
123 | " \n",
124 | "
\n",
125 | "
"
126 | ],
127 | "text/plain": [
128 | " YearsExperience Salary\n",
129 | "0 1.1 39343\n",
130 | "1 1.3 46205\n",
131 | "2 1.5 37731\n",
132 | "3 2.0 43525\n",
133 | "4 2.2 39891"
134 | ]
135 | },
136 | "metadata": {},
137 | "execution_count": 3
138 | }
139 | ]
140 | },
141 | {
142 | "cell_type": "code",
143 | "metadata": {
144 | "colab": {
145 | "base_uri": "https://localhost:8080/"
146 | },
147 | "id": "AUn-2j48QQoO",
148 | "outputId": "e2181bfd-8b1e-433a-e530-5e26b2573cd9"
149 | },
150 | "source": [
151 | "# number of rows & columns in the dataset\n",
152 | "salary_data.shape"
153 | ],
154 | "execution_count": null,
155 | "outputs": [
156 | {
157 | "output_type": "execute_result",
158 | "data": {
159 | "text/plain": [
160 | "(30, 2)"
161 | ]
162 | },
163 | "metadata": {},
164 | "execution_count": 4
165 | }
166 | ]
167 | },
168 | {
169 | "cell_type": "code",
170 | "metadata": {
171 | "colab": {
172 | "base_uri": "https://localhost:8080/"
173 | },
174 | "id": "pvdhCYEAQjKR",
175 | "outputId": "c08c11fc-203b-481b-a760-7b38d47cca17"
176 | },
177 | "source": [
178 | "# checking for missing values\n",
179 | "salary_data.isnull().sum()"
180 | ],
181 | "execution_count": null,
182 | "outputs": [
183 | {
184 | "output_type": "execute_result",
185 | "data": {
186 | "text/plain": [
187 | "YearsExperience 0\n",
188 | "Salary 0\n",
189 | "dtype: int64"
190 | ]
191 | },
192 | "metadata": {},
193 | "execution_count": 5
194 | }
195 | ]
196 | },
197 | {
198 | "cell_type": "markdown",
199 | "metadata": {
200 | "id": "DFsbcUDLQ4j6"
201 | },
202 | "source": [
203 | "Splitting the feature & target"
204 | ]
205 | },
206 | {
207 | "cell_type": "code",
208 | "metadata": {
209 | "id": "HDVDx_chQy2J"
210 | },
211 | "source": [
212 | "X = salary_data.iloc[:,:-1].values\n",
213 | "Y = salary_data.iloc[:,1].values"
214 | ],
215 | "execution_count": null,
216 | "outputs": []
217 | },
218 | {
219 | "cell_type": "code",
220 | "metadata": {
221 | "colab": {
222 | "base_uri": "https://localhost:8080/"
223 | },
224 | "id": "FOYOS3iXRJaE",
225 | "outputId": "3816f5d2-efca-44b2-84e2-b3397d99d9e2"
226 | },
227 | "source": [
228 | "print(X)"
229 | ],
230 | "execution_count": null,
231 | "outputs": [
232 | {
233 | "output_type": "stream",
234 | "name": "stdout",
235 | "text": [
236 | "[[ 1.1]\n",
237 | " [ 1.3]\n",
238 | " [ 1.5]\n",
239 | " [ 2. ]\n",
240 | " [ 2.2]\n",
241 | " [ 2.9]\n",
242 | " [ 3. ]\n",
243 | " [ 3.2]\n",
244 | " [ 3.2]\n",
245 | " [ 3.7]\n",
246 | " [ 3.9]\n",
247 | " [ 4. ]\n",
248 | " [ 4. ]\n",
249 | " [ 4.1]\n",
250 | " [ 4.5]\n",
251 | " [ 4.9]\n",
252 | " [ 5.1]\n",
253 | " [ 5.3]\n",
254 | " [ 5.9]\n",
255 | " [ 6. ]\n",
256 | " [ 6.8]\n",
257 | " [ 7.1]\n",
258 | " [ 7.9]\n",
259 | " [ 8.2]\n",
260 | " [ 8.7]\n",
261 | " [ 9. ]\n",
262 | " [ 9.5]\n",
263 | " [ 9.6]\n",
264 | " [10.3]\n",
265 | " [10.5]]\n"
266 | ]
267 | }
268 | ]
269 | },
270 | {
271 | "cell_type": "code",
272 | "metadata": {
273 | "colab": {
274 | "base_uri": "https://localhost:8080/"
275 | },
276 | "id": "eJmfuRiVRKrr",
277 | "outputId": "1d9289ce-ebf6-458c-b41f-858c7ef3464a"
278 | },
279 | "source": [
280 | "print(Y)"
281 | ],
282 | "execution_count": null,
283 | "outputs": [
284 | {
285 | "output_type": "stream",
286 | "name": "stdout",
287 | "text": [
288 | "[ 39343 46205 37731 43525 39891 56642 60150 54445 64445 57189\n",
289 | " 63218 55794 56957 57081 61111 67938 66029 83088 81363 93940\n",
290 | " 91738 98273 101302 113812 109431 105582 116969 112635 122391 121872]\n"
291 | ]
292 | }
293 | ]
294 | },
295 | {
296 | "cell_type": "markdown",
297 | "metadata": {
298 | "id": "RgdnZ2mdRRAX"
299 | },
300 | "source": [
301 | "Splitting the data to training data & Test data"
302 | ]
303 | },
304 | {
305 | "cell_type": "code",
306 | "metadata": {
307 | "id": "5TG7PAEyRMfy"
308 | },
309 | "source": [
310 | " X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.33,\n",
311 | " random_state = 2 )"
312 | ],
313 | "execution_count": null,
314 | "outputs": []
315 | },
316 | {
317 | "cell_type": "markdown",
318 | "metadata": {
319 | "id": "nLw4fjT1RjbF"
320 | },
321 | "source": [
322 | "Train the Logistic Regression Model"
323 | ]
324 | },
325 | {
326 | "cell_type": "code",
327 | "metadata": {
328 | "id": "xYPFclmRRiGW"
329 | },
330 | "source": [
331 | "model = Lin_Reg_model.Linear_Regression(learning_rate=0.01, no_of_iterations=1000) "
332 | ],
333 | "execution_count": null,
334 | "outputs": []
335 | },
336 | {
337 | "cell_type": "code",
338 | "metadata": {
339 | "id": "-_7i2ONcSBVq"
340 | },
341 | "source": [
342 | "model.fit(X_train, Y_train)"
343 | ],
344 | "execution_count": null,
345 | "outputs": []
346 | },
347 | {
348 | "cell_type": "code",
349 | "metadata": {
350 | "colab": {
351 | "base_uri": "https://localhost:8080/"
352 | },
353 | "id": "HPLLxjGkVDFa",
354 | "outputId": "5bf02edb-3346-42be-b543-651e501c8004"
355 | },
356 | "source": [
357 | "# print the parameters\n",
358 | "\n",
359 | "print('weight = ', model.w[0])\n",
360 | "print('bias = ', model.b)"
361 | ],
362 | "execution_count": null,
363 | "outputs": [
364 | {
365 | "output_type": "stream",
366 | "name": "stdout",
367 | "text": [
368 | "weight = 9580.301196603597\n",
369 | "bias = 23226.38946603212\n"
370 | ]
371 | }
372 | ]
373 | },
374 | {
375 | "cell_type": "markdown",
376 | "metadata": {
377 | "id": "xljntE2OUvRJ"
378 | },
379 | "source": [
380 | "Predict the salary value for test data"
381 | ]
382 | },
383 | {
384 | "cell_type": "code",
385 | "metadata": {
386 | "id": "ksCCTTUUUyKY"
387 | },
388 | "source": [
389 | "test_data_prediction = model.predict(X_test)"
390 | ],
391 | "execution_count": null,
392 | "outputs": []
393 | },
394 | {
395 | "cell_type": "code",
396 | "metadata": {
397 | "colab": {
398 | "base_uri": "https://localhost:8080/"
399 | },
400 | "id": "uFB2oF79VkcZ",
401 | "outputId": "b9773849-eca6-45ea-b83d-8a403c9779de"
402 | },
403 | "source": [
404 | "print(test_data_prediction)"
405 | ],
406 | "execution_count": null,
407 | "outputs": [
408 | {
409 | "output_type": "stream",
410 | "name": "stdout",
411 | "text": [
412 | "[ 35680.78102162 33764.7207823 66337.74485075 58673.50389347\n",
413 | " 91246.52796192 80708.19664565 101784.85927818 51967.29305584\n",
414 | " 42386.99185924 88372.43760294]\n"
415 | ]
416 | }
417 | ]
418 | },
419 | {
420 | "cell_type": "markdown",
421 | "metadata": {
422 | "id": "zvxHrfrMWUjq"
423 | },
424 | "source": [
425 | "Visualizing the predicted values and actual values"
426 | ]
427 | },
428 | {
429 | "cell_type": "code",
430 | "metadata": {
431 | "colab": {
432 | "base_uri": "https://localhost:8080/",
433 | "height": 295
434 | },
435 | "id": "ML7CrqCwVmCj",
436 | "outputId": "84c6ee1d-f946-4ba3-eb0e-bbdeca428135"
437 | },
438 | "source": [
439 | "plt.scatter( X_test, Y_test, color = 'red' )\n",
440 | "plt.plot( X_test, test_data_prediction, color = 'blue' ) \n",
441 | "plt.xlabel( 'Work Experience' )\n",
442 | "plt.ylabel( 'Salary' )\n",
443 | "plt.title( 'Salary vs Experience' )\n",
444 | "plt.show()"
445 | ],
446 | "execution_count": null,
447 | "outputs": [
448 | {
449 | "output_type": "display_data",
450 | "data": {
451 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAEWCAYAAACqitpwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3de5yVZb3//9dbEBETQWXzU1AxRdvqztOEZP7MJAXbFlZatinJLNpldthanr59dZu0NY+5U4vUQMVTmkpuE9loWZrKoKV4ylHk5AGUY6Io8vn+cV3TrBlmhgHWWveamffz8ViPue/PfbrWWPPhOtzXpYjAzMysnDYpugBmZtb1OLmYmVnZObmYmVnZObmYmVnZObmYmVnZObmYmVnZOblYtyDpJUkfL7ocnZGkv0t6f9HlsM7FycU6DUkHSXpI0jJJiyU9KOlDRZerEiRNlPRO/sPe+PlrEWWJiPdFxItFPNs6LycX6xQk9QXuAv4b2BoYBPwnsKrCz+1Zyfuvw0/yH/bGz97VfHjB3906OScX6yx2A4iIGyPivYh4KyLujYgnACTtIuk+SW9Iel3SZEn9WruRpGGS/ixpqaRXJP1MUq+S4yHpREnPA89LulzSRS3uMUXS91q595WSLmwRu1PSf+TtUyUtkLRC0nOSRqzvL0LS5yXNzgkXSUdIelXSgJLyf1vSi/l3cYGkTUqu/4qkZyQtkTRV0k5tffeS2K55ezNJF0qaK+k1ST+XtHk+doik+ZJOlrQw/26PL7n35pIukjQn1z7/VHLt8FwrXSrpr5IOWd/fi9WYiPDHn5r/AH2BN4BJwBFA/xbHdwUOAzYDBgAPAJeWHH8J+Hje3h8YDvQEhgDPAN8tOTeAaaQa0ubAMOBlYJN8fFtgJTCwlXIeDMwDlPf7A28B2wO752Pb52NDgF3a+L4TgXPb+X1Mzudsk8t2ZIvy35/LvyPwN+Cr+dhooAH45/z9/w/wUFvfvSS2a96+BJiSj28J/Bb4r3zsEGA1cA6wKfCJ/Hvqn49fDvyeVOvsARyY/3sNyv9tP0H6B+9heX9A0f+782cj/j9bdAH88aejn/wHcSIwP/8Rm9LaH/h87lHA4yX7/0gurZz7XeD2kv0ADm1xzjPAYXn7W8DdbdxLwFzg4Lz/NeC+vL0rsBD4OLDpOr7rROBtYGnJZ1LJ8X75OU8Cv2hxbQCjSva/CUzP278DTig5tklOADu1890jl13Am5QkRODDwOy8fQgpkfYsOb6QlMg3ycf2buW7ngpc1yI2FRhb9P/m/Nnwj5vFrNOIiGci4ssRMRjYi1QbuBRA0kBJN+Ump+XA9aQaxlok7SbprtyUtBz4cSvnzmuxPwn4Yt7+InBdG2UM4CbgCzn0b6RaBhHRQEpkZwMLc3m3b+crXxgR/Uo+Y0uesxT4df49XNTKtaXln0P6XQHsBPw0Nz8tBRaTksagNq4tNQDoA8wsuf6eHG/0RkSsLtlfCbyP9PvtDbzQyn13Ao5pvGe+70HAdm2UwzoBJxfrlCLiWdK/7vfKoR+T/oX9LxHRl5QA1MblVwLPAkPzuWe0cm7L6cKvB0ZL2ptUg7qjneLdCByd+zIOAG4rKfcNEXEQ6Q9qAOe3c582SdoH+Ep+1mWtnLJDyfaOpKYzSInj6y2S1uYR8VDJ+W1Nlf46qfaxZ8m1W0XE+zpQ5NdJNbFdWjk2j1RzKS3TFhFxXgfuazXKycU6BUkfyB3Fg/P+DqTawcP5lC2BvwPLJA0Cvt/O7bYElgN/l/QB4Bvren5EzAdmkGost0XEW+2c+zjpj+lVwNRcy0DS7pIOlbQZ6Q/tW8CadT27JUm9ScnuDOB4YJCkb7Y47fuS+uff03eAm3P858DpkvbM99pK0jEdeW5ErAF+CVwi6Z/y9YMkjezgtdcAF0vaXlIPSR/Ov4vrgU9KGpnjvfPggMEdKZfVJicX6yxWkGoBj0h6k5RUZgEn5+P/CewHLAP+B/hNO/c6hdRctYL0x/Lmds4tNQn4F9poEmvhBlLfyg0lsc2A80iJ51Xgn4DT27nHD9T8PZfXc/y/gHkRcWVErCLV0s6VNLTk2juBmcBfSL+PqwEi4nZSbemm3CQ4izRAoqNOJQ0IeDhf/7+kgQodcQqpj2gGqTnufNIgiXmkgQZnAItINZnv479PnVrjiBYzWwdJB5P+lb1T1PD/cSQFqcmvoeiyWPflfxmYdYCkTUnNS1fVcmIxqxVOLmbrIOmfSUOBtyOPTjOz9rlZzMzMys41FzMzKztPTJdtu+22MWTIkKKLYWbWqcycOfP1iBjQMu7kkg0ZMoT6+vqii2Fm1qlImtNa3M1iZmZWdk4uZmZWdk4uZmZWdk4uZmZWdk4uZmZWdk4uZmbd1eTJMGQIbLJJ+jl5ctlu7aHIZmbd0eTJMG4crFyZ9ufMSfsAY8Zs9O1dczEz647OPLMpsTRauTLFy8DJxcysO5o7d/3i68nJxcysO9pxx/WLrycnFzOz7mj8eOjTp3msT58ULwMnFzOz7mjMGJgwAXbaCaT0c8KEsnTmg0eLmZl1X2PGlC2ZtOSai5mZlV3FkoukayQtlDSrJHaMpKckrZFU1+L80yU1SHpO0siS+Kgca5B0Wkl8Z0mP5PjNknrl+GZ5vyEfH1Kp72hmZq2rZM1lIjCqRWwW8BnggdKgpD2AY4E98zVXSOohqQdwOXAEsAfwhXwuwPnAJRGxK7AEOCHHTwCW5Pgl+TwzM6uiiiWXiHgAWNwi9kxEPNfK6aOBmyJiVUTMBhqAYfnTEBEvRsQ7wE3AaEkCDgVuzddPAo4qudekvH0rMCKfb2ZmVVIrfS6DgHkl+/NzrK34NsDSiFjdIt7sXvn4sny+mZlVSa0kl0JIGiepXlL9okWLii6OmVmXUSvJZQGwQ8n+4BxrK/4G0E9SzxbxZvfKx7fK568lIiZERF1E1A0YMKBMX8XMzGoluUwBjs0jvXYGhgKPAjOAoXlkWC9Sp/+UiAjgfuDofP1Y4M6Se43N20cD9+XzzcysSir2EqWkG4FDgG0lzQfOInXw/zcwAPgfSX+JiJER8ZSkW4CngdXAiRHxXr7Pt4CpQA/gmoh4Kj/iVOAmSecCjwNX5/jVwHWSGvLzjq3UdzQzs9bJ/6hP6urqor6+vuhimJl1KpJmRkRdy3itNIuZmVkX4uRiZmZl5+RiZmZl5+RiZmZl5+RiZmZl5+RiZmZl5+RiZmZl5+RiZmZl5+RiZmZl5+RiZmZl5+RiZmZl5+RiZmZl5+RiZmZl5+RiZmZl5+RiZtYNvfcejB4NffvCwoXlv7+Ti5lZN3PuudCzJ0yZAitWQO/e5X9GxZKLpGskLZQ0qyS2taRpkp7PP/vnuCRdJqlB0hOS9iu5Zmw+/3lJY0vi+0t6Ml9zmSS19wwzs+7ujjtAgh/+MO0feii8806qvZRbJWsuE4FRLWKnAdMjYigwPe8DHAEMzZ9xwJWQEgVpeeQDgGHAWSXJ4krgayXXjVrHM8zMuqUnn0xJ5dOfTvtbbglvvAHTp8Omm1bmmRVLLhHxAGkN+1KjgUl5exJwVEn82kgeBvpJ2g4YCUyLiMURsQSYBozKx/pGxMOR1mm+tsW9WnuGmVm38vrrsPnm8MEPNsVmzYLly2HrrSv77Gr3uQyMiFfy9qvAwLw9CJhXct78HGsvPr+VeHvPWIukcZLqJdUvWrRoA76OmVntefddOPhgGDAA3n47xX77W4iAPfesThkK69DPNY4o8hkRMSEi6iKibsCAAZUsiplZVZx8MvTqBX/8Y9r/r/9KSeXII6tbjmonl9dykxb5Z+MAuAXADiXnDc6x9uKDW4m39wwzs/KaPBmGDIFNNkk/J08urCjXX5/6VS6+OO0ffXQabnxaQb3O1U4uU4DGEV9jgTtL4sflUWPDgWW5aWsqcLik/rkj/3Bgaj62XNLwPErsuBb3au0ZZmblM3kyjBsHc+akqsGcOWm/ygnmkUdSUvnSl9L+jjumPpVf/zrlvKIotRxV4MbSjcAhwLbAa6RRX3cAtwA7AnOAz0XE4pwgfkYa8bUSOD4i6vN9vgKckW87PiJ+leN1pBFpmwO/A06KiJC0TWvPWFd56+rqor6+vgzf3My6hSFDUkJpaaed4KWXKv74BQtg8ODmsRdegPe/v+KPbkbSzIioWyteqeTS2Ti5mNl62WSTVGNpSYI1ayr22LfegmHD0qivRvfdBx/7WMUe2a62kovf0Dcz2xA77rh+8Y0UASecAH36NCWWK65I8aISS3ucXMzMNsT48ekvfak+fVK8zC6/PFWUrrkm7X/1q6ly9I1vlP1RZdOz6AKYmXVKY8akn2eeCXPnphrL+PFN8TK47z4YMaJpf5994M9/rsxcYOXm5GJmtqHGjClrMmn0wguw667NYwsWwPbbl/1RFeNmMTOzGrFiRRoBVppYHn009at0psQCTi5mZoVbswY+85k0O/GC/Dr49denpPKhDxVbtg3l5GJmxaqht9yL8OMfQ48ecPvtaf+UU1JSqUBrW1W5z8XMitP4lvvKlWm/8S136Px/XddhypS0EmSjQw6Be++t3BT41eaXKDO/RGlWgILfci/CU0/BXns17W+xRfoVbLNNcWXaGG29ROmai5kVZ+7c9Yt3Ym+8kUYrN1bSIC3iVZpouhL3uZhZcar8lnsR3n03NXltu21TYrnzztSv0lUTCzi5mFmRqviWexG+//20tsof/pD2x49PSeVTnyq2XNXg5GJmxRkzBiZMSH0sUvo5YUJxnfllGrk2eXL6OhdemPY//em0tsoZZ7R/XVfiPhczK1aF3nJfb2UYufboo3DAAU37gwenDvy+fctc1k7ANRczM0hzhJX2tkPaP/PMdV768supplKaWBoaYN687plYwMnFzCzZgJFrS5empDJoUFNs+vTUr7LLLmUuXydTSHKR9B1JsyQ9Jem7Oba1pGmSns8/++e4JF0mqUHSE5L2K7nP2Hz+85LGlsT3l/RkvuayvNKlmVnb1mPkWkRKKv37N8V++tMUP/TQCpWvk6l6cpG0F/A1YBiwN3CkpF2B04DpETEUmJ73AY4AhubPOODKfJ+tSUsnH5DvdVZjQsrnfK3kulGV/2Zm1ql1cOTa3nuvvTb9mjXw7W9XuHydTBE1l38GHomIlRGxGvgD8BlgNDApnzMJOCpvjwaujeRhoJ+k7YCRwLSIWBwRS4BpwKh8rG9EPBxp+oFrS+5lZta6dYxcO/vsFH7iiaZLFi9uqsVYc0WMFpsFjJe0DfAW8AmgHhgYEa/kc14FBubtQcC8kuvn51h78fmtxNciaRypNsSOXeilLTPbQK2MXGu5YBekUWGddbbiaql6zSUingHOB+4F7gH+ArzX4pwAKj7pWURMiIi6iKgbMGBApR9nZp3IK6+kGklpYrnsss49DX41FdKhHxFXR8T+EXEwsAT4G/BabtIi/1yYT18A7FBy+eAcay8+uJW4mdk6rV6dkkrp4lyHH56SykknFVeuzqao0WL/lH/uSOpvuQGYAjSO+BoL3Jm3pwDH5VFjw4FluflsKnC4pP65I/9wYGo+tlzS8DxK7LiSe5mZtUlae8r7CJg6tZjydGZFvaF/W+5zeRc4MSKWSjoPuEXSCcAc4HP53LtJ/TINwErgeICIWCzpR8CMfN45EbE4b38TmAhsDvwuf8zMWjViROpbKfXmm2sPHrOO83oumddzMet+rrwSvvnN5rHHHoN99y2mPJ1RW+u5+A19M+t2nnwyNYGVJpZLL01NYE4s5eGJK82s21i5Mq38WOrAA+HBB4spT1fm5GJm3UJrLzq6V6By3CxmZl3a+9+/dmJZtcqJpdKcXMysSzrrrJRUZs9uijU0pKTSq1dx5eou3CxmZl3KAw/ARz/aPHbjjXDsscWUp7tycjGzLmHxYthmm+axz38ebrqpmPJ0d04uZtapRaw9BX5j3Irj5GJmnVZrI8Dee6/1ZGPV5f8EZtbpfPWrayeWV19tuxZj1ef/DGbWadxxR0oqV1/dFLv33pRUBg5s+zqrPjeLmVnNmzs3LQxZ6uST4cILiymPrZuTi5nVrNWr154Cf5tt4PXXiymPdZyTi5nVpNY669es8Xr1nYX7XMyspowcuXYCWbYs9as4sXQeTi5mVhMmTEjJ4957m2IzZqSk0rdvceWyDVPUMsffk/SUpFmSbpTUW9LOkh6R1CDpZkm98rmb5f2GfHxIyX1Oz/HnJI0siY/KsQZJp1X/G5pZR738ckoqX/96U+zCC1NSqVtrCSrrLKqeXCQNAr4N1EXEXkAP4FjgfOCSiNgVWAKckC85AViS45fk85C0R75uT2AUcIWkHpJ6AJcDRwB7AF/I55pZDXnnnZRUBg1qitXVpaRy8snFlcvKo6hmsZ7A5pJ6An2AV4BDgVvz8UnAUXl7dN4nHx8hSTl+U0SsiojZQAMwLH8aIuLFiHgHuCmfa2Y1Yvhw2Gyzpv1DD01JZcaM4spk5VX15BIRC4ALgbmkpLIMmAksjYjV+bT5QOO/ZwYB8/K1q/P525TGW1zTVnwtksZJqpdUv2jRoo3/cmbWrh/+MNVWHnmkKfbuuzB9enFlssooolmsP6kmsTOwPbAFqVmr6iJiQkTURUTdgAEDiiiCWbdw110pqZx7blPstddSbaWnX4jokjr0n1VSj4h4r0zP/DgwOyIW5Xv/BvgI0E9Sz1w7GQwsyOcvAHYA5udmtK2AN0rijUqvaStuZlXU0ABDhzaPPfwwHHBAMeWx6ulozeV5SReUqWN8LjBcUp/cdzICeBq4Hzg6nzMWuDNvT8n75OP3RUTk+LF5NNnOwFDgUWAGMDSPPutF6vSfUoZym1kHvflmqqmUJpaf/zzVVJxYuoeOVkj3Jv2RvkrSJsA1pM705ev7wIh4RNKtwGPAauBxYALwP8BNks7Nscap6a4GrpPUACzO5SAinpJ0CykxrQZObKxdSfoWMJU0Eu2aiHhqfctpZusvIs0BNq+k1/PYY9NKkNa9KNZzRR1JHwVuAPqRRm/9KCIaKlC2qqqrq4v6+vqii2HWaX3jG6l20mjLLWHpUk+B39VJmhkRa72R1OE+F+BfgeOBIcBFwGTg/wfuBnYrW0nNrFO5/nr40peax5Yuha22KqY8Vhs62iz2PKlP5IKIeKgkfqukg8tfLDOrdX/9K+yzT/PYrFmw557FlMdqyzqTS661TIyIc1o7HhHfLnupzKxmLVkCW2/dPHbzzfC5zxVTHqtN62wNzZ3kR1ahLGZWw9asSWurlCaW73wndeI7sVhLHW0We1DSz4CbgTcbgxHxWEVKZWY15bOfhd/8pml/t93g2Wc9Bb61raPJpbFltbRpLEjzgZlZF/Wzn8FJJzWPrVwJm29eTHms8+hQcomIj1W6IGZWOx58EA46qHnsxRdh552LKY91Ph2e1UfSv5Kmt+/dGGurk9/MOqdXXoHtt28eu+eetDqk2fro0OtNkn4OfB44CRBwDLBTBctlZlX07rup/6Q0sZxzTuqsd2KxDdHRd2cPjIjjSIt2/SfwYfzipFmX8JGPQK9eTfsf/WhKKj/8YXFlss6vo8nlrfxzpaTtgXeB7SpTJDOrhrPPTrWVh0pei37nHfj974sqkXUlHe1zuUtSP+AC0oSTAVxVsVKZWcX87nfwiU80j736KgwcWEx5rGvq6GixH+XN2yTdBfSOiGWVK5aZlduLL8IuuzSPPfQQfPjDxZTHurZ2k4ukz7RzjIj4TVvHzaw2rFwJW2zRPHbFFWkWY7NKWVfN5ZPtHAvAycWsRkWkmsrs2U2xY46BW24prkzWfbSbXCLi+HI/UNLupGlkGr0f+L/AtTk+BHgJ+FxELMmrVf4U+ASwEvhy47QzksYC/yff59yImJTj+wMTgc1JSwJ8J9Z34RqzTuxb34LLL2/a79MHVqzw2ipWPVV/iTIiniNPJ5NnXF4A3A6cBkyPiPMknZb3TwWOIC1hPBQ4ALgSOEDS1sBZQB2pFjVT0pSIWJLP+RrwCCm5jAJ+t75lNetsbrgBxoxpHluyBPr1K6Y81n0V/RLlCOCFiJgDjAYm5fgk4Ki8PRq4NpKHgX6StgNGAtMiYnFOKNOAUflY34h4ONdWri25l1mX9OSTaVhxaWJ58snUNObEYkUo+iXKY4HG1bUHRsQreftVoHFg5CCgZEVu5udYe/H5rcTXImmcpHpJ9YsWLdqY72FWiKVLU1L54AebYjfckJLKXnsVVy6zDX2JcjUb+RKlpF7Ap4BftzyWaxwV7yOJiAkRURcRdQMGDKj048zKZs0a6N0b+vdvip14YkoqX/hCceUya9TR5NL4EuVPgJnAbJpqHBvqCOCxiHgt77+Wm7TIPxfm+AJgh5LrBudYe/HBrcTNuoRjjoEePWDVqrS/yy4p2fzsZ8WWy6xUu8lF0ock/X8R8aOIWAq8D3iSVNu4ZCOf/QWaJ6gpwNi8PRa4syR+nJLhwLLcfDYVOFxSf0n9gcOBqfnYcknD80iz40ruZdZpXXFFagK79dam2JtvQkODF+2y2rOumssvgHcAJB0MnJdjy4AJG/pQSVsAh9H8PZnzgMMkPQ98PO9DGu31ItAA/BL4JkBELAZ+BMzIn3NyjHzOVfmaF/BIMevE/vznlDxOPLEp9sILqQmsT5/iymXWHrX3+oekv0bE3nn7cmBRRJyd9/8SEfu0eXEnU1dXF/X19UUXw+wfXn0VtmvRs3n33XDEEcWUx6w1kmZGRF3L+LpqLj0kNb4LMwK4r+RYh9+RMbOOa1xbpTSxnH12qqk4sVhnsa4EcSPwB0mvk0aM/RFA0q6kpjEzK6ODD4Y//rFp/yMfgT/9qbjymG2odU3/Ml7SdNKw43tLplDZhPRCpZmVwXHHwXXXNY+98w5sumkx5THbWOts2spvxbeM/a0yxTHrXs4/H047rXns5ZfX7msx62zcb2JWgEcfhQMOaB674Qa/AGldh5OLWRUtW7b2XF9HHgm//W0x5TGrFCcXsyqIaH26ey8EYV2VV3cwq7C+fddOLKtXO7FY1+bkYlYh3/1uel9lxYqm2Lx5Kan06FFcucyqwcnFrMzuuScllZ/+tCk2ZUpKKoMHt32dWVfiPhezMnnlFdh+++axf/93uPLKYspjViQnF7ONtGbN2s1cPXumaVzMuisnF7ON0NpU92vWeAp8M/e5mG2Az3527QSyeHHqV1nvxDJ5MgwZkoaUDRmS9s06OScXs/Vw/fUpefymZCWiBx9MSaV0yeEOmzwZxo2DOXPSTebMSftOMNbJObmYdcDzz6ek8qUvNcXOOSflgwMP3Igbn3kmrFzZPLZyZYqbdWKFJBdJ/STdKulZSc9I+rCkrSVNk/R8/tk/nytJl0lqkPSEpP1K7jM2n/+8pLEl8f0lPZmvuSwvd2y1qoabhVatSkllt92aYrvvnpLKD39YhgfMnbt+cbNOoqiay0+BeyLiA8DewDPAacD0iBgKTM/7AEcAQ/NnHHAlgKStgbOAA4BhwFmNCSmf87WS60ZV4TvZhqjhZiEJevduHouAZ58t40N23HH94madRNWTi6StgIOBqwEi4p2IWAqMBibl0yYBR+Xt0cC1kTwM9JO0HTASmBYRiyNiCTANGJWP9Y2Ih/P6M9eW3MtqTQ02C+2//9qd8itXVmi6lvHjoU+f5rE+fVLcrBMrouayM7AI+JWkxyVdJWkLYGBEvJLPeRUYmLcHAfNKrp+fY+3F57cSX4ukcZLqJdUvWrRoI7+WbZAaaha68MKUVB57rCk2a1ZKKptvXqGHjhkDEybATjulh++0U9ofM6ZCDzSrjiKSS09gP+DKiNgXeJOmJjAAco2j4tP6RcSEiKiLiLoBAwZU+nHWmhpoFpoxI/1d//73m2ITJqSksueeVSjAmDHw0kvpBZmXXnJisS6hiOQyH5gfEY/k/VtJyea13KRF/rkwH18A7FBy/eAcay8+uJW41aICm4XeeisllWHDmmIjR6ak8rWvVfzxZl1a1ZNLRLwKzJO0ew6NAJ4GpgCNI77GAnfm7SnAcXnU2HBgWW4+mwocLql/7sg/HJiajy2XNDyPEjuu5F5WawpoFoqAD3xg7ZwWkSadNLONV9T0LycBkyX1Al4EjiclulsknQDMAT6Xz70b+ATQAKzM5xIRiyX9CJiRzzsnIhbn7W8CE4HNgd/lj9WqMWOq1hT0ve/BpZc2j73LpvTcaRBMHu8mKbMyUXjFIgDq6uqivr6+6GJYhfz61/C5zzWPvbH5YLZ+q6TFtE8fd6abrSdJMyOirmXcb+hbl/b006m1rTSxPP44xE5DmicWKHwItFlX4uRiXdLy5SmplI72uvba1K+yzz7U1BBos67IycW6lAjYaqv0aTRuXIqXzgtWC0OgzboyJxfrMr74xTQ92fLlaX+HHdKrI7/4RSsn+814s4pycrFOb8KE1ARWOh3Z3/+eWrjanLLUb8abVZRXorRO69FH4YADmsf+9jcYOrSDN6jiEGiz7sY1F+t0Fi5MlY3SxDJlSupX6XBiMbOKcnKxTmP16pRUBg5sip1xRkoqn/xkceUys7W5Wcw6hY9/HKZPb9qvq0sTTppZbXLNxWraeeel2kppYlm1yonFrNa55mI16X//Fw47rHls/nwY1OrKPGZWa5xcrKbMnZtGBZf6wx/g4IOLKY+ZbRg3i1lNePvtptdNGl18ceqsd2Ix63xcc7HC7bUXPPVU0/6RR8Jvf1tcecxs4zm5WGFOOQUuuqh5bPVq6NGjmPKYWfk4uVjV3XYbHH1089jrr8M22xRTHjMrv0L6XCS9JOlJSX+RVJ9jW0uaJun5/LN/jkvSZZIaJD0hab+S+4zN5z8vaWxJfP98/4Z8bVszTFkVPfNM6lcpTSwzZ6Z+FScWs66lyA79j0XEPiUrmJ0GTI+IocD0vA9wBDA0f8YBV0JKRsBZwAHAMOCsxoSUz/layXWjKv91rC0rVqSkssceTbGJE1NS2W+/Ni8zs06slkaLjQYm5e1JwFEl8WsjeRjoJ2k7YCQwLSIWR8QSYBowKh/rGxEPR1rD+dqSe1kVRcDWW0Pfvk2xE05I8bFj277OzDq/opJLAPdKmid6EoAAABA8SURBVClpXI4NjIhX8varQOMMUoOAeSXXzs+x9uLzW4mvRdI4SfWS6hctWrQx38daOO64tLbKkiVpf7vt0toqV11VbLnMrDqK6tA/KCIWSPonYJqkZ0sPRkRIikoXIiImABMA6urqKv687uDqq+GrX20eW7EC3ve+YspjZsUopOYSEQvyz4XA7aQ+k9dykxb558J8+gJgh5LLB+dYe/HBrcStgmbMSP0qpYnluedSE5gTi1n3U/XkImkLSVs2bgOHA7OAKUBjS/xY4M68PQU4Lo8aGw4sy81nU4HDJfXPHfmHA1PzseWShudRYseV3MvKbNGilFSGDWuK3XFHSiq77VZcucysWEU0iw0Ebs+jg3sCN0TEPZJmALdIOgGYA3wun3838AmgAVgJHA8QEYsl/QhonB/3nIhYnLe/CUwENgd+lz9WRu+9Bz1b/K/n1FPTLMZmZkoDqqyuri7q6+uLLkanMHIk3Htv0/6++8JjjxVXHjMrjqSZJa+U/EMtDUXumiZPhiFD0tCpIUPSfid1wQWpCaw0sbz9thOLma3N079U0uTJMG4crFyZ9ufMSfsAY8YUV671dN99MGJE89i8eTB4cOvnm5m55lJJZ57ZlFgarVyZ4p3AvHmpplKaWO6/P3XWO7GYWXucXCpp7tz1i9eIVatSUtlxx6bYBRekpHLIIYUVy8w6ESeXSir969yReA3Ye2/o3btp/4gjUlI55ZTiymRmnY+TSyWNHw99+jSP9emT4jXmBz9ItZUnnmiKrV4Nd99dXJnMrPNycqmkMWNgwoS0dm/jGr4TJtRUZ/7tt6eiXXBBU2zRolRb8aJdZrahnFwqbcwYeOmlNGvjSy9VL7GsYwj0c8+lpPKZzzTF6utTUtl22+oU0cy6Lg9F7oraGQK94lNjmk2BD2myya98pcplNLMuzcmlK2plCHSsXMnA40ayaE1T7Mtfhl/9qrpFM7PuwcmlK2ox1PkrXM2v+ArkxLLttrBwYWoWMzOrBPe5dEV5qPNExiIiJZZs+fKmmYzNzCrFyaULWnLGBZzS81KOZ+I/Ys/03pe4fjJbbllcucys+3By6UJWrYKLL4ZdTjuGi9/7Nntv+jRT+BSx0xA+cNUpNTUE2sy6Nve5dAERcMstcPrpMHs2HH44/OQnYu+99yCttWZmVl2F1Vwk9ZD0uKS78v7Okh6R1CDpZkm9cnyzvN+Qjw8pucfpOf6cpJEl8VE51iDptGp/t2r64x9h+HA49ljYckuYOjV99t676JKZWXdWZLPYd4BnSvbPBy6JiF2BJcAJOX4CsCTHL8nnIWkP4FhgT2AUcEVOWD2Ay4EjgD2AL+Rzu5TnnoOjjoKDD4b589OQ4sceS7UWM7OiFZJcJA0G/hW4Ku8LOBS4NZ8yCTgqb4/O++TjI/L5o4GbImJVRMwmLYM8LH8aIuLFiHgHuCmfW34FLAS2cCGceCLsuSdMnw7nngvPP5/eWfF0LWZWK4rqc7kU+AHQOHZpG2BpRKzO+/OBQXl7EDAPICJWS1qWzx8EPFxyz9Jr5rWIH1DuL1DthcBWroRLL01r1K9cmR511lkwcGDZH2VmttGqXnORdCSwMCJmVvvZrZRlnKR6SfWLFi1av4urtBDYe+/BxImw227p1oceCrNmwRVXOLGYWe0qolnsI8CnJL1EarI6FPgp0E9SY01qMLAgby8AdgDIx7cC3iiNt7imrfhaImJCRNRFRN2AAQPW71tUYSGwadNg//3h+ONh++3hD3+AO+6AD3ygbI8wM6uIqieXiDg9IgZHxBBSh/x9ETEGuB84Op82Frgzb0/J++Tj90VE5PixeTTZzsBQ4FFgBjA0jz7rlZ9R/vG4FVwI7IknYNSo1Dm/bBnceCM8/HDqvDcz6wxq6SXKU4H/kNRA6lO5OsevBrbJ8f8ATgOIiKeAW4CngXuAEyPivdxv8y1gKmk02i353PKqwEJgCxak2Yn32QceeQQuvBCefTYNM96klv5LmZmtg1IlwOrq6qK+vn79Lpo8OXWEzJ2baizjx29QZ/6KFfCTn8BFF6U+lm99K912663X+1ZmZlUlaWZE1LWM+w39jTFmzEaNDFu9Gn75Szj77DTE+POfhx//GN7//vIV0cysCE4uBYiA3/4WTj01NXsddBBMmQIHlH/AtJlZIdySX2UzZsAhh8Do0Wnl49tvhwcecGIxs67FyaVKXnoJ/u3fYNgweOYZuPzy9L7KUUd5bRUz63rcLFZhs2c39aH07g1nnJGaw1quY29m1pU4uVTIihWw117N36l8/nkYPLi4MpmZVYubxcpszRo4+uhUM2lMLJMmpU58JxYz6y6cXMrovPPSzMS33Zb2v/vdlGyOO67YcpmZVZubxcrgrrvgk59s2j/ooDQdfq9exZXJzKxITi4b6brrmmomvXqlhbvWdw5MM7Ouxs1iG2n33dOSwn/9K6xa5cRiZgauuWy0YcPgL38puhRmZrXFNRczMys7JxczMys7JxczMys7JxczMyu7qicXSb0lPSrpr5KekvSfOb6zpEckNUi6OS9RTF7G+OYcf0TSkJJ7nZ7jz0kaWRIflWMNkk6r9nc0M+vuiqi5rAIOjYi9gX2AUZKGA+cDl0TErsAS4IR8/gnAkhy/JJ+HpD2AY4E9gVHAFZJ6SOoBXA4cAewBfCGfa2ZmVVL15BLJ3/PupvkTwKHArTk+CTgqb4/O++TjIyQpx2+KiFURMRtoAIblT0NEvBgR7wA35XPNzKxKCulzyTWMvwALgWnAC8DSiFidT5kPDMrbg4B5APn4MmCb0niLa9qKt1aOcZLqJdUvWrSoHF/NzMwo6CXKiHgP2EdSP+B24AMFlWMCMAFA0iJJczp46bbA6xUrWHm5rOXXWcoJLmuluKxNdmotWOgb+hGxVNL9wIeBfpJ65trJYGBBPm0BsAMwX1JPYCvgjZJ4o9Jr2oq3V5YOT9wiqT4i6jp6fpFc1vLrLOUEl7VSXNZ1K2K02IBcY0HS5sBhwDPA/cDR+bSxwJ15e0reJx+/LyIix4/No8l2BoYCjwIzgKF59FkvUqf/lMp/MzMza1REzWU7YFIe1bUJcEtE3CXpaeAmSecCjwNX5/OvBq6T1AAsJiULIuIpSbcATwOrgRNzcxuSvgVMBXoA10TEU9X7emZmVvXkEhFPAPu2En+RNNKrZfxt4Jg27jUeGN9K/G7g7o0ubNsmVPDe5eayll9nKSe4rJXisq6DUguTmZlZ+Xj6FzMzKzsnFzMzKzsnl/Ug6RpJCyXNKros7ZG0g6T7JT2d52/7TtFlaktbc83VsvwS8OOS7iq6LO2R9JKkJyX9RVJ90eVpj6R+km6V9KykZyR9uOgytUbS7vn32fhZLum7RZerNZK+l/8/NUvSjZJ6V/X57nPpOEkHA38Hro2IvYouT1skbQdsFxGPSdoSmAkcFRFPF1y0teSpfLaIiL9L2hT4E/CdiHi44KK1SdJ/AHVA34g4sujytEXSS0BdRNT8y36SJgF/jIir8isEfSJiadHlak8e8boAOCAiOvoCdlVIGkT6/9IeEfFWHll7d0RMrFYZXHNZDxHxAGk4dE2LiFci4rG8vYL0HlGrU+AUrZ255mqSpMHAvwJXFV2WrkLSVsDB5NcPIuKdWk8s2QjghVpLLCV6Apvnl8/7AC9X8+FOLl1cXqJgX+CRYkvStpZzzUVEzZYVuBT4AbCm6IJ0QAD3SpopaVzRhWnHzsAi4Fe5ufEqSVsUXagOOBa4sehCtCYiFgAXAnOBV4BlEXFvNcvg5NKFSXofcBvw3YhYXnR52hIR70XEPqSpeoZJqskmR0lHAgsjYmbRZemggyJiP9LyEyfmZt1a1BPYD7gyIvYF3gRqeh2m3HT3KeDXRZelNZL6k2aD3xnYHthC0herWQYnly4q91/cBkyOiN8UXZ6OyE0h95PW56lFHwE+lfsybgIOlXR9sUVqW/7XKxGxkDRB7FovKdeI+cD8khrrraRkU8uOAB6LiNeKLkgbPg7MjohFEfEu8BvgwGoWwMmlC8qd5FcDz0TExUWXpz1tzDX3bLGlal1EnB4RgyNiCKlJ5L6IqOq/BjtK0hZ5MAe5ielwoCZHOUbEq8A8Sbvn0AjStE617AvUaJNYNhcYLqlP/nswgtT3WjVOLutB0o3An4HdJc2XdMK6rinIR4Avkf5l3Thk8hNFF6oN2wH3S3qCNOnotIio6SG+ncRA4E+S/kqa0PV/IuKegsvUnpOAyfl/B/sAPy64PG3KyfowUm2gJuVa4K3AY8CTpL/1VZ0GxkORzcys7FxzMTOzsnNyMTOzsnNyMTOzsnNyMTOzsnNyMTOzsnNysW5L0iWlM9pKmirpqpL9i/IklR293+8l1XXgnOdKhojfumGl73CZtq/0M8xa4+Ri3dmD5LeWJW0CbAvsWXL8QOChjtwoz5DbUWMiYp/8OXo9rlsvknpGxMuVfIZZW5xcrDt7CGhcN2RP0hvsKyT1l7QZ8M/AY5JG5AkVn8xr+mwG/1gv5XxJjwHHNN5U0iaSJko6t6MFkXSnpOPy9tclTc7bv5f001zLmSVpWI5vkcvyaC7b6Bz/sqQpku4Dpksaorz+UJ4g9AJJMyQ9IenrOX5Ifk7jeiqT81vdSPqQpIeU1tt5VNKWbd3HrFTPogtgVpSIeFnSakk7kmopfyYtTfBhYBlNbzZPBEZExN8kXQt8gzQ7MsAbeXJIJP076f9Tk4FZETG+jUdPlvRW3p4WEd8HxgEPSpoNnAwMLzm/T0TskyeevAbYCziTNP3MV/L0OY9K+t98/n7AByNicZ4Vu9EJpNlxP5QT5IOSGmfK3ZeUYF8m1eg+IulR4Gbg8xExQ1Jf4K227hMRs9v/jVt34uRi3d1DpMRyIHAxKbkcSEouDwK7kyYA/Fs+fxJwIk3J5eYW9/sFcEs7iQVSs1izlSEj4jVJ/5c0ceenI6J03aAb8zkPSOqbk8nhpEk0T8nn9AZ2zNvTWlzf6HDgg5Iam8m2AoYC7wCPRsR8AKXlD4bk38ErETEjP395Pt7WfZxc7B+cXKy7a+x3+RdSs9g8Us1hOfCrDlz/Zov9h4CPSbooIt5ez7L8C/AGaYr0Ui3naApAwGcj4rnSA5IOaKVM/zgMnBQRU1tccwiwqiT0Hu3/bWj1Pmal3Odi3d1DwJHA4ryuzGKgH6lp7CHgOWCIpF3z+V8C/tDO/a4G7gZuUVoBsENyX8oRpOapUyTtXHL48/mcg0jNUcuAqcBJJX0j+3bgMVOBbygtx4Ck3dT+olzPAdtJ+lA+f8v8ndb3PtYNueZi3d2TpFFiN7SIva9x7XlJxwO/zn9YZwA/b++GEXGx0tK910kaExEtV60s7XN5nbRs8i+B43M/0MnANZIOzee8Lelx0hLQX8mxH5Ga5p7II91mk5Jke64iNXc9lpPSIuCodr7HO5I+D/y30nIIb5HWCVmv+1j35FmRzWqYpN8Dp7TsozGrdW4WMzOzsnPNxczMys41FzMzKzsnFzMzKzsnFzMzKzsnFzMzKzsnFzMzK7v/B05vbmlz8HU9AAAAAElFTkSuQmCC\n",
452 | "text/plain": [
453 | ""
454 | ]
455 | },
456 | "metadata": {
457 | "needs_background": "light"
458 | }
459 | }
460 | ]
461 | },
462 | {
463 | "cell_type": "code",
464 | "metadata": {
465 | "id": "ZEgDB1-sV5hr"
466 | },
467 | "source": [],
468 | "execution_count": null,
469 | "outputs": []
470 | }
471 | ]
472 | }
--------------------------------------------------------------------------------
/7_2_5_Building_Logistic_Regression_from_scratch_in_Python.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": [],
7 | "collapsed_sections": []
8 | },
9 | "kernelspec": {
10 | "name": "python3",
11 | "display_name": "Python 3"
12 | },
13 | "language_info": {
14 | "name": "python"
15 | }
16 | },
17 | "cells": [
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {
21 | "id": "f5BLpBB_cv8y"
22 | },
23 | "source": [
24 | "**Logistic Regression:**"
25 | ]
26 | },
27 | {
28 | "cell_type": "markdown",
29 | "metadata": {
30 | "id": "SQA2GUq7cPXd"
31 | },
32 | "source": [
33 | ""
34 | ]
35 | },
36 | {
37 | "cell_type": "markdown",
38 | "metadata": {
39 | "id": "W0dxwE-Ubsdf"
40 | },
41 | "source": [
42 | "Y_hat --> predicted value\n",
43 | "\n",
44 | "X --> Input Variable\n",
45 | "\n",
46 | "w --> weight\n",
47 | "\n",
48 | "b --> bias"
49 | ]
50 | },
51 | {
52 | "cell_type": "markdown",
53 | "metadata": {
54 | "id": "X8OJXGuPvDt2"
55 | },
56 | "source": [
57 | "**Gradient Descent:**\n",
58 | "\n",
59 | "Gradient Descent is an optimization algorithm used for minimizing the loss function in various machine learning algorithms. It is used for updating the parameters of the learning model.\n",
60 | "\n",
61 | "w = w - α*dw\n",
62 | "\n",
63 | "b = b - α*db"
64 | ]
65 | },
66 | {
67 | "cell_type": "markdown",
68 | "metadata": {
69 | "id": "WSAfYP7WmECB"
70 | },
71 | "source": [
72 | "**Learning Rate:**\n",
73 | "\n",
74 | "Learning rate is a tuning parameter in an optimization algorithm that determines the step size at each iteration while moving toward a minimum of a loss function."
75 | ]
76 | },
77 | {
78 | "cell_type": "markdown",
79 | "metadata": {
80 | "id": "s8AfHQz9cXRF"
81 | },
82 | "source": [
83 | "**Derivatives:**"
84 | ]
85 | },
86 | {
87 | "cell_type": "markdown",
88 | "metadata": {
89 | "id": "kLUw3M-WcCwv"
90 | },
91 | "source": [
92 | ""
93 | ]
94 | },
95 | {
96 | "cell_type": "markdown",
97 | "metadata": {
98 | "id": "WxOpuBj_SqV-"
99 | },
100 | "source": [
101 | "Importing the Dependencies"
102 | ]
103 | },
104 | {
105 | "cell_type": "code",
106 | "metadata": {
107 | "id": "fdjRDi8wlgX6"
108 | },
109 | "source": [
110 | "# importing numpy library\n",
111 | "import numpy as np"
112 | ],
113 | "execution_count": null,
114 | "outputs": []
115 | },
116 | {
117 | "cell_type": "markdown",
118 | "metadata": {
119 | "id": "eTA-BwebLjdc"
120 | },
121 | "source": [
122 | "**Logistic Regression**"
123 | ]
124 | },
125 | {
126 | "cell_type": "code",
127 | "metadata": {
128 | "id": "YcqEjNwtKHPq"
129 | },
130 | "source": [
131 | "class Logistic_Regression():\n",
132 | "\n",
133 | "\n",
134 | " # declaring learning rate & number of iterations (Hyperparametes)\n",
135 | " def __init__(self, learning_rate, no_of_iterations):\n",
136 | "\n",
137 | " self.learning_rate = learning_rate\n",
138 | " self.no_of_iterations = no_of_iterations\n",
139 | "\n",
140 | "\n",
141 | "\n",
142 | " # fit function to train the model with dataset\n",
143 | " def fit(self, X, Y):\n",
144 | "\n",
145 | " # number of data points in the dataset (number of rows) --> m\n",
146 | " # number of input features in the dataset (number of columns) --> n\n",
147 | " self.m, self.n = X.shape\n",
148 | "\n",
149 | "\n",
150 | " #initiating weight & bias value\n",
151 | "\n",
152 | " self.w = np.zeros(self.n)\n",
153 | " \n",
154 | " self.b = 0\n",
155 | "\n",
156 | " self.X = X\n",
157 | "\n",
158 | " self.Y = Y\n",
159 | "\n",
160 | "\n",
161 | " # implementing Gradient Descent for Optimization\n",
162 | "\n",
163 | " for i in range(self.no_of_iterations):\n",
164 | " self.update_weights()\n",
165 | "\n",
166 | "\n",
167 | "\n",
168 | " def update_weights(self):\n",
169 | "\n",
170 | " # Y_hat formula (sigmoid function)\n",
171 | "\n",
172 | " Y_hat = 1 / (1 + np.exp( - (self.X.dot(self.w) + self.b ) )) \n",
173 | "\n",
174 | "\n",
175 | " # derivaties\n",
176 | "\n",
177 | " dw = (1/self.m)*np.dot(self.X.T, (Y_hat - self.Y))\n",
178 | "\n",
179 | " db = (1/self.m)*np.sum(Y_hat - self.Y)\n",
180 | "\n",
181 | "\n",
182 | " # updating the weights & bias using gradient descent\n",
183 | "\n",
184 | " self.w = self.w - self.learning_rate * dw\n",
185 | "\n",
186 | " self.b = self.b - self.learning_rate * db\n",
187 | "\n",
188 | "\n",
189 | " # Sigmoid Equation & Decision Boundary\n",
190 | "\n",
191 | " def predict(self, X):\n",
192 | "\n",
193 | " Y_pred = 1 / (1 + np.exp( - (X.dot(self.w) + self.b ) )) \n",
194 | " Y_pred = np.where( Y_pred > 0.5, 1, 0)\n",
195 | " return Y_pred"
196 | ],
197 | "execution_count": null,
198 | "outputs": []
199 | },
200 | {
201 | "cell_type": "code",
202 | "metadata": {
203 | "id": "htiH07T_WL-Y"
204 | },
205 | "source": [],
206 | "execution_count": null,
207 | "outputs": []
208 | }
209 | ]
210 | }
--------------------------------------------------------------------------------
/7_2_6_Logistic_Regression_Implementation.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": [],
7 | "collapsed_sections": []
8 | },
9 | "kernelspec": {
10 | "name": "python3",
11 | "display_name": "Python 3"
12 | },
13 | "language_info": {
14 | "name": "python"
15 | }
16 | },
17 | "cells": [
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {
21 | "id": "LnPbntVRnfvV"
22 | },
23 | "source": [
24 | "Importing the Dependencies"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "metadata": {
30 | "id": "-71UtHzNVWjB"
31 | },
32 | "source": [
33 | "import numpy as np\n",
34 | "import pandas as pd\n",
35 | "from sklearn.preprocessing import StandardScaler\n",
36 | "from sklearn.model_selection import train_test_split\n",
37 | "from sklearn.metrics import accuracy_score\n",
38 | "import Log_Reg"
39 | ],
40 | "execution_count": null,
41 | "outputs": []
42 | },
43 | {
44 | "cell_type": "markdown",
45 | "metadata": {
46 | "id": "bmfOfG8joBBy"
47 | },
48 | "source": [
49 | "Data Collection and Analysis\n",
50 | "\n",
51 | "PIMA Diabetes Dataset"
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "metadata": {
57 | "id": "Xpw6Mj_pn_TL"
58 | },
59 | "source": [
60 | "# loading the diabetes dataset to a pandas DataFrame\n",
61 | "diabetes_dataset = pd.read_csv('/content/diabetes.csv') "
62 | ],
63 | "execution_count": null,
64 | "outputs": []
65 | },
66 | {
67 | "cell_type": "code",
68 | "metadata": {
69 | "colab": {
70 | "base_uri": "https://localhost:8080/",
71 | "height": 203
72 | },
73 | "id": "-tjO09ncovoh",
74 | "outputId": "669a7da3-b683-46d4-f419-31b97847248c"
75 | },
76 | "source": [
77 | "# printing the first 5 rows of the dataset\n",
78 | "diabetes_dataset.head()"
79 | ],
80 | "execution_count": null,
81 | "outputs": [
82 | {
83 | "output_type": "execute_result",
84 | "data": {
85 | "text/html": [
86 | "\n",
87 | "\n",
100 | "
\n",
101 | " \n",
102 | " \n",
103 | " | \n",
104 | " Pregnancies | \n",
105 | " Glucose | \n",
106 | " BloodPressure | \n",
107 | " SkinThickness | \n",
108 | " Insulin | \n",
109 | " BMI | \n",
110 | " DiabetesPedigreeFunction | \n",
111 | " Age | \n",
112 | " Outcome | \n",
113 | "
\n",
114 | " \n",
115 | " \n",
116 | " \n",
117 | " 0 | \n",
118 | " 6 | \n",
119 | " 148 | \n",
120 | " 72 | \n",
121 | " 35 | \n",
122 | " 0 | \n",
123 | " 33.6 | \n",
124 | " 0.627 | \n",
125 | " 50 | \n",
126 | " 1 | \n",
127 | "
\n",
128 | " \n",
129 | " 1 | \n",
130 | " 1 | \n",
131 | " 85 | \n",
132 | " 66 | \n",
133 | " 29 | \n",
134 | " 0 | \n",
135 | " 26.6 | \n",
136 | " 0.351 | \n",
137 | " 31 | \n",
138 | " 0 | \n",
139 | "
\n",
140 | " \n",
141 | " 2 | \n",
142 | " 8 | \n",
143 | " 183 | \n",
144 | " 64 | \n",
145 | " 0 | \n",
146 | " 0 | \n",
147 | " 23.3 | \n",
148 | " 0.672 | \n",
149 | " 32 | \n",
150 | " 1 | \n",
151 | "
\n",
152 | " \n",
153 | " 3 | \n",
154 | " 1 | \n",
155 | " 89 | \n",
156 | " 66 | \n",
157 | " 23 | \n",
158 | " 94 | \n",
159 | " 28.1 | \n",
160 | " 0.167 | \n",
161 | " 21 | \n",
162 | " 0 | \n",
163 | "
\n",
164 | " \n",
165 | " 4 | \n",
166 | " 0 | \n",
167 | " 137 | \n",
168 | " 40 | \n",
169 | " 35 | \n",
170 | " 168 | \n",
171 | " 43.1 | \n",
172 | " 2.288 | \n",
173 | " 33 | \n",
174 | " 1 | \n",
175 | "
\n",
176 | " \n",
177 | "
\n",
178 | "
"
179 | ],
180 | "text/plain": [
181 | " Pregnancies Glucose BloodPressure ... DiabetesPedigreeFunction Age Outcome\n",
182 | "0 6 148 72 ... 0.627 50 1\n",
183 | "1 1 85 66 ... 0.351 31 0\n",
184 | "2 8 183 64 ... 0.672 32 1\n",
185 | "3 1 89 66 ... 0.167 21 0\n",
186 | "4 0 137 40 ... 2.288 33 1\n",
187 | "\n",
188 | "[5 rows x 9 columns]"
189 | ]
190 | },
191 | "metadata": {},
192 | "execution_count": 4
193 | }
194 | ]
195 | },
196 | {
197 | "cell_type": "code",
198 | "metadata": {
199 | "colab": {
200 | "base_uri": "https://localhost:8080/"
201 | },
202 | "id": "lynParo6pEMB",
203 | "outputId": "0d3deba0-3071-4206-c2e9-fd94c075ab1c"
204 | },
205 | "source": [
206 | "# number of rows and Columns in this dataset\n",
207 | "diabetes_dataset.shape"
208 | ],
209 | "execution_count": null,
210 | "outputs": [
211 | {
212 | "output_type": "execute_result",
213 | "data": {
214 | "text/plain": [
215 | "(768, 9)"
216 | ]
217 | },
218 | "metadata": {},
219 | "execution_count": 5
220 | }
221 | ]
222 | },
223 | {
224 | "cell_type": "code",
225 | "metadata": {
226 | "colab": {
227 | "base_uri": "https://localhost:8080/",
228 | "height": 295
229 | },
230 | "id": "3NDJOlrEpmoL",
231 | "outputId": "18599c20-1e7c-424f-ece5-c4d17bc9a343"
232 | },
233 | "source": [
234 | "# getting the statistical measures of the data\n",
235 | "diabetes_dataset.describe()"
236 | ],
237 | "execution_count": null,
238 | "outputs": [
239 | {
240 | "output_type": "execute_result",
241 | "data": {
242 | "text/html": [
243 | "\n",
244 | "\n",
257 | "
\n",
258 | " \n",
259 | " \n",
260 | " | \n",
261 | " Pregnancies | \n",
262 | " Glucose | \n",
263 | " BloodPressure | \n",
264 | " SkinThickness | \n",
265 | " Insulin | \n",
266 | " BMI | \n",
267 | " DiabetesPedigreeFunction | \n",
268 | " Age | \n",
269 | " Outcome | \n",
270 | "
\n",
271 | " \n",
272 | " \n",
273 | " \n",
274 | " count | \n",
275 | " 768.000000 | \n",
276 | " 768.000000 | \n",
277 | " 768.000000 | \n",
278 | " 768.000000 | \n",
279 | " 768.000000 | \n",
280 | " 768.000000 | \n",
281 | " 768.000000 | \n",
282 | " 768.000000 | \n",
283 | " 768.000000 | \n",
284 | "
\n",
285 | " \n",
286 | " mean | \n",
287 | " 3.845052 | \n",
288 | " 120.894531 | \n",
289 | " 69.105469 | \n",
290 | " 20.536458 | \n",
291 | " 79.799479 | \n",
292 | " 31.992578 | \n",
293 | " 0.471876 | \n",
294 | " 33.240885 | \n",
295 | " 0.348958 | \n",
296 | "
\n",
297 | " \n",
298 | " std | \n",
299 | " 3.369578 | \n",
300 | " 31.972618 | \n",
301 | " 19.355807 | \n",
302 | " 15.952218 | \n",
303 | " 115.244002 | \n",
304 | " 7.884160 | \n",
305 | " 0.331329 | \n",
306 | " 11.760232 | \n",
307 | " 0.476951 | \n",
308 | "
\n",
309 | " \n",
310 | " min | \n",
311 | " 0.000000 | \n",
312 | " 0.000000 | \n",
313 | " 0.000000 | \n",
314 | " 0.000000 | \n",
315 | " 0.000000 | \n",
316 | " 0.000000 | \n",
317 | " 0.078000 | \n",
318 | " 21.000000 | \n",
319 | " 0.000000 | \n",
320 | "
\n",
321 | " \n",
322 | " 25% | \n",
323 | " 1.000000 | \n",
324 | " 99.000000 | \n",
325 | " 62.000000 | \n",
326 | " 0.000000 | \n",
327 | " 0.000000 | \n",
328 | " 27.300000 | \n",
329 | " 0.243750 | \n",
330 | " 24.000000 | \n",
331 | " 0.000000 | \n",
332 | "
\n",
333 | " \n",
334 | " 50% | \n",
335 | " 3.000000 | \n",
336 | " 117.000000 | \n",
337 | " 72.000000 | \n",
338 | " 23.000000 | \n",
339 | " 30.500000 | \n",
340 | " 32.000000 | \n",
341 | " 0.372500 | \n",
342 | " 29.000000 | \n",
343 | " 0.000000 | \n",
344 | "
\n",
345 | " \n",
346 | " 75% | \n",
347 | " 6.000000 | \n",
348 | " 140.250000 | \n",
349 | " 80.000000 | \n",
350 | " 32.000000 | \n",
351 | " 127.250000 | \n",
352 | " 36.600000 | \n",
353 | " 0.626250 | \n",
354 | " 41.000000 | \n",
355 | " 1.000000 | \n",
356 | "
\n",
357 | " \n",
358 | " max | \n",
359 | " 17.000000 | \n",
360 | " 199.000000 | \n",
361 | " 122.000000 | \n",
362 | " 99.000000 | \n",
363 | " 846.000000 | \n",
364 | " 67.100000 | \n",
365 | " 2.420000 | \n",
366 | " 81.000000 | \n",
367 | " 1.000000 | \n",
368 | "
\n",
369 | " \n",
370 | "
\n",
371 | "
"
372 | ],
373 | "text/plain": [
374 | " Pregnancies Glucose ... Age Outcome\n",
375 | "count 768.000000 768.000000 ... 768.000000 768.000000\n",
376 | "mean 3.845052 120.894531 ... 33.240885 0.348958\n",
377 | "std 3.369578 31.972618 ... 11.760232 0.476951\n",
378 | "min 0.000000 0.000000 ... 21.000000 0.000000\n",
379 | "25% 1.000000 99.000000 ... 24.000000 0.000000\n",
380 | "50% 3.000000 117.000000 ... 29.000000 0.000000\n",
381 | "75% 6.000000 140.250000 ... 41.000000 1.000000\n",
382 | "max 17.000000 199.000000 ... 81.000000 1.000000\n",
383 | "\n",
384 | "[8 rows x 9 columns]"
385 | ]
386 | },
387 | "metadata": {},
388 | "execution_count": 6
389 | }
390 | ]
391 | },
392 | {
393 | "cell_type": "code",
394 | "metadata": {
395 | "colab": {
396 | "base_uri": "https://localhost:8080/"
397 | },
398 | "id": "LrpHzaGpp5dQ",
399 | "outputId": "7405a173-2a57-46f0-da2a-6f0cd9699a28"
400 | },
401 | "source": [
402 | "diabetes_dataset['Outcome'].value_counts()"
403 | ],
404 | "execution_count": null,
405 | "outputs": [
406 | {
407 | "output_type": "execute_result",
408 | "data": {
409 | "text/plain": [
410 | "0 500\n",
411 | "1 268\n",
412 | "Name: Outcome, dtype: int64"
413 | ]
414 | },
415 | "metadata": {},
416 | "execution_count": 7
417 | }
418 | ]
419 | },
420 | {
421 | "cell_type": "markdown",
422 | "metadata": {
423 | "id": "cB1qRaNcqeh5"
424 | },
425 | "source": [
426 | "0 --> Non-Diabetic\n",
427 | "\n",
428 | "1 --> Diabetic"
429 | ]
430 | },
431 | {
432 | "cell_type": "code",
433 | "metadata": {
434 | "colab": {
435 | "base_uri": "https://localhost:8080/",
436 | "height": 142
437 | },
438 | "id": "I6MWR0k_qSCK",
439 | "outputId": "4b8c3c9e-452f-4772-83b4-dd17563df908"
440 | },
441 | "source": [
442 | "diabetes_dataset.groupby('Outcome').mean()"
443 | ],
444 | "execution_count": null,
445 | "outputs": [
446 | {
447 | "output_type": "execute_result",
448 | "data": {
449 | "text/html": [
450 | "\n",
451 | "\n",
464 | "
\n",
465 | " \n",
466 | " \n",
467 | " | \n",
468 | " Pregnancies | \n",
469 | " Glucose | \n",
470 | " BloodPressure | \n",
471 | " SkinThickness | \n",
472 | " Insulin | \n",
473 | " BMI | \n",
474 | " DiabetesPedigreeFunction | \n",
475 | " Age | \n",
476 | "
\n",
477 | " \n",
478 | " Outcome | \n",
479 | " | \n",
480 | " | \n",
481 | " | \n",
482 | " | \n",
483 | " | \n",
484 | " | \n",
485 | " | \n",
486 | " | \n",
487 | "
\n",
488 | " \n",
489 | " \n",
490 | " \n",
491 | " 0 | \n",
492 | " 3.298000 | \n",
493 | " 109.980000 | \n",
494 | " 68.184000 | \n",
495 | " 19.664000 | \n",
496 | " 68.792000 | \n",
497 | " 30.304200 | \n",
498 | " 0.429734 | \n",
499 | " 31.190000 | \n",
500 | "
\n",
501 | " \n",
502 | " 1 | \n",
503 | " 4.865672 | \n",
504 | " 141.257463 | \n",
505 | " 70.824627 | \n",
506 | " 22.164179 | \n",
507 | " 100.335821 | \n",
508 | " 35.142537 | \n",
509 | " 0.550500 | \n",
510 | " 37.067164 | \n",
511 | "
\n",
512 | " \n",
513 | "
\n",
514 | "
"
515 | ],
516 | "text/plain": [
517 | " Pregnancies Glucose ... DiabetesPedigreeFunction Age\n",
518 | "Outcome ... \n",
519 | "0 3.298000 109.980000 ... 0.429734 31.190000\n",
520 | "1 4.865672 141.257463 ... 0.550500 37.067164\n",
521 | "\n",
522 | "[2 rows x 8 columns]"
523 | ]
524 | },
525 | "metadata": {},
526 | "execution_count": 8
527 | }
528 | ]
529 | },
530 | {
531 | "cell_type": "code",
532 | "metadata": {
533 | "id": "RoDW7l9mqqHZ"
534 | },
535 | "source": [
536 | "# separating the data and labels\n",
537 | "features = diabetes_dataset.drop(columns = 'Outcome', axis=1)\n",
538 | "target = diabetes_dataset['Outcome']"
539 | ],
540 | "execution_count": null,
541 | "outputs": []
542 | },
543 | {
544 | "cell_type": "code",
545 | "metadata": {
546 | "colab": {
547 | "base_uri": "https://localhost:8080/"
548 | },
549 | "id": "3eiRW9M9raMm",
550 | "outputId": "28d35af9-2175-4da0-9f1b-b5afd1eb6361"
551 | },
552 | "source": [
553 | "print(features)"
554 | ],
555 | "execution_count": null,
556 | "outputs": [
557 | {
558 | "output_type": "stream",
559 | "name": "stdout",
560 | "text": [
561 | " Pregnancies Glucose BloodPressure ... BMI DiabetesPedigreeFunction Age\n",
562 | "0 6 148 72 ... 33.6 0.627 50\n",
563 | "1 1 85 66 ... 26.6 0.351 31\n",
564 | "2 8 183 64 ... 23.3 0.672 32\n",
565 | "3 1 89 66 ... 28.1 0.167 21\n",
566 | "4 0 137 40 ... 43.1 2.288 33\n",
567 | ".. ... ... ... ... ... ... ...\n",
568 | "763 10 101 76 ... 32.9 0.171 63\n",
569 | "764 2 122 70 ... 36.8 0.340 27\n",
570 | "765 5 121 72 ... 26.2 0.245 30\n",
571 | "766 1 126 60 ... 30.1 0.349 47\n",
572 | "767 1 93 70 ... 30.4 0.315 23\n",
573 | "\n",
574 | "[768 rows x 8 columns]\n"
575 | ]
576 | }
577 | ]
578 | },
579 | {
580 | "cell_type": "code",
581 | "metadata": {
582 | "colab": {
583 | "base_uri": "https://localhost:8080/"
584 | },
585 | "id": "AoxgTJAMrcCl",
586 | "outputId": "adbfa8fc-dab2-4bf7-9db5-a0fbe99d958c"
587 | },
588 | "source": [
589 | "print(target)"
590 | ],
591 | "execution_count": null,
592 | "outputs": [
593 | {
594 | "output_type": "stream",
595 | "name": "stdout",
596 | "text": [
597 | "0 1\n",
598 | "1 0\n",
599 | "2 1\n",
600 | "3 0\n",
601 | "4 1\n",
602 | " ..\n",
603 | "763 0\n",
604 | "764 0\n",
605 | "765 0\n",
606 | "766 1\n",
607 | "767 0\n",
608 | "Name: Outcome, Length: 768, dtype: int64\n"
609 | ]
610 | }
611 | ]
612 | },
613 | {
614 | "cell_type": "markdown",
615 | "metadata": {
616 | "id": "umAbo_kqrlzI"
617 | },
618 | "source": [
619 | "Data Standardization"
620 | ]
621 | },
622 | {
623 | "cell_type": "code",
624 | "metadata": {
625 | "id": "njfM5X60rgnc"
626 | },
627 | "source": [
628 | "scaler = StandardScaler()"
629 | ],
630 | "execution_count": null,
631 | "outputs": []
632 | },
633 | {
634 | "cell_type": "code",
635 | "metadata": {
636 | "colab": {
637 | "base_uri": "https://localhost:8080/"
638 | },
639 | "id": "g0ai5ARbr53p",
640 | "outputId": "29354d62-9f72-4371-b3e6-c63dc88f4b42"
641 | },
642 | "source": [
643 | "scaler.fit(features)"
644 | ],
645 | "execution_count": null,
646 | "outputs": [
647 | {
648 | "output_type": "execute_result",
649 | "data": {
650 | "text/plain": [
651 | "StandardScaler(copy=True, with_mean=True, with_std=True)"
652 | ]
653 | },
654 | "metadata": {},
655 | "execution_count": 13
656 | }
657 | ]
658 | },
659 | {
660 | "cell_type": "code",
661 | "metadata": {
662 | "id": "FHxNwPuZr-kD"
663 | },
664 | "source": [
665 | "standardized_data = scaler.transform(features)"
666 | ],
667 | "execution_count": null,
668 | "outputs": []
669 | },
670 | {
671 | "cell_type": "code",
672 | "metadata": {
673 | "colab": {
674 | "base_uri": "https://localhost:8080/"
675 | },
676 | "id": "fjMwZ5x6sPUJ",
677 | "outputId": "b667e6d2-0e13-4247-a381-565f74273c0f"
678 | },
679 | "source": [
680 | "print(standardized_data)"
681 | ],
682 | "execution_count": null,
683 | "outputs": [
684 | {
685 | "output_type": "stream",
686 | "name": "stdout",
687 | "text": [
688 | "[[ 0.63994726 0.84832379 0.14964075 ... 0.20401277 0.46849198\n",
689 | " 1.4259954 ]\n",
690 | " [-0.84488505 -1.12339636 -0.16054575 ... -0.68442195 -0.36506078\n",
691 | " -0.19067191]\n",
692 | " [ 1.23388019 1.94372388 -0.26394125 ... -1.10325546 0.60439732\n",
693 | " -0.10558415]\n",
694 | " ...\n",
695 | " [ 0.3429808 0.00330087 0.14964075 ... -0.73518964 -0.68519336\n",
696 | " -0.27575966]\n",
697 | " [-0.84488505 0.1597866 -0.47073225 ... -0.24020459 -0.37110101\n",
698 | " 1.17073215]\n",
699 | " [-0.84488505 -0.8730192 0.04624525 ... -0.20212881 -0.47378505\n",
700 | " -0.87137393]]\n"
701 | ]
702 | }
703 | ]
704 | },
705 | {
706 | "cell_type": "code",
707 | "metadata": {
708 | "id": "ZxWSl4SGsRjE"
709 | },
710 | "source": [
711 | "features = standardized_data\n",
712 | "target = diabetes_dataset['Outcome']"
713 | ],
714 | "execution_count": null,
715 | "outputs": []
716 | },
717 | {
718 | "cell_type": "code",
719 | "metadata": {
720 | "colab": {
721 | "base_uri": "https://localhost:8080/"
722 | },
723 | "id": "lhJF_7QjsjmP",
724 | "outputId": "0cf50bcb-c105-455d-8067-1102b261f1a6"
725 | },
726 | "source": [
727 | "print(features)\n",
728 | "print(target)"
729 | ],
730 | "execution_count": null,
731 | "outputs": [
732 | {
733 | "output_type": "stream",
734 | "name": "stdout",
735 | "text": [
736 | "[[ 0.63994726 0.84832379 0.14964075 ... 0.20401277 0.46849198\n",
737 | " 1.4259954 ]\n",
738 | " [-0.84488505 -1.12339636 -0.16054575 ... -0.68442195 -0.36506078\n",
739 | " -0.19067191]\n",
740 | " [ 1.23388019 1.94372388 -0.26394125 ... -1.10325546 0.60439732\n",
741 | " -0.10558415]\n",
742 | " ...\n",
743 | " [ 0.3429808 0.00330087 0.14964075 ... -0.73518964 -0.68519336\n",
744 | " -0.27575966]\n",
745 | " [-0.84488505 0.1597866 -0.47073225 ... -0.24020459 -0.37110101\n",
746 | " 1.17073215]\n",
747 | " [-0.84488505 -0.8730192 0.04624525 ... -0.20212881 -0.47378505\n",
748 | " -0.87137393]]\n",
749 | "0 1\n",
750 | "1 0\n",
751 | "2 1\n",
752 | "3 0\n",
753 | "4 1\n",
754 | " ..\n",
755 | "763 0\n",
756 | "764 0\n",
757 | "765 0\n",
758 | "766 1\n",
759 | "767 0\n",
760 | "Name: Outcome, Length: 768, dtype: int64\n"
761 | ]
762 | }
763 | ]
764 | },
765 | {
766 | "cell_type": "markdown",
767 | "metadata": {
768 | "id": "gHciEFkxsoQP"
769 | },
770 | "source": [
771 | "Train Test Split"
772 | ]
773 | },
774 | {
775 | "cell_type": "code",
776 | "metadata": {
777 | "id": "AEfKGj_yslvD"
778 | },
779 | "source": [
780 | "X_train, X_test, Y_train, Y_test = train_test_split(features,target, test_size = 0.2, random_state=2)"
781 | ],
782 | "execution_count": null,
783 | "outputs": []
784 | },
785 | {
786 | "cell_type": "code",
787 | "metadata": {
788 | "colab": {
789 | "base_uri": "https://localhost:8080/"
790 | },
791 | "id": "DR05T-o0t3FQ",
792 | "outputId": "8ded781c-969b-46c4-9da6-7755b35490ca"
793 | },
794 | "source": [
795 | "print(features.shape, X_train.shape, X_test.shape)"
796 | ],
797 | "execution_count": null,
798 | "outputs": [
799 | {
800 | "output_type": "stream",
801 | "name": "stdout",
802 | "text": [
803 | "(768, 8) (614, 8) (154, 8)\n"
804 | ]
805 | }
806 | ]
807 | },
808 | {
809 | "cell_type": "markdown",
810 | "metadata": {
811 | "id": "ElJ3tkOtuC_n"
812 | },
813 | "source": [
814 | "Training the Model"
815 | ]
816 | },
817 | {
818 | "cell_type": "code",
819 | "metadata": {
820 | "id": "5szLWHlNt9xc"
821 | },
822 | "source": [
823 | "classifier = Log_Reg.Logistic_Regression(learning_rate=0.01, no_of_iterations=1000)"
824 | ],
825 | "execution_count": null,
826 | "outputs": []
827 | },
828 | {
829 | "cell_type": "code",
830 | "metadata": {
831 | "id": "ncJWY_7suPAb"
832 | },
833 | "source": [
834 | "#training the support vector Machine Classifier\n",
835 | "classifier.fit(X_train, Y_train)"
836 | ],
837 | "execution_count": null,
838 | "outputs": []
839 | },
840 | {
841 | "cell_type": "markdown",
842 | "metadata": {
843 | "id": "UV4-CAfquiyP"
844 | },
845 | "source": [
846 | "Model Evaluation"
847 | ]
848 | },
849 | {
850 | "cell_type": "markdown",
851 | "metadata": {
852 | "id": "yhAjGPJWunXa"
853 | },
854 | "source": [
855 | "Accuracy Score"
856 | ]
857 | },
858 | {
859 | "cell_type": "code",
860 | "metadata": {
861 | "id": "fJLEPQK7ueXp"
862 | },
863 | "source": [
864 | "# accuracy score on the training data\n",
865 | "X_train_prediction = classifier.predict(X_train)\n",
866 | "training_data_accuracy = accuracy_score( Y_train, X_train_prediction)"
867 | ],
868 | "execution_count": null,
869 | "outputs": []
870 | },
871 | {
872 | "cell_type": "code",
873 | "metadata": {
874 | "colab": {
875 | "base_uri": "https://localhost:8080/"
876 | },
877 | "id": "mmJ22qhVvNwj",
878 | "outputId": "98f8fd1e-aa21-4942-a658-42dfecc35e15"
879 | },
880 | "source": [
881 | "print('Accuracy score of the training data : ', training_data_accuracy)"
882 | ],
883 | "execution_count": null,
884 | "outputs": [
885 | {
886 | "output_type": "stream",
887 | "name": "stdout",
888 | "text": [
889 | "Accuracy score of the training data : 0.7768729641693811\n"
890 | ]
891 | }
892 | ]
893 | },
894 | {
895 | "cell_type": "code",
896 | "metadata": {
897 | "id": "G2CICFMEvcCl"
898 | },
899 | "source": [
900 | "# accuracy score on the test data\n",
901 | "X_test_prediction = classifier.predict(X_test)\n",
902 | "test_data_accuracy = accuracy_score( Y_test, X_test_prediction)"
903 | ],
904 | "execution_count": null,
905 | "outputs": []
906 | },
907 | {
908 | "cell_type": "code",
909 | "metadata": {
910 | "colab": {
911 | "base_uri": "https://localhost:8080/"
912 | },
913 | "id": "i2GcW_t_vz7C",
914 | "outputId": "c359e2fc-f81c-48a7-e1f6-9d424412725e"
915 | },
916 | "source": [
917 | "print('Accuracy score of the test data : ', test_data_accuracy)"
918 | ],
919 | "execution_count": null,
920 | "outputs": [
921 | {
922 | "output_type": "stream",
923 | "name": "stdout",
924 | "text": [
925 | "Accuracy score of the test data : 0.7662337662337663\n"
926 | ]
927 | }
928 | ]
929 | },
930 | {
931 | "cell_type": "markdown",
932 | "metadata": {
933 | "id": "gq8ZX1xpwPF5"
934 | },
935 | "source": [
936 | "Making a Predictive System"
937 | ]
938 | },
939 | {
940 | "cell_type": "code",
941 | "metadata": {
942 | "colab": {
943 | "base_uri": "https://localhost:8080/"
944 | },
945 | "id": "U-ULRe4yv5tH",
946 | "outputId": "6a63ae70-016b-45d0-bc4f-6fba7952359e"
947 | },
948 | "source": [
949 | "input_data = (5,166,72,19,175,25.8,0.587,51)\n",
950 | "\n",
951 | "# changing the input_data to numpy array\n",
952 | "input_data_as_numpy_array = np.asarray(input_data)\n",
953 | "\n",
954 | "# reshape the array as we are predicting for one instance\n",
955 | "input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)\n",
956 | "\n",
957 | "# standardize the input data\n",
958 | "std_data = scaler.transform(input_data_reshaped)\n",
959 | "print(std_data)\n",
960 | "\n",
961 | "prediction = classifier.predict(std_data)\n",
962 | "print(prediction)\n",
963 | "\n",
964 | "if (prediction[0] == 0):\n",
965 | " print('The person is not diabetic')\n",
966 | "else:\n",
967 | " print('The person is diabetic')"
968 | ],
969 | "execution_count": null,
970 | "outputs": [
971 | {
972 | "output_type": "stream",
973 | "name": "stdout",
974 | "text": [
975 | "[[ 0.3429808 1.41167241 0.14964075 -0.09637905 0.82661621 -0.78595734\n",
976 | " 0.34768723 1.51108316]]\n",
977 | "[1]\n",
978 | "The person is diabetic\n"
979 | ]
980 | }
981 | ]
982 | }
983 | ]
984 | }
--------------------------------------------------------------------------------
/7_3_6_Building_Support_Vector_Machine_Classifier_from_Scratch_in_Python.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": [],
7 | "collapsed_sections": []
8 | },
9 | "kernelspec": {
10 | "name": "python3",
11 | "display_name": "Python 3"
12 | },
13 | "language_info": {
14 | "name": "python"
15 | }
16 | },
17 | "cells": [
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {
21 | "id": "OoQnMmCCBfTF"
22 | },
23 | "source": [
24 | "**SVM Classifier**"
25 | ]
26 | },
27 | {
28 | "cell_type": "markdown",
29 | "metadata": {
30 | "id": "VgNJ4FGKBkIu"
31 | },
32 | "source": [
33 | "Equation of the Hyperplane:\n",
34 | "\n",
35 | "**y = wx - b**"
36 | ]
37 | },
38 | {
39 | "cell_type": "markdown",
40 | "metadata": {
41 | "id": "X8OJXGuPvDt2"
42 | },
43 | "source": [
44 | "**Gradient Descent:**\n",
45 | "\n",
46 | "Gradient Descent is an optimization algorithm used for minimizing the loss function in various machine learning algorithms. It is used for updating the parameters of the learning model.\n",
47 | "\n",
48 | "w = w - α*dw\n",
49 | "\n",
50 | "b = b - α*db"
51 | ]
52 | },
53 | {
54 | "cell_type": "markdown",
55 | "metadata": {
56 | "id": "WSAfYP7WmECB"
57 | },
58 | "source": [
59 | "**Learning Rate:**\n",
60 | "\n",
61 | "Learning rate is a tuning parameter in an optimization algorithm that determines the step size at each iteration while moving toward a minimum of a loss function."
62 | ]
63 | },
64 | {
65 | "cell_type": "markdown",
66 | "metadata": {
67 | "id": "E9nX8-OIPWnM"
68 | },
69 | "source": [
70 | "Importing the Dependencies"
71 | ]
72 | },
73 | {
74 | "cell_type": "code",
75 | "metadata": {
76 | "id": "tzuAfRbBGmTH"
77 | },
78 | "source": [
79 | "# importing numpy library\n",
80 | "import numpy as np"
81 | ],
82 | "execution_count": null,
83 | "outputs": []
84 | },
85 | {
86 | "cell_type": "markdown",
87 | "metadata": {
88 | "id": "p29I221pPhkY"
89 | },
90 | "source": [
91 | "Support Vector Machine Classifier"
92 | ]
93 | },
94 | {
95 | "cell_type": "code",
96 | "metadata": {
97 | "id": "x7sKawRDPd4m"
98 | },
99 | "source": [
100 | "class SVM_classifier():\n",
101 | "\n",
102 | "\n",
103 | " # initiating the hyperparameters\n",
104 | " def __init__(self, learning_rate, no_of_iterations, lambda_parameter):\n",
105 | "\n",
106 | " self.learning_rate = learning_rate\n",
107 | " self.no_of_iterations = no_of_iterations\n",
108 | " self.lambda_parameter = lambda_parameter\n",
109 | "\n",
110 | "\n",
111 | " \n",
112 | " # fitting the dataset to SVM Classifier\n",
113 | " def fit(self, X, Y):\n",
114 | "\n",
115 | " # m --> number of Data points --> number of rows\n",
116 | " # n --> number of input features --> number of columns\n",
117 | " self.m, self.n = X.shape\n",
118 | "\n",
119 | " # initiating the weight value and bias value\n",
120 | "\n",
121 | " self.w = np.zeros(self.n)\n",
122 | "\n",
123 | " self.b = 0\n",
124 | "\n",
125 | " self.X = X\n",
126 | "\n",
127 | " self.Y = Y\n",
128 | "\n",
129 | " # implementing Gradient Descent algorithm for Optimization\n",
130 | "\n",
131 | " for i in range(self.no_of_iterations):\n",
132 | " self.update_weights()\n",
133 | "\n",
134 | "\n",
135 | "\n",
136 | " # function for updating the weight and bias value\n",
137 | " def update_weights(self):\n",
138 | "\n",
139 | " # label encoding\n",
140 | " y_label = np.where(self.Y <= 0, -1, 1)\n",
141 | "\n",
142 | "\n",
143 | "\n",
144 | " # gradients ( dw, db)\n",
145 | " for index, x_i in enumerate(self.X):\n",
146 | "\n",
147 | " condition = y_label[index] * (np.dot(x_i, self.w) - self.b) >= 1\n",
148 | "\n",
149 | " if (condition == True):\n",
150 | "\n",
151 | " dw = 2 * self.lambda_parameter * self.w\n",
152 | " db = 0\n",
153 | "\n",
154 | " else:\n",
155 | "\n",
156 | " dw = 2 * self.lambda_parameter * self.w - np.dot(x_i, y_label[index])\n",
157 | " db = y_label[index]\n",
158 | "\n",
159 | "\n",
160 | " self.w = self.w - self.learning_rate * dw\n",
161 | "\n",
162 | " self.b = self.b - self.learning_rate * db\n",
163 | "\n",
164 | "\n",
165 | "\n",
166 | " # predict the label for a given input value\n",
167 | " def predict(self, X):\n",
168 | "\n",
169 | " output = np.dot(X, self.w) - self.b\n",
170 | " \n",
171 | " predicted_labels = np.sign(output)\n",
172 | "\n",
173 | " y_hat = np.where(predicted_labels <= -1, 0, 1)\n",
174 | "\n",
175 | " return y_hat\n",
176 | "\n",
177 | " \n",
178 | "\n"
179 | ],
180 | "execution_count": null,
181 | "outputs": []
182 | }
183 | ]
184 | }
--------------------------------------------------------------------------------
/7_3_7_Implementing_SVM_Classifier_from_Scratch_in_Python.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": [],
7 | "collapsed_sections": []
8 | },
9 | "kernelspec": {
10 | "name": "python3",
11 | "display_name": "Python 3"
12 | },
13 | "language_info": {
14 | "name": "python"
15 | }
16 | },
17 | "cells": [
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {
21 | "id": "OoQnMmCCBfTF"
22 | },
23 | "source": [
24 | "**SVM Classifier**"
25 | ]
26 | },
27 | {
28 | "cell_type": "markdown",
29 | "metadata": {
30 | "id": "VgNJ4FGKBkIu"
31 | },
32 | "source": [
33 | "Equation of the Hyperplane:\n",
34 | "\n",
35 | "**y = wx - b**"
36 | ]
37 | },
38 | {
39 | "cell_type": "markdown",
40 | "metadata": {
41 | "id": "X8OJXGuPvDt2"
42 | },
43 | "source": [
44 | "**Gradient Descent:**\n",
45 | "\n",
46 | "Gradient Descent is an optimization algorithm used for minimizing the loss function in various machine learning algorithms. It is used for updating the parameters of the learning model.\n",
47 | "\n",
48 | "w = w - α*dw\n",
49 | "\n",
50 | "b = b - α*db"
51 | ]
52 | },
53 | {
54 | "cell_type": "markdown",
55 | "metadata": {
56 | "id": "WSAfYP7WmECB"
57 | },
58 | "source": [
59 | "**Learning Rate:**\n",
60 | "\n",
61 | "Learning rate is a tuning parameter in an optimization algorithm that determines the step size at each iteration while moving toward a minimum of a loss function."
62 | ]
63 | },
64 | {
65 | "cell_type": "markdown",
66 | "metadata": {
67 | "id": "E9nX8-OIPWnM"
68 | },
69 | "source": [
70 | "Importing the Dependencies"
71 | ]
72 | },
73 | {
74 | "cell_type": "code",
75 | "metadata": {
76 | "id": "tzuAfRbBGmTH"
77 | },
78 | "source": [
79 | "# importing numpy library\n",
80 | "import numpy as np"
81 | ],
82 | "execution_count": null,
83 | "outputs": []
84 | },
85 | {
86 | "cell_type": "markdown",
87 | "metadata": {
88 | "id": "p29I221pPhkY"
89 | },
90 | "source": [
91 | "Support Vector Machine Classifier"
92 | ]
93 | },
94 | {
95 | "cell_type": "code",
96 | "metadata": {
97 | "id": "x7sKawRDPd4m"
98 | },
99 | "source": [
100 | "class SVM_classifier():\n",
101 | "\n",
102 | "\n",
103 | " # initiating the hyperparameters\n",
104 | " def __init__(self, learning_rate, no_of_iterations, lambda_parameter):\n",
105 | "\n",
106 | " self.learning_rate = learning_rate\n",
107 | " self.no_of_iterations = no_of_iterations\n",
108 | " self.lambda_parameter = lambda_parameter\n",
109 | "\n",
110 | "\n",
111 | " \n",
112 | " # fitting the dataset to SVM Classifier\n",
113 | " def fit(self, X, Y):\n",
114 | "\n",
115 | " # m --> number of Data points --> number of rows\n",
116 | " # n --> number of input features --> number of columns\n",
117 | " self.m, self.n = X.shape\n",
118 | "\n",
119 | " # initiating the weight value and bias value\n",
120 | "\n",
121 | " self.w = np.zeros(self.n)\n",
122 | "\n",
123 | " self.b = 0\n",
124 | "\n",
125 | " self.X = X\n",
126 | "\n",
127 | " self.Y = Y\n",
128 | "\n",
129 | " # implementing Gradient Descent algorithm for Optimization\n",
130 | "\n",
131 | " for i in range(self.no_of_iterations):\n",
132 | " self.update_weights()\n",
133 | "\n",
134 | "\n",
135 | "\n",
136 | " # function for updating the weight and bias value\n",
137 | " def update_weights(self):\n",
138 | "\n",
139 | " # label encoding\n",
140 | " y_label = np.where(self.Y <= 0, -1, 1)\n",
141 | "\n",
142 | "\n",
143 | "\n",
144 | " # gradients ( dw, db)\n",
145 | " for index, x_i in enumerate(self.X):\n",
146 | "\n",
147 | " condition = y_label[index] * (np.dot(x_i, self.w) - self.b) >= 1\n",
148 | "\n",
149 | " if (condition == True):\n",
150 | "\n",
151 | " dw = 2 * self.lambda_parameter * self.w\n",
152 | " db = 0\n",
153 | "\n",
154 | " else:\n",
155 | "\n",
156 | " dw = 2 * self.lambda_parameter * self.w - np.dot(x_i, y_label[index])\n",
157 | " db = y_label[index]\n",
158 | "\n",
159 | "\n",
160 | " self.w = self.w - self.learning_rate * dw\n",
161 | "\n",
162 | " self.b = self.b - self.learning_rate * db\n",
163 | "\n",
164 | "\n",
165 | "\n",
166 | " # predict the label for a given input value\n",
167 | " def predict(self, X):\n",
168 | "\n",
169 | " output = np.dot(X, self.w) - self.b\n",
170 | " \n",
171 | " predicted_labels = np.sign(output)\n",
172 | "\n",
173 | " y_hat = np.where(predicted_labels <= -1, 0, 1)\n",
174 | "\n",
175 | " return y_hat \n",
176 | "\n"
177 | ],
178 | "execution_count": null,
179 | "outputs": []
180 | },
181 | {
182 | "cell_type": "markdown",
183 | "metadata": {
184 | "id": "gJJikdNRaiRc"
185 | },
186 | "source": [
187 | "Importing the Dependencies"
188 | ]
189 | },
190 | {
191 | "cell_type": "code",
192 | "metadata": {
193 | "id": "jMfoWjDkUH_T"
194 | },
195 | "source": [
196 | "import pandas as pd\n",
197 | "from sklearn.preprocessing import StandardScaler\n",
198 | "from sklearn.model_selection import train_test_split\n",
199 | "from sklearn.metrics import accuracy_score"
200 | ],
201 | "execution_count": null,
202 | "outputs": []
203 | },
204 | {
205 | "cell_type": "markdown",
206 | "metadata": {
207 | "id": "kPKQ-j-gbFt6"
208 | },
209 | "source": [
210 | "Data Collection & Processing"
211 | ]
212 | },
213 | {
214 | "cell_type": "code",
215 | "metadata": {
216 | "id": "FNg5ycvObEhX"
217 | },
218 | "source": [
219 | "# loading the data from csv file to pandas dataframe\n",
220 | "diabetes_data = pd.read_csv('/content/diabetes.csv')"
221 | ],
222 | "execution_count": null,
223 | "outputs": []
224 | },
225 | {
226 | "cell_type": "code",
227 | "metadata": {
228 | "colab": {
229 | "base_uri": "https://localhost:8080/",
230 | "height": 202
231 | },
232 | "id": "qZAcUKKpbc-Z",
233 | "outputId": "b8cb5c83-2876-432f-b957-5bbd6f692990"
234 | },
235 | "source": [
236 | "# print the first 5 rows of the dataframe\n",
237 | "diabetes_data.head()"
238 | ],
239 | "execution_count": null,
240 | "outputs": [
241 | {
242 | "output_type": "execute_result",
243 | "data": {
244 | "text/html": [
245 | "\n",
246 | "\n",
259 | "
\n",
260 | " \n",
261 | " \n",
262 | " | \n",
263 | " Pregnancies | \n",
264 | " Glucose | \n",
265 | " BloodPressure | \n",
266 | " SkinThickness | \n",
267 | " Insulin | \n",
268 | " BMI | \n",
269 | " DiabetesPedigreeFunction | \n",
270 | " Age | \n",
271 | " Outcome | \n",
272 | "
\n",
273 | " \n",
274 | " \n",
275 | " \n",
276 | " 0 | \n",
277 | " 6 | \n",
278 | " 148 | \n",
279 | " 72 | \n",
280 | " 35 | \n",
281 | " 0 | \n",
282 | " 33.6 | \n",
283 | " 0.627 | \n",
284 | " 50 | \n",
285 | " 1 | \n",
286 | "
\n",
287 | " \n",
288 | " 1 | \n",
289 | " 1 | \n",
290 | " 85 | \n",
291 | " 66 | \n",
292 | " 29 | \n",
293 | " 0 | \n",
294 | " 26.6 | \n",
295 | " 0.351 | \n",
296 | " 31 | \n",
297 | " 0 | \n",
298 | "
\n",
299 | " \n",
300 | " 2 | \n",
301 | " 8 | \n",
302 | " 183 | \n",
303 | " 64 | \n",
304 | " 0 | \n",
305 | " 0 | \n",
306 | " 23.3 | \n",
307 | " 0.672 | \n",
308 | " 32 | \n",
309 | " 1 | \n",
310 | "
\n",
311 | " \n",
312 | " 3 | \n",
313 | " 1 | \n",
314 | " 89 | \n",
315 | " 66 | \n",
316 | " 23 | \n",
317 | " 94 | \n",
318 | " 28.1 | \n",
319 | " 0.167 | \n",
320 | " 21 | \n",
321 | " 0 | \n",
322 | "
\n",
323 | " \n",
324 | " 4 | \n",
325 | " 0 | \n",
326 | " 137 | \n",
327 | " 40 | \n",
328 | " 35 | \n",
329 | " 168 | \n",
330 | " 43.1 | \n",
331 | " 2.288 | \n",
332 | " 33 | \n",
333 | " 1 | \n",
334 | "
\n",
335 | " \n",
336 | "
\n",
337 | "
"
338 | ],
339 | "text/plain": [
340 | " Pregnancies Glucose BloodPressure ... DiabetesPedigreeFunction Age Outcome\n",
341 | "0 6 148 72 ... 0.627 50 1\n",
342 | "1 1 85 66 ... 0.351 31 0\n",
343 | "2 8 183 64 ... 0.672 32 1\n",
344 | "3 1 89 66 ... 0.167 21 0\n",
345 | "4 0 137 40 ... 2.288 33 1\n",
346 | "\n",
347 | "[5 rows x 9 columns]"
348 | ]
349 | },
350 | "metadata": {},
351 | "execution_count": 5
352 | }
353 | ]
354 | },
355 | {
356 | "cell_type": "code",
357 | "metadata": {
358 | "colab": {
359 | "base_uri": "https://localhost:8080/"
360 | },
361 | "id": "BZRfzPWqboAU",
362 | "outputId": "14fa6746-8444-4a12-842e-faf53cc74494"
363 | },
364 | "source": [
365 | "# number of rows and columns in the dataset\n",
366 | "diabetes_data.shape"
367 | ],
368 | "execution_count": null,
369 | "outputs": [
370 | {
371 | "output_type": "execute_result",
372 | "data": {
373 | "text/plain": [
374 | "(768, 9)"
375 | ]
376 | },
377 | "metadata": {},
378 | "execution_count": 6
379 | }
380 | ]
381 | },
382 | {
383 | "cell_type": "code",
384 | "metadata": {
385 | "colab": {
386 | "base_uri": "https://localhost:8080/",
387 | "height": 294
388 | },
389 | "id": "E_-QBPhgb2dT",
390 | "outputId": "1ca1eeee-98a7-4d22-8584-c501d632f774"
391 | },
392 | "source": [
393 | "# getting the statistical measures of the dataset\n",
394 | "diabetes_data.describe()"
395 | ],
396 | "execution_count": null,
397 | "outputs": [
398 | {
399 | "output_type": "execute_result",
400 | "data": {
401 | "text/html": [
402 | "\n",
403 | "\n",
416 | "
\n",
417 | " \n",
418 | " \n",
419 | " | \n",
420 | " Pregnancies | \n",
421 | " Glucose | \n",
422 | " BloodPressure | \n",
423 | " SkinThickness | \n",
424 | " Insulin | \n",
425 | " BMI | \n",
426 | " DiabetesPedigreeFunction | \n",
427 | " Age | \n",
428 | " Outcome | \n",
429 | "
\n",
430 | " \n",
431 | " \n",
432 | " \n",
433 | " count | \n",
434 | " 768.000000 | \n",
435 | " 768.000000 | \n",
436 | " 768.000000 | \n",
437 | " 768.000000 | \n",
438 | " 768.000000 | \n",
439 | " 768.000000 | \n",
440 | " 768.000000 | \n",
441 | " 768.000000 | \n",
442 | " 768.000000 | \n",
443 | "
\n",
444 | " \n",
445 | " mean | \n",
446 | " 3.845052 | \n",
447 | " 120.894531 | \n",
448 | " 69.105469 | \n",
449 | " 20.536458 | \n",
450 | " 79.799479 | \n",
451 | " 31.992578 | \n",
452 | " 0.471876 | \n",
453 | " 33.240885 | \n",
454 | " 0.348958 | \n",
455 | "
\n",
456 | " \n",
457 | " std | \n",
458 | " 3.369578 | \n",
459 | " 31.972618 | \n",
460 | " 19.355807 | \n",
461 | " 15.952218 | \n",
462 | " 115.244002 | \n",
463 | " 7.884160 | \n",
464 | " 0.331329 | \n",
465 | " 11.760232 | \n",
466 | " 0.476951 | \n",
467 | "
\n",
468 | " \n",
469 | " min | \n",
470 | " 0.000000 | \n",
471 | " 0.000000 | \n",
472 | " 0.000000 | \n",
473 | " 0.000000 | \n",
474 | " 0.000000 | \n",
475 | " 0.000000 | \n",
476 | " 0.078000 | \n",
477 | " 21.000000 | \n",
478 | " 0.000000 | \n",
479 | "
\n",
480 | " \n",
481 | " 25% | \n",
482 | " 1.000000 | \n",
483 | " 99.000000 | \n",
484 | " 62.000000 | \n",
485 | " 0.000000 | \n",
486 | " 0.000000 | \n",
487 | " 27.300000 | \n",
488 | " 0.243750 | \n",
489 | " 24.000000 | \n",
490 | " 0.000000 | \n",
491 | "
\n",
492 | " \n",
493 | " 50% | \n",
494 | " 3.000000 | \n",
495 | " 117.000000 | \n",
496 | " 72.000000 | \n",
497 | " 23.000000 | \n",
498 | " 30.500000 | \n",
499 | " 32.000000 | \n",
500 | " 0.372500 | \n",
501 | " 29.000000 | \n",
502 | " 0.000000 | \n",
503 | "
\n",
504 | " \n",
505 | " 75% | \n",
506 | " 6.000000 | \n",
507 | " 140.250000 | \n",
508 | " 80.000000 | \n",
509 | " 32.000000 | \n",
510 | " 127.250000 | \n",
511 | " 36.600000 | \n",
512 | " 0.626250 | \n",
513 | " 41.000000 | \n",
514 | " 1.000000 | \n",
515 | "
\n",
516 | " \n",
517 | " max | \n",
518 | " 17.000000 | \n",
519 | " 199.000000 | \n",
520 | " 122.000000 | \n",
521 | " 99.000000 | \n",
522 | " 846.000000 | \n",
523 | " 67.100000 | \n",
524 | " 2.420000 | \n",
525 | " 81.000000 | \n",
526 | " 1.000000 | \n",
527 | "
\n",
528 | " \n",
529 | "
\n",
530 | "
"
531 | ],
532 | "text/plain": [
533 | " Pregnancies Glucose ... Age Outcome\n",
534 | "count 768.000000 768.000000 ... 768.000000 768.000000\n",
535 | "mean 3.845052 120.894531 ... 33.240885 0.348958\n",
536 | "std 3.369578 31.972618 ... 11.760232 0.476951\n",
537 | "min 0.000000 0.000000 ... 21.000000 0.000000\n",
538 | "25% 1.000000 99.000000 ... 24.000000 0.000000\n",
539 | "50% 3.000000 117.000000 ... 29.000000 0.000000\n",
540 | "75% 6.000000 140.250000 ... 41.000000 1.000000\n",
541 | "max 17.000000 199.000000 ... 81.000000 1.000000\n",
542 | "\n",
543 | "[8 rows x 9 columns]"
544 | ]
545 | },
546 | "metadata": {},
547 | "execution_count": 7
548 | }
549 | ]
550 | },
551 | {
552 | "cell_type": "code",
553 | "metadata": {
554 | "colab": {
555 | "base_uri": "https://localhost:8080/"
556 | },
557 | "id": "1nv8PkGFcEUA",
558 | "outputId": "e23d9b70-ffd8-4e8d-8a3a-606290be791c"
559 | },
560 | "source": [
561 | "diabetes_data['Outcome'].value_counts()"
562 | ],
563 | "execution_count": null,
564 | "outputs": [
565 | {
566 | "output_type": "execute_result",
567 | "data": {
568 | "text/plain": [
569 | "0 500\n",
570 | "1 268\n",
571 | "Name: Outcome, dtype: int64"
572 | ]
573 | },
574 | "metadata": {},
575 | "execution_count": 8
576 | }
577 | ]
578 | },
579 | {
580 | "cell_type": "markdown",
581 | "metadata": {
582 | "id": "kbIx4JRwcVy8"
583 | },
584 | "source": [
585 | "0 --> Non-diabetic\n",
586 | "\n",
587 | "1 --> Diabetic"
588 | ]
589 | },
590 | {
591 | "cell_type": "code",
592 | "metadata": {
593 | "id": "It7yuRMZcQ2-"
594 | },
595 | "source": [
596 | "# separating the features and target\n",
597 | "\n",
598 | "features = diabetes_data.drop(columns='Outcome', axis=1)\n",
599 | "\n",
600 | "target = diabetes_data['Outcome']\n"
601 | ],
602 | "execution_count": null,
603 | "outputs": []
604 | },
605 | {
606 | "cell_type": "code",
607 | "metadata": {
608 | "colab": {
609 | "base_uri": "https://localhost:8080/"
610 | },
611 | "id": "Jnv9kg01c-Mp",
612 | "outputId": "6fab0f65-e442-4c9c-cc96-679afd8aed19"
613 | },
614 | "source": [
615 | "print(features)"
616 | ],
617 | "execution_count": null,
618 | "outputs": [
619 | {
620 | "output_type": "stream",
621 | "name": "stdout",
622 | "text": [
623 | " Pregnancies Glucose BloodPressure ... BMI DiabetesPedigreeFunction Age\n",
624 | "0 6 148 72 ... 33.6 0.627 50\n",
625 | "1 1 85 66 ... 26.6 0.351 31\n",
626 | "2 8 183 64 ... 23.3 0.672 32\n",
627 | "3 1 89 66 ... 28.1 0.167 21\n",
628 | "4 0 137 40 ... 43.1 2.288 33\n",
629 | ".. ... ... ... ... ... ... ...\n",
630 | "763 10 101 76 ... 32.9 0.171 63\n",
631 | "764 2 122 70 ... 36.8 0.340 27\n",
632 | "765 5 121 72 ... 26.2 0.245 30\n",
633 | "766 1 126 60 ... 30.1 0.349 47\n",
634 | "767 1 93 70 ... 30.4 0.315 23\n",
635 | "\n",
636 | "[768 rows x 8 columns]\n"
637 | ]
638 | }
639 | ]
640 | },
641 | {
642 | "cell_type": "code",
643 | "metadata": {
644 | "colab": {
645 | "base_uri": "https://localhost:8080/"
646 | },
647 | "id": "_5bphlQ7dBZF",
648 | "outputId": "eded8bf0-e36a-4124-874a-f222c9f866f2"
649 | },
650 | "source": [
651 | "print(target)"
652 | ],
653 | "execution_count": null,
654 | "outputs": [
655 | {
656 | "output_type": "stream",
657 | "name": "stdout",
658 | "text": [
659 | "0 1\n",
660 | "1 0\n",
661 | "2 1\n",
662 | "3 0\n",
663 | "4 1\n",
664 | " ..\n",
665 | "763 0\n",
666 | "764 0\n",
667 | "765 0\n",
668 | "766 1\n",
669 | "767 0\n",
670 | "Name: Outcome, Length: 768, dtype: int64\n"
671 | ]
672 | }
673 | ]
674 | },
675 | {
676 | "cell_type": "markdown",
677 | "metadata": {
678 | "id": "BkChVOzRdNUZ"
679 | },
680 | "source": [
681 | "Data Standardization"
682 | ]
683 | },
684 | {
685 | "cell_type": "code",
686 | "metadata": {
687 | "id": "6NmIffqFdEQJ"
688 | },
689 | "source": [
690 | "scaler = StandardScaler()"
691 | ],
692 | "execution_count": null,
693 | "outputs": []
694 | },
695 | {
696 | "cell_type": "code",
697 | "metadata": {
698 | "colab": {
699 | "base_uri": "https://localhost:8080/"
700 | },
701 | "id": "vMslWjlxdW6g",
702 | "outputId": "05d11c42-cd55-4b76-d2a5-91b8ae1e7512"
703 | },
704 | "source": [
705 | "scaler.fit(features)"
706 | ],
707 | "execution_count": null,
708 | "outputs": [
709 | {
710 | "output_type": "execute_result",
711 | "data": {
712 | "text/plain": [
713 | "StandardScaler(copy=True, with_mean=True, with_std=True)"
714 | ]
715 | },
716 | "metadata": {},
717 | "execution_count": 13
718 | }
719 | ]
720 | },
721 | {
722 | "cell_type": "code",
723 | "metadata": {
724 | "id": "ow-jh7Kkdfby"
725 | },
726 | "source": [
727 | "standardized_data = scaler.transform(features)"
728 | ],
729 | "execution_count": null,
730 | "outputs": []
731 | },
732 | {
733 | "cell_type": "code",
734 | "metadata": {
735 | "colab": {
736 | "base_uri": "https://localhost:8080/"
737 | },
738 | "id": "ar7sa82gdnmI",
739 | "outputId": "22d09ca7-fb21-4414-da4f-a6e7644ef94f"
740 | },
741 | "source": [
742 | "print(standardized_data)"
743 | ],
744 | "execution_count": null,
745 | "outputs": [
746 | {
747 | "output_type": "stream",
748 | "name": "stdout",
749 | "text": [
750 | "[[ 0.63994726 0.84832379 0.14964075 ... 0.20401277 0.46849198\n",
751 | " 1.4259954 ]\n",
752 | " [-0.84488505 -1.12339636 -0.16054575 ... -0.68442195 -0.36506078\n",
753 | " -0.19067191]\n",
754 | " [ 1.23388019 1.94372388 -0.26394125 ... -1.10325546 0.60439732\n",
755 | " -0.10558415]\n",
756 | " ...\n",
757 | " [ 0.3429808 0.00330087 0.14964075 ... -0.73518964 -0.68519336\n",
758 | " -0.27575966]\n",
759 | " [-0.84488505 0.1597866 -0.47073225 ... -0.24020459 -0.37110101\n",
760 | " 1.17073215]\n",
761 | " [-0.84488505 -0.8730192 0.04624525 ... -0.20212881 -0.47378505\n",
762 | " -0.87137393]]\n"
763 | ]
764 | }
765 | ]
766 | },
767 | {
768 | "cell_type": "code",
769 | "metadata": {
770 | "id": "aMEkKDOodrUv"
771 | },
772 | "source": [
773 | "features = standardized_data\n",
774 | "target = diabetes_data['Outcome']"
775 | ],
776 | "execution_count": null,
777 | "outputs": []
778 | },
779 | {
780 | "cell_type": "code",
781 | "metadata": {
782 | "colab": {
783 | "base_uri": "https://localhost:8080/"
784 | },
785 | "id": "-xtmt3Ihd73k",
786 | "outputId": "34b67924-486d-4636-9c9d-3c253416d0eb"
787 | },
788 | "source": [
789 | "print(features)\n",
790 | "print(target)"
791 | ],
792 | "execution_count": null,
793 | "outputs": [
794 | {
795 | "output_type": "stream",
796 | "name": "stdout",
797 | "text": [
798 | "[[ 0.63994726 0.84832379 0.14964075 ... 0.20401277 0.46849198\n",
799 | " 1.4259954 ]\n",
800 | " [-0.84488505 -1.12339636 -0.16054575 ... -0.68442195 -0.36506078\n",
801 | " -0.19067191]\n",
802 | " [ 1.23388019 1.94372388 -0.26394125 ... -1.10325546 0.60439732\n",
803 | " -0.10558415]\n",
804 | " ...\n",
805 | " [ 0.3429808 0.00330087 0.14964075 ... -0.73518964 -0.68519336\n",
806 | " -0.27575966]\n",
807 | " [-0.84488505 0.1597866 -0.47073225 ... -0.24020459 -0.37110101\n",
808 | " 1.17073215]\n",
809 | " [-0.84488505 -0.8730192 0.04624525 ... -0.20212881 -0.47378505\n",
810 | " -0.87137393]]\n",
811 | "0 1\n",
812 | "1 0\n",
813 | "2 1\n",
814 | "3 0\n",
815 | "4 1\n",
816 | " ..\n",
817 | "763 0\n",
818 | "764 0\n",
819 | "765 0\n",
820 | "766 1\n",
821 | "767 0\n",
822 | "Name: Outcome, Length: 768, dtype: int64\n"
823 | ]
824 | }
825 | ]
826 | },
827 | {
828 | "cell_type": "markdown",
829 | "metadata": {
830 | "id": "gh4qURDYeHmn"
831 | },
832 | "source": [
833 | "Train Test Split"
834 | ]
835 | },
836 | {
837 | "cell_type": "code",
838 | "metadata": {
839 | "id": "tj046cqTeA51"
840 | },
841 | "source": [
842 | "X_train, X_test, Y_train, Y_test = train_test_split(features, target, test_size=0.2, random_state = 2)"
843 | ],
844 | "execution_count": null,
845 | "outputs": []
846 | },
847 | {
848 | "cell_type": "code",
849 | "metadata": {
850 | "colab": {
851 | "base_uri": "https://localhost:8080/"
852 | },
853 | "id": "loJeizDZe6MH",
854 | "outputId": "ed98b559-99e7-401f-9b47-d80424c03a68"
855 | },
856 | "source": [
857 | "print(features.shape, X_train.shape, X_test.shape)"
858 | ],
859 | "execution_count": null,
860 | "outputs": [
861 | {
862 | "output_type": "stream",
863 | "name": "stdout",
864 | "text": [
865 | "(768, 8) (614, 8) (154, 8)\n"
866 | ]
867 | }
868 | ]
869 | },
870 | {
871 | "cell_type": "markdown",
872 | "metadata": {
873 | "id": "KDcTszgpfGtp"
874 | },
875 | "source": [
876 | "Training the Model"
877 | ]
878 | },
879 | {
880 | "cell_type": "markdown",
881 | "metadata": {
882 | "id": "uzteSNBnfImg"
883 | },
884 | "source": [
885 | "Support Vector Machine Classifier"
886 | ]
887 | },
888 | {
889 | "cell_type": "code",
890 | "metadata": {
891 | "id": "xfJmnwXdfAKP"
892 | },
893 | "source": [
894 | "classifier = SVM_classifier(learning_rate=0.001, no_of_iterations=1000, lambda_parameter=0.01)"
895 | ],
896 | "execution_count": null,
897 | "outputs": []
898 | },
899 | {
900 | "cell_type": "code",
901 | "metadata": {
902 | "id": "V5KLhNF5fhXT"
903 | },
904 | "source": [
905 | "# training the SVM classifier with training data\n",
906 | "classifier.fit(X_train, Y_train)"
907 | ],
908 | "execution_count": null,
909 | "outputs": []
910 | },
911 | {
912 | "cell_type": "markdown",
913 | "metadata": {
914 | "id": "YC0v_ZCYf6gV"
915 | },
916 | "source": [
917 | "Model Evaluation"
918 | ]
919 | },
920 | {
921 | "cell_type": "markdown",
922 | "metadata": {
923 | "id": "-IO-FIPqf8Vm"
924 | },
925 | "source": [
926 | "Accuracy Score"
927 | ]
928 | },
929 | {
930 | "cell_type": "code",
931 | "metadata": {
932 | "id": "lLt2so1Hf0Ua"
933 | },
934 | "source": [
935 | "# accuracy on training data\n",
936 | "X_train_prediction = classifier.predict(X_train)\n",
937 | "training_data_accuracy = accuracy_score(Y_train, X_train_prediction)"
938 | ],
939 | "execution_count": null,
940 | "outputs": []
941 | },
942 | {
943 | "cell_type": "code",
944 | "metadata": {
945 | "colab": {
946 | "base_uri": "https://localhost:8080/"
947 | },
948 | "id": "KULlET0dglkG",
949 | "outputId": "bd094ab4-0e3b-41ec-f1d4-6fb3afad5374"
950 | },
951 | "source": [
952 | "print('Accuracy score on training data = ', training_data_accuracy)"
953 | ],
954 | "execution_count": null,
955 | "outputs": [
956 | {
957 | "output_type": "stream",
958 | "name": "stdout",
959 | "text": [
960 | "Accuracy score on training data = 0.7768729641693811\n"
961 | ]
962 | }
963 | ]
964 | },
965 | {
966 | "cell_type": "code",
967 | "metadata": {
968 | "id": "QjRNsLYmg1oU"
969 | },
970 | "source": [
971 | "# accuracy on training data\n",
972 | "X_test_prediction = classifier.predict(X_test)\n",
973 | "test_data_accuracy = accuracy_score(Y_test, X_test_prediction)"
974 | ],
975 | "execution_count": null,
976 | "outputs": []
977 | },
978 | {
979 | "cell_type": "code",
980 | "metadata": {
981 | "colab": {
982 | "base_uri": "https://localhost:8080/"
983 | },
984 | "id": "jQfednYShTg9",
985 | "outputId": "973d29b6-9b43-4c0a-ba8c-83f28cb70822"
986 | },
987 | "source": [
988 | "print('Accuracy score on test data = ', test_data_accuracy)"
989 | ],
990 | "execution_count": null,
991 | "outputs": [
992 | {
993 | "output_type": "stream",
994 | "name": "stdout",
995 | "text": [
996 | "Accuracy score on test data = 0.7532467532467533\n"
997 | ]
998 | }
999 | ]
1000 | },
1001 | {
1002 | "cell_type": "markdown",
1003 | "metadata": {
1004 | "id": "Dia3-X4ih2KP"
1005 | },
1006 | "source": [
1007 | "Building a Predictive System"
1008 | ]
1009 | },
1010 | {
1011 | "cell_type": "code",
1012 | "metadata": {
1013 | "colab": {
1014 | "base_uri": "https://localhost:8080/"
1015 | },
1016 | "id": "Pw9LkxoKhY5A",
1017 | "outputId": "004813a2-b242-4e9b-cb6a-b4652f929b3c"
1018 | },
1019 | "source": [
1020 | "input_data = (5,166,72,19,175,25.8,0.587,51)\n",
1021 | "\n",
1022 | "# change the input data to numpy array\n",
1023 | "input_data_as_numpy_array = np.asarray(input_data)\n",
1024 | "\n",
1025 | "# reshape the array\n",
1026 | "input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)\n",
1027 | "\n",
1028 | "# standardizing the input data\n",
1029 | "std_data = scaler.transform(input_data_reshaped)\n",
1030 | "print(std_data)\n",
1031 | "\n",
1032 | "prediction = classifier.predict(std_data)\n",
1033 | "print(prediction)\n",
1034 | "\n",
1035 | "if (prediction[0] == 0):\n",
1036 | " print('The person is not diabetic')\n",
1037 | "\n",
1038 | "else:\n",
1039 | " print('The Person is diabetic')"
1040 | ],
1041 | "execution_count": null,
1042 | "outputs": [
1043 | {
1044 | "output_type": "stream",
1045 | "name": "stdout",
1046 | "text": [
1047 | "[[ 0.3429808 1.41167241 0.14964075 -0.09637905 0.82661621 -0.78595734\n",
1048 | " 0.34768723 1.51108316]]\n",
1049 | "[1]\n",
1050 | "The Person is diabetic\n"
1051 | ]
1052 | }
1053 | ]
1054 | },
1055 | {
1056 | "cell_type": "code",
1057 | "metadata": {
1058 | "id": "u6YcK7aEjHen"
1059 | },
1060 | "source": [],
1061 | "execution_count": null,
1062 | "outputs": []
1063 | }
1064 | ]
1065 | }
--------------------------------------------------------------------------------
/7_3_7_Implementing_SVM_from_Scratch.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": [],
7 | "collapsed_sections": []
8 | },
9 | "kernelspec": {
10 | "name": "python3",
11 | "display_name": "Python 3"
12 | },
13 | "language_info": {
14 | "name": "python"
15 | }
16 | },
17 | "cells": [
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {
21 | "id": "LnPbntVRnfvV"
22 | },
23 | "source": [
24 | "Importing the Dependencies"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "metadata": {
30 | "id": "-71UtHzNVWjB"
31 | },
32 | "source": [
33 | "import numpy as np\n",
34 | "import pandas as pd\n",
35 | "from sklearn.preprocessing import StandardScaler\n",
36 | "from sklearn.model_selection import train_test_split\n",
37 | "from sklearn.metrics import accuracy_score\n",
38 | "\n",
39 | "from SVM import SVM_classifier"
40 | ],
41 | "execution_count": null,
42 | "outputs": []
43 | },
44 | {
45 | "cell_type": "markdown",
46 | "metadata": {
47 | "id": "bmfOfG8joBBy"
48 | },
49 | "source": [
50 | "Data Collection and Analysis\n",
51 | "\n",
52 | "PIMA Diabetes Dataset"
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "metadata": {
58 | "id": "Xpw6Mj_pn_TL"
59 | },
60 | "source": [
61 | "# loading the diabetes dataset to a pandas DataFrame\n",
62 | "diabetes_dataset = pd.read_csv('/content/diabetes.csv') "
63 | ],
64 | "execution_count": null,
65 | "outputs": []
66 | },
67 | {
68 | "cell_type": "code",
69 | "metadata": {
70 | "id": "-tjO09ncovoh",
71 | "colab": {
72 | "base_uri": "https://localhost:8080/",
73 | "height": 202
74 | },
75 | "outputId": "557afe56-5e13-42e4-a92b-0acf884e497f"
76 | },
77 | "source": [
78 | "# printing the first 5 rows of the dataset\n",
79 | "diabetes_dataset.head()"
80 | ],
81 | "execution_count": null,
82 | "outputs": [
83 | {
84 | "output_type": "execute_result",
85 | "data": {
86 | "text/html": [
87 | "\n",
88 | "\n",
101 | "
\n",
102 | " \n",
103 | " \n",
104 | " | \n",
105 | " Pregnancies | \n",
106 | " Glucose | \n",
107 | " BloodPressure | \n",
108 | " SkinThickness | \n",
109 | " Insulin | \n",
110 | " BMI | \n",
111 | " DiabetesPedigreeFunction | \n",
112 | " Age | \n",
113 | " Outcome | \n",
114 | "
\n",
115 | " \n",
116 | " \n",
117 | " \n",
118 | " 0 | \n",
119 | " 6 | \n",
120 | " 148 | \n",
121 | " 72 | \n",
122 | " 35 | \n",
123 | " 0 | \n",
124 | " 33.6 | \n",
125 | " 0.627 | \n",
126 | " 50 | \n",
127 | " 1 | \n",
128 | "
\n",
129 | " \n",
130 | " 1 | \n",
131 | " 1 | \n",
132 | " 85 | \n",
133 | " 66 | \n",
134 | " 29 | \n",
135 | " 0 | \n",
136 | " 26.6 | \n",
137 | " 0.351 | \n",
138 | " 31 | \n",
139 | " 0 | \n",
140 | "
\n",
141 | " \n",
142 | " 2 | \n",
143 | " 8 | \n",
144 | " 183 | \n",
145 | " 64 | \n",
146 | " 0 | \n",
147 | " 0 | \n",
148 | " 23.3 | \n",
149 | " 0.672 | \n",
150 | " 32 | \n",
151 | " 1 | \n",
152 | "
\n",
153 | " \n",
154 | " 3 | \n",
155 | " 1 | \n",
156 | " 89 | \n",
157 | " 66 | \n",
158 | " 23 | \n",
159 | " 94 | \n",
160 | " 28.1 | \n",
161 | " 0.167 | \n",
162 | " 21 | \n",
163 | " 0 | \n",
164 | "
\n",
165 | " \n",
166 | " 4 | \n",
167 | " 0 | \n",
168 | " 137 | \n",
169 | " 40 | \n",
170 | " 35 | \n",
171 | " 168 | \n",
172 | " 43.1 | \n",
173 | " 2.288 | \n",
174 | " 33 | \n",
175 | " 1 | \n",
176 | "
\n",
177 | " \n",
178 | "
\n",
179 | "
"
180 | ],
181 | "text/plain": [
182 | " Pregnancies Glucose BloodPressure ... DiabetesPedigreeFunction Age Outcome\n",
183 | "0 6 148 72 ... 0.627 50 1\n",
184 | "1 1 85 66 ... 0.351 31 0\n",
185 | "2 8 183 64 ... 0.672 32 1\n",
186 | "3 1 89 66 ... 0.167 21 0\n",
187 | "4 0 137 40 ... 2.288 33 1\n",
188 | "\n",
189 | "[5 rows x 9 columns]"
190 | ]
191 | },
192 | "metadata": {},
193 | "execution_count": 3
194 | }
195 | ]
196 | },
197 | {
198 | "cell_type": "code",
199 | "metadata": {
200 | "id": "lynParo6pEMB",
201 | "colab": {
202 | "base_uri": "https://localhost:8080/"
203 | },
204 | "outputId": "f1b034b0-3e4d-4960-bf05-dc86fc4f4536"
205 | },
206 | "source": [
207 | "# number of rows and Columns in this dataset\n",
208 | "diabetes_dataset.shape"
209 | ],
210 | "execution_count": null,
211 | "outputs": [
212 | {
213 | "output_type": "execute_result",
214 | "data": {
215 | "text/plain": [
216 | "(768, 9)"
217 | ]
218 | },
219 | "metadata": {},
220 | "execution_count": 4
221 | }
222 | ]
223 | },
224 | {
225 | "cell_type": "code",
226 | "metadata": {
227 | "id": "3NDJOlrEpmoL",
228 | "colab": {
229 | "base_uri": "https://localhost:8080/",
230 | "height": 294
231 | },
232 | "outputId": "dfc38bb3-ca13-4794-b240-399899f2c7b5"
233 | },
234 | "source": [
235 | "# getting the statistical measures of the data\n",
236 | "diabetes_dataset.describe()"
237 | ],
238 | "execution_count": null,
239 | "outputs": [
240 | {
241 | "output_type": "execute_result",
242 | "data": {
243 | "text/html": [
244 | "\n",
245 | "\n",
258 | "
\n",
259 | " \n",
260 | " \n",
261 | " | \n",
262 | " Pregnancies | \n",
263 | " Glucose | \n",
264 | " BloodPressure | \n",
265 | " SkinThickness | \n",
266 | " Insulin | \n",
267 | " BMI | \n",
268 | " DiabetesPedigreeFunction | \n",
269 | " Age | \n",
270 | " Outcome | \n",
271 | "
\n",
272 | " \n",
273 | " \n",
274 | " \n",
275 | " count | \n",
276 | " 768.000000 | \n",
277 | " 768.000000 | \n",
278 | " 768.000000 | \n",
279 | " 768.000000 | \n",
280 | " 768.000000 | \n",
281 | " 768.000000 | \n",
282 | " 768.000000 | \n",
283 | " 768.000000 | \n",
284 | " 768.000000 | \n",
285 | "
\n",
286 | " \n",
287 | " mean | \n",
288 | " 3.845052 | \n",
289 | " 120.894531 | \n",
290 | " 69.105469 | \n",
291 | " 20.536458 | \n",
292 | " 79.799479 | \n",
293 | " 31.992578 | \n",
294 | " 0.471876 | \n",
295 | " 33.240885 | \n",
296 | " 0.348958 | \n",
297 | "
\n",
298 | " \n",
299 | " std | \n",
300 | " 3.369578 | \n",
301 | " 31.972618 | \n",
302 | " 19.355807 | \n",
303 | " 15.952218 | \n",
304 | " 115.244002 | \n",
305 | " 7.884160 | \n",
306 | " 0.331329 | \n",
307 | " 11.760232 | \n",
308 | " 0.476951 | \n",
309 | "
\n",
310 | " \n",
311 | " min | \n",
312 | " 0.000000 | \n",
313 | " 0.000000 | \n",
314 | " 0.000000 | \n",
315 | " 0.000000 | \n",
316 | " 0.000000 | \n",
317 | " 0.000000 | \n",
318 | " 0.078000 | \n",
319 | " 21.000000 | \n",
320 | " 0.000000 | \n",
321 | "
\n",
322 | " \n",
323 | " 25% | \n",
324 | " 1.000000 | \n",
325 | " 99.000000 | \n",
326 | " 62.000000 | \n",
327 | " 0.000000 | \n",
328 | " 0.000000 | \n",
329 | " 27.300000 | \n",
330 | " 0.243750 | \n",
331 | " 24.000000 | \n",
332 | " 0.000000 | \n",
333 | "
\n",
334 | " \n",
335 | " 50% | \n",
336 | " 3.000000 | \n",
337 | " 117.000000 | \n",
338 | " 72.000000 | \n",
339 | " 23.000000 | \n",
340 | " 30.500000 | \n",
341 | " 32.000000 | \n",
342 | " 0.372500 | \n",
343 | " 29.000000 | \n",
344 | " 0.000000 | \n",
345 | "
\n",
346 | " \n",
347 | " 75% | \n",
348 | " 6.000000 | \n",
349 | " 140.250000 | \n",
350 | " 80.000000 | \n",
351 | " 32.000000 | \n",
352 | " 127.250000 | \n",
353 | " 36.600000 | \n",
354 | " 0.626250 | \n",
355 | " 41.000000 | \n",
356 | " 1.000000 | \n",
357 | "
\n",
358 | " \n",
359 | " max | \n",
360 | " 17.000000 | \n",
361 | " 199.000000 | \n",
362 | " 122.000000 | \n",
363 | " 99.000000 | \n",
364 | " 846.000000 | \n",
365 | " 67.100000 | \n",
366 | " 2.420000 | \n",
367 | " 81.000000 | \n",
368 | " 1.000000 | \n",
369 | "
\n",
370 | " \n",
371 | "
\n",
372 | "
"
373 | ],
374 | "text/plain": [
375 | " Pregnancies Glucose ... Age Outcome\n",
376 | "count 768.000000 768.000000 ... 768.000000 768.000000\n",
377 | "mean 3.845052 120.894531 ... 33.240885 0.348958\n",
378 | "std 3.369578 31.972618 ... 11.760232 0.476951\n",
379 | "min 0.000000 0.000000 ... 21.000000 0.000000\n",
380 | "25% 1.000000 99.000000 ... 24.000000 0.000000\n",
381 | "50% 3.000000 117.000000 ... 29.000000 0.000000\n",
382 | "75% 6.000000 140.250000 ... 41.000000 1.000000\n",
383 | "max 17.000000 199.000000 ... 81.000000 1.000000\n",
384 | "\n",
385 | "[8 rows x 9 columns]"
386 | ]
387 | },
388 | "metadata": {},
389 | "execution_count": 5
390 | }
391 | ]
392 | },
393 | {
394 | "cell_type": "code",
395 | "metadata": {
396 | "id": "LrpHzaGpp5dQ",
397 | "colab": {
398 | "base_uri": "https://localhost:8080/"
399 | },
400 | "outputId": "c5b6c6ec-fdc6-44dc-e12b-29e180a5caae"
401 | },
402 | "source": [
403 | "diabetes_dataset['Outcome'].value_counts()"
404 | ],
405 | "execution_count": null,
406 | "outputs": [
407 | {
408 | "output_type": "execute_result",
409 | "data": {
410 | "text/plain": [
411 | "0 500\n",
412 | "1 268\n",
413 | "Name: Outcome, dtype: int64"
414 | ]
415 | },
416 | "metadata": {},
417 | "execution_count": 6
418 | }
419 | ]
420 | },
421 | {
422 | "cell_type": "markdown",
423 | "metadata": {
424 | "id": "cB1qRaNcqeh5"
425 | },
426 | "source": [
427 | "0 --> Non-Diabetic\n",
428 | "\n",
429 | "1 --> Diabetic"
430 | ]
431 | },
432 | {
433 | "cell_type": "code",
434 | "metadata": {
435 | "id": "I6MWR0k_qSCK",
436 | "colab": {
437 | "base_uri": "https://localhost:8080/",
438 | "height": 141
439 | },
440 | "outputId": "15501b67-7e5c-4309-d83a-3f88c4c815d2"
441 | },
442 | "source": [
443 | "diabetes_dataset.groupby('Outcome').mean()"
444 | ],
445 | "execution_count": null,
446 | "outputs": [
447 | {
448 | "output_type": "execute_result",
449 | "data": {
450 | "text/html": [
451 | "\n",
452 | "\n",
465 | "
\n",
466 | " \n",
467 | " \n",
468 | " | \n",
469 | " Pregnancies | \n",
470 | " Glucose | \n",
471 | " BloodPressure | \n",
472 | " SkinThickness | \n",
473 | " Insulin | \n",
474 | " BMI | \n",
475 | " DiabetesPedigreeFunction | \n",
476 | " Age | \n",
477 | "
\n",
478 | " \n",
479 | " Outcome | \n",
480 | " | \n",
481 | " | \n",
482 | " | \n",
483 | " | \n",
484 | " | \n",
485 | " | \n",
486 | " | \n",
487 | " | \n",
488 | "
\n",
489 | " \n",
490 | " \n",
491 | " \n",
492 | " 0 | \n",
493 | " 3.298000 | \n",
494 | " 109.980000 | \n",
495 | " 68.184000 | \n",
496 | " 19.664000 | \n",
497 | " 68.792000 | \n",
498 | " 30.304200 | \n",
499 | " 0.429734 | \n",
500 | " 31.190000 | \n",
501 | "
\n",
502 | " \n",
503 | " 1 | \n",
504 | " 4.865672 | \n",
505 | " 141.257463 | \n",
506 | " 70.824627 | \n",
507 | " 22.164179 | \n",
508 | " 100.335821 | \n",
509 | " 35.142537 | \n",
510 | " 0.550500 | \n",
511 | " 37.067164 | \n",
512 | "
\n",
513 | " \n",
514 | "
\n",
515 | "
"
516 | ],
517 | "text/plain": [
518 | " Pregnancies Glucose ... DiabetesPedigreeFunction Age\n",
519 | "Outcome ... \n",
520 | "0 3.298000 109.980000 ... 0.429734 31.190000\n",
521 | "1 4.865672 141.257463 ... 0.550500 37.067164\n",
522 | "\n",
523 | "[2 rows x 8 columns]"
524 | ]
525 | },
526 | "metadata": {},
527 | "execution_count": 7
528 | }
529 | ]
530 | },
531 | {
532 | "cell_type": "code",
533 | "metadata": {
534 | "id": "RoDW7l9mqqHZ"
535 | },
536 | "source": [
537 | "# separating the data and labels\n",
538 | "features = diabetes_dataset.drop(columns = 'Outcome', axis=1)\n",
539 | "target = diabetes_dataset['Outcome']"
540 | ],
541 | "execution_count": null,
542 | "outputs": []
543 | },
544 | {
545 | "cell_type": "code",
546 | "metadata": {
547 | "id": "3eiRW9M9raMm",
548 | "colab": {
549 | "base_uri": "https://localhost:8080/"
550 | },
551 | "outputId": "095bd8a7-0215-4b50-a1ca-564eded2ae94"
552 | },
553 | "source": [
554 | "print(features)"
555 | ],
556 | "execution_count": null,
557 | "outputs": [
558 | {
559 | "output_type": "stream",
560 | "name": "stdout",
561 | "text": [
562 | " Pregnancies Glucose BloodPressure ... BMI DiabetesPedigreeFunction Age\n",
563 | "0 6 148 72 ... 33.6 0.627 50\n",
564 | "1 1 85 66 ... 26.6 0.351 31\n",
565 | "2 8 183 64 ... 23.3 0.672 32\n",
566 | "3 1 89 66 ... 28.1 0.167 21\n",
567 | "4 0 137 40 ... 43.1 2.288 33\n",
568 | ".. ... ... ... ... ... ... ...\n",
569 | "763 10 101 76 ... 32.9 0.171 63\n",
570 | "764 2 122 70 ... 36.8 0.340 27\n",
571 | "765 5 121 72 ... 26.2 0.245 30\n",
572 | "766 1 126 60 ... 30.1 0.349 47\n",
573 | "767 1 93 70 ... 30.4 0.315 23\n",
574 | "\n",
575 | "[768 rows x 8 columns]\n"
576 | ]
577 | }
578 | ]
579 | },
580 | {
581 | "cell_type": "code",
582 | "metadata": {
583 | "id": "AoxgTJAMrcCl",
584 | "colab": {
585 | "base_uri": "https://localhost:8080/"
586 | },
587 | "outputId": "dfa0fc42-fee6-4731-8e97-03c28f2fe598"
588 | },
589 | "source": [
590 | "print(target)"
591 | ],
592 | "execution_count": null,
593 | "outputs": [
594 | {
595 | "output_type": "stream",
596 | "name": "stdout",
597 | "text": [
598 | "0 1\n",
599 | "1 0\n",
600 | "2 1\n",
601 | "3 0\n",
602 | "4 1\n",
603 | " ..\n",
604 | "763 0\n",
605 | "764 0\n",
606 | "765 0\n",
607 | "766 1\n",
608 | "767 0\n",
609 | "Name: Outcome, Length: 768, dtype: int64\n"
610 | ]
611 | }
612 | ]
613 | },
614 | {
615 | "cell_type": "markdown",
616 | "metadata": {
617 | "id": "umAbo_kqrlzI"
618 | },
619 | "source": [
620 | "Data Standardization"
621 | ]
622 | },
623 | {
624 | "cell_type": "code",
625 | "metadata": {
626 | "id": "njfM5X60rgnc"
627 | },
628 | "source": [
629 | "scaler = StandardScaler()"
630 | ],
631 | "execution_count": null,
632 | "outputs": []
633 | },
634 | {
635 | "cell_type": "code",
636 | "metadata": {
637 | "id": "g0ai5ARbr53p",
638 | "colab": {
639 | "base_uri": "https://localhost:8080/"
640 | },
641 | "outputId": "886259ac-12c9-48a4-ae49-22f446bf95a5"
642 | },
643 | "source": [
644 | "scaler.fit(features)"
645 | ],
646 | "execution_count": null,
647 | "outputs": [
648 | {
649 | "output_type": "execute_result",
650 | "data": {
651 | "text/plain": [
652 | "StandardScaler(copy=True, with_mean=True, with_std=True)"
653 | ]
654 | },
655 | "metadata": {},
656 | "execution_count": 12
657 | }
658 | ]
659 | },
660 | {
661 | "cell_type": "code",
662 | "metadata": {
663 | "id": "FHxNwPuZr-kD"
664 | },
665 | "source": [
666 | "standardized_data = scaler.transform(features)"
667 | ],
668 | "execution_count": null,
669 | "outputs": []
670 | },
671 | {
672 | "cell_type": "code",
673 | "metadata": {
674 | "id": "fjMwZ5x6sPUJ",
675 | "colab": {
676 | "base_uri": "https://localhost:8080/"
677 | },
678 | "outputId": "1f44da9d-aa05-4a27-a046-8879ac6ddcbf"
679 | },
680 | "source": [
681 | "print(standardized_data)"
682 | ],
683 | "execution_count": null,
684 | "outputs": [
685 | {
686 | "output_type": "stream",
687 | "name": "stdout",
688 | "text": [
689 | "[[ 0.63994726 0.84832379 0.14964075 ... 0.20401277 0.46849198\n",
690 | " 1.4259954 ]\n",
691 | " [-0.84488505 -1.12339636 -0.16054575 ... -0.68442195 -0.36506078\n",
692 | " -0.19067191]\n",
693 | " [ 1.23388019 1.94372388 -0.26394125 ... -1.10325546 0.60439732\n",
694 | " -0.10558415]\n",
695 | " ...\n",
696 | " [ 0.3429808 0.00330087 0.14964075 ... -0.73518964 -0.68519336\n",
697 | " -0.27575966]\n",
698 | " [-0.84488505 0.1597866 -0.47073225 ... -0.24020459 -0.37110101\n",
699 | " 1.17073215]\n",
700 | " [-0.84488505 -0.8730192 0.04624525 ... -0.20212881 -0.47378505\n",
701 | " -0.87137393]]\n"
702 | ]
703 | }
704 | ]
705 | },
706 | {
707 | "cell_type": "code",
708 | "metadata": {
709 | "id": "ZxWSl4SGsRjE"
710 | },
711 | "source": [
712 | "features = standardized_data\n",
713 | "target = diabetes_dataset['Outcome']"
714 | ],
715 | "execution_count": null,
716 | "outputs": []
717 | },
718 | {
719 | "cell_type": "code",
720 | "metadata": {
721 | "id": "lhJF_7QjsjmP",
722 | "colab": {
723 | "base_uri": "https://localhost:8080/"
724 | },
725 | "outputId": "2ee5a7f5-55ee-42b9-ccdc-1a5452cb06b0"
726 | },
727 | "source": [
728 | "print(features)\n",
729 | "print(target)"
730 | ],
731 | "execution_count": null,
732 | "outputs": [
733 | {
734 | "output_type": "stream",
735 | "name": "stdout",
736 | "text": [
737 | "[[ 0.63994726 0.84832379 0.14964075 ... 0.20401277 0.46849198\n",
738 | " 1.4259954 ]\n",
739 | " [-0.84488505 -1.12339636 -0.16054575 ... -0.68442195 -0.36506078\n",
740 | " -0.19067191]\n",
741 | " [ 1.23388019 1.94372388 -0.26394125 ... -1.10325546 0.60439732\n",
742 | " -0.10558415]\n",
743 | " ...\n",
744 | " [ 0.3429808 0.00330087 0.14964075 ... -0.73518964 -0.68519336\n",
745 | " -0.27575966]\n",
746 | " [-0.84488505 0.1597866 -0.47073225 ... -0.24020459 -0.37110101\n",
747 | " 1.17073215]\n",
748 | " [-0.84488505 -0.8730192 0.04624525 ... -0.20212881 -0.47378505\n",
749 | " -0.87137393]]\n",
750 | "0 1\n",
751 | "1 0\n",
752 | "2 1\n",
753 | "3 0\n",
754 | "4 1\n",
755 | " ..\n",
756 | "763 0\n",
757 | "764 0\n",
758 | "765 0\n",
759 | "766 1\n",
760 | "767 0\n",
761 | "Name: Outcome, Length: 768, dtype: int64\n"
762 | ]
763 | }
764 | ]
765 | },
766 | {
767 | "cell_type": "markdown",
768 | "metadata": {
769 | "id": "gHciEFkxsoQP"
770 | },
771 | "source": [
772 | "Train Test Split"
773 | ]
774 | },
775 | {
776 | "cell_type": "code",
777 | "metadata": {
778 | "id": "AEfKGj_yslvD"
779 | },
780 | "source": [
781 | "X_train, X_test, Y_train, Y_test = train_test_split(features,target, test_size = 0.2, random_state=2)"
782 | ],
783 | "execution_count": null,
784 | "outputs": []
785 | },
786 | {
787 | "cell_type": "code",
788 | "metadata": {
789 | "id": "DR05T-o0t3FQ",
790 | "colab": {
791 | "base_uri": "https://localhost:8080/"
792 | },
793 | "outputId": "83023886-4372-49fa-a0c9-b0cb15e4c460"
794 | },
795 | "source": [
796 | "print(features.shape, X_train.shape, X_test.shape)"
797 | ],
798 | "execution_count": null,
799 | "outputs": [
800 | {
801 | "output_type": "stream",
802 | "name": "stdout",
803 | "text": [
804 | "(768, 8) (614, 8) (154, 8)\n"
805 | ]
806 | }
807 | ]
808 | },
809 | {
810 | "cell_type": "markdown",
811 | "metadata": {
812 | "id": "ElJ3tkOtuC_n"
813 | },
814 | "source": [
815 | "Training the Model"
816 | ]
817 | },
818 | {
819 | "cell_type": "code",
820 | "metadata": {
821 | "id": "5szLWHlNt9xc"
822 | },
823 | "source": [
824 | "classifier = SVM_classifier(learning_rate=0.001, no_of_iterations=1000, lambda_parameter = 0.01)"
825 | ],
826 | "execution_count": null,
827 | "outputs": []
828 | },
829 | {
830 | "cell_type": "code",
831 | "metadata": {
832 | "id": "ncJWY_7suPAb"
833 | },
834 | "source": [
835 | "#training the support vector Machine Classifier\n",
836 | "classifier.fit(X_train, Y_train)"
837 | ],
838 | "execution_count": null,
839 | "outputs": []
840 | },
841 | {
842 | "cell_type": "markdown",
843 | "metadata": {
844 | "id": "UV4-CAfquiyP"
845 | },
846 | "source": [
847 | "Model Evaluation"
848 | ]
849 | },
850 | {
851 | "cell_type": "markdown",
852 | "metadata": {
853 | "id": "yhAjGPJWunXa"
854 | },
855 | "source": [
856 | "Accuracy Score"
857 | ]
858 | },
859 | {
860 | "cell_type": "code",
861 | "metadata": {
862 | "id": "fJLEPQK7ueXp"
863 | },
864 | "source": [
865 | "# accuracy score on the training data\n",
866 | "X_train_prediction = classifier.predict(X_train)\n",
867 | "training_data_accuracy = accuracy_score( Y_train, X_train_prediction)"
868 | ],
869 | "execution_count": null,
870 | "outputs": []
871 | },
872 | {
873 | "cell_type": "code",
874 | "metadata": {
875 | "id": "mmJ22qhVvNwj",
876 | "colab": {
877 | "base_uri": "https://localhost:8080/"
878 | },
879 | "outputId": "8742258c-ade3-4419-fe8d-3b66c0457990"
880 | },
881 | "source": [
882 | "print('Accuracy score of the training data : ', training_data_accuracy)"
883 | ],
884 | "execution_count": null,
885 | "outputs": [
886 | {
887 | "output_type": "stream",
888 | "name": "stdout",
889 | "text": [
890 | "Accuracy score of the training data : 0.7768729641693811\n"
891 | ]
892 | }
893 | ]
894 | },
895 | {
896 | "cell_type": "code",
897 | "metadata": {
898 | "id": "G2CICFMEvcCl"
899 | },
900 | "source": [
901 | "# accuracy score on the test data\n",
902 | "X_test_prediction = classifier.predict(X_test)\n",
903 | "test_data_accuracy = accuracy_score( Y_test, X_test_prediction)"
904 | ],
905 | "execution_count": null,
906 | "outputs": []
907 | },
908 | {
909 | "cell_type": "code",
910 | "metadata": {
911 | "id": "i2GcW_t_vz7C",
912 | "colab": {
913 | "base_uri": "https://localhost:8080/"
914 | },
915 | "outputId": "6da095a3-30c1-492e-b410-cb8f4ed21cc4"
916 | },
917 | "source": [
918 | "print('Accuracy score of the test data : ', test_data_accuracy)"
919 | ],
920 | "execution_count": null,
921 | "outputs": [
922 | {
923 | "output_type": "stream",
924 | "name": "stdout",
925 | "text": [
926 | "Accuracy score of the test data : 0.7532467532467533\n"
927 | ]
928 | }
929 | ]
930 | },
931 | {
932 | "cell_type": "markdown",
933 | "metadata": {
934 | "id": "3XdcQCMcVwjM"
935 | },
936 | "source": [
937 | "Building a Predictive System"
938 | ]
939 | },
940 | {
941 | "cell_type": "code",
942 | "metadata": {
943 | "colab": {
944 | "base_uri": "https://localhost:8080/"
945 | },
946 | "id": "EJmRBcutVvzA",
947 | "outputId": "e0858c91-9db1-4460-d293-826216171734"
948 | },
949 | "source": [
950 | "input_data = (5,166,72,19,175,25.8,0.587,51)\n",
951 | "\n",
952 | "# changing the input_data to numpy array\n",
953 | "input_data_as_numpy_array = np.asarray(input_data)\n",
954 | "\n",
955 | "# reshape the array as we are predicting for one instance\n",
956 | "input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)\n",
957 | "\n",
958 | "# standardize the input data\n",
959 | "std_data = scaler.transform(input_data_reshaped)\n",
960 | "print(std_data)\n",
961 | "\n",
962 | "prediction = classifier.predict(std_data)\n",
963 | "print(prediction)\n",
964 | "\n",
965 | "if (prediction[0] == 0):\n",
966 | " print('The person is not diabetic')\n",
967 | "else:\n",
968 | " print('The person is diabetic')"
969 | ],
970 | "execution_count": null,
971 | "outputs": [
972 | {
973 | "output_type": "stream",
974 | "name": "stdout",
975 | "text": [
976 | "[[ 0.3429808 1.41167241 0.14964075 -0.09637905 0.82661621 -0.78595734\n",
977 | " 0.34768723 1.51108316]]\n",
978 | "[1]\n",
979 | "The person is diabetic\n"
980 | ]
981 | }
982 | ]
983 | }
984 | ]
985 | }
--------------------------------------------------------------------------------