├── 0. datasets link.txt
├── 7_1_4_Building_Linear_Regression_from_scratch_in_Python.ipynb
├── 7_1_5_Implementing_Linear_Regression_from_scratch_in_Python.ipynb
├── 7_1_5_Lin_Reg_implementation.ipynb
├── 7_2_5_Building_Logistic_Regression_from_scratch_in_Python.ipynb
├── 7_2_6_Implementing_Logistic_Regression_from_scratch_in_Python.ipynb
├── 7_2_6_Logistic_Regression_Implementation.ipynb
├── 7_3_6_Building_Support_Vector_Machine_Classifier_from_Scratch_in_Python.ipynb
├── 7_3_7_Implementing_SVM_Classifier_from_Scratch_in_Python.ipynb
├── 7_3_7_Implementing_SVM_from_Scratch.ipynb
├── 7_4_4_Building_Lasso_Regression_from_Scratch_in_Python.ipynb
└── 7_4_5_Implementing_Lasso_Regression_from_Scratch.ipynb
/0. datasets link.txt:
--------------------------------------------------------------------------------
1 | All Datasets link: https://drive.google.com/drive/folders/1BJLh_8Kx88V6ItrdLA5CQ63RIFcjKkFV?usp=sharing
2 |
--------------------------------------------------------------------------------
/7_1_4_Building_Linear_Regression_from_scratch_in_Python.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": [],
7 | "collapsed_sections": []
8 | },
9 | "kernelspec": {
10 | "name": "python3",
11 | "display_name": "Python 3"
12 | },
13 | "language_info": {
14 | "name": "python"
15 | }
16 | },
17 | "cells": [
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {
21 | "id": "GmDgN3q0rw6y"
22 | },
23 | "source": [
24 | "Linear Regression:\n",
25 | "\n",
26 | "**Y = wX + b**\n",
27 | "\n",
28 | "Y --> Dependent Variable\n",
29 | "\n",
30 | "X --> Independent Variable\n",
31 | "\n",
32 | "w --> weight\n",
33 | "\n",
34 | "b --> bias"
35 | ]
36 | },
37 | {
38 | "cell_type": "markdown",
39 | "metadata": {
40 | "id": "X8OJXGuPvDt2"
41 | },
42 | "source": [
43 | "**Gradient Descent:**\n",
44 | "\n",
45 | "Gradient Descent is an optimization algorithm used for minimizing the loss function in various machine learning algorithms. It is used for updating the parameters of the learning model.\n",
46 | "\n",
47 | "w = w - α*dw\n",
48 | "\n",
49 | "b = b - α*db"
50 | ]
51 | },
52 | {
53 | "cell_type": "markdown",
54 | "metadata": {
55 | "id": "WSAfYP7WmECB"
56 | },
57 | "source": [
58 | "**Learning Rate:**\n",
59 | "\n",
60 | "Learning rate is a tuning parameter in an optimization algorithm that determines the step size at each iteration while moving toward a minimum of a loss function."
61 | ]
62 | },
63 | {
64 | "cell_type": "markdown",
65 | "metadata": {
66 | "id": "rkCM1toLm7oz"
67 | },
68 | "source": [
69 | ""
70 | ]
71 | },
72 | {
73 | "cell_type": "markdown",
74 | "metadata": {
75 | "id": "eSdmQl4Sm_ft"
76 | },
77 | "source": [
78 | ""
79 | ]
80 | },
81 | {
82 | "cell_type": "code",
83 | "metadata": {
84 | "id": "cNxq7tuqllsx"
85 | },
86 | "source": [
87 | "# importing numpy library\n",
88 | "import numpy as np"
89 | ],
90 | "execution_count": null,
91 | "outputs": []
92 | },
93 | {
94 | "cell_type": "markdown",
95 | "metadata": {
96 | "id": "VrbQ5F8etU0G"
97 | },
98 | "source": [
99 | "**Linear Regression**"
100 | ]
101 | },
102 | {
103 | "cell_type": "code",
104 | "metadata": {
105 | "id": "d4zf9fMJtTeW"
106 | },
107 | "source": [
108 | "class Linear_Regression():\n",
109 | "\n",
110 | " # initiating the parameters (learning rate & no. of iterations)\n",
111 | " def __init__(self, learning_rate, no_of_iterations):\n",
112 | "\n",
113 | " self.learning_rate = learning_rate\n",
114 | " self.no_of_iterations = no_of_iterations\n",
115 | "\n",
116 | "\n",
117 | " def fit(self, X, Y ):\n",
118 | "\n",
119 | " # number of training examples & number of features\n",
120 | "\n",
121 | " self.m, self.n = X.shape # number of rows & columns\n",
122 | "\n",
123 | " # initiating the weight and bias \n",
124 | "\n",
125 | " self.w = np.zeros(self.n)\n",
126 | " self.b = 0\n",
127 | " self.X = X\n",
128 | " self.Y = Y\n",
129 | "\n",
130 | " # implementing Gradient Descent\n",
131 | " \n",
132 | " for i in range(self.no_of_iterations):\n",
133 | " self.update_weights()\n",
134 | "\n",
135 | "\n",
136 | " def update_weights(self):\n",
137 | "\n",
138 | " Y_prediction = self.predict(self.X)\n",
139 | "\n",
140 | " # calculate gradients\n",
141 | "\n",
142 | " dw = - (2 * (self.X.T).dot(self.Y - Y_prediction)) / self.m\n",
143 | "\n",
144 | " db = - 2 * np.sum(self.Y - Y_prediction)/self.m\n",
145 | "\n",
146 | " # upadating the weights\n",
147 | " \n",
148 | " self.w = self.w - self.learning_rate*dw\n",
149 | " self.b = selb.b - self.learning_rate*db\n",
150 | " \n",
151 | "\n",
152 | " def predict(self, X):\n",
153 | "\n",
154 | " return X.dot(self.w) + self.b\n",
155 | "\n"
156 | ],
157 | "execution_count": null,
158 | "outputs": []
159 | }
160 | ]
161 | }
--------------------------------------------------------------------------------
/7_1_5_Implementing_Linear_Regression_from_scratch_in_Python.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": []
7 | },
8 | "kernelspec": {
9 | "name": "python3",
10 | "display_name": "Python 3"
11 | },
12 | "language_info": {
13 | "name": "python"
14 | }
15 | },
16 | "cells": [
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {
20 | "id": "GmDgN3q0rw6y"
21 | },
22 | "source": [
23 | "Linear Regression:\n",
24 | "\n",
25 | "**Y = wX + b**\n",
26 | "\n",
27 | "Y --> Dependent Variable\n",
28 | "\n",
29 | "X --> Independent Variable\n",
30 | "\n",
31 | "w --> weight\n",
32 | "\n",
33 | "b --> bias"
34 | ]
35 | },
36 | {
37 | "cell_type": "markdown",
38 | "metadata": {
39 | "id": "X8OJXGuPvDt2"
40 | },
41 | "source": [
42 | "**Gradient Descent:**\n",
43 | "\n",
44 | "Gradient Descent is an optimization algorithm used for minimizing the loss function in various machine learning algorithms. It is used for updating the parameters of the learning model.\n",
45 | "\n",
46 | "w = w - α*dw\n",
47 | "\n",
48 | "b = b - α*db"
49 | ]
50 | },
51 | {
52 | "cell_type": "markdown",
53 | "metadata": {
54 | "id": "m4IWNi9WwNI5"
55 | },
56 | "source": [
57 | "Importing the Dependencies"
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "metadata": {
63 | "id": "WtObJGwFnINT"
64 | },
65 | "source": [
66 | "# Importing numpy library\n",
67 | "import numpy as np"
68 | ],
69 | "execution_count": null,
70 | "outputs": []
71 | },
72 | {
73 | "cell_type": "markdown",
74 | "metadata": {
75 | "id": "POvc3KnRNboc"
76 | },
77 | "source": [
78 | "**Linear Regression**"
79 | ]
80 | },
81 | {
82 | "cell_type": "code",
83 | "metadata": {
84 | "id": "BuflF8sGNQXF"
85 | },
86 | "source": [
87 | "class Linear_Regression():\n",
88 | "\n",
89 | " def __init__( self, learning_rate, no_of_iterations ) :\n",
90 | " \n",
91 | " self.learning_rate = learning_rate\n",
92 | " \n",
93 | " self.no_of_iterations = no_of_iterations\n",
94 | "\n",
95 | " # fit function to train the model\n",
96 | "\n",
97 | " def fit( self, X, Y ) :\n",
98 | " \n",
99 | " # no_of_training_examples, no_of_features\n",
100 | " \n",
101 | " self.m, self.n = X.shape\n",
102 | " \n",
103 | " # initiating the weight and bias\n",
104 | " \n",
105 | " self.w = np.zeros( self.n )\n",
106 | " \n",
107 | " self.b = 0\n",
108 | " \n",
109 | " self.X = X\n",
110 | " \n",
111 | " self.Y = Y\n",
112 | "\n",
113 | "\n",
114 | " # implementing Gradient Descent for Optimization\n",
115 | " \n",
116 | " for i in range( self.no_of_iterations ) :\n",
117 | " \n",
118 | " self.update_weights()\n",
119 | " \n",
120 | " \n",
121 | " \n",
122 | " # function to update weights in gradient descent\n",
123 | " \n",
124 | " def update_weights( self ) :\n",
125 | " \n",
126 | " Y_prediction = self.predict( self.X )\n",
127 | " \n",
128 | " # calculate gradients \n",
129 | " \n",
130 | " dw = - ( 2 * ( self.X.T ).dot( self.Y - Y_prediction ) ) / self.m\n",
131 | " \n",
132 | " db = - 2 * np.sum( self.Y - Y_prediction ) / self.m \n",
133 | " \n",
134 | " # updating the weights\n",
135 | " \n",
136 | " self.w = self.w - self.learning_rate * dw\n",
137 | " \n",
138 | " self.b = self.b - self.learning_rate * db\n",
139 | " \n",
140 | " \n",
141 | " # Line function for prediction:\n",
142 | " \n",
143 | " def predict( self, X ) :\n",
144 | " \n",
145 | " return X.dot( self.w ) + self.b\n"
146 | ],
147 | "execution_count": null,
148 | "outputs": []
149 | },
150 | {
151 | "cell_type": "markdown",
152 | "metadata": {
153 | "id": "099oR2Ip8qgA"
154 | },
155 | "source": [
156 | "Using Linear Regression model for Prediction"
157 | ]
158 | },
159 | {
160 | "cell_type": "code",
161 | "metadata": {
162 | "id": "JWDqO3zw8t6P"
163 | },
164 | "source": [
165 | "# importing the dependencies\n",
166 | "import pandas as pd\n",
167 | "from sklearn.model_selection import train_test_split\n",
168 | "import matplotlib.pyplot as plt"
169 | ],
170 | "execution_count": null,
171 | "outputs": []
172 | },
173 | {
174 | "cell_type": "markdown",
175 | "metadata": {
176 | "id": "iPX7bNko_QJO"
177 | },
178 | "source": [
179 | "Data Pre-Processing"
180 | ]
181 | },
182 | {
183 | "cell_type": "code",
184 | "metadata": {
185 | "id": "_xcrhXnm_K-m"
186 | },
187 | "source": [
188 | "# loading the data from csv file to a pandas dataframe\n",
189 | "\n",
190 | "salary_data = pd.read_csv('/content/salary_data.csv')"
191 | ],
192 | "execution_count": null,
193 | "outputs": []
194 | },
195 | {
196 | "cell_type": "code",
197 | "metadata": {
198 | "colab": {
199 | "base_uri": "https://localhost:8080/",
200 | "height": 206
201 | },
202 | "id": "gkF0Gvxm_sU6",
203 | "outputId": "d105a461-612e-4852-b8f2-f31d0ba79fb0"
204 | },
205 | "source": [
206 | "# printing the first 5 columns of the dataframe\n",
207 | "salary_data.head()"
208 | ],
209 | "execution_count": null,
210 | "outputs": [
211 | {
212 | "output_type": "execute_result",
213 | "data": {
214 | "text/html": [
215 | "
\n",
216 | "\n",
229 | "
\n",
230 | " \n",
231 | " \n",
232 | " | \n",
233 | " YearsExperience | \n",
234 | " Salary | \n",
235 | "
\n",
236 | " \n",
237 | " \n",
238 | " \n",
239 | " 0 | \n",
240 | " 1.1 | \n",
241 | " 39343 | \n",
242 | "
\n",
243 | " \n",
244 | " 1 | \n",
245 | " 1.3 | \n",
246 | " 46205 | \n",
247 | "
\n",
248 | " \n",
249 | " 2 | \n",
250 | " 1.5 | \n",
251 | " 37731 | \n",
252 | "
\n",
253 | " \n",
254 | " 3 | \n",
255 | " 2.0 | \n",
256 | " 43525 | \n",
257 | "
\n",
258 | " \n",
259 | " 4 | \n",
260 | " 2.2 | \n",
261 | " 39891 | \n",
262 | "
\n",
263 | " \n",
264 | "
\n",
265 | "
"
266 | ],
267 | "text/plain": [
268 | " YearsExperience Salary\n",
269 | "0 1.1 39343\n",
270 | "1 1.3 46205\n",
271 | "2 1.5 37731\n",
272 | "3 2.0 43525\n",
273 | "4 2.2 39891"
274 | ]
275 | },
276 | "metadata": {},
277 | "execution_count": 26
278 | }
279 | ]
280 | },
281 | {
282 | "cell_type": "code",
283 | "metadata": {
284 | "colab": {
285 | "base_uri": "https://localhost:8080/",
286 | "height": 206
287 | },
288 | "id": "TZnQ8eyk_0yJ",
289 | "outputId": "2374a90b-6245-4a2f-8aa7-2d29d783c1e9"
290 | },
291 | "source": [
292 | "# last 5 rows of the dataframe\n",
293 | "salary_data.tail()"
294 | ],
295 | "execution_count": null,
296 | "outputs": [
297 | {
298 | "output_type": "execute_result",
299 | "data": {
300 | "text/html": [
301 | "\n",
302 | "\n",
315 | "
\n",
316 | " \n",
317 | " \n",
318 | " | \n",
319 | " YearsExperience | \n",
320 | " Salary | \n",
321 | "
\n",
322 | " \n",
323 | " \n",
324 | " \n",
325 | " 25 | \n",
326 | " 9.0 | \n",
327 | " 105582 | \n",
328 | "
\n",
329 | " \n",
330 | " 26 | \n",
331 | " 9.5 | \n",
332 | " 116969 | \n",
333 | "
\n",
334 | " \n",
335 | " 27 | \n",
336 | " 9.6 | \n",
337 | " 112635 | \n",
338 | "
\n",
339 | " \n",
340 | " 28 | \n",
341 | " 10.3 | \n",
342 | " 122391 | \n",
343 | "
\n",
344 | " \n",
345 | " 29 | \n",
346 | " 10.5 | \n",
347 | " 121872 | \n",
348 | "
\n",
349 | " \n",
350 | "
\n",
351 | "
"
352 | ],
353 | "text/plain": [
354 | " YearsExperience Salary\n",
355 | "25 9.0 105582\n",
356 | "26 9.5 116969\n",
357 | "27 9.6 112635\n",
358 | "28 10.3 122391\n",
359 | "29 10.5 121872"
360 | ]
361 | },
362 | "metadata": {},
363 | "execution_count": 27
364 | }
365 | ]
366 | },
367 | {
368 | "cell_type": "code",
369 | "metadata": {
370 | "colab": {
371 | "base_uri": "https://localhost:8080/"
372 | },
373 | "id": "TUfpF8gp_5ly",
374 | "outputId": "6e4fe4e5-f2ee-481f-dd0c-0a234556ad4a"
375 | },
376 | "source": [
377 | "# number of rows & columns in the dataframe\n",
378 | "salary_data.shape"
379 | ],
380 | "execution_count": null,
381 | "outputs": [
382 | {
383 | "output_type": "execute_result",
384 | "data": {
385 | "text/plain": [
386 | "(30, 2)"
387 | ]
388 | },
389 | "metadata": {},
390 | "execution_count": 28
391 | }
392 | ]
393 | },
394 | {
395 | "cell_type": "code",
396 | "metadata": {
397 | "colab": {
398 | "base_uri": "https://localhost:8080/"
399 | },
400 | "id": "XvMCoPwIAO1u",
401 | "outputId": "2285d932-f61e-468b-997a-1c32b4659c92"
402 | },
403 | "source": [
404 | "# checking for missing values\n",
405 | "salary_data.isnull().sum()"
406 | ],
407 | "execution_count": null,
408 | "outputs": [
409 | {
410 | "output_type": "execute_result",
411 | "data": {
412 | "text/plain": [
413 | "YearsExperience 0\n",
414 | "Salary 0\n",
415 | "dtype: int64"
416 | ]
417 | },
418 | "metadata": {},
419 | "execution_count": 29
420 | }
421 | ]
422 | },
423 | {
424 | "cell_type": "markdown",
425 | "metadata": {
426 | "id": "ZwI59efnA5a1"
427 | },
428 | "source": [
429 | "Splitting the feature & target"
430 | ]
431 | },
432 | {
433 | "cell_type": "code",
434 | "metadata": {
435 | "id": "bV06CwPpAk-d"
436 | },
437 | "source": [
438 | "X = salary_data.iloc[:,:-1].values \n",
439 | "Y = salary_data.iloc[:,1].values"
440 | ],
441 | "execution_count": null,
442 | "outputs": []
443 | },
444 | {
445 | "cell_type": "code",
446 | "metadata": {
447 | "colab": {
448 | "base_uri": "https://localhost:8080/"
449 | },
450 | "id": "myjQFw1aB2EE",
451 | "outputId": "f8bcbf47-fe6a-4b63-8673-32105ed38948"
452 | },
453 | "source": [
454 | "print(X)"
455 | ],
456 | "execution_count": null,
457 | "outputs": [
458 | {
459 | "output_type": "stream",
460 | "name": "stdout",
461 | "text": [
462 | "[[ 1.1]\n",
463 | " [ 1.3]\n",
464 | " [ 1.5]\n",
465 | " [ 2. ]\n",
466 | " [ 2.2]\n",
467 | " [ 2.9]\n",
468 | " [ 3. ]\n",
469 | " [ 3.2]\n",
470 | " [ 3.2]\n",
471 | " [ 3.7]\n",
472 | " [ 3.9]\n",
473 | " [ 4. ]\n",
474 | " [ 4. ]\n",
475 | " [ 4.1]\n",
476 | " [ 4.5]\n",
477 | " [ 4.9]\n",
478 | " [ 5.1]\n",
479 | " [ 5.3]\n",
480 | " [ 5.9]\n",
481 | " [ 6. ]\n",
482 | " [ 6.8]\n",
483 | " [ 7.1]\n",
484 | " [ 7.9]\n",
485 | " [ 8.2]\n",
486 | " [ 8.7]\n",
487 | " [ 9. ]\n",
488 | " [ 9.5]\n",
489 | " [ 9.6]\n",
490 | " [10.3]\n",
491 | " [10.5]]\n"
492 | ]
493 | }
494 | ]
495 | },
496 | {
497 | "cell_type": "code",
498 | "metadata": {
499 | "colab": {
500 | "base_uri": "https://localhost:8080/"
501 | },
502 | "id": "tt4B70LiB34L",
503 | "outputId": "91e8e8b4-ace1-494b-d660-4558ac06c461"
504 | },
505 | "source": [
506 | "print(Y)"
507 | ],
508 | "execution_count": null,
509 | "outputs": [
510 | {
511 | "output_type": "stream",
512 | "name": "stdout",
513 | "text": [
514 | "[ 39343 46205 37731 43525 39891 56642 60150 54445 64445 57189\n",
515 | " 63218 55794 56957 57081 61111 67938 66029 83088 81363 93940\n",
516 | " 91738 98273 101302 113812 109431 105582 116969 112635 122391 121872]\n"
517 | ]
518 | }
519 | ]
520 | },
521 | {
522 | "cell_type": "markdown",
523 | "metadata": {
524 | "id": "JEJ1qsdlCBdu"
525 | },
526 | "source": [
527 | "Splitting the dataset into training & test data"
528 | ]
529 | },
530 | {
531 | "cell_type": "code",
532 | "metadata": {
533 | "id": "xAeWEMPjB6Id"
534 | },
535 | "source": [
536 | "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state = 2)"
537 | ],
538 | "execution_count": null,
539 | "outputs": []
540 | },
541 | {
542 | "cell_type": "markdown",
543 | "metadata": {
544 | "id": "KFIKZOGcDEN5"
545 | },
546 | "source": [
547 | "Training the Linear Regression model"
548 | ]
549 | },
550 | {
551 | "cell_type": "code",
552 | "metadata": {
553 | "id": "4ONMkF5ZC8wv"
554 | },
555 | "source": [
556 | "model = Linear_Regression(learning_rate = 0.02, no_of_iterations=1000)"
557 | ],
558 | "execution_count": null,
559 | "outputs": []
560 | },
561 | {
562 | "cell_type": "code",
563 | "metadata": {
564 | "id": "tCrA-LIlDu1Q"
565 | },
566 | "source": [
567 | "model.fit(X_train, Y_train)"
568 | ],
569 | "execution_count": null,
570 | "outputs": []
571 | },
572 | {
573 | "cell_type": "code",
574 | "metadata": {
575 | "colab": {
576 | "base_uri": "https://localhost:8080/"
577 | },
578 | "id": "RrK2D8R2EJz9",
579 | "outputId": "f14ce5a6-aad8-4a73-b933-3fdb9388a991"
580 | },
581 | "source": [
582 | "# printing the parameter values ( weights & bias)\n",
583 | "\n",
584 | "print('weight = ', model.w[0])\n",
585 | "print('bias = ', model.b)"
586 | ],
587 | "execution_count": null,
588 | "outputs": [
589 | {
590 | "output_type": "stream",
591 | "name": "stdout",
592 | "text": [
593 | "weight = 9514.400999035135\n",
594 | "bias = 23697.406507136307\n"
595 | ]
596 | }
597 | ]
598 | },
599 | {
600 | "cell_type": "markdown",
601 | "metadata": {
602 | "id": "69iNWWvkE9MF"
603 | },
604 | "source": [
605 | "y = 9514(x) + 23697\n",
606 | "\n",
607 | "\n",
608 | "salary = 9514(experience) + 23697"
609 | ]
610 | },
611 | {
612 | "cell_type": "markdown",
613 | "metadata": {
614 | "id": "WddqeITjFy_9"
615 | },
616 | "source": [
617 | "Predict the salary value for test data"
618 | ]
619 | },
620 | {
621 | "cell_type": "code",
622 | "metadata": {
623 | "id": "XWbdku1HE6b8"
624 | },
625 | "source": [
626 | "test_data_prediction = model.predict(X_test)"
627 | ],
628 | "execution_count": null,
629 | "outputs": []
630 | },
631 | {
632 | "cell_type": "code",
633 | "metadata": {
634 | "colab": {
635 | "base_uri": "https://localhost:8080/"
636 | },
637 | "id": "1xkqFPZPGTjm",
638 | "outputId": "c517f060-2dc5-409b-fda6-f904e18a0f1e"
639 | },
640 | "source": [
641 | "print(test_data_prediction)"
642 | ],
643 | "execution_count": null,
644 | "outputs": [
645 | {
646 | "output_type": "stream",
647 | "name": "stdout",
648 | "text": [
649 | "[ 36066.12780588 34163.24760607 66512.21100279 58900.69020357\n",
650 | " 91249.65360029 80783.81250135 101715.49469922 52240.60950424\n",
651 | " 42726.20850521 88395.33330058]\n"
652 | ]
653 | }
654 | ]
655 | },
656 | {
657 | "cell_type": "markdown",
658 | "metadata": {
659 | "id": "LnxLJFR1GZDT"
660 | },
661 | "source": [
662 | "Visualizing the predicted values & actual Values"
663 | ]
664 | },
665 | {
666 | "cell_type": "code",
667 | "metadata": {
668 | "colab": {
669 | "base_uri": "https://localhost:8080/",
670 | "height": 295
671 | },
672 | "id": "jDNBDfASGWCY",
673 | "outputId": "0d5c45ab-fd5c-4a10-aded-e23f967cb9a0"
674 | },
675 | "source": [
676 | "plt.scatter(X_test, Y_test, color = 'red')\n",
677 | "plt.plot(X_test, test_data_prediction, color='blue')\n",
678 | "plt.xlabel(' Work Experience')\n",
679 | "plt.ylabel('Salary')\n",
680 | "plt.title(' Salary vs Experience')\n",
681 | "plt.show()"
682 | ],
683 | "execution_count": null,
684 | "outputs": [
685 | {
686 | "output_type": "display_data",
687 | "data": {
688 | "image/png": "\n",
689 | "text/plain": [
690 | ""
691 | ]
692 | },
693 | "metadata": {
694 | "needs_background": "light"
695 | }
696 | }
697 | ]
698 | }
699 | ]
700 | }
--------------------------------------------------------------------------------
/7_1_5_Lin_Reg_implementation.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": [],
7 | "collapsed_sections": []
8 | },
9 | "kernelspec": {
10 | "name": "python3",
11 | "display_name": "Python 3"
12 | },
13 | "language_info": {
14 | "name": "python"
15 | }
16 | },
17 | "cells": [
18 | {
19 | "cell_type": "code",
20 | "metadata": {
21 | "id": "-y5S4EZyPYt3"
22 | },
23 | "source": [
24 | "import numpy as np\n",
25 | "import pandas as pd\n",
26 | "from sklearn.model_selection import train_test_split\n",
27 | "import matplotlib.pyplot as plt\n",
28 | "\n",
29 | "import Lin_Reg_model"
30 | ],
31 | "execution_count": null,
32 | "outputs": []
33 | },
34 | {
35 | "cell_type": "markdown",
36 | "metadata": {
37 | "id": "1G3UFDztP9tF"
38 | },
39 | "source": [
40 | "Data Processing"
41 | ]
42 | },
43 | {
44 | "cell_type": "code",
45 | "metadata": {
46 | "id": "YQ4Zx9HbPy1-"
47 | },
48 | "source": [
49 | "# loading the data from csv file to pandas dataframe\n",
50 | "salary_data = pd.read_csv('/content/salary_data.csv')"
51 | ],
52 | "execution_count": null,
53 | "outputs": []
54 | },
55 | {
56 | "cell_type": "code",
57 | "metadata": {
58 | "colab": {
59 | "base_uri": "https://localhost:8080/",
60 | "height": 206
61 | },
62 | "id": "nPfG1QLoQOxU",
63 | "outputId": "e78f6224-777a-4d02-9d93-9a63e62dadb7"
64 | },
65 | "source": [
66 | "# printing the first 5 rows of the dataframe\n",
67 | "salary_data.head()"
68 | ],
69 | "execution_count": null,
70 | "outputs": [
71 | {
72 | "output_type": "execute_result",
73 | "data": {
74 | "text/html": [
75 | "\n",
76 | "\n",
89 | "
\n",
90 | " \n",
91 | " \n",
92 | " | \n",
93 | " YearsExperience | \n",
94 | " Salary | \n",
95 | "
\n",
96 | " \n",
97 | " \n",
98 | " \n",
99 | " 0 | \n",
100 | " 1.1 | \n",
101 | " 39343 | \n",
102 | "
\n",
103 | " \n",
104 | " 1 | \n",
105 | " 1.3 | \n",
106 | " 46205 | \n",
107 | "
\n",
108 | " \n",
109 | " 2 | \n",
110 | " 1.5 | \n",
111 | " 37731 | \n",
112 | "
\n",
113 | " \n",
114 | " 3 | \n",
115 | " 2.0 | \n",
116 | " 43525 | \n",
117 | "
\n",
118 | " \n",
119 | " 4 | \n",
120 | " 2.2 | \n",
121 | " 39891 | \n",
122 | "
\n",
123 | " \n",
124 | "
\n",
125 | "
"
126 | ],
127 | "text/plain": [
128 | " YearsExperience Salary\n",
129 | "0 1.1 39343\n",
130 | "1 1.3 46205\n",
131 | "2 1.5 37731\n",
132 | "3 2.0 43525\n",
133 | "4 2.2 39891"
134 | ]
135 | },
136 | "metadata": {},
137 | "execution_count": 3
138 | }
139 | ]
140 | },
141 | {
142 | "cell_type": "code",
143 | "metadata": {
144 | "colab": {
145 | "base_uri": "https://localhost:8080/"
146 | },
147 | "id": "AUn-2j48QQoO",
148 | "outputId": "e2181bfd-8b1e-433a-e530-5e26b2573cd9"
149 | },
150 | "source": [
151 | "# number of rows & columns in the dataset\n",
152 | "salary_data.shape"
153 | ],
154 | "execution_count": null,
155 | "outputs": [
156 | {
157 | "output_type": "execute_result",
158 | "data": {
159 | "text/plain": [
160 | "(30, 2)"
161 | ]
162 | },
163 | "metadata": {},
164 | "execution_count": 4
165 | }
166 | ]
167 | },
168 | {
169 | "cell_type": "code",
170 | "metadata": {
171 | "colab": {
172 | "base_uri": "https://localhost:8080/"
173 | },
174 | "id": "pvdhCYEAQjKR",
175 | "outputId": "c08c11fc-203b-481b-a760-7b38d47cca17"
176 | },
177 | "source": [
178 | "# checking for missing values\n",
179 | "salary_data.isnull().sum()"
180 | ],
181 | "execution_count": null,
182 | "outputs": [
183 | {
184 | "output_type": "execute_result",
185 | "data": {
186 | "text/plain": [
187 | "YearsExperience 0\n",
188 | "Salary 0\n",
189 | "dtype: int64"
190 | ]
191 | },
192 | "metadata": {},
193 | "execution_count": 5
194 | }
195 | ]
196 | },
197 | {
198 | "cell_type": "markdown",
199 | "metadata": {
200 | "id": "DFsbcUDLQ4j6"
201 | },
202 | "source": [
203 | "Splitting the feature & target"
204 | ]
205 | },
206 | {
207 | "cell_type": "code",
208 | "metadata": {
209 | "id": "HDVDx_chQy2J"
210 | },
211 | "source": [
212 | "X = salary_data.iloc[:,:-1].values\n",
213 | "Y = salary_data.iloc[:,1].values"
214 | ],
215 | "execution_count": null,
216 | "outputs": []
217 | },
218 | {
219 | "cell_type": "code",
220 | "metadata": {
221 | "colab": {
222 | "base_uri": "https://localhost:8080/"
223 | },
224 | "id": "FOYOS3iXRJaE",
225 | "outputId": "3816f5d2-efca-44b2-84e2-b3397d99d9e2"
226 | },
227 | "source": [
228 | "print(X)"
229 | ],
230 | "execution_count": null,
231 | "outputs": [
232 | {
233 | "output_type": "stream",
234 | "name": "stdout",
235 | "text": [
236 | "[[ 1.1]\n",
237 | " [ 1.3]\n",
238 | " [ 1.5]\n",
239 | " [ 2. ]\n",
240 | " [ 2.2]\n",
241 | " [ 2.9]\n",
242 | " [ 3. ]\n",
243 | " [ 3.2]\n",
244 | " [ 3.2]\n",
245 | " [ 3.7]\n",
246 | " [ 3.9]\n",
247 | " [ 4. ]\n",
248 | " [ 4. ]\n",
249 | " [ 4.1]\n",
250 | " [ 4.5]\n",
251 | " [ 4.9]\n",
252 | " [ 5.1]\n",
253 | " [ 5.3]\n",
254 | " [ 5.9]\n",
255 | " [ 6. ]\n",
256 | " [ 6.8]\n",
257 | " [ 7.1]\n",
258 | " [ 7.9]\n",
259 | " [ 8.2]\n",
260 | " [ 8.7]\n",
261 | " [ 9. ]\n",
262 | " [ 9.5]\n",
263 | " [ 9.6]\n",
264 | " [10.3]\n",
265 | " [10.5]]\n"
266 | ]
267 | }
268 | ]
269 | },
270 | {
271 | "cell_type": "code",
272 | "metadata": {
273 | "colab": {
274 | "base_uri": "https://localhost:8080/"
275 | },
276 | "id": "eJmfuRiVRKrr",
277 | "outputId": "1d9289ce-ebf6-458c-b41f-858c7ef3464a"
278 | },
279 | "source": [
280 | "print(Y)"
281 | ],
282 | "execution_count": null,
283 | "outputs": [
284 | {
285 | "output_type": "stream",
286 | "name": "stdout",
287 | "text": [
288 | "[ 39343 46205 37731 43525 39891 56642 60150 54445 64445 57189\n",
289 | " 63218 55794 56957 57081 61111 67938 66029 83088 81363 93940\n",
290 | " 91738 98273 101302 113812 109431 105582 116969 112635 122391 121872]\n"
291 | ]
292 | }
293 | ]
294 | },
295 | {
296 | "cell_type": "markdown",
297 | "metadata": {
298 | "id": "RgdnZ2mdRRAX"
299 | },
300 | "source": [
301 | "Splitting the data to training data & Test data"
302 | ]
303 | },
304 | {
305 | "cell_type": "code",
306 | "metadata": {
307 | "id": "5TG7PAEyRMfy"
308 | },
309 | "source": [
310 | " X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.33,\n",
311 | " random_state = 2 )"
312 | ],
313 | "execution_count": null,
314 | "outputs": []
315 | },
316 | {
317 | "cell_type": "markdown",
318 | "metadata": {
319 | "id": "nLw4fjT1RjbF"
320 | },
321 | "source": [
322 | "Train the Logistic Regression Model"
323 | ]
324 | },
325 | {
326 | "cell_type": "code",
327 | "metadata": {
328 | "id": "xYPFclmRRiGW"
329 | },
330 | "source": [
331 | "model = Lin_Reg_model.Linear_Regression(learning_rate=0.01, no_of_iterations=1000) "
332 | ],
333 | "execution_count": null,
334 | "outputs": []
335 | },
336 | {
337 | "cell_type": "code",
338 | "metadata": {
339 | "id": "-_7i2ONcSBVq"
340 | },
341 | "source": [
342 | "model.fit(X_train, Y_train)"
343 | ],
344 | "execution_count": null,
345 | "outputs": []
346 | },
347 | {
348 | "cell_type": "code",
349 | "metadata": {
350 | "colab": {
351 | "base_uri": "https://localhost:8080/"
352 | },
353 | "id": "HPLLxjGkVDFa",
354 | "outputId": "5bf02edb-3346-42be-b543-651e501c8004"
355 | },
356 | "source": [
357 | "# print the parameters\n",
358 | "\n",
359 | "print('weight = ', model.w[0])\n",
360 | "print('bias = ', model.b)"
361 | ],
362 | "execution_count": null,
363 | "outputs": [
364 | {
365 | "output_type": "stream",
366 | "name": "stdout",
367 | "text": [
368 | "weight = 9580.301196603597\n",
369 | "bias = 23226.38946603212\n"
370 | ]
371 | }
372 | ]
373 | },
374 | {
375 | "cell_type": "markdown",
376 | "metadata": {
377 | "id": "xljntE2OUvRJ"
378 | },
379 | "source": [
380 | "Predict the salary value for test data"
381 | ]
382 | },
383 | {
384 | "cell_type": "code",
385 | "metadata": {
386 | "id": "ksCCTTUUUyKY"
387 | },
388 | "source": [
389 | "test_data_prediction = model.predict(X_test)"
390 | ],
391 | "execution_count": null,
392 | "outputs": []
393 | },
394 | {
395 | "cell_type": "code",
396 | "metadata": {
397 | "colab": {
398 | "base_uri": "https://localhost:8080/"
399 | },
400 | "id": "uFB2oF79VkcZ",
401 | "outputId": "b9773849-eca6-45ea-b83d-8a403c9779de"
402 | },
403 | "source": [
404 | "print(test_data_prediction)"
405 | ],
406 | "execution_count": null,
407 | "outputs": [
408 | {
409 | "output_type": "stream",
410 | "name": "stdout",
411 | "text": [
412 | "[ 35680.78102162 33764.7207823 66337.74485075 58673.50389347\n",
413 | " 91246.52796192 80708.19664565 101784.85927818 51967.29305584\n",
414 | " 42386.99185924 88372.43760294]\n"
415 | ]
416 | }
417 | ]
418 | },
419 | {
420 | "cell_type": "markdown",
421 | "metadata": {
422 | "id": "zvxHrfrMWUjq"
423 | },
424 | "source": [
425 | "Visualizing the predicted values and actual values"
426 | ]
427 | },
428 | {
429 | "cell_type": "code",
430 | "metadata": {
431 | "colab": {
432 | "base_uri": "https://localhost:8080/",
433 | "height": 295
434 | },
435 | "id": "ML7CrqCwVmCj",
436 | "outputId": "84c6ee1d-f946-4ba3-eb0e-bbdeca428135"
437 | },
438 | "source": [
439 | "plt.scatter( X_test, Y_test, color = 'red' )\n",
440 | "plt.plot( X_test, test_data_prediction, color = 'blue' ) \n",
441 | "plt.xlabel( 'Work Experience' )\n",
442 | "plt.ylabel( 'Salary' )\n",
443 | "plt.title( 'Salary vs Experience' )\n",
444 | "plt.show()"
445 | ],
446 | "execution_count": null,
447 | "outputs": [
448 | {
449 | "output_type": "display_data",
450 | "data": {
451 | "image/png": "\n",
452 | "text/plain": [
453 | ""
454 | ]
455 | },
456 | "metadata": {
457 | "needs_background": "light"
458 | }
459 | }
460 | ]
461 | },
462 | {
463 | "cell_type": "code",
464 | "metadata": {
465 | "id": "ZEgDB1-sV5hr"
466 | },
467 | "source": [],
468 | "execution_count": null,
469 | "outputs": []
470 | }
471 | ]
472 | }
--------------------------------------------------------------------------------
/7_2_5_Building_Logistic_Regression_from_scratch_in_Python.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": [],
7 | "collapsed_sections": []
8 | },
9 | "kernelspec": {
10 | "name": "python3",
11 | "display_name": "Python 3"
12 | },
13 | "language_info": {
14 | "name": "python"
15 | }
16 | },
17 | "cells": [
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {
21 | "id": "f5BLpBB_cv8y"
22 | },
23 | "source": [
24 | "**Logistic Regression:**"
25 | ]
26 | },
27 | {
28 | "cell_type": "markdown",
29 | "metadata": {
30 | "id": "SQA2GUq7cPXd"
31 | },
32 | "source": [
33 | ""
34 | ]
35 | },
36 | {
37 | "cell_type": "markdown",
38 | "metadata": {
39 | "id": "W0dxwE-Ubsdf"
40 | },
41 | "source": [
42 | "Y_hat --> predicted value\n",
43 | "\n",
44 | "X --> Input Variable\n",
45 | "\n",
46 | "w --> weight\n",
47 | "\n",
48 | "b --> bias"
49 | ]
50 | },
51 | {
52 | "cell_type": "markdown",
53 | "metadata": {
54 | "id": "X8OJXGuPvDt2"
55 | },
56 | "source": [
57 | "**Gradient Descent:**\n",
58 | "\n",
59 | "Gradient Descent is an optimization algorithm used for minimizing the loss function in various machine learning algorithms. It is used for updating the parameters of the learning model.\n",
60 | "\n",
61 | "w = w - α*dw\n",
62 | "\n",
63 | "b = b - α*db"
64 | ]
65 | },
66 | {
67 | "cell_type": "markdown",
68 | "metadata": {
69 | "id": "WSAfYP7WmECB"
70 | },
71 | "source": [
72 | "**Learning Rate:**\n",
73 | "\n",
74 | "Learning rate is a tuning parameter in an optimization algorithm that determines the step size at each iteration while moving toward a minimum of a loss function."
75 | ]
76 | },
77 | {
78 | "cell_type": "markdown",
79 | "metadata": {
80 | "id": "s8AfHQz9cXRF"
81 | },
82 | "source": [
83 | "**Derivatives:**"
84 | ]
85 | },
86 | {
87 | "cell_type": "markdown",
88 | "metadata": {
89 | "id": "kLUw3M-WcCwv"
90 | },
91 | "source": [
92 | ""
93 | ]
94 | },
95 | {
96 | "cell_type": "markdown",
97 | "metadata": {
98 | "id": "WxOpuBj_SqV-"
99 | },
100 | "source": [
101 | "Importing the Dependencies"
102 | ]
103 | },
104 | {
105 | "cell_type": "code",
106 | "metadata": {
107 | "id": "fdjRDi8wlgX6"
108 | },
109 | "source": [
110 | "# importing numpy library\n",
111 | "import numpy as np"
112 | ],
113 | "execution_count": null,
114 | "outputs": []
115 | },
116 | {
117 | "cell_type": "markdown",
118 | "metadata": {
119 | "id": "eTA-BwebLjdc"
120 | },
121 | "source": [
122 | "**Logistic Regression**"
123 | ]
124 | },
125 | {
126 | "cell_type": "code",
127 | "metadata": {
128 | "id": "YcqEjNwtKHPq"
129 | },
130 | "source": [
131 | "class Logistic_Regression():\n",
132 | "\n",
133 | "\n",
134 | " # declaring learning rate & number of iterations (Hyperparametes)\n",
135 | " def __init__(self, learning_rate, no_of_iterations):\n",
136 | "\n",
137 | " self.learning_rate = learning_rate\n",
138 | " self.no_of_iterations = no_of_iterations\n",
139 | "\n",
140 | "\n",
141 | "\n",
142 | " # fit function to train the model with dataset\n",
143 | " def fit(self, X, Y):\n",
144 | "\n",
145 | " # number of data points in the dataset (number of rows) --> m\n",
146 | " # number of input features in the dataset (number of columns) --> n\n",
147 | " self.m, self.n = X.shape\n",
148 | "\n",
149 | "\n",
150 | " #initiating weight & bias value\n",
151 | "\n",
152 | " self.w = np.zeros(self.n)\n",
153 | " \n",
154 | " self.b = 0\n",
155 | "\n",
156 | " self.X = X\n",
157 | "\n",
158 | " self.Y = Y\n",
159 | "\n",
160 | "\n",
161 | " # implementing Gradient Descent for Optimization\n",
162 | "\n",
163 | " for i in range(self.no_of_iterations):\n",
164 | " self.update_weights()\n",
165 | "\n",
166 | "\n",
167 | "\n",
168 | " def update_weights(self):\n",
169 | "\n",
170 | " # Y_hat formula (sigmoid function)\n",
171 | "\n",
172 | " Y_hat = 1 / (1 + np.exp( - (self.X.dot(self.w) + self.b ) )) \n",
173 | "\n",
174 | "\n",
175 | " # derivaties\n",
176 | "\n",
177 | " dw = (1/self.m)*np.dot(self.X.T, (Y_hat - self.Y))\n",
178 | "\n",
179 | " db = (1/self.m)*np.sum(Y_hat - self.Y)\n",
180 | "\n",
181 | "\n",
182 | " # updating the weights & bias using gradient descent\n",
183 | "\n",
184 | " self.w = self.w - self.learning_rate * dw\n",
185 | "\n",
186 | " self.b = self.b - self.learning_rate * db\n",
187 | "\n",
188 | "\n",
189 | " # Sigmoid Equation & Decision Boundary\n",
190 | "\n",
191 | " def predict(self, X):\n",
192 | "\n",
193 | " Y_pred = 1 / (1 + np.exp( - (X.dot(self.w) + self.b ) )) \n",
194 | " Y_pred = np.where( Y_pred > 0.5, 1, 0)\n",
195 | " return Y_pred"
196 | ],
197 | "execution_count": null,
198 | "outputs": []
199 | },
200 | {
201 | "cell_type": "code",
202 | "metadata": {
203 | "id": "htiH07T_WL-Y"
204 | },
205 | "source": [],
206 | "execution_count": null,
207 | "outputs": []
208 | }
209 | ]
210 | }
--------------------------------------------------------------------------------
/7_2_6_Logistic_Regression_Implementation.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": [],
7 | "collapsed_sections": []
8 | },
9 | "kernelspec": {
10 | "name": "python3",
11 | "display_name": "Python 3"
12 | },
13 | "language_info": {
14 | "name": "python"
15 | }
16 | },
17 | "cells": [
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {
21 | "id": "LnPbntVRnfvV"
22 | },
23 | "source": [
24 | "Importing the Dependencies"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "metadata": {
30 | "id": "-71UtHzNVWjB"
31 | },
32 | "source": [
33 | "import numpy as np\n",
34 | "import pandas as pd\n",
35 | "from sklearn.preprocessing import StandardScaler\n",
36 | "from sklearn.model_selection import train_test_split\n",
37 | "from sklearn.metrics import accuracy_score\n",
38 | "import Log_Reg"
39 | ],
40 | "execution_count": null,
41 | "outputs": []
42 | },
43 | {
44 | "cell_type": "markdown",
45 | "metadata": {
46 | "id": "bmfOfG8joBBy"
47 | },
48 | "source": [
49 | "Data Collection and Analysis\n",
50 | "\n",
51 | "PIMA Diabetes Dataset"
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "metadata": {
57 | "id": "Xpw6Mj_pn_TL"
58 | },
59 | "source": [
60 | "# loading the diabetes dataset to a pandas DataFrame\n",
61 | "diabetes_dataset = pd.read_csv('/content/diabetes.csv') "
62 | ],
63 | "execution_count": null,
64 | "outputs": []
65 | },
66 | {
67 | "cell_type": "code",
68 | "metadata": {
69 | "colab": {
70 | "base_uri": "https://localhost:8080/",
71 | "height": 203
72 | },
73 | "id": "-tjO09ncovoh",
74 | "outputId": "669a7da3-b683-46d4-f419-31b97847248c"
75 | },
76 | "source": [
77 | "# printing the first 5 rows of the dataset\n",
78 | "diabetes_dataset.head()"
79 | ],
80 | "execution_count": null,
81 | "outputs": [
82 | {
83 | "output_type": "execute_result",
84 | "data": {
85 | "text/html": [
86 | "\n",
87 | "\n",
100 | "
\n",
101 | " \n",
102 | " \n",
103 | " | \n",
104 | " Pregnancies | \n",
105 | " Glucose | \n",
106 | " BloodPressure | \n",
107 | " SkinThickness | \n",
108 | " Insulin | \n",
109 | " BMI | \n",
110 | " DiabetesPedigreeFunction | \n",
111 | " Age | \n",
112 | " Outcome | \n",
113 | "
\n",
114 | " \n",
115 | " \n",
116 | " \n",
117 | " 0 | \n",
118 | " 6 | \n",
119 | " 148 | \n",
120 | " 72 | \n",
121 | " 35 | \n",
122 | " 0 | \n",
123 | " 33.6 | \n",
124 | " 0.627 | \n",
125 | " 50 | \n",
126 | " 1 | \n",
127 | "
\n",
128 | " \n",
129 | " 1 | \n",
130 | " 1 | \n",
131 | " 85 | \n",
132 | " 66 | \n",
133 | " 29 | \n",
134 | " 0 | \n",
135 | " 26.6 | \n",
136 | " 0.351 | \n",
137 | " 31 | \n",
138 | " 0 | \n",
139 | "
\n",
140 | " \n",
141 | " 2 | \n",
142 | " 8 | \n",
143 | " 183 | \n",
144 | " 64 | \n",
145 | " 0 | \n",
146 | " 0 | \n",
147 | " 23.3 | \n",
148 | " 0.672 | \n",
149 | " 32 | \n",
150 | " 1 | \n",
151 | "
\n",
152 | " \n",
153 | " 3 | \n",
154 | " 1 | \n",
155 | " 89 | \n",
156 | " 66 | \n",
157 | " 23 | \n",
158 | " 94 | \n",
159 | " 28.1 | \n",
160 | " 0.167 | \n",
161 | " 21 | \n",
162 | " 0 | \n",
163 | "
\n",
164 | " \n",
165 | " 4 | \n",
166 | " 0 | \n",
167 | " 137 | \n",
168 | " 40 | \n",
169 | " 35 | \n",
170 | " 168 | \n",
171 | " 43.1 | \n",
172 | " 2.288 | \n",
173 | " 33 | \n",
174 | " 1 | \n",
175 | "
\n",
176 | " \n",
177 | "
\n",
178 | "
"
179 | ],
180 | "text/plain": [
181 | " Pregnancies Glucose BloodPressure ... DiabetesPedigreeFunction Age Outcome\n",
182 | "0 6 148 72 ... 0.627 50 1\n",
183 | "1 1 85 66 ... 0.351 31 0\n",
184 | "2 8 183 64 ... 0.672 32 1\n",
185 | "3 1 89 66 ... 0.167 21 0\n",
186 | "4 0 137 40 ... 2.288 33 1\n",
187 | "\n",
188 | "[5 rows x 9 columns]"
189 | ]
190 | },
191 | "metadata": {},
192 | "execution_count": 4
193 | }
194 | ]
195 | },
196 | {
197 | "cell_type": "code",
198 | "metadata": {
199 | "colab": {
200 | "base_uri": "https://localhost:8080/"
201 | },
202 | "id": "lynParo6pEMB",
203 | "outputId": "0d3deba0-3071-4206-c2e9-fd94c075ab1c"
204 | },
205 | "source": [
206 | "# number of rows and Columns in this dataset\n",
207 | "diabetes_dataset.shape"
208 | ],
209 | "execution_count": null,
210 | "outputs": [
211 | {
212 | "output_type": "execute_result",
213 | "data": {
214 | "text/plain": [
215 | "(768, 9)"
216 | ]
217 | },
218 | "metadata": {},
219 | "execution_count": 5
220 | }
221 | ]
222 | },
223 | {
224 | "cell_type": "code",
225 | "metadata": {
226 | "colab": {
227 | "base_uri": "https://localhost:8080/",
228 | "height": 295
229 | },
230 | "id": "3NDJOlrEpmoL",
231 | "outputId": "18599c20-1e7c-424f-ece5-c4d17bc9a343"
232 | },
233 | "source": [
234 | "# getting the statistical measures of the data\n",
235 | "diabetes_dataset.describe()"
236 | ],
237 | "execution_count": null,
238 | "outputs": [
239 | {
240 | "output_type": "execute_result",
241 | "data": {
242 | "text/html": [
243 | "\n",
244 | "\n",
257 | "
\n",
258 | " \n",
259 | " \n",
260 | " | \n",
261 | " Pregnancies | \n",
262 | " Glucose | \n",
263 | " BloodPressure | \n",
264 | " SkinThickness | \n",
265 | " Insulin | \n",
266 | " BMI | \n",
267 | " DiabetesPedigreeFunction | \n",
268 | " Age | \n",
269 | " Outcome | \n",
270 | "
\n",
271 | " \n",
272 | " \n",
273 | " \n",
274 | " count | \n",
275 | " 768.000000 | \n",
276 | " 768.000000 | \n",
277 | " 768.000000 | \n",
278 | " 768.000000 | \n",
279 | " 768.000000 | \n",
280 | " 768.000000 | \n",
281 | " 768.000000 | \n",
282 | " 768.000000 | \n",
283 | " 768.000000 | \n",
284 | "
\n",
285 | " \n",
286 | " mean | \n",
287 | " 3.845052 | \n",
288 | " 120.894531 | \n",
289 | " 69.105469 | \n",
290 | " 20.536458 | \n",
291 | " 79.799479 | \n",
292 | " 31.992578 | \n",
293 | " 0.471876 | \n",
294 | " 33.240885 | \n",
295 | " 0.348958 | \n",
296 | "
\n",
297 | " \n",
298 | " std | \n",
299 | " 3.369578 | \n",
300 | " 31.972618 | \n",
301 | " 19.355807 | \n",
302 | " 15.952218 | \n",
303 | " 115.244002 | \n",
304 | " 7.884160 | \n",
305 | " 0.331329 | \n",
306 | " 11.760232 | \n",
307 | " 0.476951 | \n",
308 | "
\n",
309 | " \n",
310 | " min | \n",
311 | " 0.000000 | \n",
312 | " 0.000000 | \n",
313 | " 0.000000 | \n",
314 | " 0.000000 | \n",
315 | " 0.000000 | \n",
316 | " 0.000000 | \n",
317 | " 0.078000 | \n",
318 | " 21.000000 | \n",
319 | " 0.000000 | \n",
320 | "
\n",
321 | " \n",
322 | " 25% | \n",
323 | " 1.000000 | \n",
324 | " 99.000000 | \n",
325 | " 62.000000 | \n",
326 | " 0.000000 | \n",
327 | " 0.000000 | \n",
328 | " 27.300000 | \n",
329 | " 0.243750 | \n",
330 | " 24.000000 | \n",
331 | " 0.000000 | \n",
332 | "
\n",
333 | " \n",
334 | " 50% | \n",
335 | " 3.000000 | \n",
336 | " 117.000000 | \n",
337 | " 72.000000 | \n",
338 | " 23.000000 | \n",
339 | " 30.500000 | \n",
340 | " 32.000000 | \n",
341 | " 0.372500 | \n",
342 | " 29.000000 | \n",
343 | " 0.000000 | \n",
344 | "
\n",
345 | " \n",
346 | " 75% | \n",
347 | " 6.000000 | \n",
348 | " 140.250000 | \n",
349 | " 80.000000 | \n",
350 | " 32.000000 | \n",
351 | " 127.250000 | \n",
352 | " 36.600000 | \n",
353 | " 0.626250 | \n",
354 | " 41.000000 | \n",
355 | " 1.000000 | \n",
356 | "
\n",
357 | " \n",
358 | " max | \n",
359 | " 17.000000 | \n",
360 | " 199.000000 | \n",
361 | " 122.000000 | \n",
362 | " 99.000000 | \n",
363 | " 846.000000 | \n",
364 | " 67.100000 | \n",
365 | " 2.420000 | \n",
366 | " 81.000000 | \n",
367 | " 1.000000 | \n",
368 | "
\n",
369 | " \n",
370 | "
\n",
371 | "
"
372 | ],
373 | "text/plain": [
374 | " Pregnancies Glucose ... Age Outcome\n",
375 | "count 768.000000 768.000000 ... 768.000000 768.000000\n",
376 | "mean 3.845052 120.894531 ... 33.240885 0.348958\n",
377 | "std 3.369578 31.972618 ... 11.760232 0.476951\n",
378 | "min 0.000000 0.000000 ... 21.000000 0.000000\n",
379 | "25% 1.000000 99.000000 ... 24.000000 0.000000\n",
380 | "50% 3.000000 117.000000 ... 29.000000 0.000000\n",
381 | "75% 6.000000 140.250000 ... 41.000000 1.000000\n",
382 | "max 17.000000 199.000000 ... 81.000000 1.000000\n",
383 | "\n",
384 | "[8 rows x 9 columns]"
385 | ]
386 | },
387 | "metadata": {},
388 | "execution_count": 6
389 | }
390 | ]
391 | },
392 | {
393 | "cell_type": "code",
394 | "metadata": {
395 | "colab": {
396 | "base_uri": "https://localhost:8080/"
397 | },
398 | "id": "LrpHzaGpp5dQ",
399 | "outputId": "7405a173-2a57-46f0-da2a-6f0cd9699a28"
400 | },
401 | "source": [
402 | "diabetes_dataset['Outcome'].value_counts()"
403 | ],
404 | "execution_count": null,
405 | "outputs": [
406 | {
407 | "output_type": "execute_result",
408 | "data": {
409 | "text/plain": [
410 | "0 500\n",
411 | "1 268\n",
412 | "Name: Outcome, dtype: int64"
413 | ]
414 | },
415 | "metadata": {},
416 | "execution_count": 7
417 | }
418 | ]
419 | },
420 | {
421 | "cell_type": "markdown",
422 | "metadata": {
423 | "id": "cB1qRaNcqeh5"
424 | },
425 | "source": [
426 | "0 --> Non-Diabetic\n",
427 | "\n",
428 | "1 --> Diabetic"
429 | ]
430 | },
431 | {
432 | "cell_type": "code",
433 | "metadata": {
434 | "colab": {
435 | "base_uri": "https://localhost:8080/",
436 | "height": 142
437 | },
438 | "id": "I6MWR0k_qSCK",
439 | "outputId": "4b8c3c9e-452f-4772-83b4-dd17563df908"
440 | },
441 | "source": [
442 | "diabetes_dataset.groupby('Outcome').mean()"
443 | ],
444 | "execution_count": null,
445 | "outputs": [
446 | {
447 | "output_type": "execute_result",
448 | "data": {
449 | "text/html": [
450 | "\n",
451 | "\n",
464 | "
\n",
465 | " \n",
466 | " \n",
467 | " | \n",
468 | " Pregnancies | \n",
469 | " Glucose | \n",
470 | " BloodPressure | \n",
471 | " SkinThickness | \n",
472 | " Insulin | \n",
473 | " BMI | \n",
474 | " DiabetesPedigreeFunction | \n",
475 | " Age | \n",
476 | "
\n",
477 | " \n",
478 | " Outcome | \n",
479 | " | \n",
480 | " | \n",
481 | " | \n",
482 | " | \n",
483 | " | \n",
484 | " | \n",
485 | " | \n",
486 | " | \n",
487 | "
\n",
488 | " \n",
489 | " \n",
490 | " \n",
491 | " 0 | \n",
492 | " 3.298000 | \n",
493 | " 109.980000 | \n",
494 | " 68.184000 | \n",
495 | " 19.664000 | \n",
496 | " 68.792000 | \n",
497 | " 30.304200 | \n",
498 | " 0.429734 | \n",
499 | " 31.190000 | \n",
500 | "
\n",
501 | " \n",
502 | " 1 | \n",
503 | " 4.865672 | \n",
504 | " 141.257463 | \n",
505 | " 70.824627 | \n",
506 | " 22.164179 | \n",
507 | " 100.335821 | \n",
508 | " 35.142537 | \n",
509 | " 0.550500 | \n",
510 | " 37.067164 | \n",
511 | "
\n",
512 | " \n",
513 | "
\n",
514 | "
"
515 | ],
516 | "text/plain": [
517 | " Pregnancies Glucose ... DiabetesPedigreeFunction Age\n",
518 | "Outcome ... \n",
519 | "0 3.298000 109.980000 ... 0.429734 31.190000\n",
520 | "1 4.865672 141.257463 ... 0.550500 37.067164\n",
521 | "\n",
522 | "[2 rows x 8 columns]"
523 | ]
524 | },
525 | "metadata": {},
526 | "execution_count": 8
527 | }
528 | ]
529 | },
530 | {
531 | "cell_type": "code",
532 | "metadata": {
533 | "id": "RoDW7l9mqqHZ"
534 | },
535 | "source": [
536 | "# separating the data and labels\n",
537 | "features = diabetes_dataset.drop(columns = 'Outcome', axis=1)\n",
538 | "target = diabetes_dataset['Outcome']"
539 | ],
540 | "execution_count": null,
541 | "outputs": []
542 | },
543 | {
544 | "cell_type": "code",
545 | "metadata": {
546 | "colab": {
547 | "base_uri": "https://localhost:8080/"
548 | },
549 | "id": "3eiRW9M9raMm",
550 | "outputId": "28d35af9-2175-4da0-9f1b-b5afd1eb6361"
551 | },
552 | "source": [
553 | "print(features)"
554 | ],
555 | "execution_count": null,
556 | "outputs": [
557 | {
558 | "output_type": "stream",
559 | "name": "stdout",
560 | "text": [
561 | " Pregnancies Glucose BloodPressure ... BMI DiabetesPedigreeFunction Age\n",
562 | "0 6 148 72 ... 33.6 0.627 50\n",
563 | "1 1 85 66 ... 26.6 0.351 31\n",
564 | "2 8 183 64 ... 23.3 0.672 32\n",
565 | "3 1 89 66 ... 28.1 0.167 21\n",
566 | "4 0 137 40 ... 43.1 2.288 33\n",
567 | ".. ... ... ... ... ... ... ...\n",
568 | "763 10 101 76 ... 32.9 0.171 63\n",
569 | "764 2 122 70 ... 36.8 0.340 27\n",
570 | "765 5 121 72 ... 26.2 0.245 30\n",
571 | "766 1 126 60 ... 30.1 0.349 47\n",
572 | "767 1 93 70 ... 30.4 0.315 23\n",
573 | "\n",
574 | "[768 rows x 8 columns]\n"
575 | ]
576 | }
577 | ]
578 | },
579 | {
580 | "cell_type": "code",
581 | "metadata": {
582 | "colab": {
583 | "base_uri": "https://localhost:8080/"
584 | },
585 | "id": "AoxgTJAMrcCl",
586 | "outputId": "adbfa8fc-dab2-4bf7-9db5-a0fbe99d958c"
587 | },
588 | "source": [
589 | "print(target)"
590 | ],
591 | "execution_count": null,
592 | "outputs": [
593 | {
594 | "output_type": "stream",
595 | "name": "stdout",
596 | "text": [
597 | "0 1\n",
598 | "1 0\n",
599 | "2 1\n",
600 | "3 0\n",
601 | "4 1\n",
602 | " ..\n",
603 | "763 0\n",
604 | "764 0\n",
605 | "765 0\n",
606 | "766 1\n",
607 | "767 0\n",
608 | "Name: Outcome, Length: 768, dtype: int64\n"
609 | ]
610 | }
611 | ]
612 | },
613 | {
614 | "cell_type": "markdown",
615 | "metadata": {
616 | "id": "umAbo_kqrlzI"
617 | },
618 | "source": [
619 | "Data Standardization"
620 | ]
621 | },
622 | {
623 | "cell_type": "code",
624 | "metadata": {
625 | "id": "njfM5X60rgnc"
626 | },
627 | "source": [
628 | "scaler = StandardScaler()"
629 | ],
630 | "execution_count": null,
631 | "outputs": []
632 | },
633 | {
634 | "cell_type": "code",
635 | "metadata": {
636 | "colab": {
637 | "base_uri": "https://localhost:8080/"
638 | },
639 | "id": "g0ai5ARbr53p",
640 | "outputId": "29354d62-9f72-4371-b3e6-c63dc88f4b42"
641 | },
642 | "source": [
643 | "scaler.fit(features)"
644 | ],
645 | "execution_count": null,
646 | "outputs": [
647 | {
648 | "output_type": "execute_result",
649 | "data": {
650 | "text/plain": [
651 | "StandardScaler(copy=True, with_mean=True, with_std=True)"
652 | ]
653 | },
654 | "metadata": {},
655 | "execution_count": 13
656 | }
657 | ]
658 | },
659 | {
660 | "cell_type": "code",
661 | "metadata": {
662 | "id": "FHxNwPuZr-kD"
663 | },
664 | "source": [
665 | "standardized_data = scaler.transform(features)"
666 | ],
667 | "execution_count": null,
668 | "outputs": []
669 | },
670 | {
671 | "cell_type": "code",
672 | "metadata": {
673 | "colab": {
674 | "base_uri": "https://localhost:8080/"
675 | },
676 | "id": "fjMwZ5x6sPUJ",
677 | "outputId": "b667e6d2-0e13-4247-a381-565f74273c0f"
678 | },
679 | "source": [
680 | "print(standardized_data)"
681 | ],
682 | "execution_count": null,
683 | "outputs": [
684 | {
685 | "output_type": "stream",
686 | "name": "stdout",
687 | "text": [
688 | "[[ 0.63994726 0.84832379 0.14964075 ... 0.20401277 0.46849198\n",
689 | " 1.4259954 ]\n",
690 | " [-0.84488505 -1.12339636 -0.16054575 ... -0.68442195 -0.36506078\n",
691 | " -0.19067191]\n",
692 | " [ 1.23388019 1.94372388 -0.26394125 ... -1.10325546 0.60439732\n",
693 | " -0.10558415]\n",
694 | " ...\n",
695 | " [ 0.3429808 0.00330087 0.14964075 ... -0.73518964 -0.68519336\n",
696 | " -0.27575966]\n",
697 | " [-0.84488505 0.1597866 -0.47073225 ... -0.24020459 -0.37110101\n",
698 | " 1.17073215]\n",
699 | " [-0.84488505 -0.8730192 0.04624525 ... -0.20212881 -0.47378505\n",
700 | " -0.87137393]]\n"
701 | ]
702 | }
703 | ]
704 | },
705 | {
706 | "cell_type": "code",
707 | "metadata": {
708 | "id": "ZxWSl4SGsRjE"
709 | },
710 | "source": [
711 | "features = standardized_data\n",
712 | "target = diabetes_dataset['Outcome']"
713 | ],
714 | "execution_count": null,
715 | "outputs": []
716 | },
717 | {
718 | "cell_type": "code",
719 | "metadata": {
720 | "colab": {
721 | "base_uri": "https://localhost:8080/"
722 | },
723 | "id": "lhJF_7QjsjmP",
724 | "outputId": "0cf50bcb-c105-455d-8067-1102b261f1a6"
725 | },
726 | "source": [
727 | "print(features)\n",
728 | "print(target)"
729 | ],
730 | "execution_count": null,
731 | "outputs": [
732 | {
733 | "output_type": "stream",
734 | "name": "stdout",
735 | "text": [
736 | "[[ 0.63994726 0.84832379 0.14964075 ... 0.20401277 0.46849198\n",
737 | " 1.4259954 ]\n",
738 | " [-0.84488505 -1.12339636 -0.16054575 ... -0.68442195 -0.36506078\n",
739 | " -0.19067191]\n",
740 | " [ 1.23388019 1.94372388 -0.26394125 ... -1.10325546 0.60439732\n",
741 | " -0.10558415]\n",
742 | " ...\n",
743 | " [ 0.3429808 0.00330087 0.14964075 ... -0.73518964 -0.68519336\n",
744 | " -0.27575966]\n",
745 | " [-0.84488505 0.1597866 -0.47073225 ... -0.24020459 -0.37110101\n",
746 | " 1.17073215]\n",
747 | " [-0.84488505 -0.8730192 0.04624525 ... -0.20212881 -0.47378505\n",
748 | " -0.87137393]]\n",
749 | "0 1\n",
750 | "1 0\n",
751 | "2 1\n",
752 | "3 0\n",
753 | "4 1\n",
754 | " ..\n",
755 | "763 0\n",
756 | "764 0\n",
757 | "765 0\n",
758 | "766 1\n",
759 | "767 0\n",
760 | "Name: Outcome, Length: 768, dtype: int64\n"
761 | ]
762 | }
763 | ]
764 | },
765 | {
766 | "cell_type": "markdown",
767 | "metadata": {
768 | "id": "gHciEFkxsoQP"
769 | },
770 | "source": [
771 | "Train Test Split"
772 | ]
773 | },
774 | {
775 | "cell_type": "code",
776 | "metadata": {
777 | "id": "AEfKGj_yslvD"
778 | },
779 | "source": [
780 | "X_train, X_test, Y_train, Y_test = train_test_split(features,target, test_size = 0.2, random_state=2)"
781 | ],
782 | "execution_count": null,
783 | "outputs": []
784 | },
785 | {
786 | "cell_type": "code",
787 | "metadata": {
788 | "colab": {
789 | "base_uri": "https://localhost:8080/"
790 | },
791 | "id": "DR05T-o0t3FQ",
792 | "outputId": "8ded781c-969b-46c4-9da6-7755b35490ca"
793 | },
794 | "source": [
795 | "print(features.shape, X_train.shape, X_test.shape)"
796 | ],
797 | "execution_count": null,
798 | "outputs": [
799 | {
800 | "output_type": "stream",
801 | "name": "stdout",
802 | "text": [
803 | "(768, 8) (614, 8) (154, 8)\n"
804 | ]
805 | }
806 | ]
807 | },
808 | {
809 | "cell_type": "markdown",
810 | "metadata": {
811 | "id": "ElJ3tkOtuC_n"
812 | },
813 | "source": [
814 | "Training the Model"
815 | ]
816 | },
817 | {
818 | "cell_type": "code",
819 | "metadata": {
820 | "id": "5szLWHlNt9xc"
821 | },
822 | "source": [
823 | "classifier = Log_Reg.Logistic_Regression(learning_rate=0.01, no_of_iterations=1000)"
824 | ],
825 | "execution_count": null,
826 | "outputs": []
827 | },
828 | {
829 | "cell_type": "code",
830 | "metadata": {
831 | "id": "ncJWY_7suPAb"
832 | },
833 | "source": [
834 | "#training the support vector Machine Classifier\n",
835 | "classifier.fit(X_train, Y_train)"
836 | ],
837 | "execution_count": null,
838 | "outputs": []
839 | },
840 | {
841 | "cell_type": "markdown",
842 | "metadata": {
843 | "id": "UV4-CAfquiyP"
844 | },
845 | "source": [
846 | "Model Evaluation"
847 | ]
848 | },
849 | {
850 | "cell_type": "markdown",
851 | "metadata": {
852 | "id": "yhAjGPJWunXa"
853 | },
854 | "source": [
855 | "Accuracy Score"
856 | ]
857 | },
858 | {
859 | "cell_type": "code",
860 | "metadata": {
861 | "id": "fJLEPQK7ueXp"
862 | },
863 | "source": [
864 | "# accuracy score on the training data\n",
865 | "X_train_prediction = classifier.predict(X_train)\n",
866 | "training_data_accuracy = accuracy_score( Y_train, X_train_prediction)"
867 | ],
868 | "execution_count": null,
869 | "outputs": []
870 | },
871 | {
872 | "cell_type": "code",
873 | "metadata": {
874 | "colab": {
875 | "base_uri": "https://localhost:8080/"
876 | },
877 | "id": "mmJ22qhVvNwj",
878 | "outputId": "98f8fd1e-aa21-4942-a658-42dfecc35e15"
879 | },
880 | "source": [
881 | "print('Accuracy score of the training data : ', training_data_accuracy)"
882 | ],
883 | "execution_count": null,
884 | "outputs": [
885 | {
886 | "output_type": "stream",
887 | "name": "stdout",
888 | "text": [
889 | "Accuracy score of the training data : 0.7768729641693811\n"
890 | ]
891 | }
892 | ]
893 | },
894 | {
895 | "cell_type": "code",
896 | "metadata": {
897 | "id": "G2CICFMEvcCl"
898 | },
899 | "source": [
900 | "# accuracy score on the test data\n",
901 | "X_test_prediction = classifier.predict(X_test)\n",
902 | "test_data_accuracy = accuracy_score( Y_test, X_test_prediction)"
903 | ],
904 | "execution_count": null,
905 | "outputs": []
906 | },
907 | {
908 | "cell_type": "code",
909 | "metadata": {
910 | "colab": {
911 | "base_uri": "https://localhost:8080/"
912 | },
913 | "id": "i2GcW_t_vz7C",
914 | "outputId": "c359e2fc-f81c-48a7-e1f6-9d424412725e"
915 | },
916 | "source": [
917 | "print('Accuracy score of the test data : ', test_data_accuracy)"
918 | ],
919 | "execution_count": null,
920 | "outputs": [
921 | {
922 | "output_type": "stream",
923 | "name": "stdout",
924 | "text": [
925 | "Accuracy score of the test data : 0.7662337662337663\n"
926 | ]
927 | }
928 | ]
929 | },
930 | {
931 | "cell_type": "markdown",
932 | "metadata": {
933 | "id": "gq8ZX1xpwPF5"
934 | },
935 | "source": [
936 | "Making a Predictive System"
937 | ]
938 | },
939 | {
940 | "cell_type": "code",
941 | "metadata": {
942 | "colab": {
943 | "base_uri": "https://localhost:8080/"
944 | },
945 | "id": "U-ULRe4yv5tH",
946 | "outputId": "6a63ae70-016b-45d0-bc4f-6fba7952359e"
947 | },
948 | "source": [
949 | "input_data = (5,166,72,19,175,25.8,0.587,51)\n",
950 | "\n",
951 | "# changing the input_data to numpy array\n",
952 | "input_data_as_numpy_array = np.asarray(input_data)\n",
953 | "\n",
954 | "# reshape the array as we are predicting for one instance\n",
955 | "input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)\n",
956 | "\n",
957 | "# standardize the input data\n",
958 | "std_data = scaler.transform(input_data_reshaped)\n",
959 | "print(std_data)\n",
960 | "\n",
961 | "prediction = classifier.predict(std_data)\n",
962 | "print(prediction)\n",
963 | "\n",
964 | "if (prediction[0] == 0):\n",
965 | " print('The person is not diabetic')\n",
966 | "else:\n",
967 | " print('The person is diabetic')"
968 | ],
969 | "execution_count": null,
970 | "outputs": [
971 | {
972 | "output_type": "stream",
973 | "name": "stdout",
974 | "text": [
975 | "[[ 0.3429808 1.41167241 0.14964075 -0.09637905 0.82661621 -0.78595734\n",
976 | " 0.34768723 1.51108316]]\n",
977 | "[1]\n",
978 | "The person is diabetic\n"
979 | ]
980 | }
981 | ]
982 | }
983 | ]
984 | }
--------------------------------------------------------------------------------
/7_3_6_Building_Support_Vector_Machine_Classifier_from_Scratch_in_Python.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": [],
7 | "collapsed_sections": []
8 | },
9 | "kernelspec": {
10 | "name": "python3",
11 | "display_name": "Python 3"
12 | },
13 | "language_info": {
14 | "name": "python"
15 | }
16 | },
17 | "cells": [
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {
21 | "id": "OoQnMmCCBfTF"
22 | },
23 | "source": [
24 | "**SVM Classifier**"
25 | ]
26 | },
27 | {
28 | "cell_type": "markdown",
29 | "metadata": {
30 | "id": "VgNJ4FGKBkIu"
31 | },
32 | "source": [
33 | "Equation of the Hyperplane:\n",
34 | "\n",
35 | "**y = wx - b**"
36 | ]
37 | },
38 | {
39 | "cell_type": "markdown",
40 | "metadata": {
41 | "id": "X8OJXGuPvDt2"
42 | },
43 | "source": [
44 | "**Gradient Descent:**\n",
45 | "\n",
46 | "Gradient Descent is an optimization algorithm used for minimizing the loss function in various machine learning algorithms. It is used for updating the parameters of the learning model.\n",
47 | "\n",
48 | "w = w - α*dw\n",
49 | "\n",
50 | "b = b - α*db"
51 | ]
52 | },
53 | {
54 | "cell_type": "markdown",
55 | "metadata": {
56 | "id": "WSAfYP7WmECB"
57 | },
58 | "source": [
59 | "**Learning Rate:**\n",
60 | "\n",
61 | "Learning rate is a tuning parameter in an optimization algorithm that determines the step size at each iteration while moving toward a minimum of a loss function."
62 | ]
63 | },
64 | {
65 | "cell_type": "markdown",
66 | "metadata": {
67 | "id": "E9nX8-OIPWnM"
68 | },
69 | "source": [
70 | "Importing the Dependencies"
71 | ]
72 | },
73 | {
74 | "cell_type": "code",
75 | "metadata": {
76 | "id": "tzuAfRbBGmTH"
77 | },
78 | "source": [
79 | "# importing numpy library\n",
80 | "import numpy as np"
81 | ],
82 | "execution_count": null,
83 | "outputs": []
84 | },
85 | {
86 | "cell_type": "markdown",
87 | "metadata": {
88 | "id": "p29I221pPhkY"
89 | },
90 | "source": [
91 | "Support Vector Machine Classifier"
92 | ]
93 | },
94 | {
95 | "cell_type": "code",
96 | "metadata": {
97 | "id": "x7sKawRDPd4m"
98 | },
99 | "source": [
100 | "class SVM_classifier():\n",
101 | "\n",
102 | "\n",
103 | " # initiating the hyperparameters\n",
104 | " def __init__(self, learning_rate, no_of_iterations, lambda_parameter):\n",
105 | "\n",
106 | " self.learning_rate = learning_rate\n",
107 | " self.no_of_iterations = no_of_iterations\n",
108 | " self.lambda_parameter = lambda_parameter\n",
109 | "\n",
110 | "\n",
111 | " \n",
112 | " # fitting the dataset to SVM Classifier\n",
113 | " def fit(self, X, Y):\n",
114 | "\n",
115 | " # m --> number of Data points --> number of rows\n",
116 | " # n --> number of input features --> number of columns\n",
117 | " self.m, self.n = X.shape\n",
118 | "\n",
119 | " # initiating the weight value and bias value\n",
120 | "\n",
121 | " self.w = np.zeros(self.n)\n",
122 | "\n",
123 | " self.b = 0\n",
124 | "\n",
125 | " self.X = X\n",
126 | "\n",
127 | " self.Y = Y\n",
128 | "\n",
129 | " # implementing Gradient Descent algorithm for Optimization\n",
130 | "\n",
131 | " for i in range(self.no_of_iterations):\n",
132 | " self.update_weights()\n",
133 | "\n",
134 | "\n",
135 | "\n",
136 | " # function for updating the weight and bias value\n",
137 | " def update_weights(self):\n",
138 | "\n",
139 | " # label encoding\n",
140 | " y_label = np.where(self.Y <= 0, -1, 1)\n",
141 | "\n",
142 | "\n",
143 | "\n",
144 | " # gradients ( dw, db)\n",
145 | " for index, x_i in enumerate(self.X):\n",
146 | "\n",
147 | " condition = y_label[index] * (np.dot(x_i, self.w) - self.b) >= 1\n",
148 | "\n",
149 | " if (condition == True):\n",
150 | "\n",
151 | " dw = 2 * self.lambda_parameter * self.w\n",
152 | " db = 0\n",
153 | "\n",
154 | " else:\n",
155 | "\n",
156 | " dw = 2 * self.lambda_parameter * self.w - np.dot(x_i, y_label[index])\n",
157 | " db = y_label[index]\n",
158 | "\n",
159 | "\n",
160 | " self.w = self.w - self.learning_rate * dw\n",
161 | "\n",
162 | " self.b = self.b - self.learning_rate * db\n",
163 | "\n",
164 | "\n",
165 | "\n",
166 | " # predict the label for a given input value\n",
167 | " def predict(self, X):\n",
168 | "\n",
169 | " output = np.dot(X, self.w) - self.b\n",
170 | " \n",
171 | " predicted_labels = np.sign(output)\n",
172 | "\n",
173 | " y_hat = np.where(predicted_labels <= -1, 0, 1)\n",
174 | "\n",
175 | " return y_hat\n",
176 | "\n",
177 | " \n",
178 | "\n"
179 | ],
180 | "execution_count": null,
181 | "outputs": []
182 | }
183 | ]
184 | }
--------------------------------------------------------------------------------
/7_3_7_Implementing_SVM_Classifier_from_Scratch_in_Python.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": [],
7 | "collapsed_sections": []
8 | },
9 | "kernelspec": {
10 | "name": "python3",
11 | "display_name": "Python 3"
12 | },
13 | "language_info": {
14 | "name": "python"
15 | }
16 | },
17 | "cells": [
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {
21 | "id": "OoQnMmCCBfTF"
22 | },
23 | "source": [
24 | "**SVM Classifier**"
25 | ]
26 | },
27 | {
28 | "cell_type": "markdown",
29 | "metadata": {
30 | "id": "VgNJ4FGKBkIu"
31 | },
32 | "source": [
33 | "Equation of the Hyperplane:\n",
34 | "\n",
35 | "**y = wx - b**"
36 | ]
37 | },
38 | {
39 | "cell_type": "markdown",
40 | "metadata": {
41 | "id": "X8OJXGuPvDt2"
42 | },
43 | "source": [
44 | "**Gradient Descent:**\n",
45 | "\n",
46 | "Gradient Descent is an optimization algorithm used for minimizing the loss function in various machine learning algorithms. It is used for updating the parameters of the learning model.\n",
47 | "\n",
48 | "w = w - α*dw\n",
49 | "\n",
50 | "b = b - α*db"
51 | ]
52 | },
53 | {
54 | "cell_type": "markdown",
55 | "metadata": {
56 | "id": "WSAfYP7WmECB"
57 | },
58 | "source": [
59 | "**Learning Rate:**\n",
60 | "\n",
61 | "Learning rate is a tuning parameter in an optimization algorithm that determines the step size at each iteration while moving toward a minimum of a loss function."
62 | ]
63 | },
64 | {
65 | "cell_type": "markdown",
66 | "metadata": {
67 | "id": "E9nX8-OIPWnM"
68 | },
69 | "source": [
70 | "Importing the Dependencies"
71 | ]
72 | },
73 | {
74 | "cell_type": "code",
75 | "metadata": {
76 | "id": "tzuAfRbBGmTH"
77 | },
78 | "source": [
79 | "# importing numpy library\n",
80 | "import numpy as np"
81 | ],
82 | "execution_count": null,
83 | "outputs": []
84 | },
85 | {
86 | "cell_type": "markdown",
87 | "metadata": {
88 | "id": "p29I221pPhkY"
89 | },
90 | "source": [
91 | "Support Vector Machine Classifier"
92 | ]
93 | },
94 | {
95 | "cell_type": "code",
96 | "metadata": {
97 | "id": "x7sKawRDPd4m"
98 | },
99 | "source": [
100 | "class SVM_classifier():\n",
101 | "\n",
102 | "\n",
103 | " # initiating the hyperparameters\n",
104 | " def __init__(self, learning_rate, no_of_iterations, lambda_parameter):\n",
105 | "\n",
106 | " self.learning_rate = learning_rate\n",
107 | " self.no_of_iterations = no_of_iterations\n",
108 | " self.lambda_parameter = lambda_parameter\n",
109 | "\n",
110 | "\n",
111 | " \n",
112 | " # fitting the dataset to SVM Classifier\n",
113 | " def fit(self, X, Y):\n",
114 | "\n",
115 | " # m --> number of Data points --> number of rows\n",
116 | " # n --> number of input features --> number of columns\n",
117 | " self.m, self.n = X.shape\n",
118 | "\n",
119 | " # initiating the weight value and bias value\n",
120 | "\n",
121 | " self.w = np.zeros(self.n)\n",
122 | "\n",
123 | " self.b = 0\n",
124 | "\n",
125 | " self.X = X\n",
126 | "\n",
127 | " self.Y = Y\n",
128 | "\n",
129 | " # implementing Gradient Descent algorithm for Optimization\n",
130 | "\n",
131 | " for i in range(self.no_of_iterations):\n",
132 | " self.update_weights()\n",
133 | "\n",
134 | "\n",
135 | "\n",
136 | " # function for updating the weight and bias value\n",
137 | " def update_weights(self):\n",
138 | "\n",
139 | " # label encoding\n",
140 | " y_label = np.where(self.Y <= 0, -1, 1)\n",
141 | "\n",
142 | "\n",
143 | "\n",
144 | " # gradients ( dw, db)\n",
145 | " for index, x_i in enumerate(self.X):\n",
146 | "\n",
147 | " condition = y_label[index] * (np.dot(x_i, self.w) - self.b) >= 1\n",
148 | "\n",
149 | " if (condition == True):\n",
150 | "\n",
151 | " dw = 2 * self.lambda_parameter * self.w\n",
152 | " db = 0\n",
153 | "\n",
154 | " else:\n",
155 | "\n",
156 | " dw = 2 * self.lambda_parameter * self.w - np.dot(x_i, y_label[index])\n",
157 | " db = y_label[index]\n",
158 | "\n",
159 | "\n",
160 | " self.w = self.w - self.learning_rate * dw\n",
161 | "\n",
162 | " self.b = self.b - self.learning_rate * db\n",
163 | "\n",
164 | "\n",
165 | "\n",
166 | " # predict the label for a given input value\n",
167 | " def predict(self, X):\n",
168 | "\n",
169 | " output = np.dot(X, self.w) - self.b\n",
170 | " \n",
171 | " predicted_labels = np.sign(output)\n",
172 | "\n",
173 | " y_hat = np.where(predicted_labels <= -1, 0, 1)\n",
174 | "\n",
175 | " return y_hat \n",
176 | "\n"
177 | ],
178 | "execution_count": null,
179 | "outputs": []
180 | },
181 | {
182 | "cell_type": "markdown",
183 | "metadata": {
184 | "id": "gJJikdNRaiRc"
185 | },
186 | "source": [
187 | "Importing the Dependencies"
188 | ]
189 | },
190 | {
191 | "cell_type": "code",
192 | "metadata": {
193 | "id": "jMfoWjDkUH_T"
194 | },
195 | "source": [
196 | "import pandas as pd\n",
197 | "from sklearn.preprocessing import StandardScaler\n",
198 | "from sklearn.model_selection import train_test_split\n",
199 | "from sklearn.metrics import accuracy_score"
200 | ],
201 | "execution_count": null,
202 | "outputs": []
203 | },
204 | {
205 | "cell_type": "markdown",
206 | "metadata": {
207 | "id": "kPKQ-j-gbFt6"
208 | },
209 | "source": [
210 | "Data Collection & Processing"
211 | ]
212 | },
213 | {
214 | "cell_type": "code",
215 | "metadata": {
216 | "id": "FNg5ycvObEhX"
217 | },
218 | "source": [
219 | "# loading the data from csv file to pandas dataframe\n",
220 | "diabetes_data = pd.read_csv('/content/diabetes.csv')"
221 | ],
222 | "execution_count": null,
223 | "outputs": []
224 | },
225 | {
226 | "cell_type": "code",
227 | "metadata": {
228 | "colab": {
229 | "base_uri": "https://localhost:8080/",
230 | "height": 202
231 | },
232 | "id": "qZAcUKKpbc-Z",
233 | "outputId": "b8cb5c83-2876-432f-b957-5bbd6f692990"
234 | },
235 | "source": [
236 | "# print the first 5 rows of the dataframe\n",
237 | "diabetes_data.head()"
238 | ],
239 | "execution_count": null,
240 | "outputs": [
241 | {
242 | "output_type": "execute_result",
243 | "data": {
244 | "text/html": [
245 | "\n",
246 | "\n",
259 | "
\n",
260 | " \n",
261 | " \n",
262 | " | \n",
263 | " Pregnancies | \n",
264 | " Glucose | \n",
265 | " BloodPressure | \n",
266 | " SkinThickness | \n",
267 | " Insulin | \n",
268 | " BMI | \n",
269 | " DiabetesPedigreeFunction | \n",
270 | " Age | \n",
271 | " Outcome | \n",
272 | "
\n",
273 | " \n",
274 | " \n",
275 | " \n",
276 | " 0 | \n",
277 | " 6 | \n",
278 | " 148 | \n",
279 | " 72 | \n",
280 | " 35 | \n",
281 | " 0 | \n",
282 | " 33.6 | \n",
283 | " 0.627 | \n",
284 | " 50 | \n",
285 | " 1 | \n",
286 | "
\n",
287 | " \n",
288 | " 1 | \n",
289 | " 1 | \n",
290 | " 85 | \n",
291 | " 66 | \n",
292 | " 29 | \n",
293 | " 0 | \n",
294 | " 26.6 | \n",
295 | " 0.351 | \n",
296 | " 31 | \n",
297 | " 0 | \n",
298 | "
\n",
299 | " \n",
300 | " 2 | \n",
301 | " 8 | \n",
302 | " 183 | \n",
303 | " 64 | \n",
304 | " 0 | \n",
305 | " 0 | \n",
306 | " 23.3 | \n",
307 | " 0.672 | \n",
308 | " 32 | \n",
309 | " 1 | \n",
310 | "
\n",
311 | " \n",
312 | " 3 | \n",
313 | " 1 | \n",
314 | " 89 | \n",
315 | " 66 | \n",
316 | " 23 | \n",
317 | " 94 | \n",
318 | " 28.1 | \n",
319 | " 0.167 | \n",
320 | " 21 | \n",
321 | " 0 | \n",
322 | "
\n",
323 | " \n",
324 | " 4 | \n",
325 | " 0 | \n",
326 | " 137 | \n",
327 | " 40 | \n",
328 | " 35 | \n",
329 | " 168 | \n",
330 | " 43.1 | \n",
331 | " 2.288 | \n",
332 | " 33 | \n",
333 | " 1 | \n",
334 | "
\n",
335 | " \n",
336 | "
\n",
337 | "
"
338 | ],
339 | "text/plain": [
340 | " Pregnancies Glucose BloodPressure ... DiabetesPedigreeFunction Age Outcome\n",
341 | "0 6 148 72 ... 0.627 50 1\n",
342 | "1 1 85 66 ... 0.351 31 0\n",
343 | "2 8 183 64 ... 0.672 32 1\n",
344 | "3 1 89 66 ... 0.167 21 0\n",
345 | "4 0 137 40 ... 2.288 33 1\n",
346 | "\n",
347 | "[5 rows x 9 columns]"
348 | ]
349 | },
350 | "metadata": {},
351 | "execution_count": 5
352 | }
353 | ]
354 | },
355 | {
356 | "cell_type": "code",
357 | "metadata": {
358 | "colab": {
359 | "base_uri": "https://localhost:8080/"
360 | },
361 | "id": "BZRfzPWqboAU",
362 | "outputId": "14fa6746-8444-4a12-842e-faf53cc74494"
363 | },
364 | "source": [
365 | "# number of rows and columns in the dataset\n",
366 | "diabetes_data.shape"
367 | ],
368 | "execution_count": null,
369 | "outputs": [
370 | {
371 | "output_type": "execute_result",
372 | "data": {
373 | "text/plain": [
374 | "(768, 9)"
375 | ]
376 | },
377 | "metadata": {},
378 | "execution_count": 6
379 | }
380 | ]
381 | },
382 | {
383 | "cell_type": "code",
384 | "metadata": {
385 | "colab": {
386 | "base_uri": "https://localhost:8080/",
387 | "height": 294
388 | },
389 | "id": "E_-QBPhgb2dT",
390 | "outputId": "1ca1eeee-98a7-4d22-8584-c501d632f774"
391 | },
392 | "source": [
393 | "# getting the statistical measures of the dataset\n",
394 | "diabetes_data.describe()"
395 | ],
396 | "execution_count": null,
397 | "outputs": [
398 | {
399 | "output_type": "execute_result",
400 | "data": {
401 | "text/html": [
402 | "\n",
403 | "\n",
416 | "
\n",
417 | " \n",
418 | " \n",
419 | " | \n",
420 | " Pregnancies | \n",
421 | " Glucose | \n",
422 | " BloodPressure | \n",
423 | " SkinThickness | \n",
424 | " Insulin | \n",
425 | " BMI | \n",
426 | " DiabetesPedigreeFunction | \n",
427 | " Age | \n",
428 | " Outcome | \n",
429 | "
\n",
430 | " \n",
431 | " \n",
432 | " \n",
433 | " count | \n",
434 | " 768.000000 | \n",
435 | " 768.000000 | \n",
436 | " 768.000000 | \n",
437 | " 768.000000 | \n",
438 | " 768.000000 | \n",
439 | " 768.000000 | \n",
440 | " 768.000000 | \n",
441 | " 768.000000 | \n",
442 | " 768.000000 | \n",
443 | "
\n",
444 | " \n",
445 | " mean | \n",
446 | " 3.845052 | \n",
447 | " 120.894531 | \n",
448 | " 69.105469 | \n",
449 | " 20.536458 | \n",
450 | " 79.799479 | \n",
451 | " 31.992578 | \n",
452 | " 0.471876 | \n",
453 | " 33.240885 | \n",
454 | " 0.348958 | \n",
455 | "
\n",
456 | " \n",
457 | " std | \n",
458 | " 3.369578 | \n",
459 | " 31.972618 | \n",
460 | " 19.355807 | \n",
461 | " 15.952218 | \n",
462 | " 115.244002 | \n",
463 | " 7.884160 | \n",
464 | " 0.331329 | \n",
465 | " 11.760232 | \n",
466 | " 0.476951 | \n",
467 | "
\n",
468 | " \n",
469 | " min | \n",
470 | " 0.000000 | \n",
471 | " 0.000000 | \n",
472 | " 0.000000 | \n",
473 | " 0.000000 | \n",
474 | " 0.000000 | \n",
475 | " 0.000000 | \n",
476 | " 0.078000 | \n",
477 | " 21.000000 | \n",
478 | " 0.000000 | \n",
479 | "
\n",
480 | " \n",
481 | " 25% | \n",
482 | " 1.000000 | \n",
483 | " 99.000000 | \n",
484 | " 62.000000 | \n",
485 | " 0.000000 | \n",
486 | " 0.000000 | \n",
487 | " 27.300000 | \n",
488 | " 0.243750 | \n",
489 | " 24.000000 | \n",
490 | " 0.000000 | \n",
491 | "
\n",
492 | " \n",
493 | " 50% | \n",
494 | " 3.000000 | \n",
495 | " 117.000000 | \n",
496 | " 72.000000 | \n",
497 | " 23.000000 | \n",
498 | " 30.500000 | \n",
499 | " 32.000000 | \n",
500 | " 0.372500 | \n",
501 | " 29.000000 | \n",
502 | " 0.000000 | \n",
503 | "
\n",
504 | " \n",
505 | " 75% | \n",
506 | " 6.000000 | \n",
507 | " 140.250000 | \n",
508 | " 80.000000 | \n",
509 | " 32.000000 | \n",
510 | " 127.250000 | \n",
511 | " 36.600000 | \n",
512 | " 0.626250 | \n",
513 | " 41.000000 | \n",
514 | " 1.000000 | \n",
515 | "
\n",
516 | " \n",
517 | " max | \n",
518 | " 17.000000 | \n",
519 | " 199.000000 | \n",
520 | " 122.000000 | \n",
521 | " 99.000000 | \n",
522 | " 846.000000 | \n",
523 | " 67.100000 | \n",
524 | " 2.420000 | \n",
525 | " 81.000000 | \n",
526 | " 1.000000 | \n",
527 | "
\n",
528 | " \n",
529 | "
\n",
530 | "
"
531 | ],
532 | "text/plain": [
533 | " Pregnancies Glucose ... Age Outcome\n",
534 | "count 768.000000 768.000000 ... 768.000000 768.000000\n",
535 | "mean 3.845052 120.894531 ... 33.240885 0.348958\n",
536 | "std 3.369578 31.972618 ... 11.760232 0.476951\n",
537 | "min 0.000000 0.000000 ... 21.000000 0.000000\n",
538 | "25% 1.000000 99.000000 ... 24.000000 0.000000\n",
539 | "50% 3.000000 117.000000 ... 29.000000 0.000000\n",
540 | "75% 6.000000 140.250000 ... 41.000000 1.000000\n",
541 | "max 17.000000 199.000000 ... 81.000000 1.000000\n",
542 | "\n",
543 | "[8 rows x 9 columns]"
544 | ]
545 | },
546 | "metadata": {},
547 | "execution_count": 7
548 | }
549 | ]
550 | },
551 | {
552 | "cell_type": "code",
553 | "metadata": {
554 | "colab": {
555 | "base_uri": "https://localhost:8080/"
556 | },
557 | "id": "1nv8PkGFcEUA",
558 | "outputId": "e23d9b70-ffd8-4e8d-8a3a-606290be791c"
559 | },
560 | "source": [
561 | "diabetes_data['Outcome'].value_counts()"
562 | ],
563 | "execution_count": null,
564 | "outputs": [
565 | {
566 | "output_type": "execute_result",
567 | "data": {
568 | "text/plain": [
569 | "0 500\n",
570 | "1 268\n",
571 | "Name: Outcome, dtype: int64"
572 | ]
573 | },
574 | "metadata": {},
575 | "execution_count": 8
576 | }
577 | ]
578 | },
579 | {
580 | "cell_type": "markdown",
581 | "metadata": {
582 | "id": "kbIx4JRwcVy8"
583 | },
584 | "source": [
585 | "0 --> Non-diabetic\n",
586 | "\n",
587 | "1 --> Diabetic"
588 | ]
589 | },
590 | {
591 | "cell_type": "code",
592 | "metadata": {
593 | "id": "It7yuRMZcQ2-"
594 | },
595 | "source": [
596 | "# separating the features and target\n",
597 | "\n",
598 | "features = diabetes_data.drop(columns='Outcome', axis=1)\n",
599 | "\n",
600 | "target = diabetes_data['Outcome']\n"
601 | ],
602 | "execution_count": null,
603 | "outputs": []
604 | },
605 | {
606 | "cell_type": "code",
607 | "metadata": {
608 | "colab": {
609 | "base_uri": "https://localhost:8080/"
610 | },
611 | "id": "Jnv9kg01c-Mp",
612 | "outputId": "6fab0f65-e442-4c9c-cc96-679afd8aed19"
613 | },
614 | "source": [
615 | "print(features)"
616 | ],
617 | "execution_count": null,
618 | "outputs": [
619 | {
620 | "output_type": "stream",
621 | "name": "stdout",
622 | "text": [
623 | " Pregnancies Glucose BloodPressure ... BMI DiabetesPedigreeFunction Age\n",
624 | "0 6 148 72 ... 33.6 0.627 50\n",
625 | "1 1 85 66 ... 26.6 0.351 31\n",
626 | "2 8 183 64 ... 23.3 0.672 32\n",
627 | "3 1 89 66 ... 28.1 0.167 21\n",
628 | "4 0 137 40 ... 43.1 2.288 33\n",
629 | ".. ... ... ... ... ... ... ...\n",
630 | "763 10 101 76 ... 32.9 0.171 63\n",
631 | "764 2 122 70 ... 36.8 0.340 27\n",
632 | "765 5 121 72 ... 26.2 0.245 30\n",
633 | "766 1 126 60 ... 30.1 0.349 47\n",
634 | "767 1 93 70 ... 30.4 0.315 23\n",
635 | "\n",
636 | "[768 rows x 8 columns]\n"
637 | ]
638 | }
639 | ]
640 | },
641 | {
642 | "cell_type": "code",
643 | "metadata": {
644 | "colab": {
645 | "base_uri": "https://localhost:8080/"
646 | },
647 | "id": "_5bphlQ7dBZF",
648 | "outputId": "eded8bf0-e36a-4124-874a-f222c9f866f2"
649 | },
650 | "source": [
651 | "print(target)"
652 | ],
653 | "execution_count": null,
654 | "outputs": [
655 | {
656 | "output_type": "stream",
657 | "name": "stdout",
658 | "text": [
659 | "0 1\n",
660 | "1 0\n",
661 | "2 1\n",
662 | "3 0\n",
663 | "4 1\n",
664 | " ..\n",
665 | "763 0\n",
666 | "764 0\n",
667 | "765 0\n",
668 | "766 1\n",
669 | "767 0\n",
670 | "Name: Outcome, Length: 768, dtype: int64\n"
671 | ]
672 | }
673 | ]
674 | },
675 | {
676 | "cell_type": "markdown",
677 | "metadata": {
678 | "id": "BkChVOzRdNUZ"
679 | },
680 | "source": [
681 | "Data Standardization"
682 | ]
683 | },
684 | {
685 | "cell_type": "code",
686 | "metadata": {
687 | "id": "6NmIffqFdEQJ"
688 | },
689 | "source": [
690 | "scaler = StandardScaler()"
691 | ],
692 | "execution_count": null,
693 | "outputs": []
694 | },
695 | {
696 | "cell_type": "code",
697 | "metadata": {
698 | "colab": {
699 | "base_uri": "https://localhost:8080/"
700 | },
701 | "id": "vMslWjlxdW6g",
702 | "outputId": "05d11c42-cd55-4b76-d2a5-91b8ae1e7512"
703 | },
704 | "source": [
705 | "scaler.fit(features)"
706 | ],
707 | "execution_count": null,
708 | "outputs": [
709 | {
710 | "output_type": "execute_result",
711 | "data": {
712 | "text/plain": [
713 | "StandardScaler(copy=True, with_mean=True, with_std=True)"
714 | ]
715 | },
716 | "metadata": {},
717 | "execution_count": 13
718 | }
719 | ]
720 | },
721 | {
722 | "cell_type": "code",
723 | "metadata": {
724 | "id": "ow-jh7Kkdfby"
725 | },
726 | "source": [
727 | "standardized_data = scaler.transform(features)"
728 | ],
729 | "execution_count": null,
730 | "outputs": []
731 | },
732 | {
733 | "cell_type": "code",
734 | "metadata": {
735 | "colab": {
736 | "base_uri": "https://localhost:8080/"
737 | },
738 | "id": "ar7sa82gdnmI",
739 | "outputId": "22d09ca7-fb21-4414-da4f-a6e7644ef94f"
740 | },
741 | "source": [
742 | "print(standardized_data)"
743 | ],
744 | "execution_count": null,
745 | "outputs": [
746 | {
747 | "output_type": "stream",
748 | "name": "stdout",
749 | "text": [
750 | "[[ 0.63994726 0.84832379 0.14964075 ... 0.20401277 0.46849198\n",
751 | " 1.4259954 ]\n",
752 | " [-0.84488505 -1.12339636 -0.16054575 ... -0.68442195 -0.36506078\n",
753 | " -0.19067191]\n",
754 | " [ 1.23388019 1.94372388 -0.26394125 ... -1.10325546 0.60439732\n",
755 | " -0.10558415]\n",
756 | " ...\n",
757 | " [ 0.3429808 0.00330087 0.14964075 ... -0.73518964 -0.68519336\n",
758 | " -0.27575966]\n",
759 | " [-0.84488505 0.1597866 -0.47073225 ... -0.24020459 -0.37110101\n",
760 | " 1.17073215]\n",
761 | " [-0.84488505 -0.8730192 0.04624525 ... -0.20212881 -0.47378505\n",
762 | " -0.87137393]]\n"
763 | ]
764 | }
765 | ]
766 | },
767 | {
768 | "cell_type": "code",
769 | "metadata": {
770 | "id": "aMEkKDOodrUv"
771 | },
772 | "source": [
773 | "features = standardized_data\n",
774 | "target = diabetes_data['Outcome']"
775 | ],
776 | "execution_count": null,
777 | "outputs": []
778 | },
779 | {
780 | "cell_type": "code",
781 | "metadata": {
782 | "colab": {
783 | "base_uri": "https://localhost:8080/"
784 | },
785 | "id": "-xtmt3Ihd73k",
786 | "outputId": "34b67924-486d-4636-9c9d-3c253416d0eb"
787 | },
788 | "source": [
789 | "print(features)\n",
790 | "print(target)"
791 | ],
792 | "execution_count": null,
793 | "outputs": [
794 | {
795 | "output_type": "stream",
796 | "name": "stdout",
797 | "text": [
798 | "[[ 0.63994726 0.84832379 0.14964075 ... 0.20401277 0.46849198\n",
799 | " 1.4259954 ]\n",
800 | " [-0.84488505 -1.12339636 -0.16054575 ... -0.68442195 -0.36506078\n",
801 | " -0.19067191]\n",
802 | " [ 1.23388019 1.94372388 -0.26394125 ... -1.10325546 0.60439732\n",
803 | " -0.10558415]\n",
804 | " ...\n",
805 | " [ 0.3429808 0.00330087 0.14964075 ... -0.73518964 -0.68519336\n",
806 | " -0.27575966]\n",
807 | " [-0.84488505 0.1597866 -0.47073225 ... -0.24020459 -0.37110101\n",
808 | " 1.17073215]\n",
809 | " [-0.84488505 -0.8730192 0.04624525 ... -0.20212881 -0.47378505\n",
810 | " -0.87137393]]\n",
811 | "0 1\n",
812 | "1 0\n",
813 | "2 1\n",
814 | "3 0\n",
815 | "4 1\n",
816 | " ..\n",
817 | "763 0\n",
818 | "764 0\n",
819 | "765 0\n",
820 | "766 1\n",
821 | "767 0\n",
822 | "Name: Outcome, Length: 768, dtype: int64\n"
823 | ]
824 | }
825 | ]
826 | },
827 | {
828 | "cell_type": "markdown",
829 | "metadata": {
830 | "id": "gh4qURDYeHmn"
831 | },
832 | "source": [
833 | "Train Test Split"
834 | ]
835 | },
836 | {
837 | "cell_type": "code",
838 | "metadata": {
839 | "id": "tj046cqTeA51"
840 | },
841 | "source": [
842 | "X_train, X_test, Y_train, Y_test = train_test_split(features, target, test_size=0.2, random_state = 2)"
843 | ],
844 | "execution_count": null,
845 | "outputs": []
846 | },
847 | {
848 | "cell_type": "code",
849 | "metadata": {
850 | "colab": {
851 | "base_uri": "https://localhost:8080/"
852 | },
853 | "id": "loJeizDZe6MH",
854 | "outputId": "ed98b559-99e7-401f-9b47-d80424c03a68"
855 | },
856 | "source": [
857 | "print(features.shape, X_train.shape, X_test.shape)"
858 | ],
859 | "execution_count": null,
860 | "outputs": [
861 | {
862 | "output_type": "stream",
863 | "name": "stdout",
864 | "text": [
865 | "(768, 8) (614, 8) (154, 8)\n"
866 | ]
867 | }
868 | ]
869 | },
870 | {
871 | "cell_type": "markdown",
872 | "metadata": {
873 | "id": "KDcTszgpfGtp"
874 | },
875 | "source": [
876 | "Training the Model"
877 | ]
878 | },
879 | {
880 | "cell_type": "markdown",
881 | "metadata": {
882 | "id": "uzteSNBnfImg"
883 | },
884 | "source": [
885 | "Support Vector Machine Classifier"
886 | ]
887 | },
888 | {
889 | "cell_type": "code",
890 | "metadata": {
891 | "id": "xfJmnwXdfAKP"
892 | },
893 | "source": [
894 | "classifier = SVM_classifier(learning_rate=0.001, no_of_iterations=1000, lambda_parameter=0.01)"
895 | ],
896 | "execution_count": null,
897 | "outputs": []
898 | },
899 | {
900 | "cell_type": "code",
901 | "metadata": {
902 | "id": "V5KLhNF5fhXT"
903 | },
904 | "source": [
905 | "# training the SVM classifier with training data\n",
906 | "classifier.fit(X_train, Y_train)"
907 | ],
908 | "execution_count": null,
909 | "outputs": []
910 | },
911 | {
912 | "cell_type": "markdown",
913 | "metadata": {
914 | "id": "YC0v_ZCYf6gV"
915 | },
916 | "source": [
917 | "Model Evaluation"
918 | ]
919 | },
920 | {
921 | "cell_type": "markdown",
922 | "metadata": {
923 | "id": "-IO-FIPqf8Vm"
924 | },
925 | "source": [
926 | "Accuracy Score"
927 | ]
928 | },
929 | {
930 | "cell_type": "code",
931 | "metadata": {
932 | "id": "lLt2so1Hf0Ua"
933 | },
934 | "source": [
935 | "# accuracy on training data\n",
936 | "X_train_prediction = classifier.predict(X_train)\n",
937 | "training_data_accuracy = accuracy_score(Y_train, X_train_prediction)"
938 | ],
939 | "execution_count": null,
940 | "outputs": []
941 | },
942 | {
943 | "cell_type": "code",
944 | "metadata": {
945 | "colab": {
946 | "base_uri": "https://localhost:8080/"
947 | },
948 | "id": "KULlET0dglkG",
949 | "outputId": "bd094ab4-0e3b-41ec-f1d4-6fb3afad5374"
950 | },
951 | "source": [
952 | "print('Accuracy score on training data = ', training_data_accuracy)"
953 | ],
954 | "execution_count": null,
955 | "outputs": [
956 | {
957 | "output_type": "stream",
958 | "name": "stdout",
959 | "text": [
960 | "Accuracy score on training data = 0.7768729641693811\n"
961 | ]
962 | }
963 | ]
964 | },
965 | {
966 | "cell_type": "code",
967 | "metadata": {
968 | "id": "QjRNsLYmg1oU"
969 | },
970 | "source": [
971 | "# accuracy on training data\n",
972 | "X_test_prediction = classifier.predict(X_test)\n",
973 | "test_data_accuracy = accuracy_score(Y_test, X_test_prediction)"
974 | ],
975 | "execution_count": null,
976 | "outputs": []
977 | },
978 | {
979 | "cell_type": "code",
980 | "metadata": {
981 | "colab": {
982 | "base_uri": "https://localhost:8080/"
983 | },
984 | "id": "jQfednYShTg9",
985 | "outputId": "973d29b6-9b43-4c0a-ba8c-83f28cb70822"
986 | },
987 | "source": [
988 | "print('Accuracy score on test data = ', test_data_accuracy)"
989 | ],
990 | "execution_count": null,
991 | "outputs": [
992 | {
993 | "output_type": "stream",
994 | "name": "stdout",
995 | "text": [
996 | "Accuracy score on test data = 0.7532467532467533\n"
997 | ]
998 | }
999 | ]
1000 | },
1001 | {
1002 | "cell_type": "markdown",
1003 | "metadata": {
1004 | "id": "Dia3-X4ih2KP"
1005 | },
1006 | "source": [
1007 | "Building a Predictive System"
1008 | ]
1009 | },
1010 | {
1011 | "cell_type": "code",
1012 | "metadata": {
1013 | "colab": {
1014 | "base_uri": "https://localhost:8080/"
1015 | },
1016 | "id": "Pw9LkxoKhY5A",
1017 | "outputId": "004813a2-b242-4e9b-cb6a-b4652f929b3c"
1018 | },
1019 | "source": [
1020 | "input_data = (5,166,72,19,175,25.8,0.587,51)\n",
1021 | "\n",
1022 | "# change the input data to numpy array\n",
1023 | "input_data_as_numpy_array = np.asarray(input_data)\n",
1024 | "\n",
1025 | "# reshape the array\n",
1026 | "input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)\n",
1027 | "\n",
1028 | "# standardizing the input data\n",
1029 | "std_data = scaler.transform(input_data_reshaped)\n",
1030 | "print(std_data)\n",
1031 | "\n",
1032 | "prediction = classifier.predict(std_data)\n",
1033 | "print(prediction)\n",
1034 | "\n",
1035 | "if (prediction[0] == 0):\n",
1036 | " print('The person is not diabetic')\n",
1037 | "\n",
1038 | "else:\n",
1039 | " print('The Person is diabetic')"
1040 | ],
1041 | "execution_count": null,
1042 | "outputs": [
1043 | {
1044 | "output_type": "stream",
1045 | "name": "stdout",
1046 | "text": [
1047 | "[[ 0.3429808 1.41167241 0.14964075 -0.09637905 0.82661621 -0.78595734\n",
1048 | " 0.34768723 1.51108316]]\n",
1049 | "[1]\n",
1050 | "The Person is diabetic\n"
1051 | ]
1052 | }
1053 | ]
1054 | },
1055 | {
1056 | "cell_type": "code",
1057 | "metadata": {
1058 | "id": "u6YcK7aEjHen"
1059 | },
1060 | "source": [],
1061 | "execution_count": null,
1062 | "outputs": []
1063 | }
1064 | ]
1065 | }
--------------------------------------------------------------------------------
/7_3_7_Implementing_SVM_from_Scratch.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": [],
7 | "collapsed_sections": []
8 | },
9 | "kernelspec": {
10 | "name": "python3",
11 | "display_name": "Python 3"
12 | },
13 | "language_info": {
14 | "name": "python"
15 | }
16 | },
17 | "cells": [
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {
21 | "id": "LnPbntVRnfvV"
22 | },
23 | "source": [
24 | "Importing the Dependencies"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "metadata": {
30 | "id": "-71UtHzNVWjB"
31 | },
32 | "source": [
33 | "import numpy as np\n",
34 | "import pandas as pd\n",
35 | "from sklearn.preprocessing import StandardScaler\n",
36 | "from sklearn.model_selection import train_test_split\n",
37 | "from sklearn.metrics import accuracy_score\n",
38 | "\n",
39 | "from SVM import SVM_classifier"
40 | ],
41 | "execution_count": null,
42 | "outputs": []
43 | },
44 | {
45 | "cell_type": "markdown",
46 | "metadata": {
47 | "id": "bmfOfG8joBBy"
48 | },
49 | "source": [
50 | "Data Collection and Analysis\n",
51 | "\n",
52 | "PIMA Diabetes Dataset"
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "metadata": {
58 | "id": "Xpw6Mj_pn_TL"
59 | },
60 | "source": [
61 | "# loading the diabetes dataset to a pandas DataFrame\n",
62 | "diabetes_dataset = pd.read_csv('/content/diabetes.csv') "
63 | ],
64 | "execution_count": null,
65 | "outputs": []
66 | },
67 | {
68 | "cell_type": "code",
69 | "metadata": {
70 | "id": "-tjO09ncovoh",
71 | "colab": {
72 | "base_uri": "https://localhost:8080/",
73 | "height": 202
74 | },
75 | "outputId": "557afe56-5e13-42e4-a92b-0acf884e497f"
76 | },
77 | "source": [
78 | "# printing the first 5 rows of the dataset\n",
79 | "diabetes_dataset.head()"
80 | ],
81 | "execution_count": null,
82 | "outputs": [
83 | {
84 | "output_type": "execute_result",
85 | "data": {
86 | "text/html": [
87 | "\n",
88 | "\n",
101 | "
\n",
102 | " \n",
103 | " \n",
104 | " | \n",
105 | " Pregnancies | \n",
106 | " Glucose | \n",
107 | " BloodPressure | \n",
108 | " SkinThickness | \n",
109 | " Insulin | \n",
110 | " BMI | \n",
111 | " DiabetesPedigreeFunction | \n",
112 | " Age | \n",
113 | " Outcome | \n",
114 | "
\n",
115 | " \n",
116 | " \n",
117 | " \n",
118 | " 0 | \n",
119 | " 6 | \n",
120 | " 148 | \n",
121 | " 72 | \n",
122 | " 35 | \n",
123 | " 0 | \n",
124 | " 33.6 | \n",
125 | " 0.627 | \n",
126 | " 50 | \n",
127 | " 1 | \n",
128 | "
\n",
129 | " \n",
130 | " 1 | \n",
131 | " 1 | \n",
132 | " 85 | \n",
133 | " 66 | \n",
134 | " 29 | \n",
135 | " 0 | \n",
136 | " 26.6 | \n",
137 | " 0.351 | \n",
138 | " 31 | \n",
139 | " 0 | \n",
140 | "
\n",
141 | " \n",
142 | " 2 | \n",
143 | " 8 | \n",
144 | " 183 | \n",
145 | " 64 | \n",
146 | " 0 | \n",
147 | " 0 | \n",
148 | " 23.3 | \n",
149 | " 0.672 | \n",
150 | " 32 | \n",
151 | " 1 | \n",
152 | "
\n",
153 | " \n",
154 | " 3 | \n",
155 | " 1 | \n",
156 | " 89 | \n",
157 | " 66 | \n",
158 | " 23 | \n",
159 | " 94 | \n",
160 | " 28.1 | \n",
161 | " 0.167 | \n",
162 | " 21 | \n",
163 | " 0 | \n",
164 | "
\n",
165 | " \n",
166 | " 4 | \n",
167 | " 0 | \n",
168 | " 137 | \n",
169 | " 40 | \n",
170 | " 35 | \n",
171 | " 168 | \n",
172 | " 43.1 | \n",
173 | " 2.288 | \n",
174 | " 33 | \n",
175 | " 1 | \n",
176 | "
\n",
177 | " \n",
178 | "
\n",
179 | "
"
180 | ],
181 | "text/plain": [
182 | " Pregnancies Glucose BloodPressure ... DiabetesPedigreeFunction Age Outcome\n",
183 | "0 6 148 72 ... 0.627 50 1\n",
184 | "1 1 85 66 ... 0.351 31 0\n",
185 | "2 8 183 64 ... 0.672 32 1\n",
186 | "3 1 89 66 ... 0.167 21 0\n",
187 | "4 0 137 40 ... 2.288 33 1\n",
188 | "\n",
189 | "[5 rows x 9 columns]"
190 | ]
191 | },
192 | "metadata": {},
193 | "execution_count": 3
194 | }
195 | ]
196 | },
197 | {
198 | "cell_type": "code",
199 | "metadata": {
200 | "id": "lynParo6pEMB",
201 | "colab": {
202 | "base_uri": "https://localhost:8080/"
203 | },
204 | "outputId": "f1b034b0-3e4d-4960-bf05-dc86fc4f4536"
205 | },
206 | "source": [
207 | "# number of rows and Columns in this dataset\n",
208 | "diabetes_dataset.shape"
209 | ],
210 | "execution_count": null,
211 | "outputs": [
212 | {
213 | "output_type": "execute_result",
214 | "data": {
215 | "text/plain": [
216 | "(768, 9)"
217 | ]
218 | },
219 | "metadata": {},
220 | "execution_count": 4
221 | }
222 | ]
223 | },
224 | {
225 | "cell_type": "code",
226 | "metadata": {
227 | "id": "3NDJOlrEpmoL",
228 | "colab": {
229 | "base_uri": "https://localhost:8080/",
230 | "height": 294
231 | },
232 | "outputId": "dfc38bb3-ca13-4794-b240-399899f2c7b5"
233 | },
234 | "source": [
235 | "# getting the statistical measures of the data\n",
236 | "diabetes_dataset.describe()"
237 | ],
238 | "execution_count": null,
239 | "outputs": [
240 | {
241 | "output_type": "execute_result",
242 | "data": {
243 | "text/html": [
244 | "\n",
245 | "\n",
258 | "
\n",
259 | " \n",
260 | " \n",
261 | " | \n",
262 | " Pregnancies | \n",
263 | " Glucose | \n",
264 | " BloodPressure | \n",
265 | " SkinThickness | \n",
266 | " Insulin | \n",
267 | " BMI | \n",
268 | " DiabetesPedigreeFunction | \n",
269 | " Age | \n",
270 | " Outcome | \n",
271 | "
\n",
272 | " \n",
273 | " \n",
274 | " \n",
275 | " count | \n",
276 | " 768.000000 | \n",
277 | " 768.000000 | \n",
278 | " 768.000000 | \n",
279 | " 768.000000 | \n",
280 | " 768.000000 | \n",
281 | " 768.000000 | \n",
282 | " 768.000000 | \n",
283 | " 768.000000 | \n",
284 | " 768.000000 | \n",
285 | "
\n",
286 | " \n",
287 | " mean | \n",
288 | " 3.845052 | \n",
289 | " 120.894531 | \n",
290 | " 69.105469 | \n",
291 | " 20.536458 | \n",
292 | " 79.799479 | \n",
293 | " 31.992578 | \n",
294 | " 0.471876 | \n",
295 | " 33.240885 | \n",
296 | " 0.348958 | \n",
297 | "
\n",
298 | " \n",
299 | " std | \n",
300 | " 3.369578 | \n",
301 | " 31.972618 | \n",
302 | " 19.355807 | \n",
303 | " 15.952218 | \n",
304 | " 115.244002 | \n",
305 | " 7.884160 | \n",
306 | " 0.331329 | \n",
307 | " 11.760232 | \n",
308 | " 0.476951 | \n",
309 | "
\n",
310 | " \n",
311 | " min | \n",
312 | " 0.000000 | \n",
313 | " 0.000000 | \n",
314 | " 0.000000 | \n",
315 | " 0.000000 | \n",
316 | " 0.000000 | \n",
317 | " 0.000000 | \n",
318 | " 0.078000 | \n",
319 | " 21.000000 | \n",
320 | " 0.000000 | \n",
321 | "
\n",
322 | " \n",
323 | " 25% | \n",
324 | " 1.000000 | \n",
325 | " 99.000000 | \n",
326 | " 62.000000 | \n",
327 | " 0.000000 | \n",
328 | " 0.000000 | \n",
329 | " 27.300000 | \n",
330 | " 0.243750 | \n",
331 | " 24.000000 | \n",
332 | " 0.000000 | \n",
333 | "
\n",
334 | " \n",
335 | " 50% | \n",
336 | " 3.000000 | \n",
337 | " 117.000000 | \n",
338 | " 72.000000 | \n",
339 | " 23.000000 | \n",
340 | " 30.500000 | \n",
341 | " 32.000000 | \n",
342 | " 0.372500 | \n",
343 | " 29.000000 | \n",
344 | " 0.000000 | \n",
345 | "
\n",
346 | " \n",
347 | " 75% | \n",
348 | " 6.000000 | \n",
349 | " 140.250000 | \n",
350 | " 80.000000 | \n",
351 | " 32.000000 | \n",
352 | " 127.250000 | \n",
353 | " 36.600000 | \n",
354 | " 0.626250 | \n",
355 | " 41.000000 | \n",
356 | " 1.000000 | \n",
357 | "
\n",
358 | " \n",
359 | " max | \n",
360 | " 17.000000 | \n",
361 | " 199.000000 | \n",
362 | " 122.000000 | \n",
363 | " 99.000000 | \n",
364 | " 846.000000 | \n",
365 | " 67.100000 | \n",
366 | " 2.420000 | \n",
367 | " 81.000000 | \n",
368 | " 1.000000 | \n",
369 | "
\n",
370 | " \n",
371 | "
\n",
372 | "
"
373 | ],
374 | "text/plain": [
375 | " Pregnancies Glucose ... Age Outcome\n",
376 | "count 768.000000 768.000000 ... 768.000000 768.000000\n",
377 | "mean 3.845052 120.894531 ... 33.240885 0.348958\n",
378 | "std 3.369578 31.972618 ... 11.760232 0.476951\n",
379 | "min 0.000000 0.000000 ... 21.000000 0.000000\n",
380 | "25% 1.000000 99.000000 ... 24.000000 0.000000\n",
381 | "50% 3.000000 117.000000 ... 29.000000 0.000000\n",
382 | "75% 6.000000 140.250000 ... 41.000000 1.000000\n",
383 | "max 17.000000 199.000000 ... 81.000000 1.000000\n",
384 | "\n",
385 | "[8 rows x 9 columns]"
386 | ]
387 | },
388 | "metadata": {},
389 | "execution_count": 5
390 | }
391 | ]
392 | },
393 | {
394 | "cell_type": "code",
395 | "metadata": {
396 | "id": "LrpHzaGpp5dQ",
397 | "colab": {
398 | "base_uri": "https://localhost:8080/"
399 | },
400 | "outputId": "c5b6c6ec-fdc6-44dc-e12b-29e180a5caae"
401 | },
402 | "source": [
403 | "diabetes_dataset['Outcome'].value_counts()"
404 | ],
405 | "execution_count": null,
406 | "outputs": [
407 | {
408 | "output_type": "execute_result",
409 | "data": {
410 | "text/plain": [
411 | "0 500\n",
412 | "1 268\n",
413 | "Name: Outcome, dtype: int64"
414 | ]
415 | },
416 | "metadata": {},
417 | "execution_count": 6
418 | }
419 | ]
420 | },
421 | {
422 | "cell_type": "markdown",
423 | "metadata": {
424 | "id": "cB1qRaNcqeh5"
425 | },
426 | "source": [
427 | "0 --> Non-Diabetic\n",
428 | "\n",
429 | "1 --> Diabetic"
430 | ]
431 | },
432 | {
433 | "cell_type": "code",
434 | "metadata": {
435 | "id": "I6MWR0k_qSCK",
436 | "colab": {
437 | "base_uri": "https://localhost:8080/",
438 | "height": 141
439 | },
440 | "outputId": "15501b67-7e5c-4309-d83a-3f88c4c815d2"
441 | },
442 | "source": [
443 | "diabetes_dataset.groupby('Outcome').mean()"
444 | ],
445 | "execution_count": null,
446 | "outputs": [
447 | {
448 | "output_type": "execute_result",
449 | "data": {
450 | "text/html": [
451 | "\n",
452 | "\n",
465 | "
\n",
466 | " \n",
467 | " \n",
468 | " | \n",
469 | " Pregnancies | \n",
470 | " Glucose | \n",
471 | " BloodPressure | \n",
472 | " SkinThickness | \n",
473 | " Insulin | \n",
474 | " BMI | \n",
475 | " DiabetesPedigreeFunction | \n",
476 | " Age | \n",
477 | "
\n",
478 | " \n",
479 | " Outcome | \n",
480 | " | \n",
481 | " | \n",
482 | " | \n",
483 | " | \n",
484 | " | \n",
485 | " | \n",
486 | " | \n",
487 | " | \n",
488 | "
\n",
489 | " \n",
490 | " \n",
491 | " \n",
492 | " 0 | \n",
493 | " 3.298000 | \n",
494 | " 109.980000 | \n",
495 | " 68.184000 | \n",
496 | " 19.664000 | \n",
497 | " 68.792000 | \n",
498 | " 30.304200 | \n",
499 | " 0.429734 | \n",
500 | " 31.190000 | \n",
501 | "
\n",
502 | " \n",
503 | " 1 | \n",
504 | " 4.865672 | \n",
505 | " 141.257463 | \n",
506 | " 70.824627 | \n",
507 | " 22.164179 | \n",
508 | " 100.335821 | \n",
509 | " 35.142537 | \n",
510 | " 0.550500 | \n",
511 | " 37.067164 | \n",
512 | "
\n",
513 | " \n",
514 | "
\n",
515 | "
"
516 | ],
517 | "text/plain": [
518 | " Pregnancies Glucose ... DiabetesPedigreeFunction Age\n",
519 | "Outcome ... \n",
520 | "0 3.298000 109.980000 ... 0.429734 31.190000\n",
521 | "1 4.865672 141.257463 ... 0.550500 37.067164\n",
522 | "\n",
523 | "[2 rows x 8 columns]"
524 | ]
525 | },
526 | "metadata": {},
527 | "execution_count": 7
528 | }
529 | ]
530 | },
531 | {
532 | "cell_type": "code",
533 | "metadata": {
534 | "id": "RoDW7l9mqqHZ"
535 | },
536 | "source": [
537 | "# separating the data and labels\n",
538 | "features = diabetes_dataset.drop(columns = 'Outcome', axis=1)\n",
539 | "target = diabetes_dataset['Outcome']"
540 | ],
541 | "execution_count": null,
542 | "outputs": []
543 | },
544 | {
545 | "cell_type": "code",
546 | "metadata": {
547 | "id": "3eiRW9M9raMm",
548 | "colab": {
549 | "base_uri": "https://localhost:8080/"
550 | },
551 | "outputId": "095bd8a7-0215-4b50-a1ca-564eded2ae94"
552 | },
553 | "source": [
554 | "print(features)"
555 | ],
556 | "execution_count": null,
557 | "outputs": [
558 | {
559 | "output_type": "stream",
560 | "name": "stdout",
561 | "text": [
562 | " Pregnancies Glucose BloodPressure ... BMI DiabetesPedigreeFunction Age\n",
563 | "0 6 148 72 ... 33.6 0.627 50\n",
564 | "1 1 85 66 ... 26.6 0.351 31\n",
565 | "2 8 183 64 ... 23.3 0.672 32\n",
566 | "3 1 89 66 ... 28.1 0.167 21\n",
567 | "4 0 137 40 ... 43.1 2.288 33\n",
568 | ".. ... ... ... ... ... ... ...\n",
569 | "763 10 101 76 ... 32.9 0.171 63\n",
570 | "764 2 122 70 ... 36.8 0.340 27\n",
571 | "765 5 121 72 ... 26.2 0.245 30\n",
572 | "766 1 126 60 ... 30.1 0.349 47\n",
573 | "767 1 93 70 ... 30.4 0.315 23\n",
574 | "\n",
575 | "[768 rows x 8 columns]\n"
576 | ]
577 | }
578 | ]
579 | },
580 | {
581 | "cell_type": "code",
582 | "metadata": {
583 | "id": "AoxgTJAMrcCl",
584 | "colab": {
585 | "base_uri": "https://localhost:8080/"
586 | },
587 | "outputId": "dfa0fc42-fee6-4731-8e97-03c28f2fe598"
588 | },
589 | "source": [
590 | "print(target)"
591 | ],
592 | "execution_count": null,
593 | "outputs": [
594 | {
595 | "output_type": "stream",
596 | "name": "stdout",
597 | "text": [
598 | "0 1\n",
599 | "1 0\n",
600 | "2 1\n",
601 | "3 0\n",
602 | "4 1\n",
603 | " ..\n",
604 | "763 0\n",
605 | "764 0\n",
606 | "765 0\n",
607 | "766 1\n",
608 | "767 0\n",
609 | "Name: Outcome, Length: 768, dtype: int64\n"
610 | ]
611 | }
612 | ]
613 | },
614 | {
615 | "cell_type": "markdown",
616 | "metadata": {
617 | "id": "umAbo_kqrlzI"
618 | },
619 | "source": [
620 | "Data Standardization"
621 | ]
622 | },
623 | {
624 | "cell_type": "code",
625 | "metadata": {
626 | "id": "njfM5X60rgnc"
627 | },
628 | "source": [
629 | "scaler = StandardScaler()"
630 | ],
631 | "execution_count": null,
632 | "outputs": []
633 | },
634 | {
635 | "cell_type": "code",
636 | "metadata": {
637 | "id": "g0ai5ARbr53p",
638 | "colab": {
639 | "base_uri": "https://localhost:8080/"
640 | },
641 | "outputId": "886259ac-12c9-48a4-ae49-22f446bf95a5"
642 | },
643 | "source": [
644 | "scaler.fit(features)"
645 | ],
646 | "execution_count": null,
647 | "outputs": [
648 | {
649 | "output_type": "execute_result",
650 | "data": {
651 | "text/plain": [
652 | "StandardScaler(copy=True, with_mean=True, with_std=True)"
653 | ]
654 | },
655 | "metadata": {},
656 | "execution_count": 12
657 | }
658 | ]
659 | },
660 | {
661 | "cell_type": "code",
662 | "metadata": {
663 | "id": "FHxNwPuZr-kD"
664 | },
665 | "source": [
666 | "standardized_data = scaler.transform(features)"
667 | ],
668 | "execution_count": null,
669 | "outputs": []
670 | },
671 | {
672 | "cell_type": "code",
673 | "metadata": {
674 | "id": "fjMwZ5x6sPUJ",
675 | "colab": {
676 | "base_uri": "https://localhost:8080/"
677 | },
678 | "outputId": "1f44da9d-aa05-4a27-a046-8879ac6ddcbf"
679 | },
680 | "source": [
681 | "print(standardized_data)"
682 | ],
683 | "execution_count": null,
684 | "outputs": [
685 | {
686 | "output_type": "stream",
687 | "name": "stdout",
688 | "text": [
689 | "[[ 0.63994726 0.84832379 0.14964075 ... 0.20401277 0.46849198\n",
690 | " 1.4259954 ]\n",
691 | " [-0.84488505 -1.12339636 -0.16054575 ... -0.68442195 -0.36506078\n",
692 | " -0.19067191]\n",
693 | " [ 1.23388019 1.94372388 -0.26394125 ... -1.10325546 0.60439732\n",
694 | " -0.10558415]\n",
695 | " ...\n",
696 | " [ 0.3429808 0.00330087 0.14964075 ... -0.73518964 -0.68519336\n",
697 | " -0.27575966]\n",
698 | " [-0.84488505 0.1597866 -0.47073225 ... -0.24020459 -0.37110101\n",
699 | " 1.17073215]\n",
700 | " [-0.84488505 -0.8730192 0.04624525 ... -0.20212881 -0.47378505\n",
701 | " -0.87137393]]\n"
702 | ]
703 | }
704 | ]
705 | },
706 | {
707 | "cell_type": "code",
708 | "metadata": {
709 | "id": "ZxWSl4SGsRjE"
710 | },
711 | "source": [
712 | "features = standardized_data\n",
713 | "target = diabetes_dataset['Outcome']"
714 | ],
715 | "execution_count": null,
716 | "outputs": []
717 | },
718 | {
719 | "cell_type": "code",
720 | "metadata": {
721 | "id": "lhJF_7QjsjmP",
722 | "colab": {
723 | "base_uri": "https://localhost:8080/"
724 | },
725 | "outputId": "2ee5a7f5-55ee-42b9-ccdc-1a5452cb06b0"
726 | },
727 | "source": [
728 | "print(features)\n",
729 | "print(target)"
730 | ],
731 | "execution_count": null,
732 | "outputs": [
733 | {
734 | "output_type": "stream",
735 | "name": "stdout",
736 | "text": [
737 | "[[ 0.63994726 0.84832379 0.14964075 ... 0.20401277 0.46849198\n",
738 | " 1.4259954 ]\n",
739 | " [-0.84488505 -1.12339636 -0.16054575 ... -0.68442195 -0.36506078\n",
740 | " -0.19067191]\n",
741 | " [ 1.23388019 1.94372388 -0.26394125 ... -1.10325546 0.60439732\n",
742 | " -0.10558415]\n",
743 | " ...\n",
744 | " [ 0.3429808 0.00330087 0.14964075 ... -0.73518964 -0.68519336\n",
745 | " -0.27575966]\n",
746 | " [-0.84488505 0.1597866 -0.47073225 ... -0.24020459 -0.37110101\n",
747 | " 1.17073215]\n",
748 | " [-0.84488505 -0.8730192 0.04624525 ... -0.20212881 -0.47378505\n",
749 | " -0.87137393]]\n",
750 | "0 1\n",
751 | "1 0\n",
752 | "2 1\n",
753 | "3 0\n",
754 | "4 1\n",
755 | " ..\n",
756 | "763 0\n",
757 | "764 0\n",
758 | "765 0\n",
759 | "766 1\n",
760 | "767 0\n",
761 | "Name: Outcome, Length: 768, dtype: int64\n"
762 | ]
763 | }
764 | ]
765 | },
766 | {
767 | "cell_type": "markdown",
768 | "metadata": {
769 | "id": "gHciEFkxsoQP"
770 | },
771 | "source": [
772 | "Train Test Split"
773 | ]
774 | },
775 | {
776 | "cell_type": "code",
777 | "metadata": {
778 | "id": "AEfKGj_yslvD"
779 | },
780 | "source": [
781 | "X_train, X_test, Y_train, Y_test = train_test_split(features,target, test_size = 0.2, random_state=2)"
782 | ],
783 | "execution_count": null,
784 | "outputs": []
785 | },
786 | {
787 | "cell_type": "code",
788 | "metadata": {
789 | "id": "DR05T-o0t3FQ",
790 | "colab": {
791 | "base_uri": "https://localhost:8080/"
792 | },
793 | "outputId": "83023886-4372-49fa-a0c9-b0cb15e4c460"
794 | },
795 | "source": [
796 | "print(features.shape, X_train.shape, X_test.shape)"
797 | ],
798 | "execution_count": null,
799 | "outputs": [
800 | {
801 | "output_type": "stream",
802 | "name": "stdout",
803 | "text": [
804 | "(768, 8) (614, 8) (154, 8)\n"
805 | ]
806 | }
807 | ]
808 | },
809 | {
810 | "cell_type": "markdown",
811 | "metadata": {
812 | "id": "ElJ3tkOtuC_n"
813 | },
814 | "source": [
815 | "Training the Model"
816 | ]
817 | },
818 | {
819 | "cell_type": "code",
820 | "metadata": {
821 | "id": "5szLWHlNt9xc"
822 | },
823 | "source": [
824 | "classifier = SVM_classifier(learning_rate=0.001, no_of_iterations=1000, lambda_parameter = 0.01)"
825 | ],
826 | "execution_count": null,
827 | "outputs": []
828 | },
829 | {
830 | "cell_type": "code",
831 | "metadata": {
832 | "id": "ncJWY_7suPAb"
833 | },
834 | "source": [
835 | "#training the support vector Machine Classifier\n",
836 | "classifier.fit(X_train, Y_train)"
837 | ],
838 | "execution_count": null,
839 | "outputs": []
840 | },
841 | {
842 | "cell_type": "markdown",
843 | "metadata": {
844 | "id": "UV4-CAfquiyP"
845 | },
846 | "source": [
847 | "Model Evaluation"
848 | ]
849 | },
850 | {
851 | "cell_type": "markdown",
852 | "metadata": {
853 | "id": "yhAjGPJWunXa"
854 | },
855 | "source": [
856 | "Accuracy Score"
857 | ]
858 | },
859 | {
860 | "cell_type": "code",
861 | "metadata": {
862 | "id": "fJLEPQK7ueXp"
863 | },
864 | "source": [
865 | "# accuracy score on the training data\n",
866 | "X_train_prediction = classifier.predict(X_train)\n",
867 | "training_data_accuracy = accuracy_score( Y_train, X_train_prediction)"
868 | ],
869 | "execution_count": null,
870 | "outputs": []
871 | },
872 | {
873 | "cell_type": "code",
874 | "metadata": {
875 | "id": "mmJ22qhVvNwj",
876 | "colab": {
877 | "base_uri": "https://localhost:8080/"
878 | },
879 | "outputId": "8742258c-ade3-4419-fe8d-3b66c0457990"
880 | },
881 | "source": [
882 | "print('Accuracy score of the training data : ', training_data_accuracy)"
883 | ],
884 | "execution_count": null,
885 | "outputs": [
886 | {
887 | "output_type": "stream",
888 | "name": "stdout",
889 | "text": [
890 | "Accuracy score of the training data : 0.7768729641693811\n"
891 | ]
892 | }
893 | ]
894 | },
895 | {
896 | "cell_type": "code",
897 | "metadata": {
898 | "id": "G2CICFMEvcCl"
899 | },
900 | "source": [
901 | "# accuracy score on the test data\n",
902 | "X_test_prediction = classifier.predict(X_test)\n",
903 | "test_data_accuracy = accuracy_score( Y_test, X_test_prediction)"
904 | ],
905 | "execution_count": null,
906 | "outputs": []
907 | },
908 | {
909 | "cell_type": "code",
910 | "metadata": {
911 | "id": "i2GcW_t_vz7C",
912 | "colab": {
913 | "base_uri": "https://localhost:8080/"
914 | },
915 | "outputId": "6da095a3-30c1-492e-b410-cb8f4ed21cc4"
916 | },
917 | "source": [
918 | "print('Accuracy score of the test data : ', test_data_accuracy)"
919 | ],
920 | "execution_count": null,
921 | "outputs": [
922 | {
923 | "output_type": "stream",
924 | "name": "stdout",
925 | "text": [
926 | "Accuracy score of the test data : 0.7532467532467533\n"
927 | ]
928 | }
929 | ]
930 | },
931 | {
932 | "cell_type": "markdown",
933 | "metadata": {
934 | "id": "3XdcQCMcVwjM"
935 | },
936 | "source": [
937 | "Building a Predictive System"
938 | ]
939 | },
940 | {
941 | "cell_type": "code",
942 | "metadata": {
943 | "colab": {
944 | "base_uri": "https://localhost:8080/"
945 | },
946 | "id": "EJmRBcutVvzA",
947 | "outputId": "e0858c91-9db1-4460-d293-826216171734"
948 | },
949 | "source": [
950 | "input_data = (5,166,72,19,175,25.8,0.587,51)\n",
951 | "\n",
952 | "# changing the input_data to numpy array\n",
953 | "input_data_as_numpy_array = np.asarray(input_data)\n",
954 | "\n",
955 | "# reshape the array as we are predicting for one instance\n",
956 | "input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)\n",
957 | "\n",
958 | "# standardize the input data\n",
959 | "std_data = scaler.transform(input_data_reshaped)\n",
960 | "print(std_data)\n",
961 | "\n",
962 | "prediction = classifier.predict(std_data)\n",
963 | "print(prediction)\n",
964 | "\n",
965 | "if (prediction[0] == 0):\n",
966 | " print('The person is not diabetic')\n",
967 | "else:\n",
968 | " print('The person is diabetic')"
969 | ],
970 | "execution_count": null,
971 | "outputs": [
972 | {
973 | "output_type": "stream",
974 | "name": "stdout",
975 | "text": [
976 | "[[ 0.3429808 1.41167241 0.14964075 -0.09637905 0.82661621 -0.78595734\n",
977 | " 0.34768723 1.51108316]]\n",
978 | "[1]\n",
979 | "The person is diabetic\n"
980 | ]
981 | }
982 | ]
983 | }
984 | ]
985 | }
--------------------------------------------------------------------------------