└── LR_Heart_Disease_Prediction.ipynb
/LR_Heart_Disease_Prediction.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": [],
7 | "include_colab_link": true
8 | },
9 | "kernelspec": {
10 | "name": "python3",
11 | "display_name": "Python 3"
12 | },
13 | "language_info": {
14 | "name": "python"
15 | }
16 | },
17 | "cells": [
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {
21 | "id": "view-in-github",
22 | "colab_type": "text"
23 | },
24 | "source": [
25 | "
"
26 | ]
27 | },
28 | {
29 | "cell_type": "markdown",
30 | "metadata": {
31 | "id": "aTb-9TFFqprC"
32 | },
33 | "source": [
34 | "Importing the Dependencies"
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "metadata": {
40 | "id": "3q9U3S_whh3-"
41 | },
42 | "source": [
43 | "import numpy as np\n",
44 | "import pandas as pd\n",
45 | "from sklearn.model_selection import train_test_split\n",
46 | "from sklearn.linear_model import LogisticRegression\n",
47 | "from sklearn.metrics import accuracy_score"
48 | ],
49 | "execution_count": null,
50 | "outputs": []
51 | },
52 | {
53 | "cell_type": "markdown",
54 | "metadata": {
55 | "id": "egMd5zeurTMR"
56 | },
57 | "source": [
58 | "Data Collection and Processing"
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "metadata": {
64 | "id": "0q-3-LkQrREV"
65 | },
66 | "source": [
67 | "# loading the csv data to a Pandas DataFrame\n",
68 | "heart_data = pd.read_csv('/content/data.csv')"
69 | ],
70 | "execution_count": null,
71 | "outputs": []
72 | },
73 | {
74 | "cell_type": "code",
75 | "metadata": {
76 | "colab": {
77 | "base_uri": "https://localhost:8080/",
78 | "height": 198
79 | },
80 | "id": "M8dQxSTqriWD",
81 | "outputId": "ea695a74-7589-47fe-e400-2dd925f7b5bb"
82 | },
83 | "source": [
84 | "# print first 5 rows of the dataset\n",
85 | "heart_data.head()"
86 | ],
87 | "execution_count": null,
88 | "outputs": [
89 | {
90 | "output_type": "execute_result",
91 | "data": {
92 | "text/html": [
93 | "
\n",
94 | "\n",
107 | "
\n",
108 | " \n",
109 | " \n",
110 | " | \n",
111 | " age | \n",
112 | " sex | \n",
113 | " cp | \n",
114 | " trestbps | \n",
115 | " chol | \n",
116 | " fbs | \n",
117 | " restecg | \n",
118 | " thalach | \n",
119 | " exang | \n",
120 | " oldpeak | \n",
121 | " slope | \n",
122 | " ca | \n",
123 | " thal | \n",
124 | " target | \n",
125 | "
\n",
126 | " \n",
127 | " \n",
128 | " \n",
129 | " | 0 | \n",
130 | " 63 | \n",
131 | " 1 | \n",
132 | " 3 | \n",
133 | " 145 | \n",
134 | " 233 | \n",
135 | " 1 | \n",
136 | " 0 | \n",
137 | " 150 | \n",
138 | " 0 | \n",
139 | " 2.3 | \n",
140 | " 0 | \n",
141 | " 0 | \n",
142 | " 1 | \n",
143 | " 1 | \n",
144 | "
\n",
145 | " \n",
146 | " | 1 | \n",
147 | " 37 | \n",
148 | " 1 | \n",
149 | " 2 | \n",
150 | " 130 | \n",
151 | " 250 | \n",
152 | " 0 | \n",
153 | " 1 | \n",
154 | " 187 | \n",
155 | " 0 | \n",
156 | " 3.5 | \n",
157 | " 0 | \n",
158 | " 0 | \n",
159 | " 2 | \n",
160 | " 1 | \n",
161 | "
\n",
162 | " \n",
163 | " | 2 | \n",
164 | " 41 | \n",
165 | " 0 | \n",
166 | " 1 | \n",
167 | " 130 | \n",
168 | " 204 | \n",
169 | " 0 | \n",
170 | " 0 | \n",
171 | " 172 | \n",
172 | " 0 | \n",
173 | " 1.4 | \n",
174 | " 2 | \n",
175 | " 0 | \n",
176 | " 2 | \n",
177 | " 1 | \n",
178 | "
\n",
179 | " \n",
180 | " | 3 | \n",
181 | " 56 | \n",
182 | " 1 | \n",
183 | " 1 | \n",
184 | " 120 | \n",
185 | " 236 | \n",
186 | " 0 | \n",
187 | " 1 | \n",
188 | " 178 | \n",
189 | " 0 | \n",
190 | " 0.8 | \n",
191 | " 2 | \n",
192 | " 0 | \n",
193 | " 2 | \n",
194 | " 1 | \n",
195 | "
\n",
196 | " \n",
197 | " | 4 | \n",
198 | " 57 | \n",
199 | " 0 | \n",
200 | " 0 | \n",
201 | " 120 | \n",
202 | " 354 | \n",
203 | " 0 | \n",
204 | " 1 | \n",
205 | " 163 | \n",
206 | " 1 | \n",
207 | " 0.6 | \n",
208 | " 2 | \n",
209 | " 0 | \n",
210 | " 2 | \n",
211 | " 1 | \n",
212 | "
\n",
213 | " \n",
214 | "
\n",
215 | "
"
216 | ],
217 | "text/plain": [
218 | " age sex cp trestbps chol fbs ... exang oldpeak slope ca thal target\n",
219 | "0 63 1 3 145 233 1 ... 0 2.3 0 0 1 1\n",
220 | "1 37 1 2 130 250 0 ... 0 3.5 0 0 2 1\n",
221 | "2 41 0 1 130 204 0 ... 0 1.4 2 0 2 1\n",
222 | "3 56 1 1 120 236 0 ... 0 0.8 2 0 2 1\n",
223 | "4 57 0 0 120 354 0 ... 1 0.6 2 0 2 1\n",
224 | "\n",
225 | "[5 rows x 14 columns]"
226 | ]
227 | },
228 | "metadata": {
229 | "tags": []
230 | },
231 | "execution_count": 3
232 | }
233 | ]
234 | },
235 | {
236 | "cell_type": "code",
237 | "metadata": {
238 | "colab": {
239 | "base_uri": "https://localhost:8080/",
240 | "height": 198
241 | },
242 | "id": "Fx_aCZDgrqdR",
243 | "outputId": "770eb646-bdff-45da-ac1c-06aae06f7446"
244 | },
245 | "source": [
246 | "# print last 5 rows of the dataset\n",
247 | "heart_data.tail()"
248 | ],
249 | "execution_count": null,
250 | "outputs": [
251 | {
252 | "output_type": "execute_result",
253 | "data": {
254 | "text/html": [
255 | "\n",
256 | "\n",
269 | "
\n",
270 | " \n",
271 | " \n",
272 | " | \n",
273 | " age | \n",
274 | " sex | \n",
275 | " cp | \n",
276 | " trestbps | \n",
277 | " chol | \n",
278 | " fbs | \n",
279 | " restecg | \n",
280 | " thalach | \n",
281 | " exang | \n",
282 | " oldpeak | \n",
283 | " slope | \n",
284 | " ca | \n",
285 | " thal | \n",
286 | " target | \n",
287 | "
\n",
288 | " \n",
289 | " \n",
290 | " \n",
291 | " | 298 | \n",
292 | " 57 | \n",
293 | " 0 | \n",
294 | " 0 | \n",
295 | " 140 | \n",
296 | " 241 | \n",
297 | " 0 | \n",
298 | " 1 | \n",
299 | " 123 | \n",
300 | " 1 | \n",
301 | " 0.2 | \n",
302 | " 1 | \n",
303 | " 0 | \n",
304 | " 3 | \n",
305 | " 0 | \n",
306 | "
\n",
307 | " \n",
308 | " | 299 | \n",
309 | " 45 | \n",
310 | " 1 | \n",
311 | " 3 | \n",
312 | " 110 | \n",
313 | " 264 | \n",
314 | " 0 | \n",
315 | " 1 | \n",
316 | " 132 | \n",
317 | " 0 | \n",
318 | " 1.2 | \n",
319 | " 1 | \n",
320 | " 0 | \n",
321 | " 3 | \n",
322 | " 0 | \n",
323 | "
\n",
324 | " \n",
325 | " | 300 | \n",
326 | " 68 | \n",
327 | " 1 | \n",
328 | " 0 | \n",
329 | " 144 | \n",
330 | " 193 | \n",
331 | " 1 | \n",
332 | " 1 | \n",
333 | " 141 | \n",
334 | " 0 | \n",
335 | " 3.4 | \n",
336 | " 1 | \n",
337 | " 2 | \n",
338 | " 3 | \n",
339 | " 0 | \n",
340 | "
\n",
341 | " \n",
342 | " | 301 | \n",
343 | " 57 | \n",
344 | " 1 | \n",
345 | " 0 | \n",
346 | " 130 | \n",
347 | " 131 | \n",
348 | " 0 | \n",
349 | " 1 | \n",
350 | " 115 | \n",
351 | " 1 | \n",
352 | " 1.2 | \n",
353 | " 1 | \n",
354 | " 1 | \n",
355 | " 3 | \n",
356 | " 0 | \n",
357 | "
\n",
358 | " \n",
359 | " | 302 | \n",
360 | " 57 | \n",
361 | " 0 | \n",
362 | " 1 | \n",
363 | " 130 | \n",
364 | " 236 | \n",
365 | " 0 | \n",
366 | " 0 | \n",
367 | " 174 | \n",
368 | " 0 | \n",
369 | " 0.0 | \n",
370 | " 1 | \n",
371 | " 1 | \n",
372 | " 2 | \n",
373 | " 0 | \n",
374 | "
\n",
375 | " \n",
376 | "
\n",
377 | "
"
378 | ],
379 | "text/plain": [
380 | " age sex cp trestbps chol fbs ... exang oldpeak slope ca thal target\n",
381 | "298 57 0 0 140 241 0 ... 1 0.2 1 0 3 0\n",
382 | "299 45 1 3 110 264 0 ... 0 1.2 1 0 3 0\n",
383 | "300 68 1 0 144 193 1 ... 0 3.4 1 2 3 0\n",
384 | "301 57 1 0 130 131 0 ... 1 1.2 1 1 3 0\n",
385 | "302 57 0 1 130 236 0 ... 0 0.0 1 1 2 0\n",
386 | "\n",
387 | "[5 rows x 14 columns]"
388 | ]
389 | },
390 | "metadata": {
391 | "tags": []
392 | },
393 | "execution_count": 4
394 | }
395 | ]
396 | },
397 | {
398 | "cell_type": "code",
399 | "metadata": {
400 | "colab": {
401 | "base_uri": "https://localhost:8080/"
402 | },
403 | "id": "8nX1tIzbrz0u",
404 | "outputId": "6f650e4c-22b6-4750-ca57-ba6758d1c3a2"
405 | },
406 | "source": [
407 | "# number of rows and columns in the dataset\n",
408 | "heart_data.shape"
409 | ],
410 | "execution_count": null,
411 | "outputs": [
412 | {
413 | "output_type": "execute_result",
414 | "data": {
415 | "text/plain": [
416 | "(303, 14)"
417 | ]
418 | },
419 | "metadata": {
420 | "tags": []
421 | },
422 | "execution_count": 5
423 | }
424 | ]
425 | },
426 | {
427 | "cell_type": "code",
428 | "metadata": {
429 | "colab": {
430 | "base_uri": "https://localhost:8080/"
431 | },
432 | "id": "7_xTcw1Sr6aJ",
433 | "outputId": "1948e00e-0656-43ef-c4c4-740ee51a6381"
434 | },
435 | "source": [
436 | "# getting some info about the data\n",
437 | "heart_data.info()"
438 | ],
439 | "execution_count": null,
440 | "outputs": [
441 | {
442 | "output_type": "stream",
443 | "text": [
444 | "\n",
445 | "RangeIndex: 303 entries, 0 to 302\n",
446 | "Data columns (total 14 columns):\n",
447 | " # Column Non-Null Count Dtype \n",
448 | "--- ------ -------------- ----- \n",
449 | " 0 age 303 non-null int64 \n",
450 | " 1 sex 303 non-null int64 \n",
451 | " 2 cp 303 non-null int64 \n",
452 | " 3 trestbps 303 non-null int64 \n",
453 | " 4 chol 303 non-null int64 \n",
454 | " 5 fbs 303 non-null int64 \n",
455 | " 6 restecg 303 non-null int64 \n",
456 | " 7 thalach 303 non-null int64 \n",
457 | " 8 exang 303 non-null int64 \n",
458 | " 9 oldpeak 303 non-null float64\n",
459 | " 10 slope 303 non-null int64 \n",
460 | " 11 ca 303 non-null int64 \n",
461 | " 12 thal 303 non-null int64 \n",
462 | " 13 target 303 non-null int64 \n",
463 | "dtypes: float64(1), int64(13)\n",
464 | "memory usage: 33.3 KB\n"
465 | ],
466 | "name": "stdout"
467 | }
468 | ]
469 | },
470 | {
471 | "cell_type": "code",
472 | "metadata": {
473 | "colab": {
474 | "base_uri": "https://localhost:8080/"
475 | },
476 | "id": "GjHtW31rsGlb",
477 | "outputId": "8c1c23ce-b5b4-4872-a579-9b4185d12522"
478 | },
479 | "source": [
480 | "# checking for missing values\n",
481 | "heart_data.isnull().sum()"
482 | ],
483 | "execution_count": null,
484 | "outputs": [
485 | {
486 | "output_type": "execute_result",
487 | "data": {
488 | "text/plain": [
489 | "age 0\n",
490 | "sex 0\n",
491 | "cp 0\n",
492 | "trestbps 0\n",
493 | "chol 0\n",
494 | "fbs 0\n",
495 | "restecg 0\n",
496 | "thalach 0\n",
497 | "exang 0\n",
498 | "oldpeak 0\n",
499 | "slope 0\n",
500 | "ca 0\n",
501 | "thal 0\n",
502 | "target 0\n",
503 | "dtype: int64"
504 | ]
505 | },
506 | "metadata": {
507 | "tags": []
508 | },
509 | "execution_count": 7
510 | }
511 | ]
512 | },
513 | {
514 | "cell_type": "code",
515 | "metadata": {
516 | "colab": {
517 | "base_uri": "https://localhost:8080/",
518 | "height": 308
519 | },
520 | "id": "OHmcP7DJsSEP",
521 | "outputId": "400a121e-dbd2-4e77-8c72-021c12af5927"
522 | },
523 | "source": [
524 | "# statistical measures about the data\n",
525 | "heart_data.describe()"
526 | ],
527 | "execution_count": null,
528 | "outputs": [
529 | {
530 | "output_type": "execute_result",
531 | "data": {
532 | "text/html": [
533 | "\n",
534 | "\n",
547 | "
\n",
548 | " \n",
549 | " \n",
550 | " | \n",
551 | " age | \n",
552 | " sex | \n",
553 | " cp | \n",
554 | " trestbps | \n",
555 | " chol | \n",
556 | " fbs | \n",
557 | " restecg | \n",
558 | " thalach | \n",
559 | " exang | \n",
560 | " oldpeak | \n",
561 | " slope | \n",
562 | " ca | \n",
563 | " thal | \n",
564 | " target | \n",
565 | "
\n",
566 | " \n",
567 | " \n",
568 | " \n",
569 | " | count | \n",
570 | " 303.000000 | \n",
571 | " 303.000000 | \n",
572 | " 303.000000 | \n",
573 | " 303.000000 | \n",
574 | " 303.000000 | \n",
575 | " 303.000000 | \n",
576 | " 303.000000 | \n",
577 | " 303.000000 | \n",
578 | " 303.000000 | \n",
579 | " 303.000000 | \n",
580 | " 303.000000 | \n",
581 | " 303.000000 | \n",
582 | " 303.000000 | \n",
583 | " 303.000000 | \n",
584 | "
\n",
585 | " \n",
586 | " | mean | \n",
587 | " 54.366337 | \n",
588 | " 0.683168 | \n",
589 | " 0.966997 | \n",
590 | " 131.623762 | \n",
591 | " 246.264026 | \n",
592 | " 0.148515 | \n",
593 | " 0.528053 | \n",
594 | " 149.646865 | \n",
595 | " 0.326733 | \n",
596 | " 1.039604 | \n",
597 | " 1.399340 | \n",
598 | " 0.729373 | \n",
599 | " 2.313531 | \n",
600 | " 0.544554 | \n",
601 | "
\n",
602 | " \n",
603 | " | std | \n",
604 | " 9.082101 | \n",
605 | " 0.466011 | \n",
606 | " 1.032052 | \n",
607 | " 17.538143 | \n",
608 | " 51.830751 | \n",
609 | " 0.356198 | \n",
610 | " 0.525860 | \n",
611 | " 22.905161 | \n",
612 | " 0.469794 | \n",
613 | " 1.161075 | \n",
614 | " 0.616226 | \n",
615 | " 1.022606 | \n",
616 | " 0.612277 | \n",
617 | " 0.498835 | \n",
618 | "
\n",
619 | " \n",
620 | " | min | \n",
621 | " 29.000000 | \n",
622 | " 0.000000 | \n",
623 | " 0.000000 | \n",
624 | " 94.000000 | \n",
625 | " 126.000000 | \n",
626 | " 0.000000 | \n",
627 | " 0.000000 | \n",
628 | " 71.000000 | \n",
629 | " 0.000000 | \n",
630 | " 0.000000 | \n",
631 | " 0.000000 | \n",
632 | " 0.000000 | \n",
633 | " 0.000000 | \n",
634 | " 0.000000 | \n",
635 | "
\n",
636 | " \n",
637 | " | 25% | \n",
638 | " 47.500000 | \n",
639 | " 0.000000 | \n",
640 | " 0.000000 | \n",
641 | " 120.000000 | \n",
642 | " 211.000000 | \n",
643 | " 0.000000 | \n",
644 | " 0.000000 | \n",
645 | " 133.500000 | \n",
646 | " 0.000000 | \n",
647 | " 0.000000 | \n",
648 | " 1.000000 | \n",
649 | " 0.000000 | \n",
650 | " 2.000000 | \n",
651 | " 0.000000 | \n",
652 | "
\n",
653 | " \n",
654 | " | 50% | \n",
655 | " 55.000000 | \n",
656 | " 1.000000 | \n",
657 | " 1.000000 | \n",
658 | " 130.000000 | \n",
659 | " 240.000000 | \n",
660 | " 0.000000 | \n",
661 | " 1.000000 | \n",
662 | " 153.000000 | \n",
663 | " 0.000000 | \n",
664 | " 0.800000 | \n",
665 | " 1.000000 | \n",
666 | " 0.000000 | \n",
667 | " 2.000000 | \n",
668 | " 1.000000 | \n",
669 | "
\n",
670 | " \n",
671 | " | 75% | \n",
672 | " 61.000000 | \n",
673 | " 1.000000 | \n",
674 | " 2.000000 | \n",
675 | " 140.000000 | \n",
676 | " 274.500000 | \n",
677 | " 0.000000 | \n",
678 | " 1.000000 | \n",
679 | " 166.000000 | \n",
680 | " 1.000000 | \n",
681 | " 1.600000 | \n",
682 | " 2.000000 | \n",
683 | " 1.000000 | \n",
684 | " 3.000000 | \n",
685 | " 1.000000 | \n",
686 | "
\n",
687 | " \n",
688 | " | max | \n",
689 | " 77.000000 | \n",
690 | " 1.000000 | \n",
691 | " 3.000000 | \n",
692 | " 200.000000 | \n",
693 | " 564.000000 | \n",
694 | " 1.000000 | \n",
695 | " 2.000000 | \n",
696 | " 202.000000 | \n",
697 | " 1.000000 | \n",
698 | " 6.200000 | \n",
699 | " 2.000000 | \n",
700 | " 4.000000 | \n",
701 | " 3.000000 | \n",
702 | " 1.000000 | \n",
703 | "
\n",
704 | " \n",
705 | "
\n",
706 | "
"
707 | ],
708 | "text/plain": [
709 | " age sex cp ... ca thal target\n",
710 | "count 303.000000 303.000000 303.000000 ... 303.000000 303.000000 303.000000\n",
711 | "mean 54.366337 0.683168 0.966997 ... 0.729373 2.313531 0.544554\n",
712 | "std 9.082101 0.466011 1.032052 ... 1.022606 0.612277 0.498835\n",
713 | "min 29.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000\n",
714 | "25% 47.500000 0.000000 0.000000 ... 0.000000 2.000000 0.000000\n",
715 | "50% 55.000000 1.000000 1.000000 ... 0.000000 2.000000 1.000000\n",
716 | "75% 61.000000 1.000000 2.000000 ... 1.000000 3.000000 1.000000\n",
717 | "max 77.000000 1.000000 3.000000 ... 4.000000 3.000000 1.000000\n",
718 | "\n",
719 | "[8 rows x 14 columns]"
720 | ]
721 | },
722 | "metadata": {
723 | "tags": []
724 | },
725 | "execution_count": 8
726 | }
727 | ]
728 | },
729 | {
730 | "cell_type": "code",
731 | "metadata": {
732 | "colab": {
733 | "base_uri": "https://localhost:8080/"
734 | },
735 | "id": "4InaOSIUsfWP",
736 | "outputId": "6c38694f-7445-47b3-e235-cdd0157a4ec6"
737 | },
738 | "source": [
739 | "# checking the distribution of Target Variable\n",
740 | "heart_data['target'].value_counts()"
741 | ],
742 | "execution_count": null,
743 | "outputs": [
744 | {
745 | "output_type": "execute_result",
746 | "data": {
747 | "text/plain": [
748 | "1 165\n",
749 | "0 138\n",
750 | "Name: target, dtype: int64"
751 | ]
752 | },
753 | "metadata": {
754 | "tags": []
755 | },
756 | "execution_count": 10
757 | }
758 | ]
759 | },
760 | {
761 | "cell_type": "markdown",
762 | "metadata": {
763 | "id": "aSOBu4qDtJy5"
764 | },
765 | "source": [
766 | "1 --> Defective Heart\n",
767 | "\n",
768 | "0 --> Healthy Heart"
769 | ]
770 | },
771 | {
772 | "cell_type": "markdown",
773 | "metadata": {
774 | "id": "tW8i4igjtPRC"
775 | },
776 | "source": [
777 | "Splitting the Features and Target"
778 | ]
779 | },
780 | {
781 | "cell_type": "code",
782 | "metadata": {
783 | "id": "Q6yfbswrs7m3"
784 | },
785 | "source": [
786 | "X = heart_data.drop(columns='target', axis=1)\n",
787 | "Y = heart_data['target']"
788 | ],
789 | "execution_count": null,
790 | "outputs": []
791 | },
792 | {
793 | "cell_type": "code",
794 | "metadata": {
795 | "colab": {
796 | "base_uri": "https://localhost:8080/"
797 | },
798 | "id": "XJoCp4ZKtpZy",
799 | "outputId": "549bc077-393d-4763-f64f-3d2e5faa0c7f"
800 | },
801 | "source": [
802 | "print(X)"
803 | ],
804 | "execution_count": null,
805 | "outputs": [
806 | {
807 | "output_type": "stream",
808 | "text": [
809 | " age sex cp trestbps chol ... exang oldpeak slope ca thal\n",
810 | "0 63 1 3 145 233 ... 0 2.3 0 0 1\n",
811 | "1 37 1 2 130 250 ... 0 3.5 0 0 2\n",
812 | "2 41 0 1 130 204 ... 0 1.4 2 0 2\n",
813 | "3 56 1 1 120 236 ... 0 0.8 2 0 2\n",
814 | "4 57 0 0 120 354 ... 1 0.6 2 0 2\n",
815 | ".. ... ... .. ... ... ... ... ... ... .. ...\n",
816 | "298 57 0 0 140 241 ... 1 0.2 1 0 3\n",
817 | "299 45 1 3 110 264 ... 0 1.2 1 0 3\n",
818 | "300 68 1 0 144 193 ... 0 3.4 1 2 3\n",
819 | "301 57 1 0 130 131 ... 1 1.2 1 1 3\n",
820 | "302 57 0 1 130 236 ... 0 0.0 1 1 2\n",
821 | "\n",
822 | "[303 rows x 13 columns]\n"
823 | ],
824 | "name": "stdout"
825 | }
826 | ]
827 | },
828 | {
829 | "cell_type": "code",
830 | "metadata": {
831 | "colab": {
832 | "base_uri": "https://localhost:8080/"
833 | },
834 | "id": "nukuj-YItq1w",
835 | "outputId": "7c604a47-1690-4db4-fec7-bed3e9497428"
836 | },
837 | "source": [
838 | "print(Y)"
839 | ],
840 | "execution_count": null,
841 | "outputs": [
842 | {
843 | "output_type": "stream",
844 | "text": [
845 | "0 1\n",
846 | "1 1\n",
847 | "2 1\n",
848 | "3 1\n",
849 | "4 1\n",
850 | " ..\n",
851 | "298 0\n",
852 | "299 0\n",
853 | "300 0\n",
854 | "301 0\n",
855 | "302 0\n",
856 | "Name: target, Length: 303, dtype: int64\n"
857 | ],
858 | "name": "stdout"
859 | }
860 | ]
861 | },
862 | {
863 | "cell_type": "markdown",
864 | "metadata": {
865 | "id": "_EcjSE3Et18n"
866 | },
867 | "source": [
868 | "Splitting the Data into Training data & Test Data"
869 | ]
870 | },
871 | {
872 | "cell_type": "code",
873 | "metadata": {
874 | "id": "a-UUfRUxtuga"
875 | },
876 | "source": [
877 | "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)"
878 | ],
879 | "execution_count": null,
880 | "outputs": []
881 | },
882 | {
883 | "cell_type": "code",
884 | "metadata": {
885 | "colab": {
886 | "base_uri": "https://localhost:8080/"
887 | },
888 | "id": "x7PrjC6zuf6X",
889 | "outputId": "f2d66421-d671-4475-a51c-b37de3a2edac"
890 | },
891 | "source": [
892 | "print(X.shape, X_train.shape, X_test.shape)"
893 | ],
894 | "execution_count": null,
895 | "outputs": [
896 | {
897 | "output_type": "stream",
898 | "text": [
899 | "(303, 13) (242, 13) (61, 13)\n"
900 | ],
901 | "name": "stdout"
902 | }
903 | ]
904 | },
905 | {
906 | "cell_type": "markdown",
907 | "metadata": {
908 | "id": "beSkZmpVuvn9"
909 | },
910 | "source": [
911 | "Model Training"
912 | ]
913 | },
914 | {
915 | "cell_type": "markdown",
916 | "metadata": {
917 | "id": "gi2NOWZjuxzw"
918 | },
919 | "source": [
920 | "Logistic Regression"
921 | ]
922 | },
923 | {
924 | "cell_type": "code",
925 | "metadata": {
926 | "id": "4-Md74FYuqNL"
927 | },
928 | "source": [
929 | "model = LogisticRegression()"
930 | ],
931 | "execution_count": null,
932 | "outputs": []
933 | },
934 | {
935 | "cell_type": "code",
936 | "metadata": {
937 | "colab": {
938 | "base_uri": "https://localhost:8080/"
939 | },
940 | "id": "kCdHYxGUu7XD",
941 | "outputId": "ff7185b7-1dd2-418d-c22f-778005b4655b"
942 | },
943 | "source": [
944 | "# training the LogisticRegression model with Training data\n",
945 | "model.fit(X_train, Y_train)"
946 | ],
947 | "execution_count": null,
948 | "outputs": [
949 | {
950 | "output_type": "stream",
951 | "text": [
952 | "/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py:940: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
953 | "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
954 | "\n",
955 | "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
956 | " https://scikit-learn.org/stable/modules/preprocessing.html\n",
957 | "Please also refer to the documentation for alternative solver options:\n",
958 | " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
959 | " extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)\n"
960 | ],
961 | "name": "stderr"
962 | },
963 | {
964 | "output_type": "execute_result",
965 | "data": {
966 | "text/plain": [
967 | "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
968 | " intercept_scaling=1, l1_ratio=None, max_iter=100,\n",
969 | " multi_class='auto', n_jobs=None, penalty='l2',\n",
970 | " random_state=None, solver='lbfgs', tol=0.0001, verbose=0,\n",
971 | " warm_start=False)"
972 | ]
973 | },
974 | "metadata": {
975 | "tags": []
976 | },
977 | "execution_count": 17
978 | }
979 | ]
980 | },
981 | {
982 | "cell_type": "markdown",
983 | "metadata": {
984 | "id": "ZYIw8Gi9vXfU"
985 | },
986 | "source": [
987 | "Model Evaluation"
988 | ]
989 | },
990 | {
991 | "cell_type": "markdown",
992 | "metadata": {
993 | "id": "wmxAekfZvZa9"
994 | },
995 | "source": [
996 | "Accuracy Score"
997 | ]
998 | },
999 | {
1000 | "cell_type": "code",
1001 | "metadata": {
1002 | "id": "g19JaUTMvPKy"
1003 | },
1004 | "source": [
1005 | "# accuracy on training data\n",
1006 | "X_train_prediction = model.predict(X_train)\n",
1007 | "training_data_accuracy = accuracy_score(X_train_prediction, Y_train)"
1008 | ],
1009 | "execution_count": null,
1010 | "outputs": []
1011 | },
1012 | {
1013 | "cell_type": "code",
1014 | "metadata": {
1015 | "colab": {
1016 | "base_uri": "https://localhost:8080/"
1017 | },
1018 | "id": "uQBZvBh8v7R_",
1019 | "outputId": "e798e765-9f84-43e2-d3a0-f0fea3192ac2"
1020 | },
1021 | "source": [
1022 | "print('Accuracy on Training data : ', training_data_accuracy)"
1023 | ],
1024 | "execution_count": null,
1025 | "outputs": [
1026 | {
1027 | "output_type": "stream",
1028 | "text": [
1029 | "Accuracy on Training data : 0.8512396694214877\n"
1030 | ],
1031 | "name": "stdout"
1032 | }
1033 | ]
1034 | },
1035 | {
1036 | "cell_type": "code",
1037 | "metadata": {
1038 | "id": "mDONDJdlwBIO"
1039 | },
1040 | "source": [
1041 | "# accuracy on test data\n",
1042 | "X_test_prediction = model.predict(X_test)\n",
1043 | "test_data_accuracy = accuracy_score(X_test_prediction, Y_test)"
1044 | ],
1045 | "execution_count": null,
1046 | "outputs": []
1047 | },
1048 | {
1049 | "cell_type": "code",
1050 | "metadata": {
1051 | "colab": {
1052 | "base_uri": "https://localhost:8080/"
1053 | },
1054 | "id": "_MBS-OqdwYpf",
1055 | "outputId": "a2f5bd57-7135-47c2-c8f2-01f1823ad66a"
1056 | },
1057 | "source": [
1058 | "print('Accuracy on Test data : ', test_data_accuracy)"
1059 | ],
1060 | "execution_count": null,
1061 | "outputs": [
1062 | {
1063 | "output_type": "stream",
1064 | "text": [
1065 | "Accuracy on Test data : 0.819672131147541\n"
1066 | ],
1067 | "name": "stdout"
1068 | }
1069 | ]
1070 | },
1071 | {
1072 | "cell_type": "markdown",
1073 | "metadata": {
1074 | "id": "jIruVh3Qwq0e"
1075 | },
1076 | "source": [
1077 | "Building a Predictive System"
1078 | ]
1079 | },
1080 | {
1081 | "cell_type": "code",
1082 | "metadata": {
1083 | "colab": {
1084 | "base_uri": "https://localhost:8080/"
1085 | },
1086 | "id": "9ercruC9wb4C",
1087 | "outputId": "6a7f8964-d7c5-4a54-bb76-ef18f2add04a"
1088 | },
1089 | "source": [
1090 | "input_data = (62,0,0,140,268,0,0,160,0,3.6,0,2,2)\n",
1091 | "\n",
1092 | "# change the input data to a numpy array\n",
1093 | "input_data_as_numpy_array= np.asarray(input_data)\n",
1094 | "\n",
1095 | "# reshape the numpy array as we are predicting for only on instance\n",
1096 | "input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)\n",
1097 | "\n",
1098 | "prediction = model.predict(input_data_reshaped)\n",
1099 | "print(prediction)\n",
1100 | "\n",
1101 | "if (prediction[0]== 0):\n",
1102 | " print('The Person does not have a Heart Disease')\n",
1103 | "else:\n",
1104 | " print('The Person has Heart Disease')"
1105 | ],
1106 | "execution_count": null,
1107 | "outputs": [
1108 | {
1109 | "output_type": "stream",
1110 | "text": [
1111 | "[0]\n",
1112 | "The Person does not have a Heart Disease\n"
1113 | ],
1114 | "name": "stdout"
1115 | }
1116 | ]
1117 | }
1118 | ]
1119 | }
1120 |
--------------------------------------------------------------------------------