└── Heart_Disease_Prediction (1).ipynb
/Heart_Disease_Prediction (1).ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": []
7 | },
8 | "kernelspec": {
9 | "name": "python3",
10 | "display_name": "Python 3"
11 | },
12 | "language_info": {
13 | "name": "python"
14 | }
15 | },
16 | "cells": [
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {
20 | "id": "aTb-9TFFqprC"
21 | },
22 | "source": [
23 | "Importing the Dependencies"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "metadata": {
29 | "id": "3q9U3S_whh3-"
30 | },
31 | "source": [
32 | "import numpy as np\n",
33 | "import pandas as pd\n",
34 | "from sklearn.model_selection import train_test_split\n",
35 | "from sklearn.linear_model import LogisticRegression\n",
36 | "from sklearn.metrics import accuracy_score"
37 | ],
38 | "execution_count": null,
39 | "outputs": []
40 | },
41 | {
42 | "cell_type": "markdown",
43 | "metadata": {
44 | "id": "egMd5zeurTMR"
45 | },
46 | "source": [
47 | "Data Collection and Processing"
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "metadata": {
53 | "id": "0q-3-LkQrREV"
54 | },
55 | "source": [
56 | "# loading the csv data to a Pandas DataFrame\n",
57 | "heart_data = pd.read_csv('/content/data.csv')"
58 | ],
59 | "execution_count": null,
60 | "outputs": []
61 | },
62 | {
63 | "cell_type": "code",
64 | "metadata": {
65 | "id": "M8dQxSTqriWD",
66 | "colab": {
67 | "base_uri": "https://localhost:8080/",
68 | "height": 206
69 | },
70 | "outputId": "0652f598-ce83-45d2-957d-fc07698432d7"
71 | },
72 | "source": [
73 | "# print first 5 rows of the dataset\n",
74 | "heart_data.head()"
75 | ],
76 | "execution_count": null,
77 | "outputs": [
78 | {
79 | "output_type": "execute_result",
80 | "data": {
81 | "text/plain": [
82 | " age sex cp trestbps chol fbs restecg thalach exang oldpeak slope \\\n",
83 | "0 63 1 3 145 233 1 0 150 0 2.3 0 \n",
84 | "1 37 1 2 130 250 0 1 187 0 3.5 0 \n",
85 | "2 41 0 1 130 204 0 0 172 0 1.4 2 \n",
86 | "3 56 1 1 120 236 0 1 178 0 0.8 2 \n",
87 | "4 57 0 0 120 354 0 1 163 1 0.6 2 \n",
88 | "\n",
89 | " ca thal target \n",
90 | "0 0 1 1 \n",
91 | "1 0 2 1 \n",
92 | "2 0 2 1 \n",
93 | "3 0 2 1 \n",
94 | "4 0 2 1 "
95 | ],
96 | "text/html": [
97 | "\n",
98 | "\n",
99 | "
\n",
100 | "
\n",
101 | "
\n",
102 | "\n",
115 | "
\n",
116 | " \n",
117 | " \n",
118 | " \n",
119 | " age \n",
120 | " sex \n",
121 | " cp \n",
122 | " trestbps \n",
123 | " chol \n",
124 | " fbs \n",
125 | " restecg \n",
126 | " thalach \n",
127 | " exang \n",
128 | " oldpeak \n",
129 | " slope \n",
130 | " ca \n",
131 | " thal \n",
132 | " target \n",
133 | " \n",
134 | " \n",
135 | " \n",
136 | " \n",
137 | " 0 \n",
138 | " 63 \n",
139 | " 1 \n",
140 | " 3 \n",
141 | " 145 \n",
142 | " 233 \n",
143 | " 1 \n",
144 | " 0 \n",
145 | " 150 \n",
146 | " 0 \n",
147 | " 2.3 \n",
148 | " 0 \n",
149 | " 0 \n",
150 | " 1 \n",
151 | " 1 \n",
152 | " \n",
153 | " \n",
154 | " 1 \n",
155 | " 37 \n",
156 | " 1 \n",
157 | " 2 \n",
158 | " 130 \n",
159 | " 250 \n",
160 | " 0 \n",
161 | " 1 \n",
162 | " 187 \n",
163 | " 0 \n",
164 | " 3.5 \n",
165 | " 0 \n",
166 | " 0 \n",
167 | " 2 \n",
168 | " 1 \n",
169 | " \n",
170 | " \n",
171 | " 2 \n",
172 | " 41 \n",
173 | " 0 \n",
174 | " 1 \n",
175 | " 130 \n",
176 | " 204 \n",
177 | " 0 \n",
178 | " 0 \n",
179 | " 172 \n",
180 | " 0 \n",
181 | " 1.4 \n",
182 | " 2 \n",
183 | " 0 \n",
184 | " 2 \n",
185 | " 1 \n",
186 | " \n",
187 | " \n",
188 | " 3 \n",
189 | " 56 \n",
190 | " 1 \n",
191 | " 1 \n",
192 | " 120 \n",
193 | " 236 \n",
194 | " 0 \n",
195 | " 1 \n",
196 | " 178 \n",
197 | " 0 \n",
198 | " 0.8 \n",
199 | " 2 \n",
200 | " 0 \n",
201 | " 2 \n",
202 | " 1 \n",
203 | " \n",
204 | " \n",
205 | " 4 \n",
206 | " 57 \n",
207 | " 0 \n",
208 | " 0 \n",
209 | " 120 \n",
210 | " 354 \n",
211 | " 0 \n",
212 | " 1 \n",
213 | " 163 \n",
214 | " 1 \n",
215 | " 0.6 \n",
216 | " 2 \n",
217 | " 0 \n",
218 | " 2 \n",
219 | " 1 \n",
220 | " \n",
221 | " \n",
222 | "
\n",
223 | "
\n",
224 | "
\n",
227 | "\n",
228 | " \n",
230 | " \n",
231 | " \n",
232 | " \n",
233 | " \n",
234 | "\n",
235 | "\n",
236 | "\n",
237 | "
\n",
238 | "
\n",
241 | "\n",
242 | "\n",
244 | " \n",
245 | " \n",
246 | " \n",
247 | " \n",
248 | " \n",
249 | "
\n",
250 | "\n",
251 | "\n",
282 | "\n",
283 | " \n",
290 | "\n",
291 | " \n",
302 | " \n",
339 | "\n",
340 | " \n",
364 | "
\n",
365 | "
\n"
366 | ]
367 | },
368 | "metadata": {},
369 | "execution_count": 26
370 | }
371 | ]
372 | },
373 | {
374 | "cell_type": "code",
375 | "metadata": {
376 | "id": "Fx_aCZDgrqdR",
377 | "colab": {
378 | "base_uri": "https://localhost:8080/",
379 | "height": 206
380 | },
381 | "outputId": "628aa677-b738-401d-8831-ae29463eaf0b"
382 | },
383 | "source": [
384 | "# print last 5 rows of the dataset\n",
385 | "heart_data.tail()"
386 | ],
387 | "execution_count": null,
388 | "outputs": [
389 | {
390 | "output_type": "execute_result",
391 | "data": {
392 | "text/plain": [
393 | " age sex cp trestbps chol fbs restecg thalach exang oldpeak \\\n",
394 | "298 57 0 0 140 241 0 1 123 1 0.2 \n",
395 | "299 45 1 3 110 264 0 1 132 0 1.2 \n",
396 | "300 68 1 0 144 193 1 1 141 0 3.4 \n",
397 | "301 57 1 0 130 131 0 1 115 1 1.2 \n",
398 | "302 57 0 1 130 236 0 0 174 0 0.0 \n",
399 | "\n",
400 | " slope ca thal target \n",
401 | "298 1 0 3 0 \n",
402 | "299 1 0 3 0 \n",
403 | "300 1 2 3 0 \n",
404 | "301 1 1 3 0 \n",
405 | "302 1 1 2 0 "
406 | ],
407 | "text/html": [
408 | "\n",
409 | "\n",
410 | " \n",
411 | "
\n",
412 | "
\n",
413 | "\n",
426 | "
\n",
427 | " \n",
428 | " \n",
429 | " \n",
430 | " age \n",
431 | " sex \n",
432 | " cp \n",
433 | " trestbps \n",
434 | " chol \n",
435 | " fbs \n",
436 | " restecg \n",
437 | " thalach \n",
438 | " exang \n",
439 | " oldpeak \n",
440 | " slope \n",
441 | " ca \n",
442 | " thal \n",
443 | " target \n",
444 | " \n",
445 | " \n",
446 | " \n",
447 | " \n",
448 | " 298 \n",
449 | " 57 \n",
450 | " 0 \n",
451 | " 0 \n",
452 | " 140 \n",
453 | " 241 \n",
454 | " 0 \n",
455 | " 1 \n",
456 | " 123 \n",
457 | " 1 \n",
458 | " 0.2 \n",
459 | " 1 \n",
460 | " 0 \n",
461 | " 3 \n",
462 | " 0 \n",
463 | " \n",
464 | " \n",
465 | " 299 \n",
466 | " 45 \n",
467 | " 1 \n",
468 | " 3 \n",
469 | " 110 \n",
470 | " 264 \n",
471 | " 0 \n",
472 | " 1 \n",
473 | " 132 \n",
474 | " 0 \n",
475 | " 1.2 \n",
476 | " 1 \n",
477 | " 0 \n",
478 | " 3 \n",
479 | " 0 \n",
480 | " \n",
481 | " \n",
482 | " 300 \n",
483 | " 68 \n",
484 | " 1 \n",
485 | " 0 \n",
486 | " 144 \n",
487 | " 193 \n",
488 | " 1 \n",
489 | " 1 \n",
490 | " 141 \n",
491 | " 0 \n",
492 | " 3.4 \n",
493 | " 1 \n",
494 | " 2 \n",
495 | " 3 \n",
496 | " 0 \n",
497 | " \n",
498 | " \n",
499 | " 301 \n",
500 | " 57 \n",
501 | " 1 \n",
502 | " 0 \n",
503 | " 130 \n",
504 | " 131 \n",
505 | " 0 \n",
506 | " 1 \n",
507 | " 115 \n",
508 | " 1 \n",
509 | " 1.2 \n",
510 | " 1 \n",
511 | " 1 \n",
512 | " 3 \n",
513 | " 0 \n",
514 | " \n",
515 | " \n",
516 | " 302 \n",
517 | " 57 \n",
518 | " 0 \n",
519 | " 1 \n",
520 | " 130 \n",
521 | " 236 \n",
522 | " 0 \n",
523 | " 0 \n",
524 | " 174 \n",
525 | " 0 \n",
526 | " 0.0 \n",
527 | " 1 \n",
528 | " 1 \n",
529 | " 2 \n",
530 | " 0 \n",
531 | " \n",
532 | " \n",
533 | "
\n",
534 | "
\n",
535 | "
\n",
538 | "\n",
539 | " \n",
541 | " \n",
542 | " \n",
543 | " \n",
544 | " \n",
545 | "\n",
546 | "\n",
547 | "\n",
548 | "
\n",
549 | "
\n",
552 | "\n",
553 | "\n",
555 | " \n",
556 | " \n",
557 | " \n",
558 | " \n",
559 | " \n",
560 | "
\n",
561 | "\n",
562 | "\n",
593 | "\n",
594 | " \n",
601 | "\n",
602 | " \n",
613 | " \n",
650 | "\n",
651 | " \n",
675 | "
\n",
676 | "
\n"
677 | ]
678 | },
679 | "metadata": {},
680 | "execution_count": 27
681 | }
682 | ]
683 | },
684 | {
685 | "cell_type": "code",
686 | "metadata": {
687 | "id": "8nX1tIzbrz0u",
688 | "colab": {
689 | "base_uri": "https://localhost:8080/"
690 | },
691 | "outputId": "a489a6bb-c67b-49a3-e579-2112481230be"
692 | },
693 | "source": [
694 | "# number of rows and columns in the dataset\n",
695 | "heart_data.shape"
696 | ],
697 | "execution_count": null,
698 | "outputs": [
699 | {
700 | "output_type": "execute_result",
701 | "data": {
702 | "text/plain": [
703 | "(303, 14)"
704 | ]
705 | },
706 | "metadata": {},
707 | "execution_count": 28
708 | }
709 | ]
710 | },
711 | {
712 | "cell_type": "code",
713 | "metadata": {
714 | "id": "7_xTcw1Sr6aJ",
715 | "colab": {
716 | "base_uri": "https://localhost:8080/"
717 | },
718 | "outputId": "bf8a473c-7315-4f79-a33c-a314f2f86e34"
719 | },
720 | "source": [
721 | "# getting some info about the data\n",
722 | "heart_data.info()"
723 | ],
724 | "execution_count": null,
725 | "outputs": [
726 | {
727 | "output_type": "stream",
728 | "name": "stdout",
729 | "text": [
730 | "\n",
731 | "RangeIndex: 303 entries, 0 to 302\n",
732 | "Data columns (total 14 columns):\n",
733 | " # Column Non-Null Count Dtype \n",
734 | "--- ------ -------------- ----- \n",
735 | " 0 age 303 non-null int64 \n",
736 | " 1 sex 303 non-null int64 \n",
737 | " 2 cp 303 non-null int64 \n",
738 | " 3 trestbps 303 non-null int64 \n",
739 | " 4 chol 303 non-null int64 \n",
740 | " 5 fbs 303 non-null int64 \n",
741 | " 6 restecg 303 non-null int64 \n",
742 | " 7 thalach 303 non-null int64 \n",
743 | " 8 exang 303 non-null int64 \n",
744 | " 9 oldpeak 303 non-null float64\n",
745 | " 10 slope 303 non-null int64 \n",
746 | " 11 ca 303 non-null int64 \n",
747 | " 12 thal 303 non-null int64 \n",
748 | " 13 target 303 non-null int64 \n",
749 | "dtypes: float64(1), int64(13)\n",
750 | "memory usage: 33.3 KB\n"
751 | ]
752 | }
753 | ]
754 | },
755 | {
756 | "cell_type": "code",
757 | "metadata": {
758 | "id": "GjHtW31rsGlb",
759 | "colab": {
760 | "base_uri": "https://localhost:8080/"
761 | },
762 | "outputId": "5eac0aea-4005-4dfb-cf01-8094d11c46cd"
763 | },
764 | "source": [
765 | "# checking for missing values\n",
766 | "heart_data.isnull().sum()"
767 | ],
768 | "execution_count": null,
769 | "outputs": [
770 | {
771 | "output_type": "execute_result",
772 | "data": {
773 | "text/plain": [
774 | "age 0\n",
775 | "sex 0\n",
776 | "cp 0\n",
777 | "trestbps 0\n",
778 | "chol 0\n",
779 | "fbs 0\n",
780 | "restecg 0\n",
781 | "thalach 0\n",
782 | "exang 0\n",
783 | "oldpeak 0\n",
784 | "slope 0\n",
785 | "ca 0\n",
786 | "thal 0\n",
787 | "target 0\n",
788 | "dtype: int64"
789 | ]
790 | },
791 | "metadata": {},
792 | "execution_count": 30
793 | }
794 | ]
795 | },
796 | {
797 | "cell_type": "code",
798 | "metadata": {
799 | "id": "OHmcP7DJsSEP",
800 | "colab": {
801 | "base_uri": "https://localhost:8080/",
802 | "height": 300
803 | },
804 | "outputId": "dcb3f424-1e0b-45a2-9a40-98e5097ec628"
805 | },
806 | "source": [
807 | "# statistical measures about the data\n",
808 | "heart_data.describe()"
809 | ],
810 | "execution_count": null,
811 | "outputs": [
812 | {
813 | "output_type": "execute_result",
814 | "data": {
815 | "text/plain": [
816 | " age sex cp trestbps chol fbs \\\n",
817 | "count 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 \n",
818 | "mean 54.366337 0.683168 0.966997 131.623762 246.264026 0.148515 \n",
819 | "std 9.082101 0.466011 1.032052 17.538143 51.830751 0.356198 \n",
820 | "min 29.000000 0.000000 0.000000 94.000000 126.000000 0.000000 \n",
821 | "25% 47.500000 0.000000 0.000000 120.000000 211.000000 0.000000 \n",
822 | "50% 55.000000 1.000000 1.000000 130.000000 240.000000 0.000000 \n",
823 | "75% 61.000000 1.000000 2.000000 140.000000 274.500000 0.000000 \n",
824 | "max 77.000000 1.000000 3.000000 200.000000 564.000000 1.000000 \n",
825 | "\n",
826 | " restecg thalach exang oldpeak slope ca \\\n",
827 | "count 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 \n",
828 | "mean 0.528053 149.646865 0.326733 1.039604 1.399340 0.729373 \n",
829 | "std 0.525860 22.905161 0.469794 1.161075 0.616226 1.022606 \n",
830 | "min 0.000000 71.000000 0.000000 0.000000 0.000000 0.000000 \n",
831 | "25% 0.000000 133.500000 0.000000 0.000000 1.000000 0.000000 \n",
832 | "50% 1.000000 153.000000 0.000000 0.800000 1.000000 0.000000 \n",
833 | "75% 1.000000 166.000000 1.000000 1.600000 2.000000 1.000000 \n",
834 | "max 2.000000 202.000000 1.000000 6.200000 2.000000 4.000000 \n",
835 | "\n",
836 | " thal target \n",
837 | "count 303.000000 303.000000 \n",
838 | "mean 2.313531 0.544554 \n",
839 | "std 0.612277 0.498835 \n",
840 | "min 0.000000 0.000000 \n",
841 | "25% 2.000000 0.000000 \n",
842 | "50% 2.000000 1.000000 \n",
843 | "75% 3.000000 1.000000 \n",
844 | "max 3.000000 1.000000 "
845 | ],
846 | "text/html": [
847 | "\n",
848 | "\n",
849 | " \n",
850 | "
\n",
851 | "
\n",
852 | "\n",
865 | "
\n",
866 | " \n",
867 | " \n",
868 | " \n",
869 | " age \n",
870 | " sex \n",
871 | " cp \n",
872 | " trestbps \n",
873 | " chol \n",
874 | " fbs \n",
875 | " restecg \n",
876 | " thalach \n",
877 | " exang \n",
878 | " oldpeak \n",
879 | " slope \n",
880 | " ca \n",
881 | " thal \n",
882 | " target \n",
883 | " \n",
884 | " \n",
885 | " \n",
886 | " \n",
887 | " count \n",
888 | " 303.000000 \n",
889 | " 303.000000 \n",
890 | " 303.000000 \n",
891 | " 303.000000 \n",
892 | " 303.000000 \n",
893 | " 303.000000 \n",
894 | " 303.000000 \n",
895 | " 303.000000 \n",
896 | " 303.000000 \n",
897 | " 303.000000 \n",
898 | " 303.000000 \n",
899 | " 303.000000 \n",
900 | " 303.000000 \n",
901 | " 303.000000 \n",
902 | " \n",
903 | " \n",
904 | " mean \n",
905 | " 54.366337 \n",
906 | " 0.683168 \n",
907 | " 0.966997 \n",
908 | " 131.623762 \n",
909 | " 246.264026 \n",
910 | " 0.148515 \n",
911 | " 0.528053 \n",
912 | " 149.646865 \n",
913 | " 0.326733 \n",
914 | " 1.039604 \n",
915 | " 1.399340 \n",
916 | " 0.729373 \n",
917 | " 2.313531 \n",
918 | " 0.544554 \n",
919 | " \n",
920 | " \n",
921 | " std \n",
922 | " 9.082101 \n",
923 | " 0.466011 \n",
924 | " 1.032052 \n",
925 | " 17.538143 \n",
926 | " 51.830751 \n",
927 | " 0.356198 \n",
928 | " 0.525860 \n",
929 | " 22.905161 \n",
930 | " 0.469794 \n",
931 | " 1.161075 \n",
932 | " 0.616226 \n",
933 | " 1.022606 \n",
934 | " 0.612277 \n",
935 | " 0.498835 \n",
936 | " \n",
937 | " \n",
938 | " min \n",
939 | " 29.000000 \n",
940 | " 0.000000 \n",
941 | " 0.000000 \n",
942 | " 94.000000 \n",
943 | " 126.000000 \n",
944 | " 0.000000 \n",
945 | " 0.000000 \n",
946 | " 71.000000 \n",
947 | " 0.000000 \n",
948 | " 0.000000 \n",
949 | " 0.000000 \n",
950 | " 0.000000 \n",
951 | " 0.000000 \n",
952 | " 0.000000 \n",
953 | " \n",
954 | " \n",
955 | " 25% \n",
956 | " 47.500000 \n",
957 | " 0.000000 \n",
958 | " 0.000000 \n",
959 | " 120.000000 \n",
960 | " 211.000000 \n",
961 | " 0.000000 \n",
962 | " 0.000000 \n",
963 | " 133.500000 \n",
964 | " 0.000000 \n",
965 | " 0.000000 \n",
966 | " 1.000000 \n",
967 | " 0.000000 \n",
968 | " 2.000000 \n",
969 | " 0.000000 \n",
970 | " \n",
971 | " \n",
972 | " 50% \n",
973 | " 55.000000 \n",
974 | " 1.000000 \n",
975 | " 1.000000 \n",
976 | " 130.000000 \n",
977 | " 240.000000 \n",
978 | " 0.000000 \n",
979 | " 1.000000 \n",
980 | " 153.000000 \n",
981 | " 0.000000 \n",
982 | " 0.800000 \n",
983 | " 1.000000 \n",
984 | " 0.000000 \n",
985 | " 2.000000 \n",
986 | " 1.000000 \n",
987 | " \n",
988 | " \n",
989 | " 75% \n",
990 | " 61.000000 \n",
991 | " 1.000000 \n",
992 | " 2.000000 \n",
993 | " 140.000000 \n",
994 | " 274.500000 \n",
995 | " 0.000000 \n",
996 | " 1.000000 \n",
997 | " 166.000000 \n",
998 | " 1.000000 \n",
999 | " 1.600000 \n",
1000 | " 2.000000 \n",
1001 | " 1.000000 \n",
1002 | " 3.000000 \n",
1003 | " 1.000000 \n",
1004 | " \n",
1005 | " \n",
1006 | " max \n",
1007 | " 77.000000 \n",
1008 | " 1.000000 \n",
1009 | " 3.000000 \n",
1010 | " 200.000000 \n",
1011 | " 564.000000 \n",
1012 | " 1.000000 \n",
1013 | " 2.000000 \n",
1014 | " 202.000000 \n",
1015 | " 1.000000 \n",
1016 | " 6.200000 \n",
1017 | " 2.000000 \n",
1018 | " 4.000000 \n",
1019 | " 3.000000 \n",
1020 | " 1.000000 \n",
1021 | " \n",
1022 | " \n",
1023 | "
\n",
1024 | "
\n",
1025 | "
\n",
1028 | "\n",
1029 | " \n",
1031 | " \n",
1032 | " \n",
1033 | " \n",
1034 | " \n",
1035 | "\n",
1036 | "\n",
1037 | "\n",
1038 | "
\n",
1039 | "
\n",
1042 | "\n",
1043 | "\n",
1045 | " \n",
1046 | " \n",
1047 | " \n",
1048 | " \n",
1049 | " \n",
1050 | "
\n",
1051 | "\n",
1052 | "\n",
1083 | "\n",
1084 | " \n",
1091 | "\n",
1092 | " \n",
1103 | " \n",
1140 | "\n",
1141 | " \n",
1165 | "
\n",
1166 | "
\n"
1167 | ]
1168 | },
1169 | "metadata": {},
1170 | "execution_count": 31
1171 | }
1172 | ]
1173 | },
1174 | {
1175 | "cell_type": "code",
1176 | "metadata": {
1177 | "id": "4InaOSIUsfWP",
1178 | "colab": {
1179 | "base_uri": "https://localhost:8080/"
1180 | },
1181 | "outputId": "2226e15f-0b21-4665-ff2c-8dcd9d919a65"
1182 | },
1183 | "source": [
1184 | "# checking the distribution of Target Variable\n",
1185 | "heart_data['target'].value_counts()"
1186 | ],
1187 | "execution_count": null,
1188 | "outputs": [
1189 | {
1190 | "output_type": "execute_result",
1191 | "data": {
1192 | "text/plain": [
1193 | "1 165\n",
1194 | "0 138\n",
1195 | "Name: target, dtype: int64"
1196 | ]
1197 | },
1198 | "metadata": {},
1199 | "execution_count": 32
1200 | }
1201 | ]
1202 | },
1203 | {
1204 | "cell_type": "markdown",
1205 | "metadata": {
1206 | "id": "aSOBu4qDtJy5"
1207 | },
1208 | "source": [
1209 | "1 --> Defective Heart\n",
1210 | "\n",
1211 | "0 --> Healthy Heart"
1212 | ]
1213 | },
1214 | {
1215 | "cell_type": "markdown",
1216 | "metadata": {
1217 | "id": "tW8i4igjtPRC"
1218 | },
1219 | "source": [
1220 | "Splitting the Features and Target"
1221 | ]
1222 | },
1223 | {
1224 | "cell_type": "code",
1225 | "metadata": {
1226 | "id": "Q6yfbswrs7m3"
1227 | },
1228 | "source": [
1229 | "X = heart_data.drop(columns='target', axis=1)\n",
1230 | "Y = heart_data['target']"
1231 | ],
1232 | "execution_count": null,
1233 | "outputs": []
1234 | },
1235 | {
1236 | "cell_type": "code",
1237 | "metadata": {
1238 | "id": "XJoCp4ZKtpZy",
1239 | "colab": {
1240 | "base_uri": "https://localhost:8080/"
1241 | },
1242 | "outputId": "10cf1a29-e527-486e-a31a-cbef50501491"
1243 | },
1244 | "source": [
1245 | "print(X)"
1246 | ],
1247 | "execution_count": null,
1248 | "outputs": [
1249 | {
1250 | "output_type": "stream",
1251 | "name": "stdout",
1252 | "text": [
1253 | " age sex cp trestbps chol fbs restecg thalach exang oldpeak \\\n",
1254 | "0 63 1 3 145 233 1 0 150 0 2.3 \n",
1255 | "1 37 1 2 130 250 0 1 187 0 3.5 \n",
1256 | "2 41 0 1 130 204 0 0 172 0 1.4 \n",
1257 | "3 56 1 1 120 236 0 1 178 0 0.8 \n",
1258 | "4 57 0 0 120 354 0 1 163 1 0.6 \n",
1259 | ".. ... ... .. ... ... ... ... ... ... ... \n",
1260 | "298 57 0 0 140 241 0 1 123 1 0.2 \n",
1261 | "299 45 1 3 110 264 0 1 132 0 1.2 \n",
1262 | "300 68 1 0 144 193 1 1 141 0 3.4 \n",
1263 | "301 57 1 0 130 131 0 1 115 1 1.2 \n",
1264 | "302 57 0 1 130 236 0 0 174 0 0.0 \n",
1265 | "\n",
1266 | " slope ca thal \n",
1267 | "0 0 0 1 \n",
1268 | "1 0 0 2 \n",
1269 | "2 2 0 2 \n",
1270 | "3 2 0 2 \n",
1271 | "4 2 0 2 \n",
1272 | ".. ... .. ... \n",
1273 | "298 1 0 3 \n",
1274 | "299 1 0 3 \n",
1275 | "300 1 2 3 \n",
1276 | "301 1 1 3 \n",
1277 | "302 1 1 2 \n",
1278 | "\n",
1279 | "[303 rows x 13 columns]\n"
1280 | ]
1281 | }
1282 | ]
1283 | },
1284 | {
1285 | "cell_type": "code",
1286 | "metadata": {
1287 | "id": "nukuj-YItq1w",
1288 | "colab": {
1289 | "base_uri": "https://localhost:8080/"
1290 | },
1291 | "outputId": "2445f02b-a639-4485-cb0b-a78a3622c6cf"
1292 | },
1293 | "source": [
1294 | "print(Y)"
1295 | ],
1296 | "execution_count": null,
1297 | "outputs": [
1298 | {
1299 | "output_type": "stream",
1300 | "name": "stdout",
1301 | "text": [
1302 | "0 1\n",
1303 | "1 1\n",
1304 | "2 1\n",
1305 | "3 1\n",
1306 | "4 1\n",
1307 | " ..\n",
1308 | "298 0\n",
1309 | "299 0\n",
1310 | "300 0\n",
1311 | "301 0\n",
1312 | "302 0\n",
1313 | "Name: target, Length: 303, dtype: int64\n"
1314 | ]
1315 | }
1316 | ]
1317 | },
1318 | {
1319 | "cell_type": "markdown",
1320 | "metadata": {
1321 | "id": "_EcjSE3Et18n"
1322 | },
1323 | "source": [
1324 | "Splitting the Data into Training data & Test Data"
1325 | ]
1326 | },
1327 | {
1328 | "cell_type": "code",
1329 | "metadata": {
1330 | "id": "a-UUfRUxtuga"
1331 | },
1332 | "source": [
1333 | "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)"
1334 | ],
1335 | "execution_count": null,
1336 | "outputs": []
1337 | },
1338 | {
1339 | "cell_type": "code",
1340 | "metadata": {
1341 | "id": "x7PrjC6zuf6X",
1342 | "colab": {
1343 | "base_uri": "https://localhost:8080/"
1344 | },
1345 | "outputId": "513e921e-42ce-4f86-9100-8220174b5751"
1346 | },
1347 | "source": [
1348 | "print(X.shape, X_train.shape, X_test.shape)"
1349 | ],
1350 | "execution_count": null,
1351 | "outputs": [
1352 | {
1353 | "output_type": "stream",
1354 | "name": "stdout",
1355 | "text": [
1356 | "(303, 13) (242, 13) (61, 13)\n"
1357 | ]
1358 | }
1359 | ]
1360 | },
1361 | {
1362 | "cell_type": "markdown",
1363 | "metadata": {
1364 | "id": "beSkZmpVuvn9"
1365 | },
1366 | "source": [
1367 | "Model Training"
1368 | ]
1369 | },
1370 | {
1371 | "cell_type": "markdown",
1372 | "metadata": {
1373 | "id": "gi2NOWZjuxzw"
1374 | },
1375 | "source": [
1376 | "Logistic Regression"
1377 | ]
1378 | },
1379 | {
1380 | "cell_type": "code",
1381 | "metadata": {
1382 | "id": "4-Md74FYuqNL"
1383 | },
1384 | "source": [
1385 | "model = LogisticRegression()"
1386 | ],
1387 | "execution_count": null,
1388 | "outputs": []
1389 | },
1390 | {
1391 | "cell_type": "code",
1392 | "metadata": {
1393 | "id": "kCdHYxGUu7XD",
1394 | "colab": {
1395 | "base_uri": "https://localhost:8080/",
1396 | "height": 213
1397 | },
1398 | "outputId": "89c4c41a-5dc5-4474-9a9a-459a93ba3263"
1399 | },
1400 | "source": [
1401 | "# training the LogisticRegression model with Training data\n",
1402 | "model.fit(X_train, Y_train)"
1403 | ],
1404 | "execution_count": null,
1405 | "outputs": [
1406 | {
1407 | "output_type": "stream",
1408 | "name": "stderr",
1409 | "text": [
1410 | "/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
1411 | "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
1412 | "\n",
1413 | "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
1414 | " https://scikit-learn.org/stable/modules/preprocessing.html\n",
1415 | "Please also refer to the documentation for alternative solver options:\n",
1416 | " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
1417 | " n_iter_i = _check_optimize_result(\n"
1418 | ]
1419 | },
1420 | {
1421 | "output_type": "execute_result",
1422 | "data": {
1423 | "text/plain": [
1424 | "LogisticRegression()"
1425 | ],
1426 | "text/html": [
1427 | "LogisticRegression() In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
1428 | ]
1429 | },
1430 | "metadata": {},
1431 | "execution_count": 39
1432 | }
1433 | ]
1434 | },
1435 | {
1436 | "cell_type": "markdown",
1437 | "metadata": {
1438 | "id": "ZYIw8Gi9vXfU"
1439 | },
1440 | "source": [
1441 | "Model Evaluation"
1442 | ]
1443 | },
1444 | {
1445 | "cell_type": "markdown",
1446 | "metadata": {
1447 | "id": "wmxAekfZvZa9"
1448 | },
1449 | "source": [
1450 | "Accuracy Score"
1451 | ]
1452 | },
1453 | {
1454 | "cell_type": "code",
1455 | "metadata": {
1456 | "id": "g19JaUTMvPKy"
1457 | },
1458 | "source": [
1459 | "# accuracy on training data\n",
1460 | "X_train_prediction = model.predict(X_train)\n",
1461 | "training_data_accuracy = accuracy_score(X_train_prediction, Y_train)"
1462 | ],
1463 | "execution_count": null,
1464 | "outputs": []
1465 | },
1466 | {
1467 | "cell_type": "code",
1468 | "metadata": {
1469 | "id": "uQBZvBh8v7R_",
1470 | "colab": {
1471 | "base_uri": "https://localhost:8080/"
1472 | },
1473 | "outputId": "09b4704b-7767-4baf-9eba-4c786d4065bf"
1474 | },
1475 | "source": [
1476 | "print('Accuracy on Training data : ', training_data_accuracy)"
1477 | ],
1478 | "execution_count": null,
1479 | "outputs": [
1480 | {
1481 | "output_type": "stream",
1482 | "name": "stdout",
1483 | "text": [
1484 | "Accuracy on Training data : 0.8512396694214877\n"
1485 | ]
1486 | }
1487 | ]
1488 | },
1489 | {
1490 | "cell_type": "code",
1491 | "metadata": {
1492 | "id": "mDONDJdlwBIO"
1493 | },
1494 | "source": [
1495 | "# accuracy on test data\n",
1496 | "X_test_prediction = model.predict(X_test)\n",
1497 | "test_data_accuracy = accuracy_score(X_test_prediction, Y_test)"
1498 | ],
1499 | "execution_count": null,
1500 | "outputs": []
1501 | },
1502 | {
1503 | "cell_type": "code",
1504 | "metadata": {
1505 | "id": "_MBS-OqdwYpf",
1506 | "colab": {
1507 | "base_uri": "https://localhost:8080/"
1508 | },
1509 | "outputId": "3ba127e8-e117-4496-a73a-90993a586577"
1510 | },
1511 | "source": [
1512 | "print('Accuracy on Test data : ', test_data_accuracy)"
1513 | ],
1514 | "execution_count": null,
1515 | "outputs": [
1516 | {
1517 | "output_type": "stream",
1518 | "name": "stdout",
1519 | "text": [
1520 | "Accuracy on Test data : 0.819672131147541\n"
1521 | ]
1522 | }
1523 | ]
1524 | },
1525 | {
1526 | "cell_type": "markdown",
1527 | "metadata": {
1528 | "id": "jIruVh3Qwq0e"
1529 | },
1530 | "source": [
1531 | "Building a Predictive System"
1532 | ]
1533 | },
1534 | {
1535 | "cell_type": "code",
1536 | "metadata": {
1537 | "id": "9ercruC9wb4C",
1538 | "colab": {
1539 | "base_uri": "https://localhost:8080/"
1540 | },
1541 | "outputId": "75ca6857-fd9a-43e7-96c9-d7f5d8e1819b"
1542 | },
1543 | "source": [
1544 | "input_data = (44,1,1,120,263,0,1,173,0,0,2,0,3)\n",
1545 | "\n",
1546 | "# change the input data to a numpy array\n",
1547 | "input_data_as_numpy_array= np.asarray(input_data)\n",
1548 | "\n",
1549 | "# reshape the numpy array as we are predicting for only on instance\n",
1550 | "input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)\n",
1551 | "\n",
1552 | "prediction = model.predict(input_data_reshaped)\n",
1553 | "print(prediction)\n",
1554 | "\n",
1555 | "if (prediction[0]== 0):\n",
1556 | " print('The Person does not have a Heart Disease')\n",
1557 | "else:\n",
1558 | " print('The Person has Heart Disease')"
1559 | ],
1560 | "execution_count": null,
1561 | "outputs": [
1562 | {
1563 | "output_type": "stream",
1564 | "name": "stdout",
1565 | "text": [
1566 | "[1]\n",
1567 | "The Person has Heart Disease\n"
1568 | ]
1569 | },
1570 | {
1571 | "output_type": "stream",
1572 | "name": "stderr",
1573 | "text": [
1574 | "/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n",
1575 | " warnings.warn(\n"
1576 | ]
1577 | }
1578 | ]
1579 | }
1580 | ]
1581 | }
--------------------------------------------------------------------------------