├── README.md
└── Water_Potability_Prediction.ipynb
/README.md:
--------------------------------------------------------------------------------
1 | # Water-Potability-Prediction
--------------------------------------------------------------------------------
/Water_Potability_Prediction.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "water_potability.ipynb",
7 | "provenance": [],
8 | "collapsed_sections": [],
9 | "authorship_tag": "ABX9TyN4sXpUT6eXTo/ND/2nazBB",
10 | "include_colab_link": true
11 | },
12 | "kernelspec": {
13 | "name": "python3",
14 | "display_name": "Python 3"
15 | },
16 | "language_info": {
17 | "name": "python"
18 | }
19 | },
20 | "cells": [
21 | {
22 | "cell_type": "markdown",
23 | "metadata": {
24 | "id": "view-in-github",
25 | "colab_type": "text"
26 | },
27 | "source": [
28 | "
"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": 1,
34 | "metadata": {
35 | "id": "rW1rmVrzSFjM"
36 | },
37 | "outputs": [],
38 | "source": [
39 | "import numpy as np\n",
40 | "import pandas as pd\n",
41 | "import matplotlib.pyplot as plt\n",
42 | "import seaborn as sns"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "source": [
48 | "df=pd.read_csv(\"water_potability 2.csv\")"
49 | ],
50 | "metadata": {
51 | "id": "4_tU8ewJSVCA"
52 | },
53 | "execution_count": 2,
54 | "outputs": []
55 | },
56 | {
57 | "cell_type": "code",
58 | "source": [
59 | "df.head()"
60 | ],
61 | "metadata": {
62 | "colab": {
63 | "base_uri": "https://localhost:8080/",
64 | "height": 206
65 | },
66 | "id": "i_VUnCLEScTw",
67 | "outputId": "bdfd2b71-cbcc-40bc-8108-b6ddfbc1d7f6"
68 | },
69 | "execution_count": 3,
70 | "outputs": [
71 | {
72 | "output_type": "execute_result",
73 | "data": {
74 | "text/plain": [
75 | " ph Hardness Solids Chloramines Sulfate Conductivity \\\n",
76 | "0 0.000000 214.846144 49456.58711 7.897539 NaN 583.448849 \n",
77 | "1 0.227499 152.530111 39028.59934 3.462492 283.693782 443.029232 \n",
78 | "2 0.975578 221.204114 31145.11074 7.615583 333.677843 439.112765 \n",
79 | "3 0.989912 133.216942 16922.85390 9.293289 444.375731 322.291191 \n",
80 | "4 1.431782 228.130383 12937.24689 6.214773 319.734136 495.379883 \n",
81 | "\n",
82 | " Organic_carbon Trihalomethanes Turbidity Potability \n",
83 | "0 7.702328 77.712891 4.928840 0 \n",
84 | "1 13.201943 62.322711 3.545741 1 \n",
85 | "2 21.145954 NaN 2.533996 0 \n",
86 | "3 10.430076 43.578466 5.160604 1 \n",
87 | "4 12.033344 61.141119 4.948443 0 "
88 | ],
89 | "text/html": [
90 | "\n",
91 | "
\n",
92 | "
\n",
93 | "
\n",
94 | "\n",
107 | "
\n",
108 | " \n",
109 | " \n",
110 | " | \n",
111 | " ph | \n",
112 | " Hardness | \n",
113 | " Solids | \n",
114 | " Chloramines | \n",
115 | " Sulfate | \n",
116 | " Conductivity | \n",
117 | " Organic_carbon | \n",
118 | " Trihalomethanes | \n",
119 | " Turbidity | \n",
120 | " Potability | \n",
121 | "
\n",
122 | " \n",
123 | " \n",
124 | " \n",
125 | " | 0 | \n",
126 | " 0.000000 | \n",
127 | " 214.846144 | \n",
128 | " 49456.58711 | \n",
129 | " 7.897539 | \n",
130 | " NaN | \n",
131 | " 583.448849 | \n",
132 | " 7.702328 | \n",
133 | " 77.712891 | \n",
134 | " 4.928840 | \n",
135 | " 0 | \n",
136 | "
\n",
137 | " \n",
138 | " | 1 | \n",
139 | " 0.227499 | \n",
140 | " 152.530111 | \n",
141 | " 39028.59934 | \n",
142 | " 3.462492 | \n",
143 | " 283.693782 | \n",
144 | " 443.029232 | \n",
145 | " 13.201943 | \n",
146 | " 62.322711 | \n",
147 | " 3.545741 | \n",
148 | " 1 | \n",
149 | "
\n",
150 | " \n",
151 | " | 2 | \n",
152 | " 0.975578 | \n",
153 | " 221.204114 | \n",
154 | " 31145.11074 | \n",
155 | " 7.615583 | \n",
156 | " 333.677843 | \n",
157 | " 439.112765 | \n",
158 | " 21.145954 | \n",
159 | " NaN | \n",
160 | " 2.533996 | \n",
161 | " 0 | \n",
162 | "
\n",
163 | " \n",
164 | " | 3 | \n",
165 | " 0.989912 | \n",
166 | " 133.216942 | \n",
167 | " 16922.85390 | \n",
168 | " 9.293289 | \n",
169 | " 444.375731 | \n",
170 | " 322.291191 | \n",
171 | " 10.430076 | \n",
172 | " 43.578466 | \n",
173 | " 5.160604 | \n",
174 | " 1 | \n",
175 | "
\n",
176 | " \n",
177 | " | 4 | \n",
178 | " 1.431782 | \n",
179 | " 228.130383 | \n",
180 | " 12937.24689 | \n",
181 | " 6.214773 | \n",
182 | " 319.734136 | \n",
183 | " 495.379883 | \n",
184 | " 12.033344 | \n",
185 | " 61.141119 | \n",
186 | " 4.948443 | \n",
187 | " 0 | \n",
188 | "
\n",
189 | " \n",
190 | "
\n",
191 | "
\n",
192 | "
\n",
202 | " \n",
203 | " \n",
240 | "\n",
241 | " \n",
265 | "
\n",
266 | "
\n",
267 | " "
268 | ]
269 | },
270 | "metadata": {},
271 | "execution_count": 3
272 | }
273 | ]
274 | },
275 | {
276 | "cell_type": "code",
277 | "source": [
278 | "df.shape"
279 | ],
280 | "metadata": {
281 | "colab": {
282 | "base_uri": "https://localhost:8080/"
283 | },
284 | "id": "Adbgbp9OSdW7",
285 | "outputId": "6b5d8461-be1e-4208-d758-628ecd02f744"
286 | },
287 | "execution_count": 4,
288 | "outputs": [
289 | {
290 | "output_type": "execute_result",
291 | "data": {
292 | "text/plain": [
293 | "(3276, 10)"
294 | ]
295 | },
296 | "metadata": {},
297 | "execution_count": 4
298 | }
299 | ]
300 | },
301 | {
302 | "cell_type": "code",
303 | "source": [
304 | "df.describe()"
305 | ],
306 | "metadata": {
307 | "colab": {
308 | "base_uri": "https://localhost:8080/",
309 | "height": 300
310 | },
311 | "id": "iCx3qrokSeNK",
312 | "outputId": "2bbb8903-52e8-4bbf-8810-1854f6965b48"
313 | },
314 | "execution_count": 5,
315 | "outputs": [
316 | {
317 | "output_type": "execute_result",
318 | "data": {
319 | "text/plain": [
320 | " ph Hardness Solids Chloramines Sulfate \\\n",
321 | "count 2785.000000 3276.000000 3276.000000 3276.000000 2495.000000 \n",
322 | "mean 7.080795 196.369496 22014.092526 7.122277 333.775777 \n",
323 | "std 1.594320 32.879761 8768.570828 1.583085 41.416840 \n",
324 | "min 0.000000 47.432000 320.942611 0.352000 129.000000 \n",
325 | "25% 6.093092 176.850538 15666.690300 6.127421 307.699498 \n",
326 | "50% 7.036752 196.967627 20927.833605 7.130299 333.073546 \n",
327 | "75% 8.062066 216.667456 27332.762125 8.114887 359.950170 \n",
328 | "max 14.000000 323.124000 61227.196010 13.127000 481.030642 \n",
329 | "\n",
330 | " Conductivity Organic_carbon Trihalomethanes Turbidity Potability \n",
331 | "count 3276.000000 3276.000000 3114.000000 3276.000000 3276.000000 \n",
332 | "mean 426.205111 14.284970 66.396293 3.966786 0.390110 \n",
333 | "std 80.824064 3.308162 16.175008 0.780382 0.487849 \n",
334 | "min 181.483754 2.200000 0.738000 1.450000 0.000000 \n",
335 | "25% 365.734414 12.065801 55.844536 3.439711 0.000000 \n",
336 | "50% 421.884968 14.218338 66.622485 3.955028 0.000000 \n",
337 | "75% 481.792305 16.557652 77.337473 4.500320 1.000000 \n",
338 | "max 753.342620 28.300000 124.000000 6.739000 1.000000 "
339 | ],
340 | "text/html": [
341 | "\n",
342 | " \n",
343 | "
\n",
344 | "
\n",
345 | "\n",
358 | "
\n",
359 | " \n",
360 | " \n",
361 | " | \n",
362 | " ph | \n",
363 | " Hardness | \n",
364 | " Solids | \n",
365 | " Chloramines | \n",
366 | " Sulfate | \n",
367 | " Conductivity | \n",
368 | " Organic_carbon | \n",
369 | " Trihalomethanes | \n",
370 | " Turbidity | \n",
371 | " Potability | \n",
372 | "
\n",
373 | " \n",
374 | " \n",
375 | " \n",
376 | " | count | \n",
377 | " 2785.000000 | \n",
378 | " 3276.000000 | \n",
379 | " 3276.000000 | \n",
380 | " 3276.000000 | \n",
381 | " 2495.000000 | \n",
382 | " 3276.000000 | \n",
383 | " 3276.000000 | \n",
384 | " 3114.000000 | \n",
385 | " 3276.000000 | \n",
386 | " 3276.000000 | \n",
387 | "
\n",
388 | " \n",
389 | " | mean | \n",
390 | " 7.080795 | \n",
391 | " 196.369496 | \n",
392 | " 22014.092526 | \n",
393 | " 7.122277 | \n",
394 | " 333.775777 | \n",
395 | " 426.205111 | \n",
396 | " 14.284970 | \n",
397 | " 66.396293 | \n",
398 | " 3.966786 | \n",
399 | " 0.390110 | \n",
400 | "
\n",
401 | " \n",
402 | " | std | \n",
403 | " 1.594320 | \n",
404 | " 32.879761 | \n",
405 | " 8768.570828 | \n",
406 | " 1.583085 | \n",
407 | " 41.416840 | \n",
408 | " 80.824064 | \n",
409 | " 3.308162 | \n",
410 | " 16.175008 | \n",
411 | " 0.780382 | \n",
412 | " 0.487849 | \n",
413 | "
\n",
414 | " \n",
415 | " | min | \n",
416 | " 0.000000 | \n",
417 | " 47.432000 | \n",
418 | " 320.942611 | \n",
419 | " 0.352000 | \n",
420 | " 129.000000 | \n",
421 | " 181.483754 | \n",
422 | " 2.200000 | \n",
423 | " 0.738000 | \n",
424 | " 1.450000 | \n",
425 | " 0.000000 | \n",
426 | "
\n",
427 | " \n",
428 | " | 25% | \n",
429 | " 6.093092 | \n",
430 | " 176.850538 | \n",
431 | " 15666.690300 | \n",
432 | " 6.127421 | \n",
433 | " 307.699498 | \n",
434 | " 365.734414 | \n",
435 | " 12.065801 | \n",
436 | " 55.844536 | \n",
437 | " 3.439711 | \n",
438 | " 0.000000 | \n",
439 | "
\n",
440 | " \n",
441 | " | 50% | \n",
442 | " 7.036752 | \n",
443 | " 196.967627 | \n",
444 | " 20927.833605 | \n",
445 | " 7.130299 | \n",
446 | " 333.073546 | \n",
447 | " 421.884968 | \n",
448 | " 14.218338 | \n",
449 | " 66.622485 | \n",
450 | " 3.955028 | \n",
451 | " 0.000000 | \n",
452 | "
\n",
453 | " \n",
454 | " | 75% | \n",
455 | " 8.062066 | \n",
456 | " 216.667456 | \n",
457 | " 27332.762125 | \n",
458 | " 8.114887 | \n",
459 | " 359.950170 | \n",
460 | " 481.792305 | \n",
461 | " 16.557652 | \n",
462 | " 77.337473 | \n",
463 | " 4.500320 | \n",
464 | " 1.000000 | \n",
465 | "
\n",
466 | " \n",
467 | " | max | \n",
468 | " 14.000000 | \n",
469 | " 323.124000 | \n",
470 | " 61227.196010 | \n",
471 | " 13.127000 | \n",
472 | " 481.030642 | \n",
473 | " 753.342620 | \n",
474 | " 28.300000 | \n",
475 | " 124.000000 | \n",
476 | " 6.739000 | \n",
477 | " 1.000000 | \n",
478 | "
\n",
479 | " \n",
480 | "
\n",
481 | "
\n",
482 | "
\n",
492 | " \n",
493 | " \n",
530 | "\n",
531 | " \n",
555 | "
\n",
556 | "
\n",
557 | " "
558 | ]
559 | },
560 | "metadata": {},
561 | "execution_count": 5
562 | }
563 | ]
564 | },
565 | {
566 | "cell_type": "code",
567 | "source": [
568 | "df.info()"
569 | ],
570 | "metadata": {
571 | "colab": {
572 | "base_uri": "https://localhost:8080/"
573 | },
574 | "id": "TeWJO-SJSfU-",
575 | "outputId": "83ed9f93-3b18-4e5c-9d0a-2947694e39a5"
576 | },
577 | "execution_count": 6,
578 | "outputs": [
579 | {
580 | "output_type": "stream",
581 | "name": "stdout",
582 | "text": [
583 | "\n",
584 | "RangeIndex: 3276 entries, 0 to 3275\n",
585 | "Data columns (total 10 columns):\n",
586 | " # Column Non-Null Count Dtype \n",
587 | "--- ------ -------------- ----- \n",
588 | " 0 ph 2785 non-null float64\n",
589 | " 1 Hardness 3276 non-null float64\n",
590 | " 2 Solids 3276 non-null float64\n",
591 | " 3 Chloramines 3276 non-null float64\n",
592 | " 4 Sulfate 2495 non-null float64\n",
593 | " 5 Conductivity 3276 non-null float64\n",
594 | " 6 Organic_carbon 3276 non-null float64\n",
595 | " 7 Trihalomethanes 3114 non-null float64\n",
596 | " 8 Turbidity 3276 non-null float64\n",
597 | " 9 Potability 3276 non-null int64 \n",
598 | "dtypes: float64(9), int64(1)\n",
599 | "memory usage: 256.1 KB\n"
600 | ]
601 | }
602 | ]
603 | },
604 | {
605 | "cell_type": "code",
606 | "source": [
607 | "df.isnull().sum()"
608 | ],
609 | "metadata": {
610 | "colab": {
611 | "base_uri": "https://localhost:8080/"
612 | },
613 | "id": "M3q6HIlzSgX0",
614 | "outputId": "76ad9538-8ef1-432f-80d2-1a09687fb182"
615 | },
616 | "execution_count": 7,
617 | "outputs": [
618 | {
619 | "output_type": "execute_result",
620 | "data": {
621 | "text/plain": [
622 | "ph 491\n",
623 | "Hardness 0\n",
624 | "Solids 0\n",
625 | "Chloramines 0\n",
626 | "Sulfate 781\n",
627 | "Conductivity 0\n",
628 | "Organic_carbon 0\n",
629 | "Trihalomethanes 162\n",
630 | "Turbidity 0\n",
631 | "Potability 0\n",
632 | "dtype: int64"
633 | ]
634 | },
635 | "metadata": {},
636 | "execution_count": 7
637 | }
638 | ]
639 | },
640 | {
641 | "cell_type": "code",
642 | "source": [
643 | "df=df.dropna()"
644 | ],
645 | "metadata": {
646 | "id": "mlPSxNacSjAN"
647 | },
648 | "execution_count": 8,
649 | "outputs": []
650 | },
651 | {
652 | "cell_type": "code",
653 | "source": [
654 | "from sklearn.model_selection import train_test_split\n",
655 | "X=df.drop('Potability', axis=1)\n",
656 | "y=df.Potability\n",
657 | "X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.2, random_state=21)"
658 | ],
659 | "metadata": {
660 | "id": "_rpwzpMEwk_w"
661 | },
662 | "execution_count": 54,
663 | "outputs": []
664 | },
665 | {
666 | "cell_type": "code",
667 | "source": [
668 | "from sklearn.preprocessing import StandardScaler\n",
669 | "s=StandardScaler()\n",
670 | "s.fit(X_train)\n",
671 | "X_train=s.transform(X_train)\n",
672 | "X_test=s.transform(X_test)"
673 | ],
674 | "metadata": {
675 | "id": "S4RT4PJzweLe"
676 | },
677 | "execution_count": 55,
678 | "outputs": []
679 | },
680 | {
681 | "cell_type": "code",
682 | "source": [
683 | "from sklearn.ensemble import GradientBoostingClassifier\n",
684 | "from sklearn.metrics import precision_score\n",
685 | "model1=GradientBoostingClassifier(n_estimators=500, learning_rate=0.04, random_state=1)\n",
686 | "model1.fit(X_train, y_train)\n",
687 | "\n",
688 | "pred=model1.predict(X_test)\n",
689 | "print(\"Precision: \", precision_score(y_test, pred))"
690 | ],
691 | "metadata": {
692 | "colab": {
693 | "base_uri": "https://localhost:8080/"
694 | },
695 | "id": "okuozGexSsiK",
696 | "outputId": "5e9ab7a1-16fa-4bd5-a162-6945ae0a75cd"
697 | },
698 | "execution_count": 56,
699 | "outputs": [
700 | {
701 | "output_type": "stream",
702 | "name": "stdout",
703 | "text": [
704 | "Precision: 0.6761904761904762\n"
705 | ]
706 | }
707 | ]
708 | },
709 | {
710 | "cell_type": "code",
711 | "source": [
712 | "from sklearn.ensemble import RandomForestClassifier\n",
713 | "model=RandomForestClassifier(random_state=1)\n",
714 | "model.fit(X_train, y_train)\n",
715 | "pred1=model.predict(X_test)\n",
716 | "print(\"Precision: \", precision_score(y_test, pred1))"
717 | ],
718 | "metadata": {
719 | "id": "nbB0XTA_UZ0c",
720 | "colab": {
721 | "base_uri": "https://localhost:8080/"
722 | },
723 | "outputId": "17341e1d-c011-47b1-96c1-9964077afb98"
724 | },
725 | "execution_count": 57,
726 | "outputs": [
727 | {
728 | "output_type": "stream",
729 | "name": "stdout",
730 | "text": [
731 | "Precision: 0.7628865979381443\n"
732 | ]
733 | }
734 | ]
735 | }
736 | ]
737 | }
--------------------------------------------------------------------------------