├── README.md └── Water_Potability_Prediction.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # Water-Potability-Prediction -------------------------------------------------------------------------------- /Water_Potability_Prediction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "water_potability.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [], 9 | "authorship_tag": "ABX9TyN4sXpUT6eXTo/ND/2nazBB", 10 | "include_colab_link": true 11 | }, 12 | "kernelspec": { 13 | "name": "python3", 14 | "display_name": "Python 3" 15 | }, 16 | "language_info": { 17 | "name": "python" 18 | } 19 | }, 20 | "cells": [ 21 | { 22 | "cell_type": "markdown", 23 | "metadata": { 24 | "id": "view-in-github", 25 | "colab_type": "text" 26 | }, 27 | "source": [ 28 | "\"Open" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 1, 34 | "metadata": { 35 | "id": "rW1rmVrzSFjM" 36 | }, 37 | "outputs": [], 38 | "source": [ 39 | "import numpy as np\n", 40 | "import pandas as pd\n", 41 | "import matplotlib.pyplot as plt\n", 42 | "import seaborn as sns" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "source": [ 48 | "df=pd.read_csv(\"water_potability 2.csv\")" 49 | ], 50 | "metadata": { 51 | "id": "4_tU8ewJSVCA" 52 | }, 53 | "execution_count": 2, 54 | "outputs": [] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "source": [ 59 | "df.head()" 60 | ], 61 | "metadata": { 62 | "colab": { 63 | "base_uri": "https://localhost:8080/", 64 | "height": 206 65 | }, 66 | "id": "i_VUnCLEScTw", 67 | "outputId": "bdfd2b71-cbcc-40bc-8108-b6ddfbc1d7f6" 68 | }, 69 | "execution_count": 3, 70 | "outputs": [ 71 | { 72 | "output_type": "execute_result", 73 | "data": { 74 | "text/plain": [ 75 | " ph Hardness Solids Chloramines Sulfate Conductivity \\\n", 76 | "0 0.000000 214.846144 49456.58711 7.897539 NaN 583.448849 \n", 77 | "1 0.227499 152.530111 39028.59934 3.462492 283.693782 443.029232 \n", 78 | "2 0.975578 221.204114 31145.11074 7.615583 333.677843 439.112765 \n", 79 | "3 0.989912 133.216942 16922.85390 9.293289 444.375731 322.291191 \n", 80 | "4 1.431782 228.130383 12937.24689 6.214773 319.734136 495.379883 \n", 81 | "\n", 82 | " Organic_carbon Trihalomethanes Turbidity Potability \n", 83 | "0 7.702328 77.712891 4.928840 0 \n", 84 | "1 13.201943 62.322711 3.545741 1 \n", 85 | "2 21.145954 NaN 2.533996 0 \n", 86 | "3 10.430076 43.578466 5.160604 1 \n", 87 | "4 12.033344 61.141119 4.948443 0 " 88 | ], 89 | "text/html": [ 90 | "\n", 91 | "
\n", 92 | "
\n", 93 | "
\n", 94 | "\n", 107 | "\n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | "
phHardnessSolidsChloraminesSulfateConductivityOrganic_carbonTrihalomethanesTurbidityPotability
00.000000214.84614449456.587117.897539NaN583.4488497.70232877.7128914.9288400
10.227499152.53011139028.599343.462492283.693782443.02923213.20194362.3227113.5457411
20.975578221.20411431145.110747.615583333.677843439.11276521.145954NaN2.5339960
30.989912133.21694216922.853909.293289444.375731322.29119110.43007643.5784665.1606041
41.431782228.13038312937.246896.214773319.734136495.37988312.03334461.1411194.9484430
\n", 191 | "
\n", 192 | " \n", 202 | " \n", 203 | " \n", 240 | "\n", 241 | " \n", 265 | "
\n", 266 | "
\n", 267 | " " 268 | ] 269 | }, 270 | "metadata": {}, 271 | "execution_count": 3 272 | } 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "source": [ 278 | "df.shape" 279 | ], 280 | "metadata": { 281 | "colab": { 282 | "base_uri": "https://localhost:8080/" 283 | }, 284 | "id": "Adbgbp9OSdW7", 285 | "outputId": "6b5d8461-be1e-4208-d758-628ecd02f744" 286 | }, 287 | "execution_count": 4, 288 | "outputs": [ 289 | { 290 | "output_type": "execute_result", 291 | "data": { 292 | "text/plain": [ 293 | "(3276, 10)" 294 | ] 295 | }, 296 | "metadata": {}, 297 | "execution_count": 4 298 | } 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "source": [ 304 | "df.describe()" 305 | ], 306 | "metadata": { 307 | "colab": { 308 | "base_uri": "https://localhost:8080/", 309 | "height": 300 310 | }, 311 | "id": "iCx3qrokSeNK", 312 | "outputId": "2bbb8903-52e8-4bbf-8810-1854f6965b48" 313 | }, 314 | "execution_count": 5, 315 | "outputs": [ 316 | { 317 | "output_type": "execute_result", 318 | "data": { 319 | "text/plain": [ 320 | " ph Hardness Solids Chloramines Sulfate \\\n", 321 | "count 2785.000000 3276.000000 3276.000000 3276.000000 2495.000000 \n", 322 | "mean 7.080795 196.369496 22014.092526 7.122277 333.775777 \n", 323 | "std 1.594320 32.879761 8768.570828 1.583085 41.416840 \n", 324 | "min 0.000000 47.432000 320.942611 0.352000 129.000000 \n", 325 | "25% 6.093092 176.850538 15666.690300 6.127421 307.699498 \n", 326 | "50% 7.036752 196.967627 20927.833605 7.130299 333.073546 \n", 327 | "75% 8.062066 216.667456 27332.762125 8.114887 359.950170 \n", 328 | "max 14.000000 323.124000 61227.196010 13.127000 481.030642 \n", 329 | "\n", 330 | " Conductivity Organic_carbon Trihalomethanes Turbidity Potability \n", 331 | "count 3276.000000 3276.000000 3114.000000 3276.000000 3276.000000 \n", 332 | "mean 426.205111 14.284970 66.396293 3.966786 0.390110 \n", 333 | "std 80.824064 3.308162 16.175008 0.780382 0.487849 \n", 334 | "min 181.483754 2.200000 0.738000 1.450000 0.000000 \n", 335 | "25% 365.734414 12.065801 55.844536 3.439711 0.000000 \n", 336 | "50% 421.884968 14.218338 66.622485 3.955028 0.000000 \n", 337 | "75% 481.792305 16.557652 77.337473 4.500320 1.000000 \n", 338 | "max 753.342620 28.300000 124.000000 6.739000 1.000000 " 339 | ], 340 | "text/html": [ 341 | "\n", 342 | "
\n", 343 | "
\n", 344 | "
\n", 345 | "\n", 358 | "\n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | "
phHardnessSolidsChloraminesSulfateConductivityOrganic_carbonTrihalomethanesTurbidityPotability
count2785.0000003276.0000003276.0000003276.0000002495.0000003276.0000003276.0000003114.0000003276.0000003276.000000
mean7.080795196.36949622014.0925267.122277333.775777426.20511114.28497066.3962933.9667860.390110
std1.59432032.8797618768.5708281.58308541.41684080.8240643.30816216.1750080.7803820.487849
min0.00000047.432000320.9426110.352000129.000000181.4837542.2000000.7380001.4500000.000000
25%6.093092176.85053815666.6903006.127421307.699498365.73441412.06580155.8445363.4397110.000000
50%7.036752196.96762720927.8336057.130299333.073546421.88496814.21833866.6224853.9550280.000000
75%8.062066216.66745627332.7621258.114887359.950170481.79230516.55765277.3374734.5003201.000000
max14.000000323.12400061227.19601013.127000481.030642753.34262028.300000124.0000006.7390001.000000
\n", 481 | "
\n", 482 | " \n", 492 | " \n", 493 | " \n", 530 | "\n", 531 | " \n", 555 | "
\n", 556 | "
\n", 557 | " " 558 | ] 559 | }, 560 | "metadata": {}, 561 | "execution_count": 5 562 | } 563 | ] 564 | }, 565 | { 566 | "cell_type": "code", 567 | "source": [ 568 | "df.info()" 569 | ], 570 | "metadata": { 571 | "colab": { 572 | "base_uri": "https://localhost:8080/" 573 | }, 574 | "id": "TeWJO-SJSfU-", 575 | "outputId": "83ed9f93-3b18-4e5c-9d0a-2947694e39a5" 576 | }, 577 | "execution_count": 6, 578 | "outputs": [ 579 | { 580 | "output_type": "stream", 581 | "name": "stdout", 582 | "text": [ 583 | "\n", 584 | "RangeIndex: 3276 entries, 0 to 3275\n", 585 | "Data columns (total 10 columns):\n", 586 | " # Column Non-Null Count Dtype \n", 587 | "--- ------ -------------- ----- \n", 588 | " 0 ph 2785 non-null float64\n", 589 | " 1 Hardness 3276 non-null float64\n", 590 | " 2 Solids 3276 non-null float64\n", 591 | " 3 Chloramines 3276 non-null float64\n", 592 | " 4 Sulfate 2495 non-null float64\n", 593 | " 5 Conductivity 3276 non-null float64\n", 594 | " 6 Organic_carbon 3276 non-null float64\n", 595 | " 7 Trihalomethanes 3114 non-null float64\n", 596 | " 8 Turbidity 3276 non-null float64\n", 597 | " 9 Potability 3276 non-null int64 \n", 598 | "dtypes: float64(9), int64(1)\n", 599 | "memory usage: 256.1 KB\n" 600 | ] 601 | } 602 | ] 603 | }, 604 | { 605 | "cell_type": "code", 606 | "source": [ 607 | "df.isnull().sum()" 608 | ], 609 | "metadata": { 610 | "colab": { 611 | "base_uri": "https://localhost:8080/" 612 | }, 613 | "id": "M3q6HIlzSgX0", 614 | "outputId": "76ad9538-8ef1-432f-80d2-1a09687fb182" 615 | }, 616 | "execution_count": 7, 617 | "outputs": [ 618 | { 619 | "output_type": "execute_result", 620 | "data": { 621 | "text/plain": [ 622 | "ph 491\n", 623 | "Hardness 0\n", 624 | "Solids 0\n", 625 | "Chloramines 0\n", 626 | "Sulfate 781\n", 627 | "Conductivity 0\n", 628 | "Organic_carbon 0\n", 629 | "Trihalomethanes 162\n", 630 | "Turbidity 0\n", 631 | "Potability 0\n", 632 | "dtype: int64" 633 | ] 634 | }, 635 | "metadata": {}, 636 | "execution_count": 7 637 | } 638 | ] 639 | }, 640 | { 641 | "cell_type": "code", 642 | "source": [ 643 | "df=df.dropna()" 644 | ], 645 | "metadata": { 646 | "id": "mlPSxNacSjAN" 647 | }, 648 | "execution_count": 8, 649 | "outputs": [] 650 | }, 651 | { 652 | "cell_type": "code", 653 | "source": [ 654 | "from sklearn.model_selection import train_test_split\n", 655 | "X=df.drop('Potability', axis=1)\n", 656 | "y=df.Potability\n", 657 | "X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.2, random_state=21)" 658 | ], 659 | "metadata": { 660 | "id": "_rpwzpMEwk_w" 661 | }, 662 | "execution_count": 54, 663 | "outputs": [] 664 | }, 665 | { 666 | "cell_type": "code", 667 | "source": [ 668 | "from sklearn.preprocessing import StandardScaler\n", 669 | "s=StandardScaler()\n", 670 | "s.fit(X_train)\n", 671 | "X_train=s.transform(X_train)\n", 672 | "X_test=s.transform(X_test)" 673 | ], 674 | "metadata": { 675 | "id": "S4RT4PJzweLe" 676 | }, 677 | "execution_count": 55, 678 | "outputs": [] 679 | }, 680 | { 681 | "cell_type": "code", 682 | "source": [ 683 | "from sklearn.ensemble import GradientBoostingClassifier\n", 684 | "from sklearn.metrics import precision_score\n", 685 | "model1=GradientBoostingClassifier(n_estimators=500, learning_rate=0.04, random_state=1)\n", 686 | "model1.fit(X_train, y_train)\n", 687 | "\n", 688 | "pred=model1.predict(X_test)\n", 689 | "print(\"Precision: \", precision_score(y_test, pred))" 690 | ], 691 | "metadata": { 692 | "colab": { 693 | "base_uri": "https://localhost:8080/" 694 | }, 695 | "id": "okuozGexSsiK", 696 | "outputId": "5e9ab7a1-16fa-4bd5-a162-6945ae0a75cd" 697 | }, 698 | "execution_count": 56, 699 | "outputs": [ 700 | { 701 | "output_type": "stream", 702 | "name": "stdout", 703 | "text": [ 704 | "Precision: 0.6761904761904762\n" 705 | ] 706 | } 707 | ] 708 | }, 709 | { 710 | "cell_type": "code", 711 | "source": [ 712 | "from sklearn.ensemble import RandomForestClassifier\n", 713 | "model=RandomForestClassifier(random_state=1)\n", 714 | "model.fit(X_train, y_train)\n", 715 | "pred1=model.predict(X_test)\n", 716 | "print(\"Precision: \", precision_score(y_test, pred1))" 717 | ], 718 | "metadata": { 719 | "id": "nbB0XTA_UZ0c", 720 | "colab": { 721 | "base_uri": "https://localhost:8080/" 722 | }, 723 | "outputId": "17341e1d-c011-47b1-96c1-9964077afb98" 724 | }, 725 | "execution_count": 57, 726 | "outputs": [ 727 | { 728 | "output_type": "stream", 729 | "name": "stdout", 730 | "text": [ 731 | "Precision: 0.7628865979381443\n" 732 | ] 733 | } 734 | ] 735 | } 736 | ] 737 | } --------------------------------------------------------------------------------