├── README.md └── Breast_Cancer_Classification_with_NN.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # Breast_cancer_classification_with_NN -------------------------------------------------------------------------------- /Breast_Cancer_Classification_with_NN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "7dbb63c8", 6 | "metadata": {}, 7 | "source": [ 8 | "# Breast Cancer Classification with NN" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "2cda65cb", 14 | "metadata": {}, 15 | "source": [ 16 | "### Importing Dependencies" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "id": "67fab22d", 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "import numpy as np\n", 27 | "import pandas as pd\n", 28 | "import matplotlib.pyplot as plt\n", 29 | "import sklearn.datasets\n", 30 | "from sklearn.model_selection import train_test_split" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "id": "7a224747", 36 | "metadata": {}, 37 | "source": [ 38 | "### Data Collection and Processing" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 2, 44 | "id": "f5cc06e5", 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "breast_cancer_dataset = sklearn.datasets.load_breast_cancer()" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 3, 54 | "id": "185c187a", 55 | "metadata": {}, 56 | "outputs": [ 57 | { 58 | "name": "stdout", 59 | "output_type": "stream", 60 | "text": [ 61 | "{'data': array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,\n", 62 | " 1.189e-01],\n", 63 | " [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,\n", 64 | " 8.902e-02],\n", 65 | " [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,\n", 66 | " 8.758e-02],\n", 67 | " ...,\n", 68 | " [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,\n", 69 | " 7.820e-02],\n", 70 | " [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,\n", 71 | " 1.240e-01],\n", 72 | " [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,\n", 73 | " 7.039e-02]]), 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,\n", 74 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,\n", 75 | " 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,\n", 76 | " 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,\n", 77 | " 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,\n", 78 | " 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,\n", 79 | " 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,\n", 80 | " 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,\n", 81 | " 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,\n", 82 | " 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,\n", 83 | " 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,\n", 84 | " 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 85 | " 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,\n", 86 | " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1,\n", 87 | " 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0,\n", 88 | " 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0,\n", 89 | " 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0,\n", 90 | " 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1,\n", 91 | " 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0,\n", 92 | " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1,\n", 93 | " 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,\n", 94 | " 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,\n", 95 | " 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1,\n", 96 | " 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,\n", 97 | " 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", 98 | " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1]), 'frame': None, 'target_names': array(['malignant', 'benign'], dtype='\n", 136 | "\n", 149 | "\n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | "
mean radiusmean texturemean perimetermean areamean smoothnessmean compactnessmean concavitymean concave pointsmean symmetrymean fractal dimension...worst radiusworst textureworst perimeterworst areaworst smoothnessworst compactnessworst concavityworst concave pointsworst symmetryworst fractal dimension
017.9910.38122.801001.00.118400.277600.30010.147100.24190.07871...25.3817.33184.602019.00.16220.66560.71190.26540.46010.11890
120.5717.77132.901326.00.084740.078640.08690.070170.18120.05667...24.9923.41158.801956.00.12380.18660.24160.18600.27500.08902
219.6921.25130.001203.00.109600.159900.19740.127900.20690.05999...23.5725.53152.501709.00.14440.42450.45040.24300.36130.08758
311.4220.3877.58386.10.142500.283900.24140.105200.25970.09744...14.9126.5098.87567.70.20980.86630.68690.25750.66380.17300
420.2914.34135.101297.00.100300.132800.19800.104300.18090.05883...22.5416.67152.201575.00.13740.20500.40000.16250.23640.07678
\n", 299 | "

5 rows × 30 columns

\n", 300 | "" 301 | ], 302 | "text/plain": [ 303 | " mean radius mean texture mean perimeter mean area mean smoothness \\\n", 304 | "0 17.99 10.38 122.80 1001.0 0.11840 \n", 305 | "1 20.57 17.77 132.90 1326.0 0.08474 \n", 306 | "2 19.69 21.25 130.00 1203.0 0.10960 \n", 307 | "3 11.42 20.38 77.58 386.1 0.14250 \n", 308 | "4 20.29 14.34 135.10 1297.0 0.10030 \n", 309 | "\n", 310 | " mean compactness mean concavity mean concave points mean symmetry \\\n", 311 | "0 0.27760 0.3001 0.14710 0.2419 \n", 312 | "1 0.07864 0.0869 0.07017 0.1812 \n", 313 | "2 0.15990 0.1974 0.12790 0.2069 \n", 314 | "3 0.28390 0.2414 0.10520 0.2597 \n", 315 | "4 0.13280 0.1980 0.10430 0.1809 \n", 316 | "\n", 317 | " mean fractal dimension ... worst radius worst texture worst perimeter \\\n", 318 | "0 0.07871 ... 25.38 17.33 184.60 \n", 319 | "1 0.05667 ... 24.99 23.41 158.80 \n", 320 | "2 0.05999 ... 23.57 25.53 152.50 \n", 321 | "3 0.09744 ... 14.91 26.50 98.87 \n", 322 | "4 0.05883 ... 22.54 16.67 152.20 \n", 323 | "\n", 324 | " worst area worst smoothness worst compactness worst concavity \\\n", 325 | "0 2019.0 0.1622 0.6656 0.7119 \n", 326 | "1 1956.0 0.1238 0.1866 0.2416 \n", 327 | "2 1709.0 0.1444 0.4245 0.4504 \n", 328 | "3 567.7 0.2098 0.8663 0.6869 \n", 329 | "4 1575.0 0.1374 0.2050 0.4000 \n", 330 | "\n", 331 | " worst concave points worst symmetry worst fractal dimension \n", 332 | "0 0.2654 0.4601 0.11890 \n", 333 | "1 0.1860 0.2750 0.08902 \n", 334 | "2 0.2430 0.3613 0.08758 \n", 335 | "3 0.2575 0.6638 0.17300 \n", 336 | "4 0.1625 0.2364 0.07678 \n", 337 | "\n", 338 | "[5 rows x 30 columns]" 339 | ] 340 | }, 341 | "execution_count": 5, 342 | "metadata": {}, 343 | "output_type": "execute_result" 344 | } 345 | ], 346 | "source": [ 347 | "data_frame.head()" 348 | ] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "execution_count": 6, 353 | "id": "b9995d57", 354 | "metadata": {}, 355 | "outputs": [], 356 | "source": [ 357 | "# adding target column to dataframe\n", 358 | "data_frame[\"label\"] = breast_cancer_dataset.target" 359 | ] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "execution_count": 7, 364 | "id": "548a7537", 365 | "metadata": {}, 366 | "outputs": [ 367 | { 368 | "data": { 369 | "text/html": [ 370 | "
\n", 371 | "\n", 384 | "\n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | "
mean radiusmean texturemean perimetermean areamean smoothnessmean compactnessmean concavitymean concave pointsmean symmetrymean fractal dimension...worst textureworst perimeterworst areaworst smoothnessworst compactnessworst concavityworst concave pointsworst symmetryworst fractal dimensionlabel
56421.5622.39142.001479.00.111000.115900.243900.138900.17260.05623...26.40166.102027.00.141000.211300.41070.22160.20600.071150
56520.1328.25131.201261.00.097800.103400.144000.097910.17520.05533...38.25155.001731.00.116600.192200.32150.16280.25720.066370
56616.6028.08108.30858.10.084550.102300.092510.053020.15900.05648...34.12126.701124.00.113900.309400.34030.14180.22180.078200
56720.6029.33140.101265.00.117800.277000.351400.152000.23970.07016...39.42184.601821.00.165000.868100.93870.26500.40870.124000
5687.7624.5447.92181.00.052630.043620.000000.000000.15870.05884...30.3759.16268.60.089960.064440.00000.00000.28710.070391
\n", 534 | "

5 rows × 31 columns

\n", 535 | "
" 536 | ], 537 | "text/plain": [ 538 | " mean radius mean texture mean perimeter mean area mean smoothness \\\n", 539 | "564 21.56 22.39 142.00 1479.0 0.11100 \n", 540 | "565 20.13 28.25 131.20 1261.0 0.09780 \n", 541 | "566 16.60 28.08 108.30 858.1 0.08455 \n", 542 | "567 20.60 29.33 140.10 1265.0 0.11780 \n", 543 | "568 7.76 24.54 47.92 181.0 0.05263 \n", 544 | "\n", 545 | " mean compactness mean concavity mean concave points mean symmetry \\\n", 546 | "564 0.11590 0.24390 0.13890 0.1726 \n", 547 | "565 0.10340 0.14400 0.09791 0.1752 \n", 548 | "566 0.10230 0.09251 0.05302 0.1590 \n", 549 | "567 0.27700 0.35140 0.15200 0.2397 \n", 550 | "568 0.04362 0.00000 0.00000 0.1587 \n", 551 | "\n", 552 | " mean fractal dimension ... worst texture worst perimeter worst area \\\n", 553 | "564 0.05623 ... 26.40 166.10 2027.0 \n", 554 | "565 0.05533 ... 38.25 155.00 1731.0 \n", 555 | "566 0.05648 ... 34.12 126.70 1124.0 \n", 556 | "567 0.07016 ... 39.42 184.60 1821.0 \n", 557 | "568 0.05884 ... 30.37 59.16 268.6 \n", 558 | "\n", 559 | " worst smoothness worst compactness worst concavity \\\n", 560 | "564 0.14100 0.21130 0.4107 \n", 561 | "565 0.11660 0.19220 0.3215 \n", 562 | "566 0.11390 0.30940 0.3403 \n", 563 | "567 0.16500 0.86810 0.9387 \n", 564 | "568 0.08996 0.06444 0.0000 \n", 565 | "\n", 566 | " worst concave points worst symmetry worst fractal dimension label \n", 567 | "564 0.2216 0.2060 0.07115 0 \n", 568 | "565 0.1628 0.2572 0.06637 0 \n", 569 | "566 0.1418 0.2218 0.07820 0 \n", 570 | "567 0.2650 0.4087 0.12400 0 \n", 571 | "568 0.0000 0.2871 0.07039 1 \n", 572 | "\n", 573 | "[5 rows x 31 columns]" 574 | ] 575 | }, 576 | "execution_count": 7, 577 | "metadata": {}, 578 | "output_type": "execute_result" 579 | } 580 | ], 581 | "source": [ 582 | "data_frame.tail()" 583 | ] 584 | }, 585 | { 586 | "cell_type": "code", 587 | "execution_count": 8, 588 | "id": "abd815ef", 589 | "metadata": {}, 590 | "outputs": [ 591 | { 592 | "data": { 593 | "text/plain": [ 594 | "(569, 31)" 595 | ] 596 | }, 597 | "execution_count": 8, 598 | "metadata": {}, 599 | "output_type": "execute_result" 600 | } 601 | ], 602 | "source": [ 603 | "data_frame.shape" 604 | ] 605 | }, 606 | { 607 | "cell_type": "code", 608 | "execution_count": 9, 609 | "id": "fb0b4f10", 610 | "metadata": {}, 611 | "outputs": [ 612 | { 613 | "name": "stdout", 614 | "output_type": "stream", 615 | "text": [ 616 | "\n", 617 | "RangeIndex: 569 entries, 0 to 568\n", 618 | "Data columns (total 31 columns):\n", 619 | " # Column Non-Null Count Dtype \n", 620 | "--- ------ -------------- ----- \n", 621 | " 0 mean radius 569 non-null float64\n", 622 | " 1 mean texture 569 non-null float64\n", 623 | " 2 mean perimeter 569 non-null float64\n", 624 | " 3 mean area 569 non-null float64\n", 625 | " 4 mean smoothness 569 non-null float64\n", 626 | " 5 mean compactness 569 non-null float64\n", 627 | " 6 mean concavity 569 non-null float64\n", 628 | " 7 mean concave points 569 non-null float64\n", 629 | " 8 mean symmetry 569 non-null float64\n", 630 | " 9 mean fractal dimension 569 non-null float64\n", 631 | " 10 radius error 569 non-null float64\n", 632 | " 11 texture error 569 non-null float64\n", 633 | " 12 perimeter error 569 non-null float64\n", 634 | " 13 area error 569 non-null float64\n", 635 | " 14 smoothness error 569 non-null float64\n", 636 | " 15 compactness error 569 non-null float64\n", 637 | " 16 concavity error 569 non-null float64\n", 638 | " 17 concave points error 569 non-null float64\n", 639 | " 18 symmetry error 569 non-null float64\n", 640 | " 19 fractal dimension error 569 non-null float64\n", 641 | " 20 worst radius 569 non-null float64\n", 642 | " 21 worst texture 569 non-null float64\n", 643 | " 22 worst perimeter 569 non-null float64\n", 644 | " 23 worst area 569 non-null float64\n", 645 | " 24 worst smoothness 569 non-null float64\n", 646 | " 25 worst compactness 569 non-null float64\n", 647 | " 26 worst concavity 569 non-null float64\n", 648 | " 27 worst concave points 569 non-null float64\n", 649 | " 28 worst symmetry 569 non-null float64\n", 650 | " 29 worst fractal dimension 569 non-null float64\n", 651 | " 30 label 569 non-null int32 \n", 652 | "dtypes: float64(30), int32(1)\n", 653 | "memory usage: 135.7 KB\n" 654 | ] 655 | } 656 | ], 657 | "source": [ 658 | "# getting information of dataset\n", 659 | "data_frame.info()" 660 | ] 661 | }, 662 | { 663 | "cell_type": "code", 664 | "execution_count": 10, 665 | "id": "7310971a", 666 | "metadata": {}, 667 | "outputs": [ 668 | { 669 | "data": { 670 | "text/plain": [ 671 | "mean radius 0\n", 672 | "mean texture 0\n", 673 | "mean perimeter 0\n", 674 | "mean area 0\n", 675 | "mean smoothness 0\n", 676 | "mean compactness 0\n", 677 | "mean concavity 0\n", 678 | "mean concave points 0\n", 679 | "mean symmetry 0\n", 680 | "mean fractal dimension 0\n", 681 | "radius error 0\n", 682 | "texture error 0\n", 683 | "perimeter error 0\n", 684 | "area error 0\n", 685 | "smoothness error 0\n", 686 | "compactness error 0\n", 687 | "concavity error 0\n", 688 | "concave points error 0\n", 689 | "symmetry error 0\n", 690 | "fractal dimension error 0\n", 691 | "worst radius 0\n", 692 | "worst texture 0\n", 693 | "worst perimeter 0\n", 694 | "worst area 0\n", 695 | "worst smoothness 0\n", 696 | "worst compactness 0\n", 697 | "worst concavity 0\n", 698 | "worst concave points 0\n", 699 | "worst symmetry 0\n", 700 | "worst fractal dimension 0\n", 701 | "label 0\n", 702 | "dtype: int64" 703 | ] 704 | }, 705 | "execution_count": 10, 706 | "metadata": {}, 707 | "output_type": "execute_result" 708 | } 709 | ], 710 | "source": [ 711 | "# checking null values\n", 712 | "data_frame.isnull().sum()" 713 | ] 714 | }, 715 | { 716 | "cell_type": "code", 717 | "execution_count": 11, 718 | "id": "2cbe26fc", 719 | "metadata": {}, 720 | "outputs": [ 721 | { 722 | "data": { 723 | "text/html": [ 724 | "
\n", 725 | "\n", 738 | "\n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | "
mean radiusmean texturemean perimetermean areamean smoothnessmean compactnessmean concavitymean concave pointsmean symmetrymean fractal dimension...worst textureworst perimeterworst areaworst smoothnessworst compactnessworst concavityworst concave pointsworst symmetryworst fractal dimensionlabel
count569.000000569.000000569.000000569.000000569.000000569.000000569.000000569.000000569.000000569.000000...569.000000569.000000569.000000569.000000569.000000569.000000569.000000569.000000569.000000569.000000
mean14.12729219.28964991.969033654.8891040.0963600.1043410.0887990.0489190.1811620.062798...25.677223107.261213880.5831280.1323690.2542650.2721880.1146060.2900760.0839460.627417
std3.5240494.30103624.298981351.9141290.0140640.0528130.0797200.0388030.0274140.007060...6.14625833.602542569.3569930.0228320.1573360.2086240.0657320.0618670.0180610.483918
min6.9810009.71000043.790000143.5000000.0526300.0193800.0000000.0000000.1060000.049960...12.02000050.410000185.2000000.0711700.0272900.0000000.0000000.1565000.0550400.000000
25%11.70000016.17000075.170000420.3000000.0863700.0649200.0295600.0203100.1619000.057700...21.08000084.110000515.3000000.1166000.1472000.1145000.0649300.2504000.0714600.000000
50%13.37000018.84000086.240000551.1000000.0958700.0926300.0615400.0335000.1792000.061540...25.41000097.660000686.5000000.1313000.2119000.2267000.0999300.2822000.0800401.000000
75%15.78000021.800000104.100000782.7000000.1053000.1304000.1307000.0740000.1957000.066120...29.720000125.4000001084.0000000.1460000.3391000.3829000.1614000.3179000.0920801.000000
max28.11000039.280000188.5000002501.0000000.1634000.3454000.4268000.2012000.3040000.097440...49.540000251.2000004254.0000000.2226001.0580001.2520000.2910000.6638000.2075001.000000
\n", 960 | "

8 rows × 31 columns

\n", 961 | "
" 962 | ], 963 | "text/plain": [ 964 | " mean radius mean texture mean perimeter mean area \\\n", 965 | "count 569.000000 569.000000 569.000000 569.000000 \n", 966 | "mean 14.127292 19.289649 91.969033 654.889104 \n", 967 | "std 3.524049 4.301036 24.298981 351.914129 \n", 968 | "min 6.981000 9.710000 43.790000 143.500000 \n", 969 | "25% 11.700000 16.170000 75.170000 420.300000 \n", 970 | "50% 13.370000 18.840000 86.240000 551.100000 \n", 971 | "75% 15.780000 21.800000 104.100000 782.700000 \n", 972 | "max 28.110000 39.280000 188.500000 2501.000000 \n", 973 | "\n", 974 | " mean smoothness mean compactness mean concavity mean concave points \\\n", 975 | "count 569.000000 569.000000 569.000000 569.000000 \n", 976 | "mean 0.096360 0.104341 0.088799 0.048919 \n", 977 | "std 0.014064 0.052813 0.079720 0.038803 \n", 978 | "min 0.052630 0.019380 0.000000 0.000000 \n", 979 | "25% 0.086370 0.064920 0.029560 0.020310 \n", 980 | "50% 0.095870 0.092630 0.061540 0.033500 \n", 981 | "75% 0.105300 0.130400 0.130700 0.074000 \n", 982 | "max 0.163400 0.345400 0.426800 0.201200 \n", 983 | "\n", 984 | " mean symmetry mean fractal dimension ... worst texture \\\n", 985 | "count 569.000000 569.000000 ... 569.000000 \n", 986 | "mean 0.181162 0.062798 ... 25.677223 \n", 987 | "std 0.027414 0.007060 ... 6.146258 \n", 988 | "min 0.106000 0.049960 ... 12.020000 \n", 989 | "25% 0.161900 0.057700 ... 21.080000 \n", 990 | "50% 0.179200 0.061540 ... 25.410000 \n", 991 | "75% 0.195700 0.066120 ... 29.720000 \n", 992 | "max 0.304000 0.097440 ... 49.540000 \n", 993 | "\n", 994 | " worst perimeter worst area worst smoothness worst compactness \\\n", 995 | "count 569.000000 569.000000 569.000000 569.000000 \n", 996 | "mean 107.261213 880.583128 0.132369 0.254265 \n", 997 | "std 33.602542 569.356993 0.022832 0.157336 \n", 998 | "min 50.410000 185.200000 0.071170 0.027290 \n", 999 | "25% 84.110000 515.300000 0.116600 0.147200 \n", 1000 | "50% 97.660000 686.500000 0.131300 0.211900 \n", 1001 | "75% 125.400000 1084.000000 0.146000 0.339100 \n", 1002 | "max 251.200000 4254.000000 0.222600 1.058000 \n", 1003 | "\n", 1004 | " worst concavity worst concave points worst symmetry \\\n", 1005 | "count 569.000000 569.000000 569.000000 \n", 1006 | "mean 0.272188 0.114606 0.290076 \n", 1007 | "std 0.208624 0.065732 0.061867 \n", 1008 | "min 0.000000 0.000000 0.156500 \n", 1009 | "25% 0.114500 0.064930 0.250400 \n", 1010 | "50% 0.226700 0.099930 0.282200 \n", 1011 | "75% 0.382900 0.161400 0.317900 \n", 1012 | "max 1.252000 0.291000 0.663800 \n", 1013 | "\n", 1014 | " worst fractal dimension label \n", 1015 | "count 569.000000 569.000000 \n", 1016 | "mean 0.083946 0.627417 \n", 1017 | "std 0.018061 0.483918 \n", 1018 | "min 0.055040 0.000000 \n", 1019 | "25% 0.071460 0.000000 \n", 1020 | "50% 0.080040 1.000000 \n", 1021 | "75% 0.092080 1.000000 \n", 1022 | "max 0.207500 1.000000 \n", 1023 | "\n", 1024 | "[8 rows x 31 columns]" 1025 | ] 1026 | }, 1027 | "execution_count": 11, 1028 | "metadata": {}, 1029 | "output_type": "execute_result" 1030 | } 1031 | ], 1032 | "source": [ 1033 | "# statistical measures of dataset\n", 1034 | "data_frame.describe()" 1035 | ] 1036 | }, 1037 | { 1038 | "cell_type": "code", 1039 | "execution_count": 12, 1040 | "id": "b7177f0a", 1041 | "metadata": {}, 1042 | "outputs": [ 1043 | { 1044 | "data": { 1045 | "text/plain": [ 1046 | "1 357\n", 1047 | "0 212\n", 1048 | "Name: label, dtype: int64" 1049 | ] 1050 | }, 1051 | "execution_count": 12, 1052 | "metadata": {}, 1053 | "output_type": "execute_result" 1054 | } 1055 | ], 1056 | "source": [ 1057 | "# distribution of data\n", 1058 | "data_frame[\"label\"].value_counts()" 1059 | ] 1060 | }, 1061 | { 1062 | "cell_type": "code", 1063 | "execution_count": 13, 1064 | "id": "19911c84", 1065 | "metadata": {}, 1066 | "outputs": [ 1067 | { 1068 | "data": { 1069 | "text/html": [ 1070 | "
\n", 1071 | "\n", 1084 | "\n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | " \n", 1095 | " \n", 1096 | " \n", 1097 | " \n", 1098 | " \n", 1099 | " \n", 1100 | " \n", 1101 | " \n", 1102 | " \n", 1103 | " \n", 1104 | " \n", 1105 | " \n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1109 | " \n", 1110 | " \n", 1111 | " \n", 1112 | " \n", 1113 | " \n", 1114 | " \n", 1115 | " \n", 1116 | " \n", 1117 | " \n", 1118 | " \n", 1119 | " \n", 1120 | " \n", 1121 | " \n", 1122 | " \n", 1123 | " \n", 1124 | " \n", 1125 | " \n", 1126 | " \n", 1127 | " \n", 1128 | " \n", 1129 | " \n", 1130 | " \n", 1131 | " \n", 1132 | " \n", 1133 | " \n", 1134 | " \n", 1135 | " \n", 1136 | " \n", 1137 | " \n", 1138 | " \n", 1139 | " \n", 1140 | " \n", 1141 | " \n", 1142 | " \n", 1143 | " \n", 1144 | " \n", 1145 | " \n", 1146 | " \n", 1147 | " \n", 1148 | " \n", 1149 | " \n", 1150 | " \n", 1151 | " \n", 1152 | " \n", 1153 | " \n", 1154 | " \n", 1155 | " \n", 1156 | " \n", 1157 | " \n", 1158 | " \n", 1159 | " \n", 1160 | " \n", 1161 | " \n", 1162 | " \n", 1163 | " \n", 1164 | " \n", 1165 | " \n", 1166 | " \n", 1167 | " \n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | " \n", 1181 | " \n", 1182 | " \n", 1183 | " \n", 1184 | " \n", 1185 | "
mean radiusmean texturemean perimetermean areamean smoothnessmean compactnessmean concavitymean concave pointsmean symmetrymean fractal dimension...worst radiusworst textureworst perimeterworst areaworst smoothnessworst compactnessworst concavityworst concave pointsworst symmetryworst fractal dimension
label
017.46283021.604906115.365377978.3764150.1028980.1451880.1607750.0879900.1929090.062680...21.13481129.318208141.3703301422.2863210.1448450.3748240.4506060.1822370.3234680.091530
112.14652417.91476278.075406462.7901960.0924780.0800850.0460580.0257170.1741860.062867...13.37980123.51507087.005938558.8994400.1249590.1826730.1662380.0744440.2702460.079442
\n", 1186 | "

2 rows × 30 columns

\n", 1187 | "
" 1188 | ], 1189 | "text/plain": [ 1190 | " mean radius mean texture mean perimeter mean area mean smoothness \\\n", 1191 | "label \n", 1192 | "0 17.462830 21.604906 115.365377 978.376415 0.102898 \n", 1193 | "1 12.146524 17.914762 78.075406 462.790196 0.092478 \n", 1194 | "\n", 1195 | " mean compactness mean concavity mean concave points mean symmetry \\\n", 1196 | "label \n", 1197 | "0 0.145188 0.160775 0.087990 0.192909 \n", 1198 | "1 0.080085 0.046058 0.025717 0.174186 \n", 1199 | "\n", 1200 | " mean fractal dimension ... worst radius worst texture \\\n", 1201 | "label ... \n", 1202 | "0 0.062680 ... 21.134811 29.318208 \n", 1203 | "1 0.062867 ... 13.379801 23.515070 \n", 1204 | "\n", 1205 | " worst perimeter worst area worst smoothness worst compactness \\\n", 1206 | "label \n", 1207 | "0 141.370330 1422.286321 0.144845 0.374824 \n", 1208 | "1 87.005938 558.899440 0.124959 0.182673 \n", 1209 | "\n", 1210 | " worst concavity worst concave points worst symmetry \\\n", 1211 | "label \n", 1212 | "0 0.450606 0.182237 0.323468 \n", 1213 | "1 0.166238 0.074444 0.270246 \n", 1214 | "\n", 1215 | " worst fractal dimension \n", 1216 | "label \n", 1217 | "0 0.091530 \n", 1218 | "1 0.079442 \n", 1219 | "\n", 1220 | "[2 rows x 30 columns]" 1221 | ] 1222 | }, 1223 | "execution_count": 13, 1224 | "metadata": {}, 1225 | "output_type": "execute_result" 1226 | } 1227 | ], 1228 | "source": [ 1229 | "# mean values\n", 1230 | "data_frame.groupby(\"label\").mean()" 1231 | ] 1232 | }, 1233 | { 1234 | "cell_type": "code", 1235 | "execution_count": 14, 1236 | "id": "52e48efe", 1237 | "metadata": {}, 1238 | "outputs": [], 1239 | "source": [ 1240 | "# separating features and target\n", 1241 | "X = data_frame.drop(columns = \"label\", axis = 1)\n", 1242 | "Y = data_frame[\"label\"]" 1243 | ] 1244 | }, 1245 | { 1246 | "cell_type": "code", 1247 | "execution_count": 15, 1248 | "id": "d3b94af9", 1249 | "metadata": {}, 1250 | "outputs": [ 1251 | { 1252 | "name": "stdout", 1253 | "output_type": "stream", 1254 | "text": [ 1255 | " mean radius mean texture mean perimeter mean area mean smoothness \\\n", 1256 | "0 17.99 10.38 122.80 1001.0 0.11840 \n", 1257 | "1 20.57 17.77 132.90 1326.0 0.08474 \n", 1258 | "2 19.69 21.25 130.00 1203.0 0.10960 \n", 1259 | "3 11.42 20.38 77.58 386.1 0.14250 \n", 1260 | "4 20.29 14.34 135.10 1297.0 0.10030 \n", 1261 | ".. ... ... ... ... ... \n", 1262 | "564 21.56 22.39 142.00 1479.0 0.11100 \n", 1263 | "565 20.13 28.25 131.20 1261.0 0.09780 \n", 1264 | "566 16.60 28.08 108.30 858.1 0.08455 \n", 1265 | "567 20.60 29.33 140.10 1265.0 0.11780 \n", 1266 | "568 7.76 24.54 47.92 181.0 0.05263 \n", 1267 | "\n", 1268 | " mean compactness mean concavity mean concave points mean symmetry \\\n", 1269 | "0 0.27760 0.30010 0.14710 0.2419 \n", 1270 | "1 0.07864 0.08690 0.07017 0.1812 \n", 1271 | "2 0.15990 0.19740 0.12790 0.2069 \n", 1272 | "3 0.28390 0.24140 0.10520 0.2597 \n", 1273 | "4 0.13280 0.19800 0.10430 0.1809 \n", 1274 | ".. ... ... ... ... \n", 1275 | "564 0.11590 0.24390 0.13890 0.1726 \n", 1276 | "565 0.10340 0.14400 0.09791 0.1752 \n", 1277 | "566 0.10230 0.09251 0.05302 0.1590 \n", 1278 | "567 0.27700 0.35140 0.15200 0.2397 \n", 1279 | "568 0.04362 0.00000 0.00000 0.1587 \n", 1280 | "\n", 1281 | " mean fractal dimension ... worst radius worst texture \\\n", 1282 | "0 0.07871 ... 25.380 17.33 \n", 1283 | "1 0.05667 ... 24.990 23.41 \n", 1284 | "2 0.05999 ... 23.570 25.53 \n", 1285 | "3 0.09744 ... 14.910 26.50 \n", 1286 | "4 0.05883 ... 22.540 16.67 \n", 1287 | ".. ... ... ... ... \n", 1288 | "564 0.05623 ... 25.450 26.40 \n", 1289 | "565 0.05533 ... 23.690 38.25 \n", 1290 | "566 0.05648 ... 18.980 34.12 \n", 1291 | "567 0.07016 ... 25.740 39.42 \n", 1292 | "568 0.05884 ... 9.456 30.37 \n", 1293 | "\n", 1294 | " worst perimeter worst area worst smoothness worst compactness \\\n", 1295 | "0 184.60 2019.0 0.16220 0.66560 \n", 1296 | "1 158.80 1956.0 0.12380 0.18660 \n", 1297 | "2 152.50 1709.0 0.14440 0.42450 \n", 1298 | "3 98.87 567.7 0.20980 0.86630 \n", 1299 | "4 152.20 1575.0 0.13740 0.20500 \n", 1300 | ".. ... ... ... ... \n", 1301 | "564 166.10 2027.0 0.14100 0.21130 \n", 1302 | "565 155.00 1731.0 0.11660 0.19220 \n", 1303 | "566 126.70 1124.0 0.11390 0.30940 \n", 1304 | "567 184.60 1821.0 0.16500 0.86810 \n", 1305 | "568 59.16 268.6 0.08996 0.06444 \n", 1306 | "\n", 1307 | " worst concavity worst concave points worst symmetry \\\n", 1308 | "0 0.7119 0.2654 0.4601 \n", 1309 | "1 0.2416 0.1860 0.2750 \n", 1310 | "2 0.4504 0.2430 0.3613 \n", 1311 | "3 0.6869 0.2575 0.6638 \n", 1312 | "4 0.4000 0.1625 0.2364 \n", 1313 | ".. ... ... ... \n", 1314 | "564 0.4107 0.2216 0.2060 \n", 1315 | "565 0.3215 0.1628 0.2572 \n", 1316 | "566 0.3403 0.1418 0.2218 \n", 1317 | "567 0.9387 0.2650 0.4087 \n", 1318 | "568 0.0000 0.0000 0.2871 \n", 1319 | "\n", 1320 | " worst fractal dimension \n", 1321 | "0 0.11890 \n", 1322 | "1 0.08902 \n", 1323 | "2 0.08758 \n", 1324 | "3 0.17300 \n", 1325 | "4 0.07678 \n", 1326 | ".. ... \n", 1327 | "564 0.07115 \n", 1328 | "565 0.06637 \n", 1329 | "566 0.07820 \n", 1330 | "567 0.12400 \n", 1331 | "568 0.07039 \n", 1332 | "\n", 1333 | "[569 rows x 30 columns]\n", 1334 | "0 0\n", 1335 | "1 0\n", 1336 | "2 0\n", 1337 | "3 0\n", 1338 | "4 0\n", 1339 | " ..\n", 1340 | "564 0\n", 1341 | "565 0\n", 1342 | "566 0\n", 1343 | "567 0\n", 1344 | "568 1\n", 1345 | "Name: label, Length: 569, dtype: int32\n" 1346 | ] 1347 | } 1348 | ], 1349 | "source": [ 1350 | "print(X)\n", 1351 | "print(Y)" 1352 | ] 1353 | }, 1354 | { 1355 | "cell_type": "markdown", 1356 | "id": "b882a92b", 1357 | "metadata": {}, 1358 | "source": [ 1359 | "#### Data Standardization" 1360 | ] 1361 | }, 1362 | { 1363 | "cell_type": "code", 1364 | "execution_count": 16, 1365 | "id": "9a40be69", 1366 | "metadata": {}, 1367 | "outputs": [], 1368 | "source": [ 1369 | "from sklearn.preprocessing import StandardScaler" 1370 | ] 1371 | }, 1372 | { 1373 | "cell_type": "code", 1374 | "execution_count": 17, 1375 | "id": "1e2f152e", 1376 | "metadata": {}, 1377 | "outputs": [], 1378 | "source": [ 1379 | "scaler = StandardScaler()" 1380 | ] 1381 | }, 1382 | { 1383 | "cell_type": "code", 1384 | "execution_count": 18, 1385 | "id": "83a72223", 1386 | "metadata": {}, 1387 | "outputs": [], 1388 | "source": [ 1389 | "standardized_data = scaler.fit_transform(X)" 1390 | ] 1391 | }, 1392 | { 1393 | "cell_type": "code", 1394 | "execution_count": 19, 1395 | "id": "4a0720ca", 1396 | "metadata": {}, 1397 | "outputs": [ 1398 | { 1399 | "name": "stdout", 1400 | "output_type": "stream", 1401 | "text": [ 1402 | "[[ 1.09706398 -2.07333501 1.26993369 ... 2.29607613 2.75062224\n", 1403 | " 1.93701461]\n", 1404 | " [ 1.82982061 -0.35363241 1.68595471 ... 1.0870843 -0.24388967\n", 1405 | " 0.28118999]\n", 1406 | " [ 1.57988811 0.45618695 1.56650313 ... 1.95500035 1.152255\n", 1407 | " 0.20139121]\n", 1408 | " ...\n", 1409 | " [ 0.70228425 2.0455738 0.67267578 ... 0.41406869 -1.10454895\n", 1410 | " -0.31840916]\n", 1411 | " [ 1.83834103 2.33645719 1.98252415 ... 2.28998549 1.91908301\n", 1412 | " 2.21963528]\n", 1413 | " [-1.80840125 1.22179204 -1.81438851 ... -1.74506282 -0.04813821\n", 1414 | " -0.75120669]]\n" 1415 | ] 1416 | } 1417 | ], 1418 | "source": [ 1419 | "print(standardized_data)" 1420 | ] 1421 | }, 1422 | { 1423 | "cell_type": "code", 1424 | "execution_count": 20, 1425 | "id": "87980ccc", 1426 | "metadata": {}, 1427 | "outputs": [ 1428 | { 1429 | "name": "stdout", 1430 | "output_type": "stream", 1431 | "text": [ 1432 | "[[ 1.09706398 -2.07333501 1.26993369 ... 2.29607613 2.75062224\n", 1433 | " 1.93701461]\n", 1434 | " [ 1.82982061 -0.35363241 1.68595471 ... 1.0870843 -0.24388967\n", 1435 | " 0.28118999]\n", 1436 | " [ 1.57988811 0.45618695 1.56650313 ... 1.95500035 1.152255\n", 1437 | " 0.20139121]\n", 1438 | " ...\n", 1439 | " [ 0.70228425 2.0455738 0.67267578 ... 0.41406869 -1.10454895\n", 1440 | " -0.31840916]\n", 1441 | " [ 1.83834103 2.33645719 1.98252415 ... 2.28998549 1.91908301\n", 1442 | " 2.21963528]\n", 1443 | " [-1.80840125 1.22179204 -1.81438851 ... -1.74506282 -0.04813821\n", 1444 | " -0.75120669]]\n" 1445 | ] 1446 | } 1447 | ], 1448 | "source": [ 1449 | "# updating features with standardized data\n", 1450 | "X = standardized_data\n", 1451 | "print(X)" 1452 | ] 1453 | }, 1454 | { 1455 | "cell_type": "code", 1456 | "execution_count": 21, 1457 | "id": "096d41d3", 1458 | "metadata": {}, 1459 | "outputs": [ 1460 | { 1461 | "name": "stdout", 1462 | "output_type": "stream", 1463 | "text": [ 1464 | "0 0\n", 1465 | "1 0\n", 1466 | "2 0\n", 1467 | "3 0\n", 1468 | "4 0\n", 1469 | " ..\n", 1470 | "564 0\n", 1471 | "565 0\n", 1472 | "566 0\n", 1473 | "567 0\n", 1474 | "568 1\n", 1475 | "Name: label, Length: 569, dtype: int32\n" 1476 | ] 1477 | } 1478 | ], 1479 | "source": [ 1480 | "print(Y)" 1481 | ] 1482 | }, 1483 | { 1484 | "cell_type": "markdown", 1485 | "id": "79bf7648", 1486 | "metadata": {}, 1487 | "source": [ 1488 | "### Split Train and Test data" 1489 | ] 1490 | }, 1491 | { 1492 | "cell_type": "code", 1493 | "execution_count": 29, 1494 | "id": "a4b5e395", 1495 | "metadata": {}, 1496 | "outputs": [], 1497 | "source": [ 1498 | "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 2)" 1499 | ] 1500 | }, 1501 | { 1502 | "cell_type": "code", 1503 | "execution_count": 30, 1504 | "id": "ba92150d", 1505 | "metadata": {}, 1506 | "outputs": [ 1507 | { 1508 | "name": "stdout", 1509 | "output_type": "stream", 1510 | "text": [ 1511 | "Total data: (569, 30)\n", 1512 | "Train data: (455, 30)\n", 1513 | "Test data: (114, 30)\n" 1514 | ] 1515 | } 1516 | ], 1517 | "source": [ 1518 | "print(\"Total data: \", X.shape)\n", 1519 | "print(\"Train data: \", X_train.shape)\n", 1520 | "print(\"Test data: \", X_test.shape)" 1521 | ] 1522 | }, 1523 | { 1524 | "cell_type": "markdown", 1525 | "id": "f81c4e85", 1526 | "metadata": {}, 1527 | "source": [ 1528 | "### Building Neural Network" 1529 | ] 1530 | }, 1531 | { 1532 | "cell_type": "markdown", 1533 | "id": "012dd59e", 1534 | "metadata": {}, 1535 | "source": [ 1536 | "#### Importing Tensorflow and Keras" 1537 | ] 1538 | }, 1539 | { 1540 | "cell_type": "code", 1541 | "execution_count": 24, 1542 | "id": "17fdc50c", 1543 | "metadata": {}, 1544 | "outputs": [], 1545 | "source": [ 1546 | "import tensorflow as tf\n", 1547 | "tf.random.set_seed(3)\n", 1548 | "from tensorflow import keras" 1549 | ] 1550 | }, 1551 | { 1552 | "cell_type": "code", 1553 | "execution_count": 27, 1554 | "id": "292c2393", 1555 | "metadata": {}, 1556 | "outputs": [], 1557 | "source": [ 1558 | "# setting up the layers of Neural Network\n", 1559 | "\n", 1560 | "model = keras.Sequential([\n", 1561 | " \n", 1562 | " keras.layers.Flatten(input_shape = (30,)),\n", 1563 | " keras.layers.Dense(20, activation = 'relu'),\n", 1564 | " keras.layers.Dense(2, activation = \"sigmoid\")\n", 1565 | "])" 1566 | ] 1567 | }, 1568 | { 1569 | "cell_type": "code", 1570 | "execution_count": 28, 1571 | "id": "03b1f4e3", 1572 | "metadata": {}, 1573 | "outputs": [], 1574 | "source": [ 1575 | "# compiling the Neural Network\n", 1576 | "\n", 1577 | "model.compile(optimizer = \"adam\",\n", 1578 | " loss = \"sparse_categorical_crossentropy\",\n", 1579 | " metrics = [\"accuracy\"])" 1580 | ] 1581 | }, 1582 | { 1583 | "cell_type": "code", 1584 | "execution_count": 31, 1585 | "id": "ba0a717f", 1586 | "metadata": {}, 1587 | "outputs": [ 1588 | { 1589 | "name": "stdout", 1590 | "output_type": "stream", 1591 | "text": [ 1592 | "Epoch 1/10\n", 1593 | "13/13 [==============================] - 0s 10ms/step - loss: 0.7720 - accuracy: 0.4523 - val_loss: 0.6600 - val_accuracy: 0.5870\n", 1594 | "Epoch 2/10\n", 1595 | "13/13 [==============================] - 0s 3ms/step - loss: 0.5253 - accuracy: 0.7922 - val_loss: 0.4667 - val_accuracy: 0.8478\n", 1596 | "Epoch 3/10\n", 1597 | "13/13 [==============================] - 0s 2ms/step - loss: 0.3880 - accuracy: 0.8875 - val_loss: 0.3581 - val_accuracy: 0.9130\n", 1598 | "Epoch 4/10\n", 1599 | "13/13 [==============================] - 0s 2ms/step - loss: 0.3069 - accuracy: 0.9144 - val_loss: 0.2917 - val_accuracy: 0.9348\n", 1600 | "Epoch 5/10\n", 1601 | "13/13 [==============================] - 0s 2ms/step - loss: 0.2517 - accuracy: 0.9315 - val_loss: 0.2496 - val_accuracy: 0.9348\n", 1602 | "Epoch 6/10\n", 1603 | "13/13 [==============================] - 0s 2ms/step - loss: 0.2144 - accuracy: 0.9389 - val_loss: 0.2199 - val_accuracy: 0.9348\n", 1604 | "Epoch 7/10\n", 1605 | "13/13 [==============================] - 0s 2ms/step - loss: 0.1877 - accuracy: 0.9462 - val_loss: 0.1978 - val_accuracy: 0.9348\n", 1606 | "Epoch 8/10\n", 1607 | "13/13 [==============================] - 0s 4ms/step - loss: 0.1671 - accuracy: 0.9462 - val_loss: 0.1803 - val_accuracy: 0.9348\n", 1608 | "Epoch 9/10\n", 1609 | "13/13 [==============================] - 0s 2ms/step - loss: 0.1514 - accuracy: 0.9462 - val_loss: 0.1657 - val_accuracy: 0.9348\n", 1610 | "Epoch 10/10\n", 1611 | "13/13 [==============================] - 0s 2ms/step - loss: 0.1376 - accuracy: 0.9560 - val_loss: 0.1543 - val_accuracy: 0.9565\n" 1612 | ] 1613 | } 1614 | ], 1615 | "source": [ 1616 | "# training the Neural Network\n", 1617 | "\n", 1618 | "history = model.fit(X_train, Y_train, validation_split = 0.1, epochs = 10)" 1619 | ] 1620 | }, 1621 | { 1622 | "cell_type": "code", 1623 | "execution_count": 34, 1624 | "id": "30a2c0fa", 1625 | "metadata": {}, 1626 | "outputs": [ 1627 | { 1628 | "data": { 1629 | "text/plain": [ 1630 | "" 1631 | ] 1632 | }, 1633 | "execution_count": 34, 1634 | "metadata": {}, 1635 | "output_type": "execute_result" 1636 | }, 1637 | { 1638 | "data": { 1639 | "image/png": "\n", 1640 | "text/plain": [ 1641 | "
" 1642 | ] 1643 | }, 1644 | "metadata": {}, 1645 | "output_type": "display_data" 1646 | } 1647 | ], 1648 | "source": [ 1649 | "# visualize train validation accuracy\n", 1650 | "\n", 1651 | "plt.plot(history.history[\"accuracy\"])\n", 1652 | "plt.plot(history.history[\"val_accuracy\"])\n", 1653 | "\n", 1654 | "plt.title(\"Model Accuracy\")\n", 1655 | "plt.ylabel(\"accuracy\")\n", 1656 | "plt.xlabel(\"epoch\")\n", 1657 | "\n", 1658 | "plt.legend([\"training\", \"validation\"], loc = \"lower right\")" 1659 | ] 1660 | }, 1661 | { 1662 | "cell_type": "code", 1663 | "execution_count": 35, 1664 | "id": "f33a8681", 1665 | "metadata": {}, 1666 | "outputs": [ 1667 | { 1668 | "data": { 1669 | "text/plain": [ 1670 | "" 1671 | ] 1672 | }, 1673 | "execution_count": 35, 1674 | "metadata": {}, 1675 | "output_type": "execute_result" 1676 | }, 1677 | { 1678 | "data": { 1679 | "image/png": "\n", 1680 | "text/plain": [ 1681 | "
" 1682 | ] 1683 | }, 1684 | "metadata": {}, 1685 | "output_type": "display_data" 1686 | } 1687 | ], 1688 | "source": [ 1689 | "# visualize train validation loss\n", 1690 | "\n", 1691 | "plt.plot(history.history[\"loss\"])\n", 1692 | "plt.plot(history.history[\"val_loss\"])\n", 1693 | "\n", 1694 | "plt.title(\"Model Accuracy\")\n", 1695 | "plt.ylabel(\"loss\")\n", 1696 | "plt.xlabel(\"epoch\")\n", 1697 | "\n", 1698 | "plt.legend([\"training\", \"validation\"], loc = \"upper right\")" 1699 | ] 1700 | }, 1701 | { 1702 | "cell_type": "markdown", 1703 | "id": "a3da5f45", 1704 | "metadata": {}, 1705 | "source": [ 1706 | "### Accuracy on test data" 1707 | ] 1708 | }, 1709 | { 1710 | "cell_type": "code", 1711 | "execution_count": 36, 1712 | "id": "718e3596", 1713 | "metadata": {}, 1714 | "outputs": [ 1715 | { 1716 | "name": "stdout", 1717 | "output_type": "stream", 1718 | "text": [ 1719 | "4/4 [==============================] - 0s 1ms/step - loss: 0.1402 - accuracy: 0.9649\n" 1720 | ] 1721 | } 1722 | ], 1723 | "source": [ 1724 | "loss, accuracy = model.evaluate(X_test, Y_test)" 1725 | ] 1726 | }, 1727 | { 1728 | "cell_type": "code", 1729 | "execution_count": 37, 1730 | "id": "7d48070a", 1731 | "metadata": {}, 1732 | "outputs": [ 1733 | { 1734 | "name": "stdout", 1735 | "output_type": "stream", 1736 | "text": [ 1737 | "0.9649122953414917\n" 1738 | ] 1739 | } 1740 | ], 1741 | "source": [ 1742 | "print(accuracy)" 1743 | ] 1744 | }, 1745 | { 1746 | "cell_type": "code", 1747 | "execution_count": 38, 1748 | "id": "446d9e4b", 1749 | "metadata": {}, 1750 | "outputs": [ 1751 | { 1752 | "name": "stdout", 1753 | "output_type": "stream", 1754 | "text": [ 1755 | "(114, 30)\n" 1756 | ] 1757 | } 1758 | ], 1759 | "source": [ 1760 | "print(X_test.shape)" 1761 | ] 1762 | }, 1763 | { 1764 | "cell_type": "code", 1765 | "execution_count": 39, 1766 | "id": "3d6f5662", 1767 | "metadata": {}, 1768 | "outputs": [ 1769 | { 1770 | "name": "stdout", 1771 | "output_type": "stream", 1772 | "text": [ 1773 | "[-0.05319351 -1.42408329 -0.06833592 -0.17260604 2.02392516 -0.1286992\n", 1774 | " 0.15317924 0.44445235 0.60014708 0.25125475 0.50863855 2.57300558\n", 1775 | " 0.60638318 0.09687179 0.99971423 0.3907719 0.52176832 2.71439431\n", 1776 | " -0.41073446 0.65554594 -0.3415184 -1.67683887 -0.3795077 -0.3995439\n", 1777 | " 0.30822795 -0.74978636 -0.55789673 -0.19956318 -1.19838023 -0.63261684]\n" 1778 | ] 1779 | } 1780 | ], 1781 | "source": [ 1782 | "print(X_test[0])" 1783 | ] 1784 | }, 1785 | { 1786 | "cell_type": "code", 1787 | "execution_count": 40, 1788 | "id": "6516d2d1", 1789 | "metadata": {}, 1790 | "outputs": [ 1791 | { 1792 | "name": "stdout", 1793 | "output_type": "stream", 1794 | "text": [ 1795 | "4/4 [==============================] - 0s 1ms/step\n" 1796 | ] 1797 | } 1798 | ], 1799 | "source": [ 1800 | "Y_pred = model.predict(X_test)" 1801 | ] 1802 | }, 1803 | { 1804 | "cell_type": "code", 1805 | "execution_count": 41, 1806 | "id": "6608d010", 1807 | "metadata": {}, 1808 | "outputs": [ 1809 | { 1810 | "name": "stdout", 1811 | "output_type": "stream", 1812 | "text": [ 1813 | "(114, 2)\n" 1814 | ] 1815 | } 1816 | ], 1817 | "source": [ 1818 | "print(Y_pred.shape)" 1819 | ] 1820 | }, 1821 | { 1822 | "cell_type": "code", 1823 | "execution_count": 43, 1824 | "id": "4a8d8292", 1825 | "metadata": { 1826 | "scrolled": false 1827 | }, 1828 | "outputs": [ 1829 | { 1830 | "name": "stdout", 1831 | "output_type": "stream", 1832 | "text": [ 1833 | "[[2.80580282e-01 3.25303674e-01]\n", 1834 | " [4.00586158e-01 5.64572453e-01]\n", 1835 | " [6.19726144e-02 5.79110444e-01]\n", 1836 | " [9.41306710e-01 6.86075306e-04]\n", 1837 | " [1.68490976e-01 4.10280824e-01]\n", 1838 | " [9.01930094e-01 1.44960303e-02]\n", 1839 | " [2.20659822e-01 5.39898217e-01]\n", 1840 | " [1.84332635e-02 7.08341122e-01]\n", 1841 | " [8.40566605e-02 6.30798757e-01]\n", 1842 | " [9.57605615e-02 7.16690063e-01]\n", 1843 | " [4.59069788e-01 5.63535154e-01]\n", 1844 | " [2.30619729e-01 4.18735981e-01]\n", 1845 | " [4.11641061e-01 5.36629915e-01]\n", 1846 | " [3.38374466e-01 6.69319749e-01]\n", 1847 | " [8.86892453e-02 6.04105711e-01]\n", 1848 | " [5.49484789e-01 1.51873782e-01]\n", 1849 | " [1.24987289e-01 6.41290069e-01]\n", 1850 | " [9.08953175e-02 5.09522319e-01]\n", 1851 | " [6.10249490e-02 4.89589632e-01]\n", 1852 | " [8.38953197e-01 8.91702157e-03]\n", 1853 | " [3.35946912e-04 2.48149671e-02]\n", 1854 | " [3.45254317e-02 6.04846776e-01]\n", 1855 | " [1.22320369e-01 6.81798637e-01]\n", 1856 | " [3.36710177e-02 6.93576634e-01]\n", 1857 | " [9.28178653e-02 5.79511881e-01]\n", 1858 | " [7.41548419e-01 5.25906943e-02]\n", 1859 | " [1.13643177e-01 6.32771552e-01]\n", 1860 | " [1.10372379e-01 5.29826641e-01]\n", 1861 | " [5.56508362e-01 1.85755029e-01]\n", 1862 | " [7.58250356e-01 1.14575617e-01]\n", 1863 | " [7.64246807e-02 5.49830735e-01]\n", 1864 | " [1.13285542e-01 6.15059435e-01]\n", 1865 | " [2.27489136e-02 4.76285458e-01]\n", 1866 | " [9.23382461e-01 1.48010645e-02]\n", 1867 | " [7.96139002e-01 4.50374186e-02]\n", 1868 | " [1.60566449e-01 5.30571580e-01]\n", 1869 | " [5.77757470e-02 8.24828923e-01]\n", 1870 | " [1.88226983e-01 7.08311856e-01]\n", 1871 | " [7.21279532e-02 7.24554360e-01]\n", 1872 | " [1.55912802e-01 5.92655540e-01]\n", 1873 | " [9.57738638e-01 1.16276264e-03]\n", 1874 | " [5.54833293e-01 3.50607336e-01]\n", 1875 | " [2.13276252e-01 9.20545995e-01]\n", 1876 | " [3.90536450e-02 4.76125807e-01]\n", 1877 | " [5.19536972e-01 1.52727619e-01]\n", 1878 | " [5.14298901e-02 5.60706258e-01]\n", 1879 | " [9.65917204e-03 4.51839358e-01]\n", 1880 | " [1.76396757e-01 7.21280813e-01]\n", 1881 | " [7.81166434e-01 7.52157271e-02]\n", 1882 | " [7.30869889e-01 9.94213820e-02]\n", 1883 | " [5.55938259e-02 6.73402965e-01]\n", 1884 | " [4.30722773e-01 2.65711039e-01]\n", 1885 | " [3.78956407e-01 6.11069143e-01]\n", 1886 | " [7.18913674e-02 6.62696481e-01]\n", 1887 | " [3.49075720e-02 6.05360210e-01]\n", 1888 | " [2.85496801e-01 2.59709179e-01]\n", 1889 | " [2.35385716e-01 6.94373608e-01]\n", 1890 | " [2.07911190e-02 5.30921221e-01]\n", 1891 | " [9.55710784e-02 1.07712522e-02]\n", 1892 | " [3.61065045e-02 3.88472795e-01]\n", 1893 | " [2.25749448e-01 4.79985386e-01]\n", 1894 | " [5.28910100e-01 3.18630971e-02]\n", 1895 | " [4.80409935e-02 7.21798480e-01]\n", 1896 | " [7.91918635e-01 5.19000180e-02]\n", 1897 | " [4.98691261e-01 2.71469623e-01]\n", 1898 | " [2.80347671e-02 1.45101041e-01]\n", 1899 | " [8.27616811e-01 2.52969917e-02]\n", 1900 | " [6.72371805e-01 1.26031891e-01]\n", 1901 | " [1.92288235e-01 6.54888928e-01]\n", 1902 | " [2.15822905e-01 2.24455699e-01]\n", 1903 | " [5.35945535e-01 1.19255506e-01]\n", 1904 | " [6.75195634e-01 2.52579190e-02]\n", 1905 | " [9.35380384e-02 5.91325700e-01]\n", 1906 | " [6.36132717e-01 1.62417501e-01]\n", 1907 | " [2.02159826e-02 6.71649992e-01]\n", 1908 | " [6.77214265e-01 1.75438479e-01]\n", 1909 | " [1.21988885e-01 7.28516936e-01]\n", 1910 | " [2.45469604e-02 5.26511133e-01]\n", 1911 | " [2.12290779e-01 4.50889975e-01]\n", 1912 | " [5.53406477e-01 2.08686411e-01]\n", 1913 | " [7.70385325e-01 3.61663550e-02]\n", 1914 | " [6.86095595e-01 1.66715428e-01]\n", 1915 | " [8.50891411e-01 2.74228845e-02]\n", 1916 | " [1.58213526e-01 5.27098060e-01]\n", 1917 | " [2.23039418e-01 6.33844852e-01]\n", 1918 | " [3.96386027e-01 5.78070164e-01]\n", 1919 | " [7.69431842e-03 3.95019352e-01]\n", 1920 | " [3.33974175e-02 4.86973792e-01]\n", 1921 | " [6.41181469e-02 2.79889673e-01]\n", 1922 | " [8.26448083e-01 4.15967330e-02]\n", 1923 | " [2.83282716e-02 5.45532167e-01]\n", 1924 | " [1.49629772e-01 7.27436781e-01]\n", 1925 | " [1.01870149e-02 2.52338976e-01]\n", 1926 | " [6.90371513e-01 9.95781571e-02]\n", 1927 | " [6.29738212e-01 2.17616633e-01]\n", 1928 | " [3.75147648e-02 5.43697119e-01]\n", 1929 | " [7.79797971e-01 2.86676846e-02]\n", 1930 | " [7.10010529e-01 4.98198941e-02]\n", 1931 | " [2.43353173e-01 7.77844369e-01]\n", 1932 | " [2.85720751e-02 6.62522793e-01]\n", 1933 | " [1.57105438e-02 6.36715531e-01]\n", 1934 | " [4.78853673e-01 2.22064272e-01]\n", 1935 | " [8.50502133e-01 1.56207411e-02]\n", 1936 | " [9.01700020e-01 4.88460856e-03]\n", 1937 | " [1.34612456e-01 6.90749466e-01]\n", 1938 | " [3.48112062e-02 5.00106275e-01]\n", 1939 | " [1.70416590e-02 3.76978278e-01]\n", 1940 | " [2.30908077e-02 5.70067883e-01]\n", 1941 | " [3.25023048e-02 7.93150604e-01]\n", 1942 | " [3.74827772e-01 7.78844476e-01]\n", 1943 | " [7.72616923e-01 4.94829752e-02]\n", 1944 | " [8.58565569e-01 2.32815985e-02]\n", 1945 | " [3.91242266e-01 5.54798543e-01]\n", 1946 | " [7.11177230e-01 6.61827102e-02]]\n" 1947 | ] 1948 | } 1949 | ], 1950 | "source": [ 1951 | "print(Y_pred)" 1952 | ] 1953 | }, 1954 | { 1955 | "cell_type": "code", 1956 | "execution_count": 45, 1957 | "id": "38e856d1", 1958 | "metadata": {}, 1959 | "outputs": [ 1960 | { 1961 | "name": "stdout", 1962 | "output_type": "stream", 1963 | "text": [ 1964 | "[1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0]\n" 1965 | ] 1966 | } 1967 | ], 1968 | "source": [ 1969 | "# converting the prediction probability to class labels\n", 1970 | "Y_pred_labels = [np.argmax(i) for i in Y_pred]\n", 1971 | "print(Y_pred_labels)" 1972 | ] 1973 | }, 1974 | { 1975 | "cell_type": "markdown", 1976 | "id": "85ac4191", 1977 | "metadata": {}, 1978 | "source": [ 1979 | "### Predictive System" 1980 | ] 1981 | }, 1982 | { 1983 | "cell_type": "markdown", 1984 | "id": "273cd4ea", 1985 | "metadata": {}, 1986 | "source": [ 1987 | "#### taking data directly from dataset so we need to do some preprocessing" 1988 | ] 1989 | }, 1990 | { 1991 | "cell_type": "code", 1992 | "execution_count": 93, 1993 | "id": "a9bfcd06", 1994 | "metadata": {}, 1995 | "outputs": [], 1996 | "source": [ 1997 | "sample = data_frame.drop(\"label\", axis = 1).iloc[0].values" 1998 | ] 1999 | }, 2000 | { 2001 | "cell_type": "code", 2002 | "execution_count": 96, 2003 | "id": "6bcbab6b", 2004 | "metadata": {}, 2005 | "outputs": [ 2006 | { 2007 | "name": "stdout", 2008 | "output_type": "stream", 2009 | "text": [ 2010 | "[1.799e+01 1.038e+01 1.228e+02 1.001e+03 1.184e-01 2.776e-01 3.001e-01\n", 2011 | " 1.471e-01 2.419e-01 7.871e-02 1.095e+00 9.053e-01 8.589e+00 1.534e+02\n", 2012 | " 6.399e-03 4.904e-02 5.373e-02 1.587e-02 3.003e-02 6.193e-03 2.538e+01\n", 2013 | " 1.733e+01 1.846e+02 2.019e+03 1.622e-01 6.656e-01 7.119e-01 2.654e-01\n", 2014 | " 4.601e-01 1.189e-01]\n", 2015 | "[[ 1.09706398 -2.07333501 1.26993369 0.9843749 1.56846633 3.28351467\n", 2016 | " 2.65287398 2.53247522 2.21751501 2.25574689 2.48973393 -0.56526506\n", 2017 | " 2.83303087 2.48757756 -0.21400165 1.31686157 0.72402616 0.66081994\n", 2018 | " 1.14875667 0.90708308 1.88668963 -1.35929347 2.30360062 2.00123749\n", 2019 | " 1.30768627 2.61666502 2.10952635 2.29607613 2.75062224 1.93701461]]\n", 2020 | "1/1 [==============================] - 0s 12ms/step\n", 2021 | "[[0.7400751 0.00730012]]\n" 2022 | ] 2023 | }, 2024 | { 2025 | "name": "stderr", 2026 | "output_type": "stream", 2027 | "text": [ 2028 | "C:\\Users\\TanveerKader\\anaconda3\\lib\\site-packages\\sklearn\\base.py:450: UserWarning: X does not have valid feature names, but StandardScaler was fitted with feature names\n", 2029 | " warnings.warn(\n" 2030 | ] 2031 | } 2032 | ], 2033 | "source": [ 2034 | "input_data = (sample)\n", 2035 | "print(input_data)\n", 2036 | "\n", 2037 | "# reshape the array as we are predicting for one instance\n", 2038 | "input_data_reshaped = input_data.reshape(1, -1)\n", 2039 | "\n", 2040 | "# standardize the input data as model is trained as such\n", 2041 | "std_data = scaler.transform(input_data_reshaped)\n", 2042 | "print(std_data)\n", 2043 | "\n", 2044 | "prediction = model.predict(std_data)\n", 2045 | "print(prediction)\n" 2046 | ] 2047 | }, 2048 | { 2049 | "cell_type": "code", 2050 | "execution_count": 97, 2051 | "id": "3cf4d2ea", 2052 | "metadata": {}, 2053 | "outputs": [ 2054 | { 2055 | "name": "stdout", 2056 | "output_type": "stream", 2057 | "text": [ 2058 | "[0]\n", 2059 | "The tumor is Malignant.\n" 2060 | ] 2061 | } 2062 | ], 2063 | "source": [ 2064 | "predictin_label = [np.argmax(prediction)]\n", 2065 | "print(predictin_label)\n", 2066 | "\n", 2067 | "if(predictin_label[0] == 0):\n", 2068 | " print(\"The tumor is Malignant.\")\n", 2069 | "else:\n", 2070 | " print(\"The tumor is Benign.\")" 2071 | ] 2072 | }, 2073 | { 2074 | "cell_type": "markdown", 2075 | "id": "ac060dd2", 2076 | "metadata": {}, 2077 | "source": [ 2078 | "##### data was standardised before split so we can use it directly after reshaping" 2079 | ] 2080 | }, 2081 | { 2082 | "cell_type": "code", 2083 | "execution_count": 98, 2084 | "id": "903ca0aa", 2085 | "metadata": {}, 2086 | "outputs": [ 2087 | { 2088 | "name": "stdout", 2089 | "output_type": "stream", 2090 | "text": [ 2091 | "[-0.05319351 -1.42408329 -0.06833592 -0.17260604 2.02392516 -0.1286992\n", 2092 | " 0.15317924 0.44445235 0.60014708 0.25125475 0.50863855 2.57300558\n", 2093 | " 0.60638318 0.09687179 0.99971423 0.3907719 0.52176832 2.71439431\n", 2094 | " -0.41073446 0.65554594 -0.3415184 -1.67683887 -0.3795077 -0.3995439\n", 2095 | " 0.30822795 -0.74978636 -0.55789673 -0.19956318 -1.19838023 -0.63261684]\n", 2096 | "[[-0.05319351 -1.42408329 -0.06833592 -0.17260604 2.02392516 -0.1286992\n", 2097 | " 0.15317924 0.44445235 0.60014708 0.25125475 0.50863855 2.57300558\n", 2098 | " 0.60638318 0.09687179 0.99971423 0.3907719 0.52176832 2.71439431\n", 2099 | " -0.41073446 0.65554594 -0.3415184 -1.67683887 -0.3795077 -0.3995439\n", 2100 | " 0.30822795 -0.74978636 -0.55789673 -0.19956318 -1.19838023 -0.63261684]]\n", 2101 | "1/1 [==============================] - 0s 12ms/step\n", 2102 | "[[0.28058025 0.3253037 ]]\n" 2103 | ] 2104 | } 2105 | ], 2106 | "source": [ 2107 | "input_data = X_test[0]\n", 2108 | "print(input_data)\n", 2109 | "\n", 2110 | "# reshape the array as we are predicting for one instance\n", 2111 | "input_data_reshaped = input_data.reshape(1, -1)\n", 2112 | "\n", 2113 | "print(input_data_reshaped)\n", 2114 | "\n", 2115 | "prediction = model.predict(input_data_reshaped)\n", 2116 | "print(prediction)" 2117 | ] 2118 | }, 2119 | { 2120 | "cell_type": "code", 2121 | "execution_count": 99, 2122 | "id": "02232014", 2123 | "metadata": {}, 2124 | "outputs": [ 2125 | { 2126 | "name": "stdout", 2127 | "output_type": "stream", 2128 | "text": [ 2129 | "[1]\n", 2130 | "The tumor is Benign.\n" 2131 | ] 2132 | } 2133 | ], 2134 | "source": [ 2135 | "predictin_label = [np.argmax(prediction)]\n", 2136 | "print(predictin_label)\n", 2137 | "\n", 2138 | "if(predictin_label[0] == 0):\n", 2139 | " print(\"The tumor is Malignant.\")\n", 2140 | "else:\n", 2141 | " print(\"The tumor is Benign.\")" 2142 | ] 2143 | }, 2144 | { 2145 | "cell_type": "code", 2146 | "execution_count": null, 2147 | "id": "965857ca", 2148 | "metadata": {}, 2149 | "outputs": [], 2150 | "source": [] 2151 | } 2152 | ], 2153 | "metadata": { 2154 | "kernelspec": { 2155 | "display_name": "Python 3 (ipykernel)", 2156 | "language": "python", 2157 | "name": "python3" 2158 | }, 2159 | "language_info": { 2160 | "codemirror_mode": { 2161 | "name": "ipython", 2162 | "version": 3 2163 | }, 2164 | "file_extension": ".py", 2165 | "mimetype": "text/x-python", 2166 | "name": "python", 2167 | "nbconvert_exporter": "python", 2168 | "pygments_lexer": "ipython3", 2169 | "version": "3.9.13" 2170 | } 2171 | }, 2172 | "nbformat": 4, 2173 | "nbformat_minor": 5 2174 | } 2175 | --------------------------------------------------------------------------------