├── README.md ├── DSBDA_Assignment1(A).ipynb ├── DSBDA A=3 part2 (1).ipynb ├── DSBDA-Assignment3DSBDA.ipynb └── DSBDA A=1 PART2 (1) (1).ipynb /README.md: -------------------------------------------------------------------------------- 1 | # DSBDA-PRACTICALS 2 | 3 | - [Assignment 1 - A](https://github.com/vaishnaviraykar/DSBDA-PRACTICALS/blob/main/DSBDA_Assignment1(A).ipynb) 4 | 5 | - [Assignment 1 - B](https://github.com/vaishnaviraykar/DSBDA-PRACTICALS/blob/main/DSBDA%20A%3D1%20PART2%20(1)%20(1).ipynb) 6 | 7 | - [Assignment 2](https://github.com/vaishnaviraykar/DSBDA-PRACTICALS/blob/main/Asssignment2.ipynb) 8 | 9 | - [Assignment 3 - A](https://github.com/vaishnaviraykar/DSBDA-PRACTICALS/blob/main/DSBDA-Assignment3DSBDA.ipynb) 10 | 11 | - [Assignment 3 - B](https://github.com/vaishnaviraykar/DSBDA-PRACTICALS/blob/main/DSBDA%20A%3D3%20part2%20(1).ipynb) 12 | -------------------------------------------------------------------------------- /DSBDA_Assignment1(A).ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | }, 12 | "language_info": { 13 | "name": "python" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": { 21 | "id": "S1Du7cyo_jpQ" 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "[1] #1. Import all required python libraries\n", 26 | "\n", 27 | "#import numpy as np\n", 28 | "import pandas as pd\n", 29 | "\n", 30 | "import matplotlib\n", 31 | "from matplotlib import pyplot as plt\n", 32 | "#matplotlib.style.use(ggplot)\n", 33 | "\n", 34 | "\n", 35 | "import seaborn as sns" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "source": [ 41 | "#2. preparing to load data into colab from local drive\n", 42 | "from google.colab import files\n", 43 | "uploaded = files.upload()" 44 | ], 45 | "metadata": { 46 | "colab": { 47 | "base_uri": "https://localhost:8080/", 48 | "height": 73 49 | }, 50 | "id": "v64Wxr8TeNX1", 51 | "outputId": "7c3b830e-e744-47f0-86ff-5ec9757ef906" 52 | }, 53 | "execution_count": 2, 54 | "outputs": [ 55 | { 56 | "output_type": "display_data", 57 | "data": { 58 | "text/plain": [ 59 | "" 60 | ], 61 | "text/html": [ 62 | "\n", 63 | " \n", 65 | " \n", 66 | " Upload widget is only available when the cell has been executed in the\n", 67 | " current browser session. Please rerun this cell to enable.\n", 68 | " \n", 69 | " " 245 | ] 246 | }, 247 | "metadata": {} 248 | }, 249 | { 250 | "output_type": "stream", 251 | "name": "stdout", 252 | "text": [ 253 | "Saving iris.data.csv to iris.data.csv\n" 254 | ] 255 | } 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "source": [ 261 | " #3.loading data into dataframe\n", 262 | "import io\n", 263 | "df = pd.read_csv(io.BytesIO(uploaded['iris.data.csv'])) #import csv file in dataframe called df\n", 264 | "#Dataset is now stored in a Pandas Dataframe\n", 265 | "\n", 266 | " \n" 267 | ], 268 | "metadata": { 269 | "id": "4-p5hfYfrXrW" 270 | }, 271 | "execution_count": 3, 272 | "outputs": [] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "source": [ 277 | " #classes = df ['species'].unique().tolist()\n", 278 | "#classes" 279 | ], 280 | "metadata": { 281 | "id": "e0e_XMdXyD8a" 282 | }, 283 | "execution_count": 4, 284 | "outputs": [] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "source": [ 289 | " #4.1 Information of the data present in dataset\n", 290 | "df.info()" 291 | ], 292 | "metadata": { 293 | "colab": { 294 | "base_uri": "https://localhost:8080/" 295 | }, 296 | "id": "lf2-KAVUsJ0W", 297 | "outputId": "4ade49ec-5463-439b-f8b2-c51cf7631ba2" 298 | }, 299 | "execution_count": 5, 300 | "outputs": [ 301 | { 302 | "output_type": "stream", 303 | "name": "stdout", 304 | "text": [ 305 | "\n", 306 | "RangeIndex: 149 entries, 0 to 148\n", 307 | "Data columns (total 5 columns):\n", 308 | " # Column Non-Null Count Dtype \n", 309 | "--- ------ -------------- ----- \n", 310 | " 0 5.1 149 non-null float64\n", 311 | " 1 3.5 149 non-null float64\n", 312 | " 2 1.4 149 non-null float64\n", 313 | " 3 0.2 149 non-null float64\n", 314 | " 4 Iris-setosa 149 non-null object \n", 315 | "dtypes: float64(4), object(1)\n", 316 | "memory usage: 5.9+ KB\n" 317 | ] 318 | } 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "source": [ 324 | "# 4.2 Getting the dimension/shape of the data.\n", 325 | "#Expect it to 150 rows and 5 columns\n", 326 | "\n", 327 | "print(df.shape)" 328 | ], 329 | "metadata": { 330 | "colab": { 331 | "base_uri": "https://localhost:8080/" 332 | }, 333 | "id": "zQcdTTr7tO51", 334 | "outputId": "abe46c73-e167-488f-8fbe-a55e696125b2" 335 | }, 336 | "execution_count": 6, 337 | "outputs": [ 338 | { 339 | "output_type": "stream", 340 | "name": "stdout", 341 | "text": [ 342 | "(149, 5)\n" 343 | ] 344 | } 345 | ] 346 | }, 347 | { 348 | "cell_type": "code", 349 | "source": [ 350 | "# Print the first 20 data points -- the head of the dataset\n", 351 | "print(df.head(10))" 352 | ], 353 | "metadata": { 354 | "id": "PPFKzFXtt-B-", 355 | "colab": { 356 | "base_uri": "https://localhost:8080/" 357 | }, 358 | "outputId": "790522fb-ae7b-4eb5-c26c-49b222fc4c89" 359 | }, 360 | "execution_count": 7, 361 | "outputs": [ 362 | { 363 | "output_type": "stream", 364 | "name": "stdout", 365 | "text": [ 366 | " 5.1 3.5 1.4 0.2 Iris-setosa\n", 367 | "0 4.9 3.0 1.4 0.2 Iris-setosa\n", 368 | "1 4.7 3.2 1.3 0.2 Iris-setosa\n", 369 | "2 4.6 3.1 1.5 0.2 Iris-setosa\n", 370 | "3 5.0 3.6 1.4 0.2 Iris-setosa\n", 371 | "4 5.4 3.9 1.7 0.4 Iris-setosa\n", 372 | "5 4.6 3.4 1.4 0.3 Iris-setosa\n", 373 | "6 5.0 3.4 1.5 0.2 Iris-setosa\n", 374 | "7 4.4 2.9 1.4 0.2 Iris-setosa\n", 375 | "8 4.9 3.1 1.5 0.1 Iris-setosa\n", 376 | "9 5.4 3.7 1.5 0.2 Iris-setosa\n" 377 | ] 378 | } 379 | ] 380 | }, 381 | { 382 | "cell_type": "code", 383 | "source": [ 384 | "#4.3 Describing the data present in the dataset\n", 385 | "#Use the describe function to describe some of the \n", 386 | "#statistical properties of the data.\n", 387 | "print(df.describe())\n", 388 | "\n", 389 | "\n", 390 | "\n", 391 | "\n", 392 | "\n", 393 | "\n", 394 | "\n" 395 | ], 396 | "metadata": { 397 | "id": "VGtPCZjKyr2R", 398 | "colab": { 399 | "base_uri": "https://localhost:8080/" 400 | }, 401 | "outputId": "a000221a-5ed6-457b-a74b-c55e316f0c8d" 402 | }, 403 | "execution_count": 8, 404 | "outputs": [ 405 | { 406 | "output_type": "stream", 407 | "name": "stdout", 408 | "text": [ 409 | " 5.1 3.5 1.4 0.2\n", 410 | "count 149.000000 149.000000 149.000000 149.000000\n", 411 | "mean 5.848322 3.051007 3.774497 1.205369\n", 412 | "std 0.828594 0.433499 1.759651 0.761292\n", 413 | "min 4.300000 2.000000 1.000000 0.100000\n", 414 | "25% 5.100000 2.800000 1.600000 0.300000\n", 415 | "50% 5.800000 3.000000 4.400000 1.300000\n", 416 | "75% 6.400000 3.300000 5.100000 1.800000\n", 417 | "max 7.900000 4.400000 6.900000 2.500000\n" 418 | ] 419 | } 420 | ] 421 | } 422 | ] 423 | } -------------------------------------------------------------------------------- /DSBDA A=3 part2 (1).ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "4cf43d87", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import pandas as pd\n", 11 | "import numpy as np\n", 12 | "import matplotlib.pyplot as plt \n" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "id": "5abb9d2c", 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "df=pd.read_csv('C:/Users/coeco/Downloads/Assign_3_Mall_Customers.csv')" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 3, 28 | "id": "c536e6b9", 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "df1=df\n", 33 | "df2=df1\n", 34 | "df3=df\n", 35 | "df4=df" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 4, 41 | "id": "cf5df6f0", 42 | "metadata": {}, 43 | "outputs": [ 44 | { 45 | "data": { 46 | "text/html": [ 47 | "
\n", 48 | "\n", 61 | "\n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | "
CustomerIDGenderAge GroupAgeSalarySpending Score (1-100)
01MaleTeen191539
12MaleMiddle Age211581
23FemaleMiddle Age20166
34FemaleMiddle Age231677
45FemaleMiddle Age311740
.....................
195196FemaleMiddle Age3512079
196197FemaleElder4512628
197198MaleMiddle Age3212674
198199MaleMiddle Age3213718
199200MaleMiddle Age3013783
\n", 175 | "

200 rows × 6 columns

\n", 176 | "
" 177 | ], 178 | "text/plain": [ 179 | " CustomerID Gender Age Group Age Salary Spending Score (1-100)\n", 180 | "0 1 Male Teen 19 15 39\n", 181 | "1 2 Male Middle Age 21 15 81\n", 182 | "2 3 Female Middle Age 20 16 6\n", 183 | "3 4 Female Middle Age 23 16 77\n", 184 | "4 5 Female Middle Age 31 17 40\n", 185 | ".. ... ... ... ... ... ...\n", 186 | "195 196 Female Middle Age 35 120 79\n", 187 | "196 197 Female Elder 45 126 28\n", 188 | "197 198 Male Middle Age 32 126 74\n", 189 | "198 199 Male Middle Age 32 137 18\n", 190 | "199 200 Male Middle Age 30 137 83\n", 191 | "\n", 192 | "[200 rows x 6 columns]" 193 | ] 194 | }, 195 | "execution_count": 4, 196 | "metadata": {}, 197 | "output_type": "execute_result" 198 | } 199 | ], 200 | "source": [ 201 | "df" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 5, 207 | "id": "02c6d88f", 208 | "metadata": {}, 209 | "outputs": [ 210 | { 211 | "name": "stdout", 212 | "output_type": "stream", 213 | "text": [ 214 | "\n", 215 | "RangeIndex: 200 entries, 0 to 199\n", 216 | "Data columns (total 6 columns):\n", 217 | " # Column Non-Null Count Dtype \n", 218 | "--- ------ -------------- ----- \n", 219 | " 0 CustomerID 200 non-null int64 \n", 220 | " 1 Gender 200 non-null object\n", 221 | " 2 Age Group 200 non-null object\n", 222 | " 3 Age 200 non-null int64 \n", 223 | " 4 Salary 200 non-null int64 \n", 224 | " 5 Spending Score (1-100) 200 non-null int64 \n", 225 | "dtypes: int64(4), object(2)\n", 226 | "memory usage: 9.5+ KB\n" 227 | ] 228 | } 229 | ], 230 | "source": [ 231 | "df.info()" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": 6, 237 | "id": "1dd78e6a", 238 | "metadata": {}, 239 | "outputs": [ 240 | { 241 | "data": { 242 | "text/plain": [ 243 | "(200, 6)" 244 | ] 245 | }, 246 | "execution_count": 6, 247 | "metadata": {}, 248 | "output_type": "execute_result" 249 | } 250 | ], 251 | "source": [ 252 | "df.shape" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": 7, 258 | "id": "c1d4e462", 259 | "metadata": {}, 260 | "outputs": [ 261 | { 262 | "data": { 263 | "text/html": [ 264 | "
\n", 265 | "\n", 278 | "\n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | "
CustomerIDGenderAge GroupAgeSalarySpending Score (1-100)
01MaleTeen191539
12MaleMiddle Age211581
23FemaleMiddle Age20166
34FemaleMiddle Age231677
45FemaleMiddle Age311740
\n", 338 | "
" 339 | ], 340 | "text/plain": [ 341 | " CustomerID Gender Age Group Age Salary Spending Score (1-100)\n", 342 | "0 1 Male Teen 19 15 39\n", 343 | "1 2 Male Middle Age 21 15 81\n", 344 | "2 3 Female Middle Age 20 16 6\n", 345 | "3 4 Female Middle Age 23 16 77\n", 346 | "4 5 Female Middle Age 31 17 40" 347 | ] 348 | }, 349 | "execution_count": 7, 350 | "metadata": {}, 351 | "output_type": "execute_result" 352 | } 353 | ], 354 | "source": [ 355 | "df.head()" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": 8, 361 | "id": "0fe2e323", 362 | "metadata": {}, 363 | "outputs": [ 364 | { 365 | "data": { 366 | "text/html": [ 367 | "
\n", 368 | "\n", 381 | "\n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | "
CustomerIDGenderAge GroupAgeSalarySpending Score (1-100)
195196FemaleMiddle Age3512079
196197FemaleElder4512628
197198MaleMiddle Age3212674
198199MaleMiddle Age3213718
199200MaleMiddle Age3013783
\n", 441 | "
" 442 | ], 443 | "text/plain": [ 444 | " CustomerID Gender Age Group Age Salary Spending Score (1-100)\n", 445 | "195 196 Female Middle Age 35 120 79\n", 446 | "196 197 Female Elder 45 126 28\n", 447 | "197 198 Male Middle Age 32 126 74\n", 448 | "198 199 Male Middle Age 32 137 18\n", 449 | "199 200 Male Middle Age 30 137 83" 450 | ] 451 | }, 452 | "execution_count": 8, 453 | "metadata": {}, 454 | "output_type": "execute_result" 455 | } 456 | ], 457 | "source": [ 458 | "df.tail()" 459 | ] 460 | }, 461 | { 462 | "cell_type": "code", 463 | "execution_count": 9, 464 | "id": "d1080869", 465 | "metadata": {}, 466 | "outputs": [ 467 | { 468 | "data": { 469 | "text/html": [ 470 | "
\n", 471 | "\n", 484 | "\n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | "
CustomerIDAgeSalarySpending Score (1-100)
count200.000000200.000000200.000000200.000000
mean100.50000038.85000060.56000050.200000
std57.87918513.96900726.26472125.823522
min1.00000018.00000015.0000001.000000
25%50.75000028.75000041.50000034.750000
50%100.50000036.00000061.50000050.000000
75%150.25000049.00000078.00000073.000000
max200.00000070.000000137.00000099.000000
\n", 553 | "
" 554 | ], 555 | "text/plain": [ 556 | " CustomerID Age Salary Spending Score (1-100)\n", 557 | "count 200.000000 200.000000 200.000000 200.000000\n", 558 | "mean 100.500000 38.850000 60.560000 50.200000\n", 559 | "std 57.879185 13.969007 26.264721 25.823522\n", 560 | "min 1.000000 18.000000 15.000000 1.000000\n", 561 | "25% 50.750000 28.750000 41.500000 34.750000\n", 562 | "50% 100.500000 36.000000 61.500000 50.000000\n", 563 | "75% 150.250000 49.000000 78.000000 73.000000\n", 564 | "max 200.000000 70.000000 137.000000 99.000000" 565 | ] 566 | }, 567 | "execution_count": 9, 568 | "metadata": {}, 569 | "output_type": "execute_result" 570 | } 571 | ], 572 | "source": [ 573 | "df.describe()" 574 | ] 575 | }, 576 | { 577 | "cell_type": "code", 578 | "execution_count": 10, 579 | "id": "dcd52d13", 580 | "metadata": {}, 581 | "outputs": [], 582 | "source": [ 583 | "import pandas as pd\n", 584 | "import numpy as np\n", 585 | "import requests\n", 586 | "df=pd.read_csv('C:/Users/coeco/Downloads/Assign_3_Mall_Customers.csv')" 587 | ] 588 | }, 589 | { 590 | "cell_type": "code", 591 | "execution_count": 11, 592 | "id": "b0d17e5e", 593 | "metadata": {}, 594 | "outputs": [ 595 | { 596 | "data": { 597 | "text/plain": [ 598 | "CustomerID 100.50\n", 599 | "Salary 60.56\n", 600 | "dtype: float64" 601 | ] 602 | }, 603 | "execution_count": 11, 604 | "metadata": {}, 605 | "output_type": "execute_result" 606 | } 607 | ], 608 | "source": [ 609 | "df[['CustomerID','Salary']].mean()" 610 | ] 611 | }, 612 | { 613 | "cell_type": "code", 614 | "execution_count": 12, 615 | "id": "e90b6adc", 616 | "metadata": {}, 617 | "outputs": [ 618 | { 619 | "data": { 620 | "text/plain": [ 621 | "CustomerID 1\n", 622 | "Gender Female\n", 623 | "Age Group Elder\n", 624 | "Age 18\n", 625 | "Salary 15\n", 626 | "Spending Score (1-100) 1\n", 627 | "dtype: object" 628 | ] 629 | }, 630 | "execution_count": 12, 631 | "metadata": {}, 632 | "output_type": "execute_result" 633 | } 634 | ], 635 | "source": [ 636 | "df[['CustomerID','Gender','Age Group','Age','Salary','Spending Score (1-100)']].min()" 637 | ] 638 | }, 639 | { 640 | "cell_type": "code", 641 | "execution_count": 13, 642 | "id": "130b0a2d", 643 | "metadata": {}, 644 | "outputs": [ 645 | { 646 | "data": { 647 | "text/plain": [ 648 | "CustomerID 100.5\n", 649 | "Spending Score (1-100) 50.0\n", 650 | "dtype: float64" 651 | ] 652 | }, 653 | "execution_count": 13, 654 | "metadata": {}, 655 | "output_type": "execute_result" 656 | } 657 | ], 658 | "source": [ 659 | "df[['CustomerID','Spending Score (1-100)']].median()" 660 | ] 661 | }, 662 | { 663 | "cell_type": "code", 664 | "execution_count": 14, 665 | "id": "0a4c0f0e", 666 | "metadata": {}, 667 | "outputs": [ 668 | { 669 | "data": { 670 | "text/plain": [ 671 | "CustomerID 200\n", 672 | "Gender Male\n", 673 | "Age Group Teen\n", 674 | "Age 70\n", 675 | "Salary 137\n", 676 | "Spending Score (1-100) 99\n", 677 | "dtype: object" 678 | ] 679 | }, 680 | "execution_count": 14, 681 | "metadata": {}, 682 | "output_type": "execute_result" 683 | } 684 | ], 685 | "source": [ 686 | "df[['CustomerID','Gender','Age Group','Age','Salary','Spending Score (1-100)']].max()" 687 | ] 688 | }, 689 | { 690 | "cell_type": "code", 691 | "execution_count": 15, 692 | "id": "6f3a27dc", 693 | "metadata": {}, 694 | "outputs": [ 695 | { 696 | "data": { 697 | "text/plain": [ 698 | "Salary 26.264721\n", 699 | "Spending Score (1-100) 25.823522\n", 700 | "dtype: float64" 701 | ] 702 | }, 703 | "execution_count": 15, 704 | "metadata": {}, 705 | "output_type": "execute_result" 706 | } 707 | ], 708 | "source": [ 709 | "df[['Salary','Spending Score (1-100)']].std()" 710 | ] 711 | }, 712 | { 713 | "cell_type": "code", 714 | "execution_count": 16, 715 | "id": "5533ecbf", 716 | "metadata": {}, 717 | "outputs": [], 718 | "source": [ 719 | "df2=df.groupby('Gender')" 720 | ] 721 | }, 722 | { 723 | "cell_type": "code", 724 | "execution_count": 17, 725 | "id": "43f3c88e", 726 | "metadata": {}, 727 | "outputs": [ 728 | { 729 | "data": { 730 | "text/plain": [ 731 | "" 732 | ] 733 | }, 734 | "execution_count": 17, 735 | "metadata": {}, 736 | "output_type": "execute_result" 737 | } 738 | ], 739 | "source": [ 740 | "df2" 741 | ] 742 | }, 743 | { 744 | "cell_type": "code", 745 | "execution_count": 18, 746 | "id": "f915ab77", 747 | "metadata": {}, 748 | "outputs": [ 749 | { 750 | "name": "stdout", 751 | "output_type": "stream", 752 | "text": [ 753 | "Female\n", 754 | " CustomerID Gender Age Group Age Salary Spending Score (1-100)\n", 755 | "2 3 Female Middle Age 20 16 6\n", 756 | "3 4 Female Middle Age 23 16 77\n", 757 | "4 5 Female Middle Age 31 17 40\n", 758 | "5 6 Female Middle Age 22 17 76\n", 759 | "6 7 Female Middle Age 35 18 6\n", 760 | ".. ... ... ... ... ... ...\n", 761 | "191 192 Female Middle Age 32 103 69\n", 762 | "193 194 Female Middle Age 38 113 91\n", 763 | "194 195 Female Elder 47 120 16\n", 764 | "195 196 Female Middle Age 35 120 79\n", 765 | "196 197 Female Elder 45 126 28\n", 766 | "\n", 767 | "[112 rows x 6 columns]\n", 768 | "Male\n", 769 | " CustomerID Gender Age Group Age Salary Spending Score (1-100)\n", 770 | "0 1 Male Teen 19 15 39\n", 771 | "1 2 Male Middle Age 21 15 81\n", 772 | "8 9 Male Elder 64 19 3\n", 773 | "10 11 Male Elder 67 19 14\n", 774 | "14 15 Male Middle Age 37 20 13\n", 775 | ".. ... ... ... ... ... ...\n", 776 | "187 188 Male Middle Age 28 101 68\n", 777 | "192 193 Male Middle Age 33 113 8\n", 778 | "197 198 Male Middle Age 32 126 74\n", 779 | "198 199 Male Middle Age 32 137 18\n", 780 | "199 200 Male Middle Age 30 137 83\n", 781 | "\n", 782 | "[88 rows x 6 columns]\n" 783 | ] 784 | } 785 | ], 786 | "source": [ 787 | "for Gender,Gender_f in df2:\n", 788 | " print(Gender)\n", 789 | " print(Gender_f)" 790 | ] 791 | }, 792 | { 793 | "cell_type": "code", 794 | "execution_count": 19, 795 | "id": "62be058c", 796 | "metadata": {}, 797 | "outputs": [ 798 | { 799 | "data": { 800 | "text/html": [ 801 | "
\n", 802 | "\n", 815 | "\n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | "
CustomerIDGenderAge GroupAgeSalarySpending Score (1-100)
23FemaleMiddle Age20166
34FemaleMiddle Age231677
45FemaleMiddle Age311740
56FemaleMiddle Age221776
67FemaleMiddle Age35186
.....................
191192FemaleMiddle Age3210369
193194FemaleMiddle Age3811391
194195FemaleElder4712016
195196FemaleMiddle Age3512079
196197FemaleElder4512628
\n", 929 | "

112 rows × 6 columns

\n", 930 | "
" 931 | ], 932 | "text/plain": [ 933 | " CustomerID Gender Age Group Age Salary Spending Score (1-100)\n", 934 | "2 3 Female Middle Age 20 16 6\n", 935 | "3 4 Female Middle Age 23 16 77\n", 936 | "4 5 Female Middle Age 31 17 40\n", 937 | "5 6 Female Middle Age 22 17 76\n", 938 | "6 7 Female Middle Age 35 18 6\n", 939 | ".. ... ... ... ... ... ...\n", 940 | "191 192 Female Middle Age 32 103 69\n", 941 | "193 194 Female Middle Age 38 113 91\n", 942 | "194 195 Female Elder 47 120 16\n", 943 | "195 196 Female Middle Age 35 120 79\n", 944 | "196 197 Female Elder 45 126 28\n", 945 | "\n", 946 | "[112 rows x 6 columns]" 947 | ] 948 | }, 949 | "execution_count": 19, 950 | "metadata": {}, 951 | "output_type": "execute_result" 952 | } 953 | ], 954 | "source": [ 955 | "df2.get_group('Female')" 956 | ] 957 | }, 958 | { 959 | "cell_type": "code", 960 | "execution_count": 20, 961 | "id": "6e269f8d", 962 | "metadata": {}, 963 | "outputs": [ 964 | { 965 | "data": { 966 | "text/html": [ 967 | "
\n", 968 | "\n", 981 | "\n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | "
CustomerIDGenderAge GroupAgeSalarySpending Score (1-100)
01MaleTeen191539
12MaleMiddle Age211581
89MaleElder64193
1011MaleElder671914
1415MaleMiddle Age372013
.....................
187188MaleMiddle Age2810168
192193MaleMiddle Age331138
197198MaleMiddle Age3212674
198199MaleMiddle Age3213718
199200MaleMiddle Age3013783
\n", 1095 | "

88 rows × 6 columns

\n", 1096 | "
" 1097 | ], 1098 | "text/plain": [ 1099 | " CustomerID Gender Age Group Age Salary Spending Score (1-100)\n", 1100 | "0 1 Male Teen 19 15 39\n", 1101 | "1 2 Male Middle Age 21 15 81\n", 1102 | "8 9 Male Elder 64 19 3\n", 1103 | "10 11 Male Elder 67 19 14\n", 1104 | "14 15 Male Middle Age 37 20 13\n", 1105 | ".. ... ... ... ... ... ...\n", 1106 | "187 188 Male Middle Age 28 101 68\n", 1107 | "192 193 Male Middle Age 33 113 8\n", 1108 | "197 198 Male Middle Age 32 126 74\n", 1109 | "198 199 Male Middle Age 32 137 18\n", 1110 | "199 200 Male Middle Age 30 137 83\n", 1111 | "\n", 1112 | "[88 rows x 6 columns]" 1113 | ] 1114 | }, 1115 | "execution_count": 20, 1116 | "metadata": {}, 1117 | "output_type": "execute_result" 1118 | } 1119 | ], 1120 | "source": [ 1121 | "df2.get_group('Male')" 1122 | ] 1123 | }, 1124 | { 1125 | "cell_type": "code", 1126 | "execution_count": 21, 1127 | "id": "5edd89ba", 1128 | "metadata": {}, 1129 | "outputs": [ 1130 | { 1131 | "data": { 1132 | "text/html": [ 1133 | "
\n", 1134 | "\n", 1147 | "\n", 1148 | " \n", 1149 | " \n", 1150 | " \n", 1151 | " \n", 1152 | " \n", 1153 | " \n", 1154 | " \n", 1155 | " \n", 1156 | " \n", 1157 | " \n", 1158 | " \n", 1159 | " \n", 1160 | " \n", 1161 | " \n", 1162 | " \n", 1163 | " \n", 1164 | " \n", 1165 | " \n", 1166 | " \n", 1167 | " \n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | " \n", 1181 | " \n", 1182 | " \n", 1183 | " \n", 1184 | " \n", 1185 | " \n", 1186 | " \n", 1187 | " \n", 1188 | "
CustomerIDGenderAge GroupAgeSalarySpending Score (1-100)
Gender
Female197FemaleTeen6812699
Male200MaleTeen7013797
\n", 1189 | "
" 1190 | ], 1191 | "text/plain": [ 1192 | " CustomerID Gender Age Group Age Salary Spending Score (1-100)\n", 1193 | "Gender \n", 1194 | "Female 197 Female Teen 68 126 99\n", 1195 | "Male 200 Male Teen 70 137 97" 1196 | ] 1197 | }, 1198 | "execution_count": 21, 1199 | "metadata": {}, 1200 | "output_type": "execute_result" 1201 | } 1202 | ], 1203 | "source": [ 1204 | "df2[['CustomerID','Gender','Age Group','Age','Salary','Spending Score (1-100)']].max()" 1205 | ] 1206 | }, 1207 | { 1208 | "cell_type": "code", 1209 | "execution_count": 22, 1210 | "id": "64af1317", 1211 | "metadata": {}, 1212 | "outputs": [ 1213 | { 1214 | "data": { 1215 | "text/html": [ 1216 | "
\n", 1217 | "\n", 1230 | "\n", 1231 | " \n", 1232 | " \n", 1233 | " \n", 1234 | " \n", 1235 | " \n", 1236 | " \n", 1237 | " \n", 1238 | " \n", 1239 | " \n", 1240 | " \n", 1241 | " \n", 1242 | " \n", 1243 | " \n", 1244 | " \n", 1245 | " \n", 1246 | " \n", 1247 | " \n", 1248 | " \n", 1249 | " \n", 1250 | " \n", 1251 | " \n", 1252 | " \n", 1253 | " \n", 1254 | " \n", 1255 | " \n", 1256 | " \n", 1257 | " \n", 1258 | " \n", 1259 | " \n", 1260 | " \n", 1261 | " \n", 1262 | " \n", 1263 | " \n", 1264 | " \n", 1265 | " \n", 1266 | " \n", 1267 | " \n", 1268 | " \n", 1269 | " \n", 1270 | " \n", 1271 | "
CustomerIDGenderAge GroupAgeSalarySpending Score (1-100)
Gender
Female3FemaleElder18165
Male1MaleElder18151
\n", 1272 | "
" 1273 | ], 1274 | "text/plain": [ 1275 | " CustomerID Gender Age Group Age Salary Spending Score (1-100)\n", 1276 | "Gender \n", 1277 | "Female 3 Female Elder 18 16 5\n", 1278 | "Male 1 Male Elder 18 15 1" 1279 | ] 1280 | }, 1281 | "execution_count": 22, 1282 | "metadata": {}, 1283 | "output_type": "execute_result" 1284 | } 1285 | ], 1286 | "source": [ 1287 | "df2[['CustomerID','Gender','Age Group','Age','Salary','Spending Score (1-100)']].min()" 1288 | ] 1289 | }, 1290 | { 1291 | "cell_type": "code", 1292 | "execution_count": 23, 1293 | "id": "2768daf5", 1294 | "metadata": {}, 1295 | "outputs": [ 1296 | { 1297 | "data": { 1298 | "text/html": [ 1299 | "
\n", 1300 | "\n", 1313 | "\n", 1314 | " \n", 1315 | " \n", 1316 | " \n", 1317 | " \n", 1318 | " \n", 1319 | " \n", 1320 | " \n", 1321 | " \n", 1322 | " \n", 1323 | " \n", 1324 | " \n", 1325 | " \n", 1326 | " \n", 1327 | " \n", 1328 | " \n", 1329 | " \n", 1330 | " \n", 1331 | " \n", 1332 | " \n", 1333 | " \n", 1334 | " \n", 1335 | " \n", 1336 | " \n", 1337 | " \n", 1338 | " \n", 1339 | " \n", 1340 | " \n", 1341 | " \n", 1342 | " \n", 1343 | " \n", 1344 | " \n", 1345 | " \n", 1346 | "
CustomerIDAgeSalarySpending Score (1-100)
Gender
Female97.56250038.09821459.25000051.526786
Male104.23863639.80681862.22727348.511364
\n", 1347 | "
" 1348 | ], 1349 | "text/plain": [ 1350 | " CustomerID Age Salary Spending Score (1-100)\n", 1351 | "Gender \n", 1352 | "Female 97.562500 38.098214 59.250000 51.526786\n", 1353 | "Male 104.238636 39.806818 62.227273 48.511364" 1354 | ] 1355 | }, 1356 | "execution_count": 23, 1357 | "metadata": {}, 1358 | "output_type": "execute_result" 1359 | } 1360 | ], 1361 | "source": [ 1362 | "df2[['CustomerID','Gender','Age Group','Age','Salary','Spending Score (1-100)']].mean()" 1363 | ] 1364 | }, 1365 | { 1366 | "cell_type": "code", 1367 | "execution_count": 24, 1368 | "id": "6dd4b774", 1369 | "metadata": {}, 1370 | "outputs": [ 1371 | { 1372 | "data": { 1373 | "text/html": [ 1374 | "
\n", 1375 | "\n", 1388 | "\n", 1389 | " \n", 1390 | " \n", 1391 | " \n", 1392 | " \n", 1393 | " \n", 1394 | " \n", 1395 | " \n", 1396 | " \n", 1397 | " \n", 1398 | " \n", 1399 | " \n", 1400 | " \n", 1401 | " \n", 1402 | " \n", 1403 | " \n", 1404 | " \n", 1405 | " \n", 1406 | " \n", 1407 | " \n", 1408 | " \n", 1409 | " \n", 1410 | " \n", 1411 | " \n", 1412 | " \n", 1413 | " \n", 1414 | " \n", 1415 | " \n", 1416 | " \n", 1417 | " \n", 1418 | " \n", 1419 | " \n", 1420 | " \n", 1421 | "
CustomerIDAgeSalarySpending Score (1-100)
Gender
Female58.27641212.64409526.01195224.11495
Male57.48383015.51481226.63837327.89677
\n", 1422 | "
" 1423 | ], 1424 | "text/plain": [ 1425 | " CustomerID Age Salary Spending Score (1-100)\n", 1426 | "Gender \n", 1427 | "Female 58.276412 12.644095 26.011952 24.11495\n", 1428 | "Male 57.483830 15.514812 26.638373 27.89677" 1429 | ] 1430 | }, 1431 | "execution_count": 24, 1432 | "metadata": {}, 1433 | "output_type": "execute_result" 1434 | } 1435 | ], 1436 | "source": [ 1437 | "df2[['CustomerID','Gender','Age Group','Age','Salary','Spending Score (1-100)']].std()" 1438 | ] 1439 | }, 1440 | { 1441 | "cell_type": "code", 1442 | "execution_count": 25, 1443 | "id": "0665061d", 1444 | "metadata": {}, 1445 | "outputs": [], 1446 | "source": [ 1447 | "a=np.percentile(df3['Age'],50)" 1448 | ] 1449 | }, 1450 | { 1451 | "cell_type": "code", 1452 | "execution_count": 26, 1453 | "id": "4656b1a0", 1454 | "metadata": {}, 1455 | "outputs": [ 1456 | { 1457 | "data": { 1458 | "text/plain": [ 1459 | "36.0" 1460 | ] 1461 | }, 1462 | "execution_count": 26, 1463 | "metadata": {}, 1464 | "output_type": "execute_result" 1465 | } 1466 | ], 1467 | "source": [ 1468 | "a" 1469 | ] 1470 | }, 1471 | { 1472 | "cell_type": "code", 1473 | "execution_count": 27, 1474 | "id": "724e267c", 1475 | "metadata": {}, 1476 | "outputs": [], 1477 | "source": [ 1478 | "b=np.percentile(df3['Age'],100)" 1479 | ] 1480 | }, 1481 | { 1482 | "cell_type": "code", 1483 | "execution_count": 28, 1484 | "id": "bdb3d4a7", 1485 | "metadata": {}, 1486 | "outputs": [ 1487 | { 1488 | "data": { 1489 | "text/plain": [ 1490 | "70.0" 1491 | ] 1492 | }, 1493 | "execution_count": 28, 1494 | "metadata": {}, 1495 | "output_type": "execute_result" 1496 | } 1497 | ], 1498 | "source": [ 1499 | "b" 1500 | ] 1501 | }, 1502 | { 1503 | "cell_type": "code", 1504 | "execution_count": 29, 1505 | "id": "20b12e3e", 1506 | "metadata": {}, 1507 | "outputs": [], 1508 | "source": [ 1509 | "a=np.percentile(df3['Salary'],50)" 1510 | ] 1511 | }, 1512 | { 1513 | "cell_type": "code", 1514 | "execution_count": 30, 1515 | "id": "a3236481", 1516 | "metadata": {}, 1517 | "outputs": [ 1518 | { 1519 | "data": { 1520 | "text/plain": [ 1521 | "61.5" 1522 | ] 1523 | }, 1524 | "execution_count": 30, 1525 | "metadata": {}, 1526 | "output_type": "execute_result" 1527 | } 1528 | ], 1529 | "source": [ 1530 | "a" 1531 | ] 1532 | }, 1533 | { 1534 | "cell_type": "code", 1535 | "execution_count": 31, 1536 | "id": "049dbcf6", 1537 | "metadata": {}, 1538 | "outputs": [], 1539 | "source": [ 1540 | "b=np.percentile(df3['Salary'],60)" 1541 | ] 1542 | }, 1543 | { 1544 | "cell_type": "code", 1545 | "execution_count": 32, 1546 | "id": "ac441aab", 1547 | "metadata": {}, 1548 | "outputs": [ 1549 | { 1550 | "data": { 1551 | "text/plain": [ 1552 | "67.0" 1553 | ] 1554 | }, 1555 | "execution_count": 32, 1556 | "metadata": {}, 1557 | "output_type": "execute_result" 1558 | } 1559 | ], 1560 | "source": [ 1561 | "b" 1562 | ] 1563 | }, 1564 | { 1565 | "cell_type": "code", 1566 | "execution_count": 33, 1567 | "id": "b1586c26", 1568 | "metadata": {}, 1569 | "outputs": [], 1570 | "source": [ 1571 | "a=np.percentile(df3['CustomerID'],50)" 1572 | ] 1573 | }, 1574 | { 1575 | "cell_type": "code", 1576 | "execution_count": 34, 1577 | "id": "ce7e4eec", 1578 | "metadata": {}, 1579 | "outputs": [ 1580 | { 1581 | "data": { 1582 | "text/plain": [ 1583 | "100.5" 1584 | ] 1585 | }, 1586 | "execution_count": 34, 1587 | "metadata": {}, 1588 | "output_type": "execute_result" 1589 | } 1590 | ], 1591 | "source": [ 1592 | "a" 1593 | ] 1594 | }, 1595 | { 1596 | "cell_type": "code", 1597 | "execution_count": 35, 1598 | "id": "c420aa15", 1599 | "metadata": {}, 1600 | "outputs": [], 1601 | "source": [ 1602 | "a=np.percentile(df3['CustomerID'],80)" 1603 | ] 1604 | }, 1605 | { 1606 | "cell_type": "code", 1607 | "execution_count": 36, 1608 | "id": "dc96fdf1", 1609 | "metadata": {}, 1610 | "outputs": [ 1611 | { 1612 | "data": { 1613 | "text/plain": [ 1614 | "160.20000000000002" 1615 | ] 1616 | }, 1617 | "execution_count": 36, 1618 | "metadata": {}, 1619 | "output_type": "execute_result" 1620 | } 1621 | ], 1622 | "source": [ 1623 | "a" 1624 | ] 1625 | }, 1626 | { 1627 | "cell_type": "code", 1628 | "execution_count": 37, 1629 | "id": "2d88404c", 1630 | "metadata": {}, 1631 | "outputs": [], 1632 | "source": [ 1633 | "a=np.percentile(df3['Spending Score (1-100)'],50)" 1634 | ] 1635 | }, 1636 | { 1637 | "cell_type": "code", 1638 | "execution_count": 38, 1639 | "id": "d4c15884", 1640 | "metadata": {}, 1641 | "outputs": [ 1642 | { 1643 | "data": { 1644 | "text/plain": [ 1645 | "50.0" 1646 | ] 1647 | }, 1648 | "execution_count": 38, 1649 | "metadata": {}, 1650 | "output_type": "execute_result" 1651 | } 1652 | ], 1653 | "source": [ 1654 | "a" 1655 | ] 1656 | }, 1657 | { 1658 | "cell_type": "code", 1659 | "execution_count": 39, 1660 | "id": "37df2d34", 1661 | "metadata": {}, 1662 | "outputs": [], 1663 | "source": [ 1664 | "a=np.percentile(df3['Spending Score (1-100)'],40)" 1665 | ] 1666 | }, 1667 | { 1668 | "cell_type": "code", 1669 | "execution_count": 40, 1670 | "id": "f378a9f4", 1671 | "metadata": {}, 1672 | "outputs": [ 1673 | { 1674 | "data": { 1675 | "text/plain": [ 1676 | "45.60000000000001" 1677 | ] 1678 | }, 1679 | "execution_count": 40, 1680 | "metadata": {}, 1681 | "output_type": "execute_result" 1682 | } 1683 | ], 1684 | "source": [ 1685 | "a" 1686 | ] 1687 | }, 1688 | { 1689 | "cell_type": "code", 1690 | "execution_count": null, 1691 | "id": "f6bcae27", 1692 | "metadata": {}, 1693 | "outputs": [], 1694 | "source": [] 1695 | } 1696 | ], 1697 | "metadata": { 1698 | "kernelspec": { 1699 | "display_name": "Python 3 (ipykernel)", 1700 | "language": "python", 1701 | "name": "python3" 1702 | }, 1703 | "language_info": { 1704 | "codemirror_mode": { 1705 | "name": "ipython", 1706 | "version": 3 1707 | }, 1708 | "file_extension": ".py", 1709 | "mimetype": "text/x-python", 1710 | "name": "python", 1711 | "nbconvert_exporter": "python", 1712 | "pygments_lexer": "ipython3", 1713 | "version": "3.9.13" 1714 | } 1715 | }, 1716 | "nbformat": 4, 1717 | "nbformat_minor": 5 1718 | } 1719 | -------------------------------------------------------------------------------- /DSBDA-Assignment3DSBDA.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "a5132be9", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import pandas as pd\n", 11 | "import numpy as np\n", 12 | "import matplotlib.pyplot as plt" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "id": "8d2ecc01", 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "df=pd.read_csv('C:/Users/coeco/Downloads/Assign_3_Mall_Customers.csv')" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 3, 28 | "id": "2d5e22b6", 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "df1=df\n", 33 | "df2=df1\n", 34 | "df3=df\n", 35 | "df4=df" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 4, 41 | "id": "0a8c9753", 42 | "metadata": {}, 43 | "outputs": [ 44 | { 45 | "data": { 46 | "text/html": [ 47 | "
\n", 48 | "\n", 61 | "\n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | "
CustomerIDGenderAge GroupAgeSalarySpending Score (1-100)
01MaleTeen191539
12MaleMiddle Age211581
23FemaleMiddle Age20166
34FemaleMiddle Age231677
45FemaleMiddle Age311740
.....................
195196FemaleMiddle Age3512079
196197FemaleElder4512628
197198MaleMiddle Age3212674
198199MaleMiddle Age3213718
199200MaleMiddle Age3013783
\n", 175 | "

200 rows × 6 columns

\n", 176 | "
" 177 | ], 178 | "text/plain": [ 179 | " CustomerID Gender Age Group Age Salary Spending Score (1-100)\n", 180 | "0 1 Male Teen 19 15 39\n", 181 | "1 2 Male Middle Age 21 15 81\n", 182 | "2 3 Female Middle Age 20 16 6\n", 183 | "3 4 Female Middle Age 23 16 77\n", 184 | "4 5 Female Middle Age 31 17 40\n", 185 | ".. ... ... ... ... ... ...\n", 186 | "195 196 Female Middle Age 35 120 79\n", 187 | "196 197 Female Elder 45 126 28\n", 188 | "197 198 Male Middle Age 32 126 74\n", 189 | "198 199 Male Middle Age 32 137 18\n", 190 | "199 200 Male Middle Age 30 137 83\n", 191 | "\n", 192 | "[200 rows x 6 columns]" 193 | ] 194 | }, 195 | "execution_count": 4, 196 | "metadata": {}, 197 | "output_type": "execute_result" 198 | } 199 | ], 200 | "source": [ 201 | "df" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 5, 207 | "id": "ac5610db", 208 | "metadata": {}, 209 | "outputs": [ 210 | { 211 | "data": { 212 | "text/plain": [ 213 | "Index(['CustomerID', 'Gender', 'Age Group', 'Age', 'Salary',\n", 214 | " 'Spending Score (1-100)'],\n", 215 | " dtype='object')" 216 | ] 217 | }, 218 | "execution_count": 5, 219 | "metadata": {}, 220 | "output_type": "execute_result" 221 | } 222 | ], 223 | "source": [ 224 | "df.columns" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": 6, 230 | "id": "d2311b86", 231 | "metadata": {}, 232 | "outputs": [ 233 | { 234 | "name": "stdout", 235 | "output_type": "stream", 236 | "text": [ 237 | "60.56\n" 238 | ] 239 | } 240 | ], 241 | "source": [ 242 | "print(df['Salary'].mean())" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": 7, 248 | "id": "43e22a92", 249 | "metadata": {}, 250 | "outputs": [ 251 | { 252 | "name": "stdout", 253 | "output_type": "stream", 254 | "text": [ 255 | "0 54\n", 256 | "1 78\n", 257 | "Name: Salary, dtype: int64\n" 258 | ] 259 | } 260 | ], 261 | "source": [ 262 | "print(df['Salary'].mode())" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 8, 268 | "id": "8ec3d0ab", 269 | "metadata": {}, 270 | "outputs": [ 271 | { 272 | "name": "stdout", 273 | "output_type": "stream", 274 | "text": [ 275 | "61.5\n" 276 | ] 277 | } 278 | ], 279 | "source": [ 280 | "print(df['Salary'].median())" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": 9, 286 | "id": "b3371013", 287 | "metadata": {}, 288 | "outputs": [ 289 | { 290 | "name": "stdout", 291 | "output_type": "stream", 292 | "text": [ 293 | "100.5\n" 294 | ] 295 | } 296 | ], 297 | "source": [ 298 | "print(df['CustomerID'].mean())" 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": 10, 304 | "id": "8c7cf63c", 305 | "metadata": {}, 306 | "outputs": [ 307 | { 308 | "name": "stdout", 309 | "output_type": "stream", 310 | "text": [ 311 | "0 1\n", 312 | "1 2\n", 313 | "2 3\n", 314 | "3 4\n", 315 | "4 5\n", 316 | " ... \n", 317 | "195 196\n", 318 | "196 197\n", 319 | "197 198\n", 320 | "198 199\n", 321 | "199 200\n", 322 | "Name: CustomerID, Length: 200, dtype: int64\n" 323 | ] 324 | } 325 | ], 326 | "source": [ 327 | "print(df['CustomerID'].mode())" 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": 11, 333 | "id": "04f51439", 334 | "metadata": {}, 335 | "outputs": [ 336 | { 337 | "name": "stdout", 338 | "output_type": "stream", 339 | "text": [ 340 | "100.5\n" 341 | ] 342 | } 343 | ], 344 | "source": [ 345 | "print(df['CustomerID'].median())" 346 | ] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "execution_count": 12, 351 | "id": "4cfacd4d", 352 | "metadata": {}, 353 | "outputs": [ 354 | { 355 | "name": "stdout", 356 | "output_type": "stream", 357 | "text": [ 358 | "0 Female\n", 359 | "Name: Gender, dtype: object\n" 360 | ] 361 | } 362 | ], 363 | "source": [ 364 | "print(df['Gender'].mode())" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": 13, 370 | "id": "10975ab6", 371 | "metadata": {}, 372 | "outputs": [ 373 | { 374 | "name": "stdout", 375 | "output_type": "stream", 376 | "text": [ 377 | "0 Middle Age\n", 378 | "Name: Age Group, dtype: object\n" 379 | ] 380 | } 381 | ], 382 | "source": [ 383 | "print(df['Age Group'].mode())" 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "execution_count": 14, 389 | "id": "4a2fb1fb", 390 | "metadata": {}, 391 | "outputs": [ 392 | { 393 | "name": "stdout", 394 | "output_type": "stream", 395 | "text": [ 396 | "38.85\n" 397 | ] 398 | } 399 | ], 400 | "source": [ 401 | "print(df['Age'].mean())" 402 | ] 403 | }, 404 | { 405 | "cell_type": "code", 406 | "execution_count": 15, 407 | "id": "eaf2e715", 408 | "metadata": {}, 409 | "outputs": [ 410 | { 411 | "name": "stdout", 412 | "output_type": "stream", 413 | "text": [ 414 | "0 32\n", 415 | "Name: Age, dtype: int64\n" 416 | ] 417 | } 418 | ], 419 | "source": [ 420 | "print(df['Age'].mode())" 421 | ] 422 | }, 423 | { 424 | "cell_type": "code", 425 | "execution_count": 16, 426 | "id": "87536fb0", 427 | "metadata": {}, 428 | "outputs": [ 429 | { 430 | "name": "stdout", 431 | "output_type": "stream", 432 | "text": [ 433 | "36.0\n" 434 | ] 435 | } 436 | ], 437 | "source": [ 438 | "print(df['Age'].median())" 439 | ] 440 | }, 441 | { 442 | "cell_type": "code", 443 | "execution_count": 17, 444 | "id": "78c21e09", 445 | "metadata": {}, 446 | "outputs": [ 447 | { 448 | "name": "stdout", 449 | "output_type": "stream", 450 | "text": [ 451 | "50.2\n" 452 | ] 453 | } 454 | ], 455 | "source": [ 456 | "print(df['Spending Score (1-100)'].mean())" 457 | ] 458 | }, 459 | { 460 | "cell_type": "code", 461 | "execution_count": 18, 462 | "id": "ed836d66", 463 | "metadata": {}, 464 | "outputs": [ 465 | { 466 | "name": "stdout", 467 | "output_type": "stream", 468 | "text": [ 469 | "0 42\n", 470 | "Name: Spending Score (1-100), dtype: int64\n" 471 | ] 472 | } 473 | ], 474 | "source": [ 475 | "print(df['Spending Score (1-100)'].mode())" 476 | ] 477 | }, 478 | { 479 | "cell_type": "code", 480 | "execution_count": 19, 481 | "id": "792372c1", 482 | "metadata": {}, 483 | "outputs": [ 484 | { 485 | "name": "stdout", 486 | "output_type": "stream", 487 | "text": [ 488 | "50.0\n" 489 | ] 490 | } 491 | ], 492 | "source": [ 493 | "print(df['Spending Score (1-100)'].median())" 494 | ] 495 | }, 496 | { 497 | "cell_type": "code", 498 | "execution_count": 20, 499 | "id": "2e6f8867", 500 | "metadata": {}, 501 | "outputs": [ 502 | { 503 | "data": { 504 | "text/plain": [ 505 | "0 1\n", 506 | "1 2\n", 507 | "2 3\n", 508 | "3 4\n", 509 | "4 5\n", 510 | " ... \n", 511 | "195 196\n", 512 | "196 197\n", 513 | "197 198\n", 514 | "198 199\n", 515 | "199 200\n", 516 | "Name: CustomerID, Length: 200, dtype: int64" 517 | ] 518 | }, 519 | "execution_count": 20, 520 | "metadata": {}, 521 | "output_type": "execute_result" 522 | } 523 | ], 524 | "source": [ 525 | "df['CustomerID']" 526 | ] 527 | }, 528 | { 529 | "cell_type": "code", 530 | "execution_count": 21, 531 | "id": "a3c61e3e", 532 | "metadata": {}, 533 | "outputs": [ 534 | { 535 | "data": { 536 | "text/plain": [ 537 | "0 Male\n", 538 | "1 Male\n", 539 | "2 Female\n", 540 | "3 Female\n", 541 | "4 Female\n", 542 | " ... \n", 543 | "195 Female\n", 544 | "196 Female\n", 545 | "197 Male\n", 546 | "198 Male\n", 547 | "199 Male\n", 548 | "Name: Gender, Length: 200, dtype: object" 549 | ] 550 | }, 551 | "execution_count": 21, 552 | "metadata": {}, 553 | "output_type": "execute_result" 554 | } 555 | ], 556 | "source": [ 557 | "df['Gender']" 558 | ] 559 | }, 560 | { 561 | "cell_type": "code", 562 | "execution_count": 22, 563 | "id": "997bb2bf", 564 | "metadata": {}, 565 | "outputs": [ 566 | { 567 | "data": { 568 | "text/plain": [ 569 | "0 Teen\n", 570 | "1 Middle Age\n", 571 | "2 Middle Age\n", 572 | "3 Middle Age\n", 573 | "4 Middle Age\n", 574 | " ... \n", 575 | "195 Middle Age\n", 576 | "196 Elder\n", 577 | "197 Middle Age\n", 578 | "198 Middle Age\n", 579 | "199 Middle Age\n", 580 | "Name: Age Group, Length: 200, dtype: object" 581 | ] 582 | }, 583 | "execution_count": 22, 584 | "metadata": {}, 585 | "output_type": "execute_result" 586 | } 587 | ], 588 | "source": [ 589 | "df['Age Group']" 590 | ] 591 | }, 592 | { 593 | "cell_type": "code", 594 | "execution_count": 23, 595 | "id": "2717f589", 596 | "metadata": {}, 597 | "outputs": [ 598 | { 599 | "data": { 600 | "text/plain": [ 601 | "0 19\n", 602 | "1 21\n", 603 | "2 20\n", 604 | "3 23\n", 605 | "4 31\n", 606 | " ..\n", 607 | "195 35\n", 608 | "196 45\n", 609 | "197 32\n", 610 | "198 32\n", 611 | "199 30\n", 612 | "Name: Age, Length: 200, dtype: int64" 613 | ] 614 | }, 615 | "execution_count": 23, 616 | "metadata": {}, 617 | "output_type": "execute_result" 618 | } 619 | ], 620 | "source": [ 621 | "df['Age']" 622 | ] 623 | }, 624 | { 625 | "cell_type": "code", 626 | "execution_count": 24, 627 | "id": "7d2713b9", 628 | "metadata": {}, 629 | "outputs": [ 630 | { 631 | "data": { 632 | "text/plain": [ 633 | "0 15\n", 634 | "1 15\n", 635 | "2 16\n", 636 | "3 16\n", 637 | "4 17\n", 638 | " ... \n", 639 | "195 120\n", 640 | "196 126\n", 641 | "197 126\n", 642 | "198 137\n", 643 | "199 137\n", 644 | "Name: Salary, Length: 200, dtype: int64" 645 | ] 646 | }, 647 | "execution_count": 24, 648 | "metadata": {}, 649 | "output_type": "execute_result" 650 | } 651 | ], 652 | "source": [ 653 | "df['Salary']" 654 | ] 655 | }, 656 | { 657 | "cell_type": "code", 658 | "execution_count": 25, 659 | "id": "ca503190", 660 | "metadata": {}, 661 | "outputs": [ 662 | { 663 | "data": { 664 | "text/plain": [ 665 | "0 39\n", 666 | "1 81\n", 667 | "2 6\n", 668 | "3 77\n", 669 | "4 40\n", 670 | " ..\n", 671 | "195 79\n", 672 | "196 28\n", 673 | "197 74\n", 674 | "198 18\n", 675 | "199 83\n", 676 | "Name: Spending Score (1-100), Length: 200, dtype: int64" 677 | ] 678 | }, 679 | "execution_count": 25, 680 | "metadata": {}, 681 | "output_type": "execute_result" 682 | } 683 | ], 684 | "source": [ 685 | "df['Spending Score (1-100)']" 686 | ] 687 | }, 688 | { 689 | "cell_type": "code", 690 | "execution_count": 26, 691 | "id": "cf5ed0bc", 692 | "metadata": {}, 693 | "outputs": [ 694 | { 695 | "data": { 696 | "text/html": [ 697 | "
\n", 698 | "\n", 711 | "\n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | "
CustomerIDGenderAge GroupAgeSalarySpending Score (1-100)
0FalseFalseFalseFalseFalseFalse
1FalseFalseFalseFalseFalseFalse
2FalseFalseFalseFalseFalseFalse
3FalseFalseFalseFalseFalseFalse
4FalseFalseFalseFalseFalseFalse
.....................
195FalseFalseFalseFalseFalseFalse
196FalseFalseFalseFalseFalseFalse
197FalseFalseFalseFalseFalseFalse
198FalseFalseFalseFalseFalseFalse
199FalseFalseFalseFalseFalseFalse
\n", 825 | "

200 rows × 6 columns

\n", 826 | "
" 827 | ], 828 | "text/plain": [ 829 | " CustomerID Gender Age Group Age Salary Spending Score (1-100)\n", 830 | "0 False False False False False False\n", 831 | "1 False False False False False False\n", 832 | "2 False False False False False False\n", 833 | "3 False False False False False False\n", 834 | "4 False False False False False False\n", 835 | ".. ... ... ... ... ... ...\n", 836 | "195 False False False False False False\n", 837 | "196 False False False False False False\n", 838 | "197 False False False False False False\n", 839 | "198 False False False False False False\n", 840 | "199 False False False False False False\n", 841 | "\n", 842 | "[200 rows x 6 columns]" 843 | ] 844 | }, 845 | "execution_count": 26, 846 | "metadata": {}, 847 | "output_type": "execute_result" 848 | } 849 | ], 850 | "source": [ 851 | "df.isnull()" 852 | ] 853 | }, 854 | { 855 | "cell_type": "code", 856 | "execution_count": 27, 857 | "id": "57e87b15", 858 | "metadata": {}, 859 | "outputs": [ 860 | { 861 | "data": { 862 | "text/plain": [ 863 | "CustomerID 0\n", 864 | "Gender 0\n", 865 | "Age Group 0\n", 866 | "Age 0\n", 867 | "Salary 0\n", 868 | "Spending Score (1-100) 0\n", 869 | "dtype: int64" 870 | ] 871 | }, 872 | "execution_count": 27, 873 | "metadata": {}, 874 | "output_type": "execute_result" 875 | } 876 | ], 877 | "source": [ 878 | "df.isnull().sum()" 879 | ] 880 | }, 881 | { 882 | "cell_type": "code", 883 | "execution_count": 28, 884 | "id": "53903062", 885 | "metadata": {}, 886 | "outputs": [ 887 | { 888 | "data": { 889 | "text/html": [ 890 | "
\n", 891 | "\n", 904 | "\n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | "
CustomerIDAgeSalarySpending Score (1-100)
count200.000000200.000000200.000000200.000000
mean100.50000038.85000060.56000050.200000
std57.87918513.96900726.26472125.823522
min1.00000018.00000015.0000001.000000
25%50.75000028.75000041.50000034.750000
50%100.50000036.00000061.50000050.000000
75%150.25000049.00000078.00000073.000000
max200.00000070.000000137.00000099.000000
\n", 973 | "
" 974 | ], 975 | "text/plain": [ 976 | " CustomerID Age Salary Spending Score (1-100)\n", 977 | "count 200.000000 200.000000 200.000000 200.000000\n", 978 | "mean 100.500000 38.850000 60.560000 50.200000\n", 979 | "std 57.879185 13.969007 26.264721 25.823522\n", 980 | "min 1.000000 18.000000 15.000000 1.000000\n", 981 | "25% 50.750000 28.750000 41.500000 34.750000\n", 982 | "50% 100.500000 36.000000 61.500000 50.000000\n", 983 | "75% 150.250000 49.000000 78.000000 73.000000\n", 984 | "max 200.000000 70.000000 137.000000 99.000000" 985 | ] 986 | }, 987 | "execution_count": 28, 988 | "metadata": {}, 989 | "output_type": "execute_result" 990 | } 991 | ], 992 | "source": [ 993 | "df.describe()" 994 | ] 995 | }, 996 | { 997 | "cell_type": "code", 998 | "execution_count": 29, 999 | "id": "a06157ff", 1000 | "metadata": {}, 1001 | "outputs": [ 1002 | { 1003 | "data": { 1004 | "text/html": [ 1005 | "
\n", 1006 | "\n", 1019 | "\n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | "
CustomerIDGenderAge GroupAgeSalarySpending Score (1-100)
01MaleTeen191539
12MaleMiddle Age211581
23FemaleMiddle Age20166
34FemaleMiddle Age231677
45FemaleMiddle Age311740
\n", 1079 | "
" 1080 | ], 1081 | "text/plain": [ 1082 | " CustomerID Gender Age Group Age Salary Spending Score (1-100)\n", 1083 | "0 1 Male Teen 19 15 39\n", 1084 | "1 2 Male Middle Age 21 15 81\n", 1085 | "2 3 Female Middle Age 20 16 6\n", 1086 | "3 4 Female Middle Age 23 16 77\n", 1087 | "4 5 Female Middle Age 31 17 40" 1088 | ] 1089 | }, 1090 | "execution_count": 29, 1091 | "metadata": {}, 1092 | "output_type": "execute_result" 1093 | } 1094 | ], 1095 | "source": [ 1096 | "df2.head()" 1097 | ] 1098 | }, 1099 | { 1100 | "cell_type": "code", 1101 | "execution_count": 30, 1102 | "id": "b2c01dd9", 1103 | "metadata": {}, 1104 | "outputs": [ 1105 | { 1106 | "data": { 1107 | "text/html": [ 1108 | "
\n", 1109 | "\n", 1122 | "\n", 1123 | " \n", 1124 | " \n", 1125 | " \n", 1126 | " \n", 1127 | " \n", 1128 | " \n", 1129 | " \n", 1130 | " \n", 1131 | " \n", 1132 | " \n", 1133 | " \n", 1134 | " \n", 1135 | " \n", 1136 | " \n", 1137 | " \n", 1138 | " \n", 1139 | " \n", 1140 | " \n", 1141 | " \n", 1142 | " \n", 1143 | " \n", 1144 | " \n", 1145 | " \n", 1146 | " \n", 1147 | " \n", 1148 | " \n", 1149 | " \n", 1150 | " \n", 1151 | " \n", 1152 | " \n", 1153 | " \n", 1154 | " \n", 1155 | " \n", 1156 | " \n", 1157 | " \n", 1158 | " \n", 1159 | " \n", 1160 | " \n", 1161 | " \n", 1162 | " \n", 1163 | " \n", 1164 | " \n", 1165 | " \n", 1166 | " \n", 1167 | " \n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | " \n", 1181 | "
CustomerIDGenderAge GroupAgeSalarySpending Score (1-100)
01MaleTeen191539
12MaleMiddle Age211581
23FemaleMiddle Age20166
34FemaleMiddle Age231677
45FemaleMiddle Age311740
\n", 1182 | "
" 1183 | ], 1184 | "text/plain": [ 1185 | " CustomerID Gender Age Group Age Salary Spending Score (1-100)\n", 1186 | "0 1 Male Teen 19 15 39\n", 1187 | "1 2 Male Middle Age 21 15 81\n", 1188 | "2 3 Female Middle Age 20 16 6\n", 1189 | "3 4 Female Middle Age 23 16 77\n", 1190 | "4 5 Female Middle Age 31 17 40" 1191 | ] 1192 | }, 1193 | "execution_count": 30, 1194 | "metadata": {}, 1195 | "output_type": "execute_result" 1196 | } 1197 | ], 1198 | "source": [ 1199 | "df1.head()" 1200 | ] 1201 | }, 1202 | { 1203 | "cell_type": "code", 1204 | "execution_count": 31, 1205 | "id": "da0f648e", 1206 | "metadata": {}, 1207 | "outputs": [ 1208 | { 1209 | "data": { 1210 | "text/html": [ 1211 | "
\n", 1212 | "\n", 1225 | "\n", 1226 | " \n", 1227 | " \n", 1228 | " \n", 1229 | " \n", 1230 | " \n", 1231 | " \n", 1232 | " \n", 1233 | " \n", 1234 | " \n", 1235 | " \n", 1236 | " \n", 1237 | " \n", 1238 | " \n", 1239 | " \n", 1240 | " \n", 1241 | " \n", 1242 | " \n", 1243 | " \n", 1244 | " \n", 1245 | " \n", 1246 | " \n", 1247 | " \n", 1248 | " \n", 1249 | " \n", 1250 | " \n", 1251 | " \n", 1252 | " \n", 1253 | " \n", 1254 | " \n", 1255 | " \n", 1256 | " \n", 1257 | " \n", 1258 | " \n", 1259 | " \n", 1260 | " \n", 1261 | " \n", 1262 | " \n", 1263 | " \n", 1264 | " \n", 1265 | " \n", 1266 | " \n", 1267 | " \n", 1268 | " \n", 1269 | " \n", 1270 | " \n", 1271 | " \n", 1272 | " \n", 1273 | " \n", 1274 | " \n", 1275 | " \n", 1276 | " \n", 1277 | " \n", 1278 | " \n", 1279 | " \n", 1280 | " \n", 1281 | " \n", 1282 | " \n", 1283 | " \n", 1284 | "
CustomerIDGenderAge GroupAgeSalarySpending Score (1-100)
01MaleTeen191539
12MaleMiddle Age211581
23FemaleMiddle Age20166
34FemaleMiddle Age231677
45FemaleMiddle Age311740
\n", 1285 | "
" 1286 | ], 1287 | "text/plain": [ 1288 | " CustomerID Gender Age Group Age Salary Spending Score (1-100)\n", 1289 | "0 1 Male Teen 19 15 39\n", 1290 | "1 2 Male Middle Age 21 15 81\n", 1291 | "2 3 Female Middle Age 20 16 6\n", 1292 | "3 4 Female Middle Age 23 16 77\n", 1293 | "4 5 Female Middle Age 31 17 40" 1294 | ] 1295 | }, 1296 | "execution_count": 31, 1297 | "metadata": {}, 1298 | "output_type": "execute_result" 1299 | } 1300 | ], 1301 | "source": [ 1302 | "df3.head()" 1303 | ] 1304 | }, 1305 | { 1306 | "cell_type": "code", 1307 | "execution_count": 32, 1308 | "id": "93e1980c", 1309 | "metadata": {}, 1310 | "outputs": [ 1311 | { 1312 | "data": { 1313 | "text/html": [ 1314 | "
\n", 1315 | "\n", 1328 | "\n", 1329 | " \n", 1330 | " \n", 1331 | " \n", 1332 | " \n", 1333 | " \n", 1334 | " \n", 1335 | " \n", 1336 | " \n", 1337 | " \n", 1338 | " \n", 1339 | " \n", 1340 | " \n", 1341 | " \n", 1342 | " \n", 1343 | " \n", 1344 | " \n", 1345 | " \n", 1346 | " \n", 1347 | " \n", 1348 | " \n", 1349 | " \n", 1350 | " \n", 1351 | " \n", 1352 | " \n", 1353 | " \n", 1354 | " \n", 1355 | " \n", 1356 | " \n", 1357 | " \n", 1358 | " \n", 1359 | " \n", 1360 | " \n", 1361 | " \n", 1362 | " \n", 1363 | " \n", 1364 | " \n", 1365 | " \n", 1366 | " \n", 1367 | " \n", 1368 | " \n", 1369 | " \n", 1370 | " \n", 1371 | " \n", 1372 | " \n", 1373 | " \n", 1374 | " \n", 1375 | " \n", 1376 | " \n", 1377 | " \n", 1378 | " \n", 1379 | " \n", 1380 | " \n", 1381 | " \n", 1382 | " \n", 1383 | " \n", 1384 | " \n", 1385 | " \n", 1386 | " \n", 1387 | "
CustomerIDGenderAge GroupAgeSalarySpending Score (1-100)
01MaleTeen191539
12MaleMiddle Age211581
23FemaleMiddle Age20166
34FemaleMiddle Age231677
45FemaleMiddle Age311740
\n", 1388 | "
" 1389 | ], 1390 | "text/plain": [ 1391 | " CustomerID Gender Age Group Age Salary Spending Score (1-100)\n", 1392 | "0 1 Male Teen 19 15 39\n", 1393 | "1 2 Male Middle Age 21 15 81\n", 1394 | "2 3 Female Middle Age 20 16 6\n", 1395 | "3 4 Female Middle Age 23 16 77\n", 1396 | "4 5 Female Middle Age 31 17 40" 1397 | ] 1398 | }, 1399 | "execution_count": 32, 1400 | "metadata": {}, 1401 | "output_type": "execute_result" 1402 | } 1403 | ], 1404 | "source": [ 1405 | "df4.head()" 1406 | ] 1407 | }, 1408 | { 1409 | "cell_type": "code", 1410 | "execution_count": 33, 1411 | "id": "d5a0ba61", 1412 | "metadata": {}, 1413 | "outputs": [ 1414 | { 1415 | "data": { 1416 | "text/html": [ 1417 | "
\n", 1418 | "\n", 1431 | "\n", 1432 | " \n", 1433 | " \n", 1434 | " \n", 1435 | " \n", 1436 | " \n", 1437 | " \n", 1438 | " \n", 1439 | " \n", 1440 | " \n", 1441 | " \n", 1442 | " \n", 1443 | " \n", 1444 | " \n", 1445 | " \n", 1446 | " \n", 1447 | " \n", 1448 | " \n", 1449 | " \n", 1450 | " \n", 1451 | " \n", 1452 | " \n", 1453 | " \n", 1454 | " \n", 1455 | " \n", 1456 | " \n", 1457 | " \n", 1458 | " \n", 1459 | " \n", 1460 | " \n", 1461 | " \n", 1462 | " \n", 1463 | " \n", 1464 | " \n", 1465 | " \n", 1466 | " \n", 1467 | " \n", 1468 | " \n", 1469 | " \n", 1470 | " \n", 1471 | " \n", 1472 | " \n", 1473 | " \n", 1474 | " \n", 1475 | " \n", 1476 | " \n", 1477 | " \n", 1478 | " \n", 1479 | " \n", 1480 | " \n", 1481 | " \n", 1482 | " \n", 1483 | " \n", 1484 | " \n", 1485 | " \n", 1486 | " \n", 1487 | " \n", 1488 | " \n", 1489 | " \n", 1490 | " \n", 1491 | " \n", 1492 | " \n", 1493 | " \n", 1494 | " \n", 1495 | " \n", 1496 | " \n", 1497 | " \n", 1498 | " \n", 1499 | " \n", 1500 | " \n", 1501 | " \n", 1502 | " \n", 1503 | " \n", 1504 | " \n", 1505 | " \n", 1506 | " \n", 1507 | " \n", 1508 | " \n", 1509 | " \n", 1510 | " \n", 1511 | " \n", 1512 | " \n", 1513 | " \n", 1514 | " \n", 1515 | " \n", 1516 | " \n", 1517 | " \n", 1518 | " \n", 1519 | " \n", 1520 | " \n", 1521 | " \n", 1522 | " \n", 1523 | " \n", 1524 | " \n", 1525 | " \n", 1526 | " \n", 1527 | " \n", 1528 | " \n", 1529 | " \n", 1530 | " \n", 1531 | " \n", 1532 | " \n", 1533 | " \n", 1534 | " \n", 1535 | " \n", 1536 | " \n", 1537 | " \n", 1538 | " \n", 1539 | " \n", 1540 | " \n", 1541 | " \n", 1542 | " \n", 1543 | " \n", 1544 | "
CustomerIDGenderAge GroupAgeSalarySpending Score (1-100)
01MaleTeen191539
12MaleMiddle Age211581
23FemaleMiddle Age20166
34FemaleMiddle Age231677
45FemaleMiddle Age311740
.....................
195196FemaleMiddle Age3512079
196197FemaleElder4512628
197198MaleMiddle Age3212674
198199MaleMiddle Age3213718
199200MaleMiddle Age3013783
\n", 1545 | "

200 rows × 6 columns

\n", 1546 | "
" 1547 | ], 1548 | "text/plain": [ 1549 | " CustomerID Gender Age Group Age Salary Spending Score (1-100)\n", 1550 | "0 1 Male Teen 19 15 39\n", 1551 | "1 2 Male Middle Age 21 15 81\n", 1552 | "2 3 Female Middle Age 20 16 6\n", 1553 | "3 4 Female Middle Age 23 16 77\n", 1554 | "4 5 Female Middle Age 31 17 40\n", 1555 | ".. ... ... ... ... ... ...\n", 1556 | "195 196 Female Middle Age 35 120 79\n", 1557 | "196 197 Female Elder 45 126 28\n", 1558 | "197 198 Male Middle Age 32 126 74\n", 1559 | "198 199 Male Middle Age 32 137 18\n", 1560 | "199 200 Male Middle Age 30 137 83\n", 1561 | "\n", 1562 | "[200 rows x 6 columns]" 1563 | ] 1564 | }, 1565 | "execution_count": 33, 1566 | "metadata": {}, 1567 | "output_type": "execute_result" 1568 | } 1569 | ], 1570 | "source": [ 1571 | "df.replace(to_replace=np.nan,value=80)" 1572 | ] 1573 | }, 1574 | { 1575 | "cell_type": "code", 1576 | "execution_count": 34, 1577 | "id": "3c07d865", 1578 | "metadata": {}, 1579 | "outputs": [ 1580 | { 1581 | "name": "stdout", 1582 | "output_type": "stream", 1583 | "text": [ 1584 | "\n", 1585 | "RangeIndex: 200 entries, 0 to 199\n", 1586 | "Data columns (total 6 columns):\n", 1587 | " # Column Non-Null Count Dtype \n", 1588 | "--- ------ -------------- ----- \n", 1589 | " 0 CustomerID 200 non-null int64 \n", 1590 | " 1 Gender 200 non-null object\n", 1591 | " 2 Age Group 200 non-null object\n", 1592 | " 3 Age 200 non-null int64 \n", 1593 | " 4 Salary 200 non-null int64 \n", 1594 | " 5 Spending Score (1-100) 200 non-null int64 \n", 1595 | "dtypes: int64(4), object(2)\n", 1596 | "memory usage: 9.5+ KB\n" 1597 | ] 1598 | } 1599 | ], 1600 | "source": [ 1601 | "df.info()" 1602 | ] 1603 | }, 1604 | { 1605 | "cell_type": "code", 1606 | "execution_count": 35, 1607 | "id": "fbbe0a22", 1608 | "metadata": {}, 1609 | "outputs": [ 1610 | { 1611 | "name": "stdout", 1612 | "output_type": "stream", 1613 | "text": [ 1614 | "(200, 6)\n" 1615 | ] 1616 | } 1617 | ], 1618 | "source": [ 1619 | "print(df.shape)" 1620 | ] 1621 | }, 1622 | { 1623 | "cell_type": "code", 1624 | "execution_count": 36, 1625 | "id": "e796e277", 1626 | "metadata": {}, 1627 | "outputs": [ 1628 | { 1629 | "name": "stdout", 1630 | "output_type": "stream", 1631 | "text": [ 1632 | " CustomerID Gender Age Group Age Salary Spending Score (1-100)\n", 1633 | "0 1 Male Teen 19 15 39\n", 1634 | "1 2 Male Middle Age 21 15 81\n", 1635 | "2 3 Female Middle Age 20 16 6\n", 1636 | "3 4 Female Middle Age 23 16 77\n", 1637 | "4 5 Female Middle Age 31 17 40\n", 1638 | "5 6 Female Middle Age 22 17 76\n", 1639 | "6 7 Female Middle Age 35 18 6\n", 1640 | "7 8 Female Middle Age 23 18 94\n", 1641 | "8 9 Male Elder 64 19 3\n", 1642 | "9 10 Female Middle Age 30 19 72\n", 1643 | "10 11 Male Elder 67 19 14\n", 1644 | "11 12 Female Middle Age 35 19 99\n", 1645 | "12 13 Female Elder 58 20 15\n", 1646 | "13 14 Female Middle Age 24 20 77\n", 1647 | "14 15 Male Middle Age 37 20 13\n", 1648 | "15 16 Male Middle Age 22 20 79\n", 1649 | "16 17 Female Middle Age 35 21 35\n", 1650 | "17 18 Male Middle Age 20 21 66\n", 1651 | "18 19 Male Elder 52 23 29\n", 1652 | "19 20 Female Middle Age 35 23 98\n" 1653 | ] 1654 | } 1655 | ], 1656 | "source": [ 1657 | "print(df.head(20))" 1658 | ] 1659 | }, 1660 | { 1661 | "cell_type": "code", 1662 | "execution_count": 37, 1663 | "id": "656589f5", 1664 | "metadata": {}, 1665 | "outputs": [ 1666 | { 1667 | "data": { 1668 | "text/plain": [ 1669 | "Middle Age 0.52\n", 1670 | "Elder 0.42\n", 1671 | "Teen 0.06\n", 1672 | "Name: Age Group, dtype: float64" 1673 | ] 1674 | }, 1675 | "execution_count": 37, 1676 | "metadata": {}, 1677 | "output_type": "execute_result" 1678 | } 1679 | ], 1680 | "source": [ 1681 | "df['Age Group'].value_counts(normalize=True)" 1682 | ] 1683 | }, 1684 | { 1685 | "cell_type": "code", 1686 | "execution_count": 38, 1687 | "id": "71877d43", 1688 | "metadata": {}, 1689 | "outputs": [ 1690 | { 1691 | "data": { 1692 | "text/plain": [ 1693 | "1 0.005\n", 1694 | "138 0.005\n", 1695 | "128 0.005\n", 1696 | "129 0.005\n", 1697 | "130 0.005\n", 1698 | " ... \n", 1699 | "70 0.005\n", 1700 | "71 0.005\n", 1701 | "72 0.005\n", 1702 | "73 0.005\n", 1703 | "200 0.005\n", 1704 | "Name: CustomerID, Length: 200, dtype: float64" 1705 | ] 1706 | }, 1707 | "execution_count": 38, 1708 | "metadata": {}, 1709 | "output_type": "execute_result" 1710 | } 1711 | ], 1712 | "source": [ 1713 | "df['CustomerID'].value_counts(normalize=True)" 1714 | ] 1715 | }, 1716 | { 1717 | "cell_type": "code", 1718 | "execution_count": 39, 1719 | "id": "738abec1", 1720 | "metadata": {}, 1721 | "outputs": [ 1722 | { 1723 | "data": { 1724 | "text/plain": [ 1725 | "Female 0.56\n", 1726 | "Male 0.44\n", 1727 | "Name: Gender, dtype: float64" 1728 | ] 1729 | }, 1730 | "execution_count": 39, 1731 | "metadata": {}, 1732 | "output_type": "execute_result" 1733 | } 1734 | ], 1735 | "source": [ 1736 | "df['Gender'].value_counts(normalize=True)" 1737 | ] 1738 | }, 1739 | { 1740 | "cell_type": "code", 1741 | "execution_count": 40, 1742 | "id": "b8ad6ee3", 1743 | "metadata": {}, 1744 | "outputs": [ 1745 | { 1746 | "data": { 1747 | "text/plain": [ 1748 | "32 0.055\n", 1749 | "35 0.045\n", 1750 | "19 0.040\n", 1751 | "31 0.040\n", 1752 | "30 0.035\n", 1753 | "49 0.035\n", 1754 | "40 0.030\n", 1755 | "38 0.030\n", 1756 | "47 0.030\n", 1757 | "27 0.030\n", 1758 | "36 0.030\n", 1759 | "23 0.030\n", 1760 | "34 0.025\n", 1761 | "20 0.025\n", 1762 | "29 0.025\n", 1763 | "50 0.025\n", 1764 | "48 0.025\n", 1765 | "21 0.025\n", 1766 | "24 0.020\n", 1767 | "18 0.020\n", 1768 | "28 0.020\n", 1769 | "67 0.020\n", 1770 | "59 0.020\n", 1771 | "54 0.020\n", 1772 | "43 0.015\n", 1773 | "60 0.015\n", 1774 | "45 0.015\n", 1775 | "39 0.015\n", 1776 | "33 0.015\n", 1777 | "37 0.015\n", 1778 | "22 0.015\n", 1779 | "25 0.015\n", 1780 | "46 0.015\n", 1781 | "68 0.015\n", 1782 | "52 0.010\n", 1783 | "44 0.010\n", 1784 | "66 0.010\n", 1785 | "57 0.010\n", 1786 | "26 0.010\n", 1787 | "53 0.010\n", 1788 | "42 0.010\n", 1789 | "63 0.010\n", 1790 | "70 0.010\n", 1791 | "51 0.010\n", 1792 | "58 0.010\n", 1793 | "65 0.010\n", 1794 | "41 0.010\n", 1795 | "55 0.005\n", 1796 | "69 0.005\n", 1797 | "64 0.005\n", 1798 | "56 0.005\n", 1799 | "Name: Age, dtype: float64" 1800 | ] 1801 | }, 1802 | "execution_count": 40, 1803 | "metadata": {}, 1804 | "output_type": "execute_result" 1805 | } 1806 | ], 1807 | "source": [ 1808 | "df['Age'].value_counts(normalize=True)" 1809 | ] 1810 | }, 1811 | { 1812 | "cell_type": "code", 1813 | "execution_count": 41, 1814 | "id": "2dd5a141", 1815 | "metadata": {}, 1816 | "outputs": [ 1817 | { 1818 | "data": { 1819 | "text/plain": [ 1820 | "54 0.06\n", 1821 | "78 0.06\n", 1822 | "48 0.03\n", 1823 | "71 0.03\n", 1824 | "63 0.03\n", 1825 | " ... \n", 1826 | "58 0.01\n", 1827 | "59 0.01\n", 1828 | "16 0.01\n", 1829 | "64 0.01\n", 1830 | "137 0.01\n", 1831 | "Name: Salary, Length: 64, dtype: float64" 1832 | ] 1833 | }, 1834 | "execution_count": 41, 1835 | "metadata": {}, 1836 | "output_type": "execute_result" 1837 | } 1838 | ], 1839 | "source": [ 1840 | "df['Salary'].value_counts(normalize=True)" 1841 | ] 1842 | }, 1843 | { 1844 | "cell_type": "code", 1845 | "execution_count": 42, 1846 | "id": "9b27978c", 1847 | "metadata": {}, 1848 | "outputs": [ 1849 | { 1850 | "data": { 1851 | "text/plain": [ 1852 | "42 0.040\n", 1853 | "55 0.035\n", 1854 | "46 0.030\n", 1855 | "73 0.030\n", 1856 | "35 0.025\n", 1857 | " ... \n", 1858 | "31 0.005\n", 1859 | "44 0.005\n", 1860 | "53 0.005\n", 1861 | "65 0.005\n", 1862 | "18 0.005\n", 1863 | "Name: Spending Score (1-100), Length: 84, dtype: float64" 1864 | ] 1865 | }, 1866 | "execution_count": 42, 1867 | "metadata": {}, 1868 | "output_type": "execute_result" 1869 | } 1870 | ], 1871 | "source": [ 1872 | "df['Spending Score (1-100)'].value_counts(normalize=True)" 1873 | ] 1874 | }, 1875 | { 1876 | "cell_type": "code", 1877 | "execution_count": 43, 1878 | "id": "2cf26351", 1879 | "metadata": {}, 1880 | "outputs": [ 1881 | { 1882 | "data": { 1883 | "text/html": [ 1884 | "
\n", 1885 | "\n", 1898 | "\n", 1899 | " \n", 1900 | " \n", 1901 | " \n", 1902 | " \n", 1903 | " \n", 1904 | " \n", 1905 | " \n", 1906 | " \n", 1907 | " \n", 1908 | " \n", 1909 | " \n", 1910 | " \n", 1911 | " \n", 1912 | " \n", 1913 | " \n", 1914 | " \n", 1915 | " \n", 1916 | " \n", 1917 | " \n", 1918 | " \n", 1919 | " \n", 1920 | " \n", 1921 | " \n", 1922 | " \n", 1923 | " \n", 1924 | " \n", 1925 | " \n", 1926 | " \n", 1927 | "
GenderAgeSalary
0Male1915
1Female2217
2Female3521
\n", 1928 | "
" 1929 | ], 1930 | "text/plain": [ 1931 | " Gender Age Salary\n", 1932 | "0 Male 19 15\n", 1933 | "1 Female 22 17\n", 1934 | "2 Female 35 21" 1935 | ] 1936 | }, 1937 | "execution_count": 43, 1938 | "metadata": {}, 1939 | "output_type": "execute_result" 1940 | } 1941 | ], 1942 | "source": [ 1943 | "df = pd.DataFrame([['Male', 19, 15],\n", 1944 | " ['Female', 22, 17],\n", 1945 | " ['Female', 35, 21]])\n", 1946 | "df.columns = ['Gender','Age','Salary']\n", 1947 | "df.head()" 1948 | ] 1949 | }, 1950 | { 1951 | "cell_type": "code", 1952 | "execution_count": null, 1953 | "id": "ddae8cd4", 1954 | "metadata": {}, 1955 | "outputs": [], 1956 | "source": [] 1957 | }, 1958 | { 1959 | "cell_type": "code", 1960 | "execution_count": null, 1961 | "id": "fd3e1027", 1962 | "metadata": {}, 1963 | "outputs": [], 1964 | "source": [] 1965 | }, 1966 | { 1967 | "cell_type": "code", 1968 | "execution_count": null, 1969 | "id": "f86138b7", 1970 | "metadata": {}, 1971 | "outputs": [], 1972 | "source": [] 1973 | }, 1974 | { 1975 | "cell_type": "code", 1976 | "execution_count": null, 1977 | "id": "07f42db6", 1978 | "metadata": {}, 1979 | "outputs": [], 1980 | "source": [] 1981 | } 1982 | ], 1983 | "metadata": { 1984 | "kernelspec": { 1985 | "display_name": "Python 3 (ipykernel)", 1986 | "language": "python", 1987 | "name": "python3" 1988 | }, 1989 | "language_info": { 1990 | "codemirror_mode": { 1991 | "name": "ipython", 1992 | "version": 3 1993 | }, 1994 | "file_extension": ".py", 1995 | "mimetype": "text/x-python", 1996 | "name": "python", 1997 | "nbconvert_exporter": "python", 1998 | "pygments_lexer": "ipython3", 1999 | "version": "3.9.13" 2000 | } 2001 | }, 2002 | "nbformat": 4, 2003 | "nbformat_minor": 5 2004 | } 2005 | -------------------------------------------------------------------------------- /DSBDA A=1 PART2 (1) (1).ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "6d8e9d37", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "id": "e0077717", 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "csv_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 3, 26 | "id": "eabd856b", 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "iris=pd.read_csv(csv_url, header = None)" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 4, 36 | "id": "4e936a14", 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "col_name = ['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width','Species']" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 5, 46 | "id": "f01bba3d", 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "iris = pd.read_csv(csv_url, names = col_name)" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 6, 56 | "id": "5e3ad842", 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "df1=df=iris" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 7, 66 | "id": "84845444", 67 | "metadata": {}, 68 | "outputs": [ 69 | { 70 | "data": { 71 | "text/html": [ 72 | "
\n", 73 | "\n", 86 | "\n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | "
Sepal_LengthSepal_WidthPetal_LengthPetal_WidthSpecies
05.13.51.40.2Iris-setosa
14.93.01.40.2Iris-setosa
24.73.21.30.2Iris-setosa
34.63.11.50.2Iris-setosa
45.03.61.40.2Iris-setosa
55.43.91.70.4Iris-setosa
64.63.41.40.3Iris-setosa
75.03.41.50.2Iris-setosa
\n", 164 | "
" 165 | ], 166 | "text/plain": [ 167 | " Sepal_Length Sepal_Width Petal_Length Petal_Width Species\n", 168 | "0 5.1 3.5 1.4 0.2 Iris-setosa\n", 169 | "1 4.9 3.0 1.4 0.2 Iris-setosa\n", 170 | "2 4.7 3.2 1.3 0.2 Iris-setosa\n", 171 | "3 4.6 3.1 1.5 0.2 Iris-setosa\n", 172 | "4 5.0 3.6 1.4 0.2 Iris-setosa\n", 173 | "5 5.4 3.9 1.7 0.4 Iris-setosa\n", 174 | "6 4.6 3.4 1.4 0.3 Iris-setosa\n", 175 | "7 5.0 3.4 1.5 0.2 Iris-setosa" 176 | ] 177 | }, 178 | "execution_count": 7, 179 | "metadata": {}, 180 | "output_type": "execute_result" 181 | } 182 | ], 183 | "source": [ 184 | "iris.head(8)" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 8, 190 | "id": "cf3b452c", 191 | "metadata": {}, 192 | "outputs": [ 193 | { 194 | "data": { 195 | "text/html": [ 196 | "
\n", 197 | "\n", 210 | "\n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | "
Sepal_LengthSepal_WidthPetal_LengthPetal_WidthSpecies
1446.73.35.72.5Iris-virginica
1456.73.05.22.3Iris-virginica
1466.32.55.01.9Iris-virginica
1476.53.05.22.0Iris-virginica
1486.23.45.42.3Iris-virginica
1495.93.05.11.8Iris-virginica
\n", 272 | "
" 273 | ], 274 | "text/plain": [ 275 | " Sepal_Length Sepal_Width Petal_Length Petal_Width Species\n", 276 | "144 6.7 3.3 5.7 2.5 Iris-virginica\n", 277 | "145 6.7 3.0 5.2 2.3 Iris-virginica\n", 278 | "146 6.3 2.5 5.0 1.9 Iris-virginica\n", 279 | "147 6.5 3.0 5.2 2.0 Iris-virginica\n", 280 | "148 6.2 3.4 5.4 2.3 Iris-virginica\n", 281 | "149 5.9 3.0 5.1 1.8 Iris-virginica" 282 | ] 283 | }, 284 | "execution_count": 8, 285 | "metadata": {}, 286 | "output_type": "execute_result" 287 | } 288 | ], 289 | "source": [ 290 | "iris.tail(6)" 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": 9, 296 | "id": "1b91f6a5", 297 | "metadata": {}, 298 | "outputs": [ 299 | { 300 | "data": { 301 | "text/plain": [ 302 | "RangeIndex(start=0, stop=150, step=1)" 303 | ] 304 | }, 305 | "execution_count": 9, 306 | "metadata": {}, 307 | "output_type": "execute_result" 308 | } 309 | ], 310 | "source": [ 311 | "iris.index" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": 10, 317 | "id": "88afcc1e", 318 | "metadata": {}, 319 | "outputs": [ 320 | { 321 | "data": { 322 | "text/plain": [ 323 | "Index(['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width',\n", 324 | " 'Species'],\n", 325 | " dtype='object')" 326 | ] 327 | }, 328 | "execution_count": 10, 329 | "metadata": {}, 330 | "output_type": "execute_result" 331 | } 332 | ], 333 | "source": [ 334 | "iris.columns" 335 | ] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "execution_count": 11, 340 | "id": "08a54b7d", 341 | "metadata": {}, 342 | "outputs": [ 343 | { 344 | "data": { 345 | "text/plain": [ 346 | "(150, 5)" 347 | ] 348 | }, 349 | "execution_count": 11, 350 | "metadata": {}, 351 | "output_type": "execute_result" 352 | } 353 | ], 354 | "source": [ 355 | "iris.shape" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": 12, 361 | "id": "7a05d054", 362 | "metadata": {}, 363 | "outputs": [ 364 | { 365 | "data": { 366 | "text/plain": [ 367 | "Sepal_Length float64\n", 368 | "Sepal_Width float64\n", 369 | "Petal_Length float64\n", 370 | "Petal_Width float64\n", 371 | "Species object\n", 372 | "dtype: object" 373 | ] 374 | }, 375 | "execution_count": 12, 376 | "metadata": {}, 377 | "output_type": "execute_result" 378 | } 379 | ], 380 | "source": [ 381 | "iris.dtypes" 382 | ] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "execution_count": 13, 387 | "id": "e8f6a558", 388 | "metadata": {}, 389 | "outputs": [ 390 | { 391 | "data": { 392 | "text/html": [ 393 | "
\n", 394 | "\n", 407 | "\n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | "
Sepal_LengthSepal_WidthPetal_LengthPetal_Width
count150.000000150.000000150.000000150.000000
mean5.8433333.0540003.7586671.198667
std0.8280660.4335941.7644200.763161
min4.3000002.0000001.0000000.100000
25%5.1000002.8000001.6000000.300000
50%5.8000003.0000004.3500001.300000
75%6.4000003.3000005.1000001.800000
max7.9000004.4000006.9000002.500000
\n", 476 | "
" 477 | ], 478 | "text/plain": [ 479 | " Sepal_Length Sepal_Width Petal_Length Petal_Width\n", 480 | "count 150.000000 150.000000 150.000000 150.000000\n", 481 | "mean 5.843333 3.054000 3.758667 1.198667\n", 482 | "std 0.828066 0.433594 1.764420 0.763161\n", 483 | "min 4.300000 2.000000 1.000000 0.100000\n", 484 | "25% 5.100000 2.800000 1.600000 0.300000\n", 485 | "50% 5.800000 3.000000 4.350000 1.300000\n", 486 | "75% 6.400000 3.300000 5.100000 1.800000\n", 487 | "max 7.900000 4.400000 6.900000 2.500000" 488 | ] 489 | }, 490 | "execution_count": 13, 491 | "metadata": {}, 492 | "output_type": "execute_result" 493 | } 494 | ], 495 | "source": [ 496 | "iris.describe()" 497 | ] 498 | }, 499 | { 500 | "cell_type": "code", 501 | "execution_count": 14, 502 | "id": "e183dfeb", 503 | "metadata": {}, 504 | "outputs": [ 505 | { 506 | "data": { 507 | "text/plain": [ 508 | "array(['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width',\n", 509 | " 'Species'], dtype=object)" 510 | ] 511 | }, 512 | "execution_count": 14, 513 | "metadata": {}, 514 | "output_type": "execute_result" 515 | } 516 | ], 517 | "source": [ 518 | "iris.columns.values" 519 | ] 520 | }, 521 | { 522 | "cell_type": "code", 523 | "execution_count": 15, 524 | "id": "bc4ba47f", 525 | "metadata": {}, 526 | "outputs": [ 527 | { 528 | "data": { 529 | "text/plain": [ 530 | "Sepal_Length 4.4\n", 531 | "Sepal_Width 2.9\n", 532 | "Petal_Length 1.4\n", 533 | "Petal_Width 0.2\n", 534 | "Species Iris-setosa\n", 535 | "Name: 8, dtype: object" 536 | ] 537 | }, 538 | "execution_count": 15, 539 | "metadata": {}, 540 | "output_type": "execute_result" 541 | } 542 | ], 543 | "source": [ 544 | "iris.iloc[8]" 545 | ] 546 | }, 547 | { 548 | "cell_type": "code", 549 | "execution_count": 16, 550 | "id": "a1d0794f", 551 | "metadata": {}, 552 | "outputs": [ 553 | { 554 | "data": { 555 | "text/html": [ 556 | "
\n", 557 | "\n", 570 | "\n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | "
Sepal_LengthSepal_WidthPetal_LengthPetal_WidthSpecies
474.63.21.40.2Iris-setosa
485.33.71.50.2Iris-setosa
495.03.31.40.2Iris-setosa
507.03.24.71.4Iris-versicolor
\n", 616 | "
" 617 | ], 618 | "text/plain": [ 619 | " Sepal_Length Sepal_Width Petal_Length Petal_Width Species\n", 620 | "47 4.6 3.2 1.4 0.2 Iris-setosa\n", 621 | "48 5.3 3.7 1.5 0.2 Iris-setosa\n", 622 | "49 5.0 3.3 1.4 0.2 Iris-setosa\n", 623 | "50 7.0 3.2 4.7 1.4 Iris-versicolor" 624 | ] 625 | }, 626 | "execution_count": 16, 627 | "metadata": {}, 628 | "output_type": "execute_result" 629 | } 630 | ], 631 | "source": [ 632 | "iris[47:51]" 633 | ] 634 | }, 635 | { 636 | "cell_type": "code", 637 | "execution_count": 17, 638 | "id": "6bd30eb9", 639 | "metadata": {}, 640 | "outputs": [ 641 | { 642 | "data": { 643 | "text/html": [ 644 | "
\n", 645 | "\n", 658 | "\n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | "
Sepal_LengthSepal_Width
05.13.5
14.93.0
24.73.2
34.63.1
45.03.6
.........
1456.73.0
1466.32.5
1476.53.0
1486.23.4
1495.93.0
\n", 724 | "

150 rows × 2 columns

\n", 725 | "
" 726 | ], 727 | "text/plain": [ 728 | " Sepal_Length Sepal_Width\n", 729 | "0 5.1 3.5\n", 730 | "1 4.9 3.0\n", 731 | "2 4.7 3.2\n", 732 | "3 4.6 3.1\n", 733 | "4 5.0 3.6\n", 734 | ".. ... ...\n", 735 | "145 6.7 3.0\n", 736 | "146 6.3 2.5\n", 737 | "147 6.5 3.0\n", 738 | "148 6.2 3.4\n", 739 | "149 5.9 3.0\n", 740 | "\n", 741 | "[150 rows x 2 columns]" 742 | ] 743 | }, 744 | "execution_count": 17, 745 | "metadata": {}, 746 | "output_type": "execute_result" 747 | } 748 | ], 749 | "source": [ 750 | "iris.loc[:,[\"Sepal_Length\",\"Sepal_Width\"]]" 751 | ] 752 | }, 753 | { 754 | "cell_type": "code", 755 | "execution_count": 18, 756 | "id": "495cb730", 757 | "metadata": {}, 758 | "outputs": [ 759 | { 760 | "data": { 761 | "text/html": [ 762 | "
\n", 763 | "\n", 776 | "\n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | "
Petal_LengthPetal_Width
01.40.2
11.40.2
21.30.2
31.50.2
41.40.2
.........
1455.22.3
1465.01.9
1475.22.0
1485.42.3
1495.11.8
\n", 842 | "

150 rows × 2 columns

\n", 843 | "
" 844 | ], 845 | "text/plain": [ 846 | " Petal_Length Petal_Width\n", 847 | "0 1.4 0.2\n", 848 | "1 1.4 0.2\n", 849 | "2 1.3 0.2\n", 850 | "3 1.5 0.2\n", 851 | "4 1.4 0.2\n", 852 | ".. ... ...\n", 853 | "145 5.2 2.3\n", 854 | "146 5.0 1.9\n", 855 | "147 5.2 2.0\n", 856 | "148 5.4 2.3\n", 857 | "149 5.1 1.8\n", 858 | "\n", 859 | "[150 rows x 2 columns]" 860 | ] 861 | }, 862 | "execution_count": 18, 863 | "metadata": {}, 864 | "output_type": "execute_result" 865 | } 866 | ], 867 | "source": [ 868 | "cols_2_4=iris.columns[2:4]\n", 869 | "iris[cols_2_4]" 870 | ] 871 | }, 872 | { 873 | "cell_type": "code", 874 | "execution_count": 19, 875 | "id": "8099b16e", 876 | "metadata": {}, 877 | "outputs": [ 878 | { 879 | "data": { 880 | "text/plain": [ 881 | "Sepal_Length False\n", 882 | "Sepal_Width False\n", 883 | "Petal_Length False\n", 884 | "Petal_Width False\n", 885 | "Species False\n", 886 | "dtype: bool" 887 | ] 888 | }, 889 | "execution_count": 19, 890 | "metadata": {}, 891 | "output_type": "execute_result" 892 | } 893 | ], 894 | "source": [ 895 | "iris.isnull().any()" 896 | ] 897 | }, 898 | { 899 | "cell_type": "code", 900 | "execution_count": 20, 901 | "id": "c4f4fd2a", 902 | "metadata": {}, 903 | "outputs": [ 904 | { 905 | "data": { 906 | "text/plain": [ 907 | "Sepal_Length 0\n", 908 | "Sepal_Width 0\n", 909 | "Petal_Length 0\n", 910 | "Petal_Width 0\n", 911 | "Species 0\n", 912 | "dtype: int64" 913 | ] 914 | }, 915 | "execution_count": 20, 916 | "metadata": {}, 917 | "output_type": "execute_result" 918 | } 919 | ], 920 | "source": [ 921 | "iris.isnull().sum()" 922 | ] 923 | }, 924 | { 925 | "cell_type": "code", 926 | "execution_count": 21, 927 | "id": "166c41d5", 928 | "metadata": {}, 929 | "outputs": [ 930 | { 931 | "data": { 932 | "text/plain": [ 933 | "Sepal_Length float64\n", 934 | "Sepal_Width float64\n", 935 | "Petal_Length float64\n", 936 | "Petal_Width float64\n", 937 | "Species object\n", 938 | "dtype: object" 939 | ] 940 | }, 941 | "execution_count": 21, 942 | "metadata": {}, 943 | "output_type": "execute_result" 944 | } 945 | ], 946 | "source": [ 947 | "iris.dtypes" 948 | ] 949 | }, 950 | { 951 | "cell_type": "code", 952 | "execution_count": 22, 953 | "id": "7075f286", 954 | "metadata": {}, 955 | "outputs": [], 956 | "source": [ 957 | "df=iris\n", 958 | "df['petal length(cm)']= iris['Petal_Length'].astype('int')" 959 | ] 960 | }, 961 | { 962 | "cell_type": "code", 963 | "execution_count": 23, 964 | "id": "21fa96d2", 965 | "metadata": {}, 966 | "outputs": [], 967 | "source": [ 968 | "df1=df" 969 | ] 970 | }, 971 | { 972 | "cell_type": "code", 973 | "execution_count": 24, 974 | "id": "efa7b7db", 975 | "metadata": {}, 976 | "outputs": [ 977 | { 978 | "data": { 979 | "text/html": [ 980 | "
\n", 981 | "\n", 994 | "\n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | " \n", 1095 | " \n", 1096 | " \n", 1097 | " \n", 1098 | " \n", 1099 | " \n", 1100 | " \n", 1101 | " \n", 1102 | " \n", 1103 | " \n", 1104 | " \n", 1105 | " \n", 1106 | " \n", 1107 | "
Sepal_LengthSepal_WidthPetal_LengthPetal_WidthSpeciespetal length(cm)
05.13.51.40.2Iris-setosa1
14.93.01.40.2Iris-setosa1
24.73.21.30.2Iris-setosa1
34.63.11.50.2Iris-setosa1
45.03.61.40.2Iris-setosa1
.....................
1456.73.05.22.3Iris-virginica5
1466.32.55.01.9Iris-virginica5
1476.53.05.22.0Iris-virginica5
1486.23.45.42.3Iris-virginica5
1495.93.05.11.8Iris-virginica5
\n", 1108 | "

150 rows × 6 columns

\n", 1109 | "
" 1110 | ], 1111 | "text/plain": [ 1112 | " Sepal_Length Sepal_Width Petal_Length Petal_Width Species \\\n", 1113 | "0 5.1 3.5 1.4 0.2 Iris-setosa \n", 1114 | "1 4.9 3.0 1.4 0.2 Iris-setosa \n", 1115 | "2 4.7 3.2 1.3 0.2 Iris-setosa \n", 1116 | "3 4.6 3.1 1.5 0.2 Iris-setosa \n", 1117 | "4 5.0 3.6 1.4 0.2 Iris-setosa \n", 1118 | ".. ... ... ... ... ... \n", 1119 | "145 6.7 3.0 5.2 2.3 Iris-virginica \n", 1120 | "146 6.3 2.5 5.0 1.9 Iris-virginica \n", 1121 | "147 6.5 3.0 5.2 2.0 Iris-virginica \n", 1122 | "148 6.2 3.4 5.4 2.3 Iris-virginica \n", 1123 | "149 5.9 3.0 5.1 1.8 Iris-virginica \n", 1124 | "\n", 1125 | " petal length(cm) \n", 1126 | "0 1 \n", 1127 | "1 1 \n", 1128 | "2 1 \n", 1129 | "3 1 \n", 1130 | "4 1 \n", 1131 | ".. ... \n", 1132 | "145 5 \n", 1133 | "146 5 \n", 1134 | "147 5 \n", 1135 | "148 5 \n", 1136 | "149 5 \n", 1137 | "\n", 1138 | "[150 rows x 6 columns]" 1139 | ] 1140 | }, 1141 | "execution_count": 24, 1142 | "metadata": {}, 1143 | "output_type": "execute_result" 1144 | } 1145 | ], 1146 | "source": [ 1147 | "df" 1148 | ] 1149 | }, 1150 | { 1151 | "cell_type": "markdown", 1152 | "id": "08a1cbe3", 1153 | "metadata": {}, 1154 | "source": [ 1155 | "# Data Normalization" 1156 | ] 1157 | }, 1158 | { 1159 | "cell_type": "code", 1160 | "execution_count": 25, 1161 | "id": "53f8141c", 1162 | "metadata": {}, 1163 | "outputs": [], 1164 | "source": [ 1165 | "from sklearn import preprocessing \n", 1166 | "min_max_scaler = preprocessing.MinMaxScaler()" 1167 | ] 1168 | }, 1169 | { 1170 | "cell_type": "code", 1171 | "execution_count": 26, 1172 | "id": "8aa308ab", 1173 | "metadata": {}, 1174 | "outputs": [], 1175 | "source": [ 1176 | "x=iris.iloc[:,:4]" 1177 | ] 1178 | }, 1179 | { 1180 | "cell_type": "code", 1181 | "execution_count": 27, 1182 | "id": "a2bd881b", 1183 | "metadata": {}, 1184 | "outputs": [ 1185 | { 1186 | "data": { 1187 | "text/html": [ 1188 | "
\n", 1189 | "\n", 1202 | "\n", 1203 | " \n", 1204 | " \n", 1205 | " \n", 1206 | " \n", 1207 | " \n", 1208 | " \n", 1209 | " \n", 1210 | " \n", 1211 | " \n", 1212 | " \n", 1213 | " \n", 1214 | " \n", 1215 | " \n", 1216 | " \n", 1217 | " \n", 1218 | " \n", 1219 | " \n", 1220 | " \n", 1221 | " \n", 1222 | " \n", 1223 | " \n", 1224 | " \n", 1225 | " \n", 1226 | " \n", 1227 | " \n", 1228 | " \n", 1229 | " \n", 1230 | " \n", 1231 | " \n", 1232 | " \n", 1233 | " \n", 1234 | " \n", 1235 | " \n", 1236 | " \n", 1237 | " \n", 1238 | " \n", 1239 | " \n", 1240 | " \n", 1241 | " \n", 1242 | " \n", 1243 | " \n", 1244 | " \n", 1245 | " \n", 1246 | " \n", 1247 | " \n", 1248 | " \n", 1249 | " \n", 1250 | " \n", 1251 | " \n", 1252 | " \n", 1253 | " \n", 1254 | " \n", 1255 | " \n", 1256 | " \n", 1257 | " \n", 1258 | " \n", 1259 | " \n", 1260 | " \n", 1261 | " \n", 1262 | " \n", 1263 | " \n", 1264 | " \n", 1265 | " \n", 1266 | " \n", 1267 | " \n", 1268 | " \n", 1269 | " \n", 1270 | " \n", 1271 | " \n", 1272 | " \n", 1273 | " \n", 1274 | " \n", 1275 | " \n", 1276 | " \n", 1277 | " \n", 1278 | " \n", 1279 | " \n", 1280 | " \n", 1281 | " \n", 1282 | " \n", 1283 | " \n", 1284 | " \n", 1285 | " \n", 1286 | " \n", 1287 | " \n", 1288 | " \n", 1289 | " \n", 1290 | " \n", 1291 | "
Sepal_LengthSepal_WidthPetal_LengthPetal_Width
05.13.51.40.2
14.93.01.40.2
24.73.21.30.2
34.63.11.50.2
45.03.61.40.2
...............
1456.73.05.22.3
1466.32.55.01.9
1476.53.05.22.0
1486.23.45.42.3
1495.93.05.11.8
\n", 1292 | "

150 rows × 4 columns

\n", 1293 | "
" 1294 | ], 1295 | "text/plain": [ 1296 | " Sepal_Length Sepal_Width Petal_Length Petal_Width\n", 1297 | "0 5.1 3.5 1.4 0.2\n", 1298 | "1 4.9 3.0 1.4 0.2\n", 1299 | "2 4.7 3.2 1.3 0.2\n", 1300 | "3 4.6 3.1 1.5 0.2\n", 1301 | "4 5.0 3.6 1.4 0.2\n", 1302 | ".. ... ... ... ...\n", 1303 | "145 6.7 3.0 5.2 2.3\n", 1304 | "146 6.3 2.5 5.0 1.9\n", 1305 | "147 6.5 3.0 5.2 2.0\n", 1306 | "148 6.2 3.4 5.4 2.3\n", 1307 | "149 5.9 3.0 5.1 1.8\n", 1308 | "\n", 1309 | "[150 rows x 4 columns]" 1310 | ] 1311 | }, 1312 | "execution_count": 27, 1313 | "metadata": {}, 1314 | "output_type": "execute_result" 1315 | } 1316 | ], 1317 | "source": [ 1318 | "x" 1319 | ] 1320 | }, 1321 | { 1322 | "cell_type": "code", 1323 | "execution_count": 28, 1324 | "id": "d3836f0b", 1325 | "metadata": {}, 1326 | "outputs": [], 1327 | "source": [ 1328 | "x_scaled = min_max_scaler.fit_transform(x)" 1329 | ] 1330 | }, 1331 | { 1332 | "cell_type": "code", 1333 | "execution_count": 29, 1334 | "id": "b69004aa", 1335 | "metadata": {}, 1336 | "outputs": [], 1337 | "source": [ 1338 | "x_scaled = min_max_scaler.fit_transform(x)" 1339 | ] 1340 | }, 1341 | { 1342 | "cell_type": "code", 1343 | "execution_count": 30, 1344 | "id": "19c17e6a", 1345 | "metadata": {}, 1346 | "outputs": [], 1347 | "source": [ 1348 | "df_normalized = pd.DataFrame(x_scaled)" 1349 | ] 1350 | }, 1351 | { 1352 | "cell_type": "code", 1353 | "execution_count": 31, 1354 | "id": "f84253d3", 1355 | "metadata": {}, 1356 | "outputs": [ 1357 | { 1358 | "data": { 1359 | "text/html": [ 1360 | "
\n", 1361 | "\n", 1374 | "\n", 1375 | " \n", 1376 | " \n", 1377 | " \n", 1378 | " \n", 1379 | " \n", 1380 | " \n", 1381 | " \n", 1382 | " \n", 1383 | " \n", 1384 | " \n", 1385 | " \n", 1386 | " \n", 1387 | " \n", 1388 | " \n", 1389 | " \n", 1390 | " \n", 1391 | " \n", 1392 | " \n", 1393 | " \n", 1394 | " \n", 1395 | " \n", 1396 | " \n", 1397 | " \n", 1398 | " \n", 1399 | " \n", 1400 | " \n", 1401 | " \n", 1402 | " \n", 1403 | " \n", 1404 | " \n", 1405 | " \n", 1406 | " \n", 1407 | " \n", 1408 | " \n", 1409 | " \n", 1410 | " \n", 1411 | " \n", 1412 | " \n", 1413 | " \n", 1414 | " \n", 1415 | " \n", 1416 | " \n", 1417 | " \n", 1418 | " \n", 1419 | " \n", 1420 | " \n", 1421 | " \n", 1422 | " \n", 1423 | " \n", 1424 | " \n", 1425 | " \n", 1426 | " \n", 1427 | " \n", 1428 | " \n", 1429 | " \n", 1430 | " \n", 1431 | " \n", 1432 | " \n", 1433 | " \n", 1434 | " \n", 1435 | " \n", 1436 | " \n", 1437 | " \n", 1438 | " \n", 1439 | " \n", 1440 | " \n", 1441 | " \n", 1442 | " \n", 1443 | " \n", 1444 | " \n", 1445 | " \n", 1446 | " \n", 1447 | " \n", 1448 | " \n", 1449 | " \n", 1450 | " \n", 1451 | " \n", 1452 | " \n", 1453 | " \n", 1454 | " \n", 1455 | " \n", 1456 | " \n", 1457 | " \n", 1458 | " \n", 1459 | " \n", 1460 | " \n", 1461 | " \n", 1462 | " \n", 1463 | "
0123
00.2222220.6250000.0677970.041667
10.1666670.4166670.0677970.041667
20.1111110.5000000.0508470.041667
30.0833330.4583330.0847460.041667
40.1944440.6666670.0677970.041667
...............
1450.6666670.4166670.7118640.916667
1460.5555560.2083330.6779660.750000
1470.6111110.4166670.7118640.791667
1480.5277780.5833330.7457630.916667
1490.4444440.4166670.6949150.708333
\n", 1464 | "

150 rows × 4 columns

\n", 1465 | "
" 1466 | ], 1467 | "text/plain": [ 1468 | " 0 1 2 3\n", 1469 | "0 0.222222 0.625000 0.067797 0.041667\n", 1470 | "1 0.166667 0.416667 0.067797 0.041667\n", 1471 | "2 0.111111 0.500000 0.050847 0.041667\n", 1472 | "3 0.083333 0.458333 0.084746 0.041667\n", 1473 | "4 0.194444 0.666667 0.067797 0.041667\n", 1474 | ".. ... ... ... ...\n", 1475 | "145 0.666667 0.416667 0.711864 0.916667\n", 1476 | "146 0.555556 0.208333 0.677966 0.750000\n", 1477 | "147 0.611111 0.416667 0.711864 0.791667\n", 1478 | "148 0.527778 0.583333 0.745763 0.916667\n", 1479 | "149 0.444444 0.416667 0.694915 0.708333\n", 1480 | "\n", 1481 | "[150 rows x 4 columns]" 1482 | ] 1483 | }, 1484 | "execution_count": 31, 1485 | "metadata": {}, 1486 | "output_type": "execute_result" 1487 | } 1488 | ], 1489 | "source": [ 1490 | "df_normalized" 1491 | ] 1492 | }, 1493 | { 1494 | "cell_type": "markdown", 1495 | "id": "dbfd1a7c", 1496 | "metadata": {}, 1497 | "source": [ 1498 | "# Handling categorical variable" 1499 | ] 1500 | }, 1501 | { 1502 | "cell_type": "markdown", 1503 | "id": "0dd1b634", 1504 | "metadata": {}, 1505 | "source": [ 1506 | "label encoder" 1507 | ] 1508 | }, 1509 | { 1510 | "cell_type": "code", 1511 | "execution_count": 32, 1512 | "id": "faa8476f", 1513 | "metadata": {}, 1514 | "outputs": [ 1515 | { 1516 | "data": { 1517 | "text/plain": [ 1518 | "array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)" 1519 | ] 1520 | }, 1521 | "execution_count": 32, 1522 | "metadata": {}, 1523 | "output_type": "execute_result" 1524 | } 1525 | ], 1526 | "source": [ 1527 | "df2=df1\n", 1528 | "df2['Species'].unique()" 1529 | ] 1530 | }, 1531 | { 1532 | "cell_type": "code", 1533 | "execution_count": 33, 1534 | "id": "89b90e42", 1535 | "metadata": {}, 1536 | "outputs": [], 1537 | "source": [ 1538 | "from sklearn import preprocessing \n", 1539 | "enc = preprocessing.OneHotEncoder()" 1540 | ] 1541 | }, 1542 | { 1543 | "cell_type": "code", 1544 | "execution_count": 34, 1545 | "id": "84a0a869", 1546 | "metadata": {}, 1547 | "outputs": [], 1548 | "source": [ 1549 | "features_df=df2.drop(columns=['Species'])" 1550 | ] 1551 | }, 1552 | { 1553 | "cell_type": "code", 1554 | "execution_count": 35, 1555 | "id": "6943e7bc", 1556 | "metadata": {}, 1557 | "outputs": [ 1558 | { 1559 | "data": { 1560 | "text/html": [ 1561 | "
\n", 1562 | "\n", 1575 | "\n", 1576 | " \n", 1577 | " \n", 1578 | " \n", 1579 | " \n", 1580 | " \n", 1581 | " \n", 1582 | " \n", 1583 | " \n", 1584 | " \n", 1585 | " \n", 1586 | " \n", 1587 | " \n", 1588 | " \n", 1589 | " \n", 1590 | " \n", 1591 | " \n", 1592 | " \n", 1593 | " \n", 1594 | " \n", 1595 | " \n", 1596 | " \n", 1597 | " \n", 1598 | " \n", 1599 | " \n", 1600 | " \n", 1601 | " \n", 1602 | " \n", 1603 | " \n", 1604 | " \n", 1605 | " \n", 1606 | " \n", 1607 | " \n", 1608 | " \n", 1609 | " \n", 1610 | " \n", 1611 | " \n", 1612 | " \n", 1613 | " \n", 1614 | " \n", 1615 | " \n", 1616 | " \n", 1617 | " \n", 1618 | " \n", 1619 | " \n", 1620 | " \n", 1621 | " \n", 1622 | " \n", 1623 | " \n", 1624 | " \n", 1625 | " \n", 1626 | " \n", 1627 | " \n", 1628 | " \n", 1629 | " \n", 1630 | " \n", 1631 | " \n", 1632 | " \n", 1633 | " \n", 1634 | " \n", 1635 | " \n", 1636 | " \n", 1637 | " \n", 1638 | " \n", 1639 | " \n", 1640 | " \n", 1641 | " \n", 1642 | " \n", 1643 | " \n", 1644 | " \n", 1645 | " \n", 1646 | " \n", 1647 | " \n", 1648 | " \n", 1649 | " \n", 1650 | " \n", 1651 | " \n", 1652 | " \n", 1653 | " \n", 1654 | " \n", 1655 | " \n", 1656 | " \n", 1657 | " \n", 1658 | " \n", 1659 | " \n", 1660 | " \n", 1661 | " \n", 1662 | " \n", 1663 | " \n", 1664 | " \n", 1665 | " \n", 1666 | " \n", 1667 | " \n", 1668 | " \n", 1669 | " \n", 1670 | " \n", 1671 | " \n", 1672 | " \n", 1673 | " \n", 1674 | " \n", 1675 | " \n", 1676 | "
Sepal_LengthSepal_WidthPetal_LengthPetal_Widthpetal length(cm)
05.13.51.40.21
14.93.01.40.21
24.73.21.30.21
34.63.11.50.21
45.03.61.40.21
..................
1456.73.05.22.35
1466.32.55.01.95
1476.53.05.22.05
1486.23.45.42.35
1495.93.05.11.85
\n", 1677 | "

150 rows × 5 columns

\n", 1678 | "
" 1679 | ], 1680 | "text/plain": [ 1681 | " Sepal_Length Sepal_Width Petal_Length Petal_Width petal length(cm)\n", 1682 | "0 5.1 3.5 1.4 0.2 1\n", 1683 | "1 4.9 3.0 1.4 0.2 1\n", 1684 | "2 4.7 3.2 1.3 0.2 1\n", 1685 | "3 4.6 3.1 1.5 0.2 1\n", 1686 | "4 5.0 3.6 1.4 0.2 1\n", 1687 | ".. ... ... ... ... ...\n", 1688 | "145 6.7 3.0 5.2 2.3 5\n", 1689 | "146 6.3 2.5 5.0 1.9 5\n", 1690 | "147 6.5 3.0 5.2 2.0 5\n", 1691 | "148 6.2 3.4 5.4 2.3 5\n", 1692 | "149 5.9 3.0 5.1 1.8 5\n", 1693 | "\n", 1694 | "[150 rows x 5 columns]" 1695 | ] 1696 | }, 1697 | "execution_count": 35, 1698 | "metadata": {}, 1699 | "output_type": "execute_result" 1700 | } 1701 | ], 1702 | "source": [ 1703 | "features_df" 1704 | ] 1705 | }, 1706 | { 1707 | "cell_type": "code", 1708 | "execution_count": 36, 1709 | "id": "db194e38", 1710 | "metadata": {}, 1711 | "outputs": [], 1712 | "source": [ 1713 | "enc_df=(enc. fit_transform(df2[['Species']])).toarray()" 1714 | ] 1715 | }, 1716 | { 1717 | "cell_type": "code", 1718 | "execution_count": 37, 1719 | "id": "9e9f56c9", 1720 | "metadata": {}, 1721 | "outputs": [], 1722 | "source": [ 1723 | "enc_df = pd.DataFrame(enc_df, columns = ['Iris-Setosa','Iris-Vrsicolor','Iris-virginca'])" 1724 | ] 1725 | }, 1726 | { 1727 | "cell_type": "code", 1728 | "execution_count": 38, 1729 | "id": "b9dd8e20", 1730 | "metadata": {}, 1731 | "outputs": [], 1732 | "source": [ 1733 | "df_encode = features_df.join(enc_df)" 1734 | ] 1735 | }, 1736 | { 1737 | "cell_type": "code", 1738 | "execution_count": 39, 1739 | "id": "52e6f717", 1740 | "metadata": {}, 1741 | "outputs": [ 1742 | { 1743 | "data": { 1744 | "text/html": [ 1745 | "
\n", 1746 | "\n", 1759 | "\n", 1760 | " \n", 1761 | " \n", 1762 | " \n", 1763 | " \n", 1764 | " \n", 1765 | " \n", 1766 | " \n", 1767 | " \n", 1768 | " \n", 1769 | " \n", 1770 | " \n", 1771 | " \n", 1772 | " \n", 1773 | " \n", 1774 | " \n", 1775 | " \n", 1776 | " \n", 1777 | " \n", 1778 | " \n", 1779 | " \n", 1780 | " \n", 1781 | " \n", 1782 | " \n", 1783 | " \n", 1784 | " \n", 1785 | " \n", 1786 | " \n", 1787 | " \n", 1788 | " \n", 1789 | " \n", 1790 | " \n", 1791 | " \n", 1792 | " \n", 1793 | " \n", 1794 | " \n", 1795 | " \n", 1796 | " \n", 1797 | " \n", 1798 | " \n", 1799 | " \n", 1800 | " \n", 1801 | " \n", 1802 | " \n", 1803 | " \n", 1804 | " \n", 1805 | " \n", 1806 | " \n", 1807 | " \n", 1808 | " \n", 1809 | " \n", 1810 | " \n", 1811 | " \n", 1812 | " \n", 1813 | " \n", 1814 | " \n", 1815 | " \n", 1816 | " \n", 1817 | " \n", 1818 | " \n", 1819 | " \n", 1820 | " \n", 1821 | " \n", 1822 | " \n", 1823 | " \n", 1824 | " \n", 1825 | " \n", 1826 | " \n", 1827 | " \n", 1828 | " \n", 1829 | " \n", 1830 | " \n", 1831 | " \n", 1832 | " \n", 1833 | " \n", 1834 | " \n", 1835 | " \n", 1836 | " \n", 1837 | " \n", 1838 | " \n", 1839 | " \n", 1840 | " \n", 1841 | " \n", 1842 | " \n", 1843 | " \n", 1844 | " \n", 1845 | " \n", 1846 | " \n", 1847 | " \n", 1848 | " \n", 1849 | " \n", 1850 | " \n", 1851 | " \n", 1852 | " \n", 1853 | " \n", 1854 | " \n", 1855 | " \n", 1856 | " \n", 1857 | " \n", 1858 | " \n", 1859 | " \n", 1860 | " \n", 1861 | " \n", 1862 | " \n", 1863 | " \n", 1864 | " \n", 1865 | " \n", 1866 | " \n", 1867 | " \n", 1868 | " \n", 1869 | " \n", 1870 | " \n", 1871 | " \n", 1872 | " \n", 1873 | " \n", 1874 | " \n", 1875 | " \n", 1876 | " \n", 1877 | " \n", 1878 | " \n", 1879 | " \n", 1880 | " \n", 1881 | " \n", 1882 | " \n", 1883 | " \n", 1884 | " \n", 1885 | " \n", 1886 | " \n", 1887 | " \n", 1888 | " \n", 1889 | " \n", 1890 | " \n", 1891 | " \n", 1892 | " \n", 1893 | " \n", 1894 | " \n", 1895 | " \n", 1896 | "
Sepal_LengthSepal_WidthPetal_LengthPetal_Widthpetal length(cm)Iris-SetosaIris-VrsicolorIris-virginca
05.13.51.40.211.00.00.0
14.93.01.40.211.00.00.0
24.73.21.30.211.00.00.0
34.63.11.50.211.00.00.0
45.03.61.40.211.00.00.0
...........................
1456.73.05.22.350.00.01.0
1466.32.55.01.950.00.01.0
1476.53.05.22.050.00.01.0
1486.23.45.42.350.00.01.0
1495.93.05.11.850.00.01.0
\n", 1897 | "

150 rows × 8 columns

\n", 1898 | "
" 1899 | ], 1900 | "text/plain": [ 1901 | " Sepal_Length Sepal_Width Petal_Length Petal_Width petal length(cm) \\\n", 1902 | "0 5.1 3.5 1.4 0.2 1 \n", 1903 | "1 4.9 3.0 1.4 0.2 1 \n", 1904 | "2 4.7 3.2 1.3 0.2 1 \n", 1905 | "3 4.6 3.1 1.5 0.2 1 \n", 1906 | "4 5.0 3.6 1.4 0.2 1 \n", 1907 | ".. ... ... ... ... ... \n", 1908 | "145 6.7 3.0 5.2 2.3 5 \n", 1909 | "146 6.3 2.5 5.0 1.9 5 \n", 1910 | "147 6.5 3.0 5.2 2.0 5 \n", 1911 | "148 6.2 3.4 5.4 2.3 5 \n", 1912 | "149 5.9 3.0 5.1 1.8 5 \n", 1913 | "\n", 1914 | " Iris-Setosa Iris-Vrsicolor Iris-virginca \n", 1915 | "0 1.0 0.0 0.0 \n", 1916 | "1 1.0 0.0 0.0 \n", 1917 | "2 1.0 0.0 0.0 \n", 1918 | "3 1.0 0.0 0.0 \n", 1919 | "4 1.0 0.0 0.0 \n", 1920 | ".. ... ... ... \n", 1921 | "145 0.0 0.0 1.0 \n", 1922 | "146 0.0 0.0 1.0 \n", 1923 | "147 0.0 0.0 1.0 \n", 1924 | "148 0.0 0.0 1.0 \n", 1925 | "149 0.0 0.0 1.0 \n", 1926 | "\n", 1927 | "[150 rows x 8 columns]" 1928 | ] 1929 | }, 1930 | "execution_count": 39, 1931 | "metadata": {}, 1932 | "output_type": "execute_result" 1933 | } 1934 | ], 1935 | "source": [ 1936 | "df_encode" 1937 | ] 1938 | }, 1939 | { 1940 | "cell_type": "code", 1941 | "execution_count": null, 1942 | "id": "4122e8aa", 1943 | "metadata": {}, 1944 | "outputs": [], 1945 | "source": [] 1946 | }, 1947 | { 1948 | "cell_type": "code", 1949 | "execution_count": null, 1950 | "id": "0c97b5d4", 1951 | "metadata": {}, 1952 | "outputs": [], 1953 | "source": [] 1954 | }, 1955 | { 1956 | "cell_type": "code", 1957 | "execution_count": null, 1958 | "id": "11f191f1", 1959 | "metadata": {}, 1960 | "outputs": [], 1961 | "source": [] 1962 | }, 1963 | { 1964 | "cell_type": "code", 1965 | "execution_count": null, 1966 | "id": "d318f1e8", 1967 | "metadata": {}, 1968 | "outputs": [], 1969 | "source": [] 1970 | }, 1971 | { 1972 | "cell_type": "code", 1973 | "execution_count": null, 1974 | "id": "bf4ab003", 1975 | "metadata": {}, 1976 | "outputs": [], 1977 | "source": [] 1978 | } 1979 | ], 1980 | "metadata": { 1981 | "kernelspec": { 1982 | "display_name": "Python 3 (ipykernel)", 1983 | "language": "python", 1984 | "name": "python3" 1985 | }, 1986 | "language_info": { 1987 | "codemirror_mode": { 1988 | "name": "ipython", 1989 | "version": 3 1990 | }, 1991 | "file_extension": ".py", 1992 | "mimetype": "text/x-python", 1993 | "name": "python", 1994 | "nbconvert_exporter": "python", 1995 | "pygments_lexer": "ipython3", 1996 | "version": "3.9.13" 1997 | } 1998 | }, 1999 | "nbformat": 4, 2000 | "nbformat_minor": 5 2001 | } 2002 | --------------------------------------------------------------------------------