├── 10 Univariate non-graphical EDA.ipynb ├── 11 Univariate visualizations (Categorical).ipynb ├── 12 Univariate visualizations (Numerical).ipynb ├── 13 Bivariate numerical.ipynb ├── 14 Bivariate Categorical - Numerical.ipynb ├── 15 Bivariate categorical .ipynb ├── 2 Understanding your data.ipynb ├── 3 Missing values.ipynb ├── 4 Duplicated values.ipynb ├── 6 Outliers(z_score).ipynb ├── 7 Outliers(IQR).ipynb ├── 8 Outliers(Percentile).ipynb ├── 9 Correction of datatype.ipynb ├── AB_NYC_2019.csv ├── EDA 5 (Outliers).pptx ├── README.md ├── scholarship.csv └── weight-height.csv /10 Univariate non-graphical EDA.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "8abdfe09", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "id": "119d4726", 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "df = pd.read_csv(\"AB_NYC_2019.csv\")" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 3, 26 | "id": "217b1e02", 27 | "metadata": { 28 | "scrolled": true 29 | }, 30 | "outputs": [ 31 | { 32 | "data": { 33 | "text/html": [ 34 | "
\n", 35 | "\n", 48 | "\n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | "
idnamehost_idhost_nameneighbourhood_groupneighbourhoodlatitudelongituderoom_typepriceminimum_nightsnumber_of_reviewslast_reviewreviews_per_monthcalculated_host_listings_countavailability_365
02539Clean & quiet apt home by the park2787JohnBrooklynKensington40.64749-73.97237Private room1491919-10-20180.216365
12595Skylit Midtown Castle2845JenniferManhattanMidtown40.75362-73.98377Entire home/apt22514521-05-20190.382355
23647THE VILLAGE OF HARLEM....NEW YORK !4632ElisabethManhattanHarlem40.80902-73.94190Private room15030NaNNaN1365
33831Cozy Entire Floor of Brownstone4869LisaRoxanneBrooklynClinton Hill40.68514-73.95976Entire home/apt89127005-07-20194.641194
45022Entire Apt: Spacious Studio/Loft by central park7192LauraManhattanEast Harlem40.79851-73.94399Entire home/apt8010919-11-20180.1010
\n", 168 | "
" 169 | ], 170 | "text/plain": [ 171 | " id name host_id \\\n", 172 | "0 2539 Clean & quiet apt home by the park 2787 \n", 173 | "1 2595 Skylit Midtown Castle 2845 \n", 174 | "2 3647 THE VILLAGE OF HARLEM....NEW YORK ! 4632 \n", 175 | "3 3831 Cozy Entire Floor of Brownstone 4869 \n", 176 | "4 5022 Entire Apt: Spacious Studio/Loft by central park 7192 \n", 177 | "\n", 178 | " host_name neighbourhood_group neighbourhood latitude longitude \\\n", 179 | "0 John Brooklyn Kensington 40.64749 -73.97237 \n", 180 | "1 Jennifer Manhattan Midtown 40.75362 -73.98377 \n", 181 | "2 Elisabeth Manhattan Harlem 40.80902 -73.94190 \n", 182 | "3 LisaRoxanne Brooklyn Clinton Hill 40.68514 -73.95976 \n", 183 | "4 Laura Manhattan East Harlem 40.79851 -73.94399 \n", 184 | "\n", 185 | " room_type price minimum_nights number_of_reviews last_review \\\n", 186 | "0 Private room 149 1 9 19-10-2018 \n", 187 | "1 Entire home/apt 225 1 45 21-05-2019 \n", 188 | "2 Private room 150 3 0 NaN \n", 189 | "3 Entire home/apt 89 1 270 05-07-2019 \n", 190 | "4 Entire home/apt 80 10 9 19-11-2018 \n", 191 | "\n", 192 | " reviews_per_month calculated_host_listings_count availability_365 \n", 193 | "0 0.21 6 365 \n", 194 | "1 0.38 2 355 \n", 195 | "2 NaN 1 365 \n", 196 | "3 4.64 1 194 \n", 197 | "4 0.10 1 0 " 198 | ] 199 | }, 200 | "execution_count": 3, 201 | "metadata": {}, 202 | "output_type": "execute_result" 203 | } 204 | ], 205 | "source": [ 206 | "df.head()" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 4, 212 | "id": "94df84f5", 213 | "metadata": {}, 214 | "outputs": [], 215 | "source": [ 216 | "df[\"id\"]=df[\"id\"].astype(str)\n", 217 | "df[\"host_id\"]=df[\"host_id\"].astype(str)\n", 218 | "df[\"latitude\"]=df[\"latitude\"].astype(str)\n", 219 | "df[\"longitude\"]=df[\"longitude\"].astype(str)" 220 | ] 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "id": "56db9221", 225 | "metadata": {}, 226 | "source": [ 227 | "How does the data look mathematically?" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": 5, 233 | "id": "95b30309", 234 | "metadata": {}, 235 | "outputs": [ 236 | { 237 | "data": { 238 | "text/html": [ 239 | "
\n", 240 | "\n", 253 | "\n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | "
priceminimum_nightsnumber_of_reviewsreviews_per_monthcalculated_host_listings_countavailability_365
count48906.00000048906.00000048906.00000038854.00000048906.00000048906.000000
mean152.7113247.03161223.3004541.3731517.142702112.782031
std240.12871320.51248944.6071751.68027032.948926131.620370
min0.0000001.0000000.0000000.0100001.0000000.000000
25%69.0000001.0000001.0000000.1900001.0000000.000000
50%106.0000003.0000005.0000000.7200001.00000045.000000
75%175.0000005.00000024.0000002.0200002.000000227.000000
max10000.0000001250.000000629.00000058.500000327.000000365.000000
\n", 340 | "
" 341 | ], 342 | "text/plain": [ 343 | " price minimum_nights number_of_reviews reviews_per_month \\\n", 344 | "count 48906.000000 48906.000000 48906.000000 38854.000000 \n", 345 | "mean 152.711324 7.031612 23.300454 1.373151 \n", 346 | "std 240.128713 20.512489 44.607175 1.680270 \n", 347 | "min 0.000000 1.000000 0.000000 0.010000 \n", 348 | "25% 69.000000 1.000000 1.000000 0.190000 \n", 349 | "50% 106.000000 3.000000 5.000000 0.720000 \n", 350 | "75% 175.000000 5.000000 24.000000 2.020000 \n", 351 | "max 10000.000000 1250.000000 629.000000 58.500000 \n", 352 | "\n", 353 | " calculated_host_listings_count availability_365 \n", 354 | "count 48906.000000 48906.000000 \n", 355 | "mean 7.142702 112.782031 \n", 356 | "std 32.948926 131.620370 \n", 357 | "min 1.000000 0.000000 \n", 358 | "25% 1.000000 0.000000 \n", 359 | "50% 1.000000 45.000000 \n", 360 | "75% 2.000000 227.000000 \n", 361 | "max 327.000000 365.000000 " 362 | ] 363 | }, 364 | "execution_count": 5, 365 | "metadata": {}, 366 | "output_type": "execute_result" 367 | } 368 | ], 369 | "source": [ 370 | "df.describe()" 371 | ] 372 | }, 373 | { 374 | "cell_type": "markdown", 375 | "id": "138bafa9", 376 | "metadata": {}, 377 | "source": [ 378 | "range of minimum nights for listings is 1 and 1250" 379 | ] 380 | }, 381 | { 382 | "cell_type": "markdown", 383 | "id": "a6b70275", 384 | "metadata": {}, 385 | "source": [ 386 | "# Categorical Data" 387 | ] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": 6, 392 | "id": "74b3cb63", 393 | "metadata": {}, 394 | "outputs": [ 395 | { 396 | "data": { 397 | "text/plain": [ 398 | "id 48895\n", 399 | "name 47896\n", 400 | "host_id 37457\n", 401 | "host_name 11452\n", 402 | "neighbourhood_group 5\n", 403 | "neighbourhood 221\n", 404 | "latitude 19048\n", 405 | "longitude 14718\n", 406 | "room_type 3\n", 407 | "price 674\n", 408 | "minimum_nights 109\n", 409 | "number_of_reviews 394\n", 410 | "last_review 1764\n", 411 | "reviews_per_month 937\n", 412 | "calculated_host_listings_count 47\n", 413 | "availability_365 366\n", 414 | "dtype: int64" 415 | ] 416 | }, 417 | "execution_count": 6, 418 | "metadata": {}, 419 | "output_type": "execute_result" 420 | } 421 | ], 422 | "source": [ 423 | "df.nunique()" 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": 7, 429 | "id": "56c9d1dc", 430 | "metadata": {}, 431 | "outputs": [ 432 | { 433 | "data": { 434 | "text/plain": [ 435 | "Index(['id', 'name', 'host_id', 'host_name', 'neighbourhood_group',\n", 436 | " 'neighbourhood', 'latitude', 'longitude', 'room_type', 'price',\n", 437 | " 'minimum_nights', 'number_of_reviews', 'last_review',\n", 438 | " 'reviews_per_month', 'calculated_host_listings_count',\n", 439 | " 'availability_365'],\n", 440 | " dtype='object')" 441 | ] 442 | }, 443 | "execution_count": 7, 444 | "metadata": {}, 445 | "output_type": "execute_result" 446 | } 447 | ], 448 | "source": [ 449 | "df.columns" 450 | ] 451 | }, 452 | { 453 | "cell_type": "code", 454 | "execution_count": 8, 455 | "id": "fa8aa028", 456 | "metadata": {}, 457 | "outputs": [ 458 | { 459 | "data": { 460 | "text/plain": [ 461 | "Entire home/apt 25414\n", 462 | "Private room 22332\n", 463 | "Shared room 1160\n", 464 | "Name: room_type, dtype: int64" 465 | ] 466 | }, 467 | "execution_count": 8, 468 | "metadata": {}, 469 | "output_type": "execute_result" 470 | } 471 | ], 472 | "source": [ 473 | "df[\"room_type\"].value_counts()" 474 | ] 475 | }, 476 | { 477 | "cell_type": "code", 478 | "execution_count": 9, 479 | "id": "3401f55e", 480 | "metadata": {}, 481 | "outputs": [ 482 | { 483 | "data": { 484 | "text/plain": [ 485 | "Entire home/apt 0.519650\n", 486 | "Private room 0.456631\n", 487 | "Shared room 0.023719\n", 488 | "Name: room_type, dtype: float64" 489 | ] 490 | }, 491 | "execution_count": 9, 492 | "metadata": {}, 493 | "output_type": "execute_result" 494 | } 495 | ], 496 | "source": [ 497 | "df[\"room_type\"].value_counts(normalize = True)" 498 | ] 499 | }, 500 | { 501 | "cell_type": "code", 502 | "execution_count": 10, 503 | "id": "5b722d04", 504 | "metadata": {}, 505 | "outputs": [ 506 | { 507 | "data": { 508 | "text/plain": [ 509 | "Manhattan 21669\n", 510 | "Brooklyn 20107\n", 511 | "Queens 5666\n", 512 | "Bronx 1091\n", 513 | "Staten Island 373\n", 514 | "Name: neighbourhood_group, dtype: int64" 515 | ] 516 | }, 517 | "execution_count": 10, 518 | "metadata": {}, 519 | "output_type": "execute_result" 520 | } 521 | ], 522 | "source": [ 523 | "df[\"neighbourhood_group\"].value_counts()" 524 | ] 525 | }, 526 | { 527 | "cell_type": "markdown", 528 | "id": "388dfebe", 529 | "metadata": {}, 530 | "source": [ 531 | "# Numerical Data" 532 | ] 533 | }, 534 | { 535 | "cell_type": "code", 536 | "execution_count": 11, 537 | "id": "c45b4650", 538 | "metadata": {}, 539 | "outputs": [ 540 | { 541 | "data": { 542 | "text/plain": [ 543 | "(-10.001, 2000.0] 48820\n", 544 | "(2000.0, 4000.0] 54\n", 545 | "(4000.0, 6000.0] 16\n", 546 | "(6000.0, 8000.0] 9\n", 547 | "(8000.0, 10000.0] 7\n", 548 | "Name: price, dtype: int64" 549 | ] 550 | }, 551 | "execution_count": 11, 552 | "metadata": {}, 553 | "output_type": "execute_result" 554 | } 555 | ], 556 | "source": [ 557 | "df[\"price\"].value_counts(bins = 5)" 558 | ] 559 | }, 560 | { 561 | "cell_type": "code", 562 | "execution_count": 12, 563 | "id": "7f3ce5a3", 564 | "metadata": { 565 | "scrolled": false 566 | }, 567 | "outputs": [ 568 | { 569 | "data": { 570 | "text/plain": [ 571 | "(50.0, 100.0] 17373\n", 572 | "(100.0, 200.0] 16588\n", 573 | "(200.0, 500.0] 7340\n", 574 | "(0.0, 50.0] 6550\n", 575 | "(500.0, 800.0] 624\n", 576 | "(800.0, 2000.0] 334\n", 577 | "(2000.0, 4000.0] 54\n", 578 | "(4000.0, 10000.0] 32\n", 579 | "(-10.001, 0.0] 11\n", 580 | "Name: price, dtype: int64" 581 | ] 582 | }, 583 | "execution_count": 12, 584 | "metadata": {}, 585 | "output_type": "execute_result" 586 | } 587 | ], 588 | "source": [ 589 | "bins = [-10,0, 50,100, 200,500,800,2000,4000,10000]\n", 590 | "df[\"price\"].value_counts(bins = bins)" 591 | ] 592 | }, 593 | { 594 | "cell_type": "markdown", 595 | "id": "993f989b", 596 | "metadata": {}, 597 | "source": [ 598 | "It is mainly helpful in small datasets." 599 | ] 600 | }, 601 | { 602 | "cell_type": "markdown", 603 | "id": "c76d8cea", 604 | "metadata": {}, 605 | "source": [ 606 | "## Measures of central tendency" 607 | ] 608 | }, 609 | { 610 | "cell_type": "code", 611 | "execution_count": 13, 612 | "id": "329245f5", 613 | "metadata": { 614 | "scrolled": true 615 | }, 616 | "outputs": [ 617 | { 618 | "data": { 619 | "text/plain": [ 620 | "152.71132376395533" 621 | ] 622 | }, 623 | "execution_count": 13, 624 | "metadata": {}, 625 | "output_type": "execute_result" 626 | } 627 | ], 628 | "source": [ 629 | "df[\"price\"].mean()" 630 | ] 631 | }, 632 | { 633 | "cell_type": "code", 634 | "execution_count": 14, 635 | "id": "b48226ef", 636 | "metadata": {}, 637 | "outputs": [ 638 | { 639 | "data": { 640 | "text/plain": [ 641 | "106.0" 642 | ] 643 | }, 644 | "execution_count": 14, 645 | "metadata": {}, 646 | "output_type": "execute_result" 647 | } 648 | ], 649 | "source": [ 650 | "df[\"price\"].median()" 651 | ] 652 | }, 653 | { 654 | "cell_type": "code", 655 | "execution_count": 15, 656 | "id": "00bae1ee", 657 | "metadata": {}, 658 | "outputs": [ 659 | { 660 | "data": { 661 | "text/plain": [ 662 | "240.1287131622509" 663 | ] 664 | }, 665 | "execution_count": 15, 666 | "metadata": {}, 667 | "output_type": "execute_result" 668 | } 669 | ], 670 | "source": [ 671 | "df[\"price\"].std()" 672 | ] 673 | }, 674 | { 675 | "cell_type": "code", 676 | "execution_count": 16, 677 | "id": "33404a5d", 678 | "metadata": {}, 679 | "outputs": [ 680 | { 681 | "data": { 682 | "text/plain": [ 683 | "7.031611663190611" 684 | ] 685 | }, 686 | "execution_count": 16, 687 | "metadata": {}, 688 | "output_type": "execute_result" 689 | } 690 | ], 691 | "source": [ 692 | "df[\"minimum_nights\"].mean()" 693 | ] 694 | }, 695 | { 696 | "cell_type": "code", 697 | "execution_count": 17, 698 | "id": "a45e32fe", 699 | "metadata": {}, 700 | "outputs": [ 701 | { 702 | "data": { 703 | "text/plain": [ 704 | "3.0" 705 | ] 706 | }, 707 | "execution_count": 17, 708 | "metadata": {}, 709 | "output_type": "execute_result" 710 | } 711 | ], 712 | "source": [ 713 | "df[\"minimum_nights\"].median()" 714 | ] 715 | }, 716 | { 717 | "cell_type": "markdown", 718 | "id": "ce2e309d", 719 | "metadata": {}, 720 | "source": [ 721 | "## Measure of Spread" 722 | ] 723 | }, 724 | { 725 | "cell_type": "code", 726 | "execution_count": 18, 727 | "id": "12649a50", 728 | "metadata": {}, 729 | "outputs": [ 730 | { 731 | "data": { 732 | "text/plain": [ 733 | "19.120831694826197" 734 | ] 735 | }, 736 | "execution_count": 18, 737 | "metadata": {}, 738 | "output_type": "execute_result" 739 | } 740 | ], 741 | "source": [ 742 | "df[\"price\"].skew()" 743 | ] 744 | }, 745 | { 746 | "cell_type": "code", 747 | "execution_count": 19, 748 | "id": "0e856dc2", 749 | "metadata": {}, 750 | "outputs": [ 751 | { 752 | "data": { 753 | "text/plain": [ 754 | "585.7930484394186" 755 | ] 756 | }, 757 | "execution_count": 19, 758 | "metadata": {}, 759 | "output_type": "execute_result" 760 | } 761 | ], 762 | "source": [ 763 | "df[\"price\"].kurt()" 764 | ] 765 | }, 766 | { 767 | "cell_type": "markdown", 768 | "id": "8e4ad1db", 769 | "metadata": {}, 770 | "source": [ 771 | "How many listings have availability throughout the year (365 days)" 772 | ] 773 | }, 774 | { 775 | "cell_type": "code", 776 | "execution_count": 20, 777 | "id": "e1cc1b65", 778 | "metadata": {}, 779 | "outputs": [ 780 | { 781 | "data": { 782 | "text/plain": [ 783 | "1295" 784 | ] 785 | }, 786 | "execution_count": 20, 787 | "metadata": {}, 788 | "output_type": "execute_result" 789 | } 790 | ], 791 | "source": [ 792 | "df[df[\"availability_365\"]==365].shape[0]" 793 | ] 794 | }, 795 | { 796 | "cell_type": "code", 797 | "execution_count": 21, 798 | "id": "87d7ac64", 799 | "metadata": {}, 800 | "outputs": [ 801 | { 802 | "name": "stderr", 803 | "output_type": "stream", 804 | "text": [ 805 | "C:\\Users\\GFG19189\\AppData\\Local\\Temp\\ipykernel_1056\\1134722465.py:1: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.\n", 806 | " df.corr()\n" 807 | ] 808 | }, 809 | { 810 | "data": { 811 | "text/html": [ 812 | "
\n", 813 | "\n", 826 | "\n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | "
priceminimum_nightsnumber_of_reviewsreviews_per_monthcalculated_host_listings_countavailability_365
price1.0000000.042771-0.048014-0.0306080.0574780.081817
minimum_nights0.0427711.000000-0.080093-0.1217720.1279170.144146
number_of_reviews-0.048014-0.0800931.0000000.549291-0.0723750.172002
reviews_per_month-0.030608-0.1217720.5492911.000000-0.0094140.185818
calculated_host_listings_count0.0574780.127917-0.072375-0.0094141.0000000.225680
availability_3650.0818170.1441460.1720020.1858180.2256801.000000
\n", 895 | "
" 896 | ], 897 | "text/plain": [ 898 | " price minimum_nights number_of_reviews \\\n", 899 | "price 1.000000 0.042771 -0.048014 \n", 900 | "minimum_nights 0.042771 1.000000 -0.080093 \n", 901 | "number_of_reviews -0.048014 -0.080093 1.000000 \n", 902 | "reviews_per_month -0.030608 -0.121772 0.549291 \n", 903 | "calculated_host_listings_count 0.057478 0.127917 -0.072375 \n", 904 | "availability_365 0.081817 0.144146 0.172002 \n", 905 | "\n", 906 | " reviews_per_month \\\n", 907 | "price -0.030608 \n", 908 | "minimum_nights -0.121772 \n", 909 | "number_of_reviews 0.549291 \n", 910 | "reviews_per_month 1.000000 \n", 911 | "calculated_host_listings_count -0.009414 \n", 912 | "availability_365 0.185818 \n", 913 | "\n", 914 | " calculated_host_listings_count \\\n", 915 | "price 0.057478 \n", 916 | "minimum_nights 0.127917 \n", 917 | "number_of_reviews -0.072375 \n", 918 | "reviews_per_month -0.009414 \n", 919 | "calculated_host_listings_count 1.000000 \n", 920 | "availability_365 0.225680 \n", 921 | "\n", 922 | " availability_365 \n", 923 | "price 0.081817 \n", 924 | "minimum_nights 0.144146 \n", 925 | "number_of_reviews 0.172002 \n", 926 | "reviews_per_month 0.185818 \n", 927 | "calculated_host_listings_count 0.225680 \n", 928 | "availability_365 1.000000 " 929 | ] 930 | }, 931 | "execution_count": 21, 932 | "metadata": {}, 933 | "output_type": "execute_result" 934 | } 935 | ], 936 | "source": [ 937 | "df.corr()" 938 | ] 939 | } 940 | ], 941 | "metadata": { 942 | "kernelspec": { 943 | "display_name": "Python 3 (ipykernel)", 944 | "language": "python", 945 | "name": "python3" 946 | }, 947 | "language_info": { 948 | "codemirror_mode": { 949 | "name": "ipython", 950 | "version": 3 951 | }, 952 | "file_extension": ".py", 953 | "mimetype": "text/x-python", 954 | "name": "python", 955 | "nbconvert_exporter": "python", 956 | "pygments_lexer": "ipython3", 957 | "version": "3.11.1" 958 | } 959 | }, 960 | "nbformat": 4, 961 | "nbformat_minor": 5 962 | } 963 | -------------------------------------------------------------------------------- /2 Understanding your data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 29, 6 | "id": "8eb8b9b5", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import pandas as pd\n", 11 | "import seaborn as sns" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 30, 17 | "id": "ec399f59", 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "df = pd.read_csv(\"AB_NYC_2019.csv\")" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "id": "c7f041e4", 27 | "metadata": {}, 28 | "source": [ 29 | "## 1. Dimension of data?" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 31, 35 | "id": "ab628eec", 36 | "metadata": {}, 37 | "outputs": [ 38 | { 39 | "data": { 40 | "text/plain": [ 41 | "(48906, 16)" 42 | ] 43 | }, 44 | "execution_count": 31, 45 | "metadata": {}, 46 | "output_type": "execute_result" 47 | } 48 | ], 49 | "source": [ 50 | "df.shape" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "id": "bae2d9b7", 56 | "metadata": {}, 57 | "source": [ 58 | "## 2. How does the data look like?" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 32, 64 | "id": "a3bff2be", 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "data": { 69 | "text/html": [ 70 | "
\n", 71 | "\n", 84 | "\n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | "
idnamehost_idhost_nameneighbourhood_groupneighbourhoodlatitudelongituderoom_typepriceminimum_nightsnumber_of_reviewslast_reviewreviews_per_monthcalculated_host_listings_countavailability_365
02539Clean & quiet apt home by the park2787JohnBrooklynKensington40.64749-73.97237Private room1491919-10-20180.216365
12595Skylit Midtown Castle2845JenniferManhattanMidtown40.75362-73.98377Entire home/apt22514521-05-20190.382355
23647THE VILLAGE OF HARLEM....NEW YORK !4632ElisabethManhattanHarlem40.80902-73.94190Private room15030NaNNaN1365
33831Cozy Entire Floor of Brownstone4869LisaRoxanneBrooklynClinton Hill40.68514-73.95976Entire home/apt89127005-07-20194.641194
45022Entire Apt: Spacious Studio/Loft by central park7192LauraManhattanEast Harlem40.79851-73.94399Entire home/apt8010919-11-20180.1010
\n", 204 | "
" 205 | ], 206 | "text/plain": [ 207 | " id name host_id \\\n", 208 | "0 2539 Clean & quiet apt home by the park 2787 \n", 209 | "1 2595 Skylit Midtown Castle 2845 \n", 210 | "2 3647 THE VILLAGE OF HARLEM....NEW YORK ! 4632 \n", 211 | "3 3831 Cozy Entire Floor of Brownstone 4869 \n", 212 | "4 5022 Entire Apt: Spacious Studio/Loft by central park 7192 \n", 213 | "\n", 214 | " host_name neighbourhood_group neighbourhood latitude longitude \\\n", 215 | "0 John Brooklyn Kensington 40.64749 -73.97237 \n", 216 | "1 Jennifer Manhattan Midtown 40.75362 -73.98377 \n", 217 | "2 Elisabeth Manhattan Harlem 40.80902 -73.94190 \n", 218 | "3 LisaRoxanne Brooklyn Clinton Hill 40.68514 -73.95976 \n", 219 | "4 Laura Manhattan East Harlem 40.79851 -73.94399 \n", 220 | "\n", 221 | " room_type price minimum_nights number_of_reviews last_review \\\n", 222 | "0 Private room 149 1 9 19-10-2018 \n", 223 | "1 Entire home/apt 225 1 45 21-05-2019 \n", 224 | "2 Private room 150 3 0 NaN \n", 225 | "3 Entire home/apt 89 1 270 05-07-2019 \n", 226 | "4 Entire home/apt 80 10 9 19-11-2018 \n", 227 | "\n", 228 | " reviews_per_month calculated_host_listings_count availability_365 \n", 229 | "0 0.21 6 365 \n", 230 | "1 0.38 2 355 \n", 231 | "2 NaN 1 365 \n", 232 | "3 4.64 1 194 \n", 233 | "4 0.10 1 0 " 234 | ] 235 | }, 236 | "execution_count": 32, 237 | "metadata": {}, 238 | "output_type": "execute_result" 239 | } 240 | ], 241 | "source": [ 242 | "df.head()" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": 33, 248 | "id": "29978697", 249 | "metadata": {}, 250 | "outputs": [ 251 | { 252 | "data": { 253 | "text/html": [ 254 | "
\n", 255 | "\n", 268 | "\n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | "
idnamehost_idhost_nameneighbourhood_groupneighbourhoodlatitudelongituderoom_typepriceminimum_nightsnumber_of_reviewslast_reviewreviews_per_monthcalculated_host_listings_countavailability_365
489015441Central Manhattan/near Broadway7989KateManhattanHell's Kitchen40.76076-73.98867Private room85218823-06-20191.50139
489025803Lovely Room 1, Garden, Best Area, Legal rental9744LaurieBrooklynSouth Slope40.66829-73.98779Private room89416724-06-20191.343314
489036021Wonderful Guest Bedroom in Manhattan for SINGLES11528ClaudioManhattanUpper West Side40.79826-73.96113Private room85211305-07-20190.911333
489046090West Village Nest - Superhost11975AlinaManhattanWest Village40.73530-74.00525Entire home/apt120902731-10-20180.2210
489056848Only 2 stops to Manhattan studio15991Allen & IrinaBrooklynWilliamsburg40.70837-73.95352Entire home/apt140214829-06-20191.20146
\n", 388 | "
" 389 | ], 390 | "text/plain": [ 391 | " id name host_id \\\n", 392 | "48901 5441 Central Manhattan/near Broadway 7989 \n", 393 | "48902 5803 Lovely Room 1, Garden, Best Area, Legal rental 9744 \n", 394 | "48903 6021 Wonderful Guest Bedroom in Manhattan for SINGLES 11528 \n", 395 | "48904 6090 West Village Nest - Superhost 11975 \n", 396 | "48905 6848 Only 2 stops to Manhattan studio 15991 \n", 397 | "\n", 398 | " host_name neighbourhood_group neighbourhood latitude \\\n", 399 | "48901 Kate Manhattan Hell's Kitchen 40.76076 \n", 400 | "48902 Laurie Brooklyn South Slope 40.66829 \n", 401 | "48903 Claudio Manhattan Upper West Side 40.79826 \n", 402 | "48904 Alina Manhattan West Village 40.73530 \n", 403 | "48905 Allen & Irina Brooklyn Williamsburg 40.70837 \n", 404 | "\n", 405 | " longitude room_type price minimum_nights number_of_reviews \\\n", 406 | "48901 -73.98867 Private room 85 2 188 \n", 407 | "48902 -73.98779 Private room 89 4 167 \n", 408 | "48903 -73.96113 Private room 85 2 113 \n", 409 | "48904 -74.00525 Entire home/apt 120 90 27 \n", 410 | "48905 -73.95352 Entire home/apt 140 2 148 \n", 411 | "\n", 412 | " last_review reviews_per_month calculated_host_listings_count \\\n", 413 | "48901 23-06-2019 1.50 1 \n", 414 | "48902 24-06-2019 1.34 3 \n", 415 | "48903 05-07-2019 0.91 1 \n", 416 | "48904 31-10-2018 0.22 1 \n", 417 | "48905 29-06-2019 1.20 1 \n", 418 | "\n", 419 | " availability_365 \n", 420 | "48901 39 \n", 421 | "48902 314 \n", 422 | "48903 333 \n", 423 | "48904 0 \n", 424 | "48905 46 " 425 | ] 426 | }, 427 | "execution_count": 33, 428 | "metadata": {}, 429 | "output_type": "execute_result" 430 | } 431 | ], 432 | "source": [ 433 | "df.tail()" 434 | ] 435 | }, 436 | { 437 | "cell_type": "code", 438 | "execution_count": 34, 439 | "id": "e1ee689c", 440 | "metadata": {}, 441 | "outputs": [ 442 | { 443 | "data": { 444 | "text/html": [ 445 | "
\n", 446 | "\n", 459 | "\n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | "
idnamehost_idhost_nameneighbourhood_groupneighbourhoodlatitudelongituderoom_typepriceminimum_nightsnumber_of_reviewslast_reviewreviews_per_monthcalculated_host_listings_countavailability_365
70245050156Wake Up In The City That Never Sleeps193488JaneManhattanEast Village40.73273-73.98969Private room13511426-05-20190.282332
32211923153Cozy Vintage Inspired East Village9965028SashaManhattanEast Village40.72709-73.98442Entire home/apt45021815-01-20170.281363
1863714741623Cozy Private Bedroom and Bathroom in Brooklyn39181402SarahBrooklynBedford-Stuyvesant40.68179-73.95252Private room4836722-06-20191.95170
1580512786165Cozy Private room in Chelsea !69598657VhManhattanChelsea40.75094-73.99748Private room75211523-06-20192.99137
2402819368455Chelsea Gem70154608DonnaManhattanChelsea40.74625-73.99911Entire home/apt18551701-07-20190.88115
\n", 579 | "
" 580 | ], 581 | "text/plain": [ 582 | " id name host_id \\\n", 583 | "7024 5050156 Wake Up In The City That Never Sleeps 193488 \n", 584 | "3221 1923153 Cozy Vintage Inspired East Village 9965028 \n", 585 | "18637 14741623 Cozy Private Bedroom and Bathroom in Brooklyn 39181402 \n", 586 | "15805 12786165 Cozy Private room in Chelsea ! 69598657 \n", 587 | "24028 19368455 Chelsea Gem 70154608 \n", 588 | "\n", 589 | " host_name neighbourhood_group neighbourhood latitude longitude \\\n", 590 | "7024 Jane Manhattan East Village 40.73273 -73.98969 \n", 591 | "3221 Sasha Manhattan East Village 40.72709 -73.98442 \n", 592 | "18637 Sarah Brooklyn Bedford-Stuyvesant 40.68179 -73.95252 \n", 593 | "15805 Vh Manhattan Chelsea 40.75094 -73.99748 \n", 594 | "24028 Donna Manhattan Chelsea 40.74625 -73.99911 \n", 595 | "\n", 596 | " room_type price minimum_nights number_of_reviews last_review \\\n", 597 | "7024 Private room 135 1 14 26-05-2019 \n", 598 | "3221 Entire home/apt 450 2 18 15-01-2017 \n", 599 | "18637 Private room 48 3 67 22-06-2019 \n", 600 | "15805 Private room 75 2 115 23-06-2019 \n", 601 | "24028 Entire home/apt 185 5 17 01-07-2019 \n", 602 | "\n", 603 | " reviews_per_month calculated_host_listings_count availability_365 \n", 604 | "7024 0.28 2 332 \n", 605 | "3221 0.28 1 363 \n", 606 | "18637 1.95 1 70 \n", 607 | "15805 2.99 1 37 \n", 608 | "24028 0.88 1 15 " 609 | ] 610 | }, 611 | "execution_count": 34, 612 | "metadata": {}, 613 | "output_type": "execute_result" 614 | } 615 | ], 616 | "source": [ 617 | "df.sample(5)" 618 | ] 619 | }, 620 | { 621 | "cell_type": "markdown", 622 | "id": "14ded357", 623 | "metadata": {}, 624 | "source": [ 625 | "## 3. What is the datatype of cols?" 626 | ] 627 | }, 628 | { 629 | "cell_type": "code", 630 | "execution_count": 36, 631 | "id": "3b6ba267", 632 | "metadata": {}, 633 | "outputs": [ 634 | { 635 | "name": "stdout", 636 | "output_type": "stream", 637 | "text": [ 638 | "\n", 639 | "RangeIndex: 48906 entries, 0 to 48905\n", 640 | "Data columns (total 16 columns):\n", 641 | " # Column Non-Null Count Dtype \n", 642 | "--- ------ -------------- ----- \n", 643 | " 0 id 48906 non-null int64 \n", 644 | " 1 name 48890 non-null object \n", 645 | " 2 host_id 48906 non-null int64 \n", 646 | " 3 host_name 48885 non-null object \n", 647 | " 4 neighbourhood_group 48906 non-null object \n", 648 | " 5 neighbourhood 48906 non-null object \n", 649 | " 6 latitude 48906 non-null float64\n", 650 | " 7 longitude 48906 non-null float64\n", 651 | " 8 room_type 48906 non-null object \n", 652 | " 9 price 48906 non-null int64 \n", 653 | " 10 minimum_nights 48906 non-null int64 \n", 654 | " 11 number_of_reviews 48906 non-null int64 \n", 655 | " 12 last_review 38854 non-null object \n", 656 | " 13 reviews_per_month 38854 non-null float64\n", 657 | " 14 calculated_host_listings_count 48906 non-null int64 \n", 658 | " 15 availability_365 48906 non-null int64 \n", 659 | "dtypes: float64(3), int64(7), object(6)\n", 660 | "memory usage: 6.0+ MB\n" 661 | ] 662 | } 663 | ], 664 | "source": [ 665 | "df.info()" 666 | ] 667 | }, 668 | { 669 | "cell_type": "markdown", 670 | "id": "5f2c538a", 671 | "metadata": {}, 672 | "source": [ 673 | "## 4. Are there any missing values?" 674 | ] 675 | }, 676 | { 677 | "cell_type": "code", 678 | "execution_count": 37, 679 | "id": "fe257b4f", 680 | "metadata": {}, 681 | "outputs": [ 682 | { 683 | "data": { 684 | "text/plain": [ 685 | "id 0\n", 686 | "name 16\n", 687 | "host_id 0\n", 688 | "host_name 21\n", 689 | "neighbourhood_group 0\n", 690 | "neighbourhood 0\n", 691 | "latitude 0\n", 692 | "longitude 0\n", 693 | "room_type 0\n", 694 | "price 0\n", 695 | "minimum_nights 0\n", 696 | "number_of_reviews 0\n", 697 | "last_review 10052\n", 698 | "reviews_per_month 10052\n", 699 | "calculated_host_listings_count 0\n", 700 | "availability_365 0\n", 701 | "dtype: int64" 702 | ] 703 | }, 704 | "execution_count": 37, 705 | "metadata": {}, 706 | "output_type": "execute_result" 707 | } 708 | ], 709 | "source": [ 710 | "df.isna().sum()" 711 | ] 712 | }, 713 | { 714 | "cell_type": "markdown", 715 | "id": "b8b5f3a1", 716 | "metadata": {}, 717 | "source": [ 718 | "## 5. How does the data look like mathematically?" 719 | ] 720 | }, 721 | { 722 | "cell_type": "code", 723 | "execution_count": 38, 724 | "id": "fa4bc8f7", 725 | "metadata": {}, 726 | "outputs": [ 727 | { 728 | "data": { 729 | "text/html": [ 730 | "
\n", 731 | "\n", 744 | "\n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | "
idhost_idlatitudelongitudepriceminimum_nightsnumber_of_reviewsreviews_per_monthcalculated_host_listings_countavailability_365
count4.890600e+044.890600e+0448906.00000048906.00000048906.00000048906.00000048906.00000038854.00000048906.00000048906.000000
mean1.901287e+076.760480e+0740.728952-73.952175152.7113247.03161223.3004541.3731517.142702112.782031
std1.098557e+077.860866e+070.0545290.046154240.12871320.51248944.6071751.68027032.948926131.620370
min2.539000e+032.438000e+0340.499790-74.2444200.0000001.0000000.0000000.0100001.0000000.000000
25%9.464662e+067.809567e+0640.690100-73.98308069.0000001.0000001.0000000.1900001.0000000.000000
50%1.967545e+073.078463e+0740.723080-73.955685106.0000003.0000005.0000000.7200001.00000045.000000
75%2.915085e+071.074344e+0840.763120-73.936283175.0000005.00000024.0000002.0200002.000000227.000000
max3.648724e+072.743213e+0840.913060-73.71299010000.0000001250.000000629.00000058.500000327.000000365.000000
\n", 867 | "
" 868 | ], 869 | "text/plain": [ 870 | " id host_id latitude longitude price \\\n", 871 | "count 4.890600e+04 4.890600e+04 48906.000000 48906.000000 48906.000000 \n", 872 | "mean 1.901287e+07 6.760480e+07 40.728952 -73.952175 152.711324 \n", 873 | "std 1.098557e+07 7.860866e+07 0.054529 0.046154 240.128713 \n", 874 | "min 2.539000e+03 2.438000e+03 40.499790 -74.244420 0.000000 \n", 875 | "25% 9.464662e+06 7.809567e+06 40.690100 -73.983080 69.000000 \n", 876 | "50% 1.967545e+07 3.078463e+07 40.723080 -73.955685 106.000000 \n", 877 | "75% 2.915085e+07 1.074344e+08 40.763120 -73.936283 175.000000 \n", 878 | "max 3.648724e+07 2.743213e+08 40.913060 -73.712990 10000.000000 \n", 879 | "\n", 880 | " minimum_nights number_of_reviews reviews_per_month \\\n", 881 | "count 48906.000000 48906.000000 38854.000000 \n", 882 | "mean 7.031612 23.300454 1.373151 \n", 883 | "std 20.512489 44.607175 1.680270 \n", 884 | "min 1.000000 0.000000 0.010000 \n", 885 | "25% 1.000000 1.000000 0.190000 \n", 886 | "50% 3.000000 5.000000 0.720000 \n", 887 | "75% 5.000000 24.000000 2.020000 \n", 888 | "max 1250.000000 629.000000 58.500000 \n", 889 | "\n", 890 | " calculated_host_listings_count availability_365 \n", 891 | "count 48906.000000 48906.000000 \n", 892 | "mean 7.142702 112.782031 \n", 893 | "std 32.948926 131.620370 \n", 894 | "min 1.000000 0.000000 \n", 895 | "25% 1.000000 0.000000 \n", 896 | "50% 1.000000 45.000000 \n", 897 | "75% 2.000000 227.000000 \n", 898 | "max 327.000000 365.000000 " 899 | ] 900 | }, 901 | "execution_count": 38, 902 | "metadata": {}, 903 | "output_type": "execute_result" 904 | } 905 | ], 906 | "source": [ 907 | "df.describe()" 908 | ] 909 | }, 910 | { 911 | "cell_type": "markdown", 912 | "id": "290855e4", 913 | "metadata": {}, 914 | "source": [ 915 | "## 6. Are there any duplicate values?" 916 | ] 917 | }, 918 | { 919 | "cell_type": "code", 920 | "execution_count": 39, 921 | "id": "0298e5a3", 922 | "metadata": {}, 923 | "outputs": [ 924 | { 925 | "data": { 926 | "text/plain": [ 927 | "11" 928 | ] 929 | }, 930 | "execution_count": 39, 931 | "metadata": {}, 932 | "output_type": "execute_result" 933 | } 934 | ], 935 | "source": [ 936 | "df.duplicated().sum()" 937 | ] 938 | }, 939 | { 940 | "cell_type": "markdown", 941 | "id": "59b998f0", 942 | "metadata": {}, 943 | "source": [ 944 | "## 7. How many unique values are there in each column?" 945 | ] 946 | }, 947 | { 948 | "cell_type": "code", 949 | "execution_count": 40, 950 | "id": "6541d846", 951 | "metadata": {}, 952 | "outputs": [ 953 | { 954 | "data": { 955 | "text/plain": [ 956 | "id 48895\n", 957 | "name 47896\n", 958 | "host_id 37457\n", 959 | "host_name 11452\n", 960 | "neighbourhood_group 5\n", 961 | "neighbourhood 221\n", 962 | "latitude 19048\n", 963 | "longitude 14718\n", 964 | "room_type 3\n", 965 | "price 674\n", 966 | "minimum_nights 109\n", 967 | "number_of_reviews 394\n", 968 | "last_review 1764\n", 969 | "reviews_per_month 937\n", 970 | "calculated_host_listings_count 47\n", 971 | "availability_365 366\n", 972 | "dtype: int64" 973 | ] 974 | }, 975 | "execution_count": 40, 976 | "metadata": {}, 977 | "output_type": "execute_result" 978 | } 979 | ], 980 | "source": [ 981 | "df.nunique()" 982 | ] 983 | }, 984 | { 985 | "cell_type": "code", 986 | "execution_count": 41, 987 | "id": "ce2dc51b", 988 | "metadata": {}, 989 | "outputs": [ 990 | { 991 | "data": { 992 | "text/plain": [ 993 | "array(['Brooklyn', 'Manhattan', 'Queens', 'Staten Island', 'Bronx'],\n", 994 | " dtype=object)" 995 | ] 996 | }, 997 | "execution_count": 41, 998 | "metadata": {}, 999 | "output_type": "execute_result" 1000 | } 1001 | ], 1002 | "source": [ 1003 | "df[\"neighbourhood_group\"].unique()" 1004 | ] 1005 | }, 1006 | { 1007 | "cell_type": "markdown", 1008 | "id": "61865104", 1009 | "metadata": {}, 1010 | "source": [ 1011 | "## 8. Are there any outliers in the numerical columns?" 1012 | ] 1013 | }, 1014 | { 1015 | "cell_type": "code", 1016 | "execution_count": 42, 1017 | "id": "03a7266d", 1018 | "metadata": {}, 1019 | "outputs": [ 1020 | { 1021 | "data": { 1022 | "text/plain": [ 1023 | "" 1024 | ] 1025 | }, 1026 | "execution_count": 42, 1027 | "metadata": {}, 1028 | "output_type": "execute_result" 1029 | }, 1030 | { 1031 | "data": { 1032 | "image/png": "\n", 1033 | "text/plain": [ 1034 | "
" 1035 | ] 1036 | }, 1037 | "metadata": {}, 1038 | "output_type": "display_data" 1039 | } 1040 | ], 1041 | "source": [ 1042 | "sns.boxplot(df[\"price\"])" 1043 | ] 1044 | }, 1045 | { 1046 | "cell_type": "code", 1047 | "execution_count": 43, 1048 | "id": "35d534e9", 1049 | "metadata": {}, 1050 | "outputs": [ 1051 | { 1052 | "data": { 1053 | "text/plain": [ 1054 | "" 1055 | ] 1056 | }, 1057 | "execution_count": 43, 1058 | "metadata": {}, 1059 | "output_type": "execute_result" 1060 | }, 1061 | { 1062 | "data": { 1063 | "image/png": "\n", 1064 | "text/plain": [ 1065 | "
" 1066 | ] 1067 | }, 1068 | "metadata": {}, 1069 | "output_type": "display_data" 1070 | } 1071 | ], 1072 | "source": [ 1073 | "sns.boxplot(df[\"availability_365\"])" 1074 | ] 1075 | } 1076 | ], 1077 | "metadata": { 1078 | "kernelspec": { 1079 | "display_name": "Python 3 (ipykernel)", 1080 | "language": "python", 1081 | "name": "python3" 1082 | }, 1083 | "language_info": { 1084 | "codemirror_mode": { 1085 | "name": "ipython", 1086 | "version": 3 1087 | }, 1088 | "file_extension": ".py", 1089 | "mimetype": "text/x-python", 1090 | "name": "python", 1091 | "nbconvert_exporter": "python", 1092 | "pygments_lexer": "ipython3", 1093 | "version": "3.11.1" 1094 | } 1095 | }, 1096 | "nbformat": 4, 1097 | "nbformat_minor": 5 1098 | } 1099 | -------------------------------------------------------------------------------- /3 Missing values.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "bb76a3b8", 6 | "metadata": {}, 7 | "source": [ 8 | "In the field of data-related research, it is very important to handle missing data either by deleting or imputation(handling the missing values with some estimation)." 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "2dbb8dd1", 14 | "metadata": {}, 15 | "source": [ 16 | "Different Methods of Dealing With Missing Data\n", 17 | "1. Deleting the column with missing data\n", 18 | "2. Deleting the row with missing data\n", 19 | "3. Filling the Missing Values – Imputation\n", 20 | " \n", 21 | " (i) Numerical data - use mean\n", 22 | " \n", 23 | " (ii) categorical data \n", 24 | " - use mode\n", 25 | " - assign the NaN values their own category\n", 26 | " \n", 27 | " \n", 28 | "4. Advanced Imputation" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "id": "e9dcf94b", 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "import pandas as pd" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 3, 44 | "id": "d2efcf45", 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "data = pd.read_csv(\"AB_NYC_2019.csv\")" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 178, 54 | "id": "63308fe4", 55 | "metadata": {}, 56 | "outputs": [ 57 | { 58 | "data": { 59 | "text/html": [ 60 | "
\n", 61 | "\n", 74 | "\n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | "
idnamehost_idhost_nameneighbourhood_groupneighbourhoodlatitudelongituderoom_typepriceminimum_nightsnumber_of_reviewslast_reviewreviews_per_monthcalculated_host_listings_countavailability_365
02539Clean & quiet apt home by the park2787JohnBrooklynKensington40.64749-73.97237Private room149192018-10-190.216365
12595Skylit Midtown Castle2845JenniferManhattanMidtown40.75362-73.98377Entire home/apt2251452019-05-210.382355
23647THE VILLAGE OF HARLEM....NEW YORK !4632ElisabethManhattanHarlem40.80902-73.94190Private room15030NaNNaN1365
33831Cozy Entire Floor of Brownstone4869LisaRoxanneBrooklynClinton Hill40.68514-73.95976Entire home/apt8912702019-07-054.641194
45022Entire Apt: Spacious Studio/Loft by central park7192LauraManhattanEast Harlem40.79851-73.94399Entire home/apt801092018-11-190.1010
\n", 194 | "
" 195 | ], 196 | "text/plain": [ 197 | " id name host_id \\\n", 198 | "0 2539 Clean & quiet apt home by the park 2787 \n", 199 | "1 2595 Skylit Midtown Castle 2845 \n", 200 | "2 3647 THE VILLAGE OF HARLEM....NEW YORK ! 4632 \n", 201 | "3 3831 Cozy Entire Floor of Brownstone 4869 \n", 202 | "4 5022 Entire Apt: Spacious Studio/Loft by central park 7192 \n", 203 | "\n", 204 | " host_name neighbourhood_group neighbourhood latitude longitude \\\n", 205 | "0 John Brooklyn Kensington 40.64749 -73.97237 \n", 206 | "1 Jennifer Manhattan Midtown 40.75362 -73.98377 \n", 207 | "2 Elisabeth Manhattan Harlem 40.80902 -73.94190 \n", 208 | "3 LisaRoxanne Brooklyn Clinton Hill 40.68514 -73.95976 \n", 209 | "4 Laura Manhattan East Harlem 40.79851 -73.94399 \n", 210 | "\n", 211 | " room_type price minimum_nights number_of_reviews last_review \\\n", 212 | "0 Private room 149 1 9 2018-10-19 \n", 213 | "1 Entire home/apt 225 1 45 2019-05-21 \n", 214 | "2 Private room 150 3 0 NaN \n", 215 | "3 Entire home/apt 89 1 270 2019-07-05 \n", 216 | "4 Entire home/apt 80 10 9 2018-11-19 \n", 217 | "\n", 218 | " reviews_per_month calculated_host_listings_count availability_365 \n", 219 | "0 0.21 6 365 \n", 220 | "1 0.38 2 355 \n", 221 | "2 NaN 1 365 \n", 222 | "3 4.64 1 194 \n", 223 | "4 0.10 1 0 " 224 | ] 225 | }, 226 | "execution_count": 178, 227 | "metadata": {}, 228 | "output_type": "execute_result" 229 | } 230 | ], 231 | "source": [ 232 | "data.head()" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": 5, 238 | "id": "312e5057", 239 | "metadata": {}, 240 | "outputs": [ 241 | { 242 | "data": { 243 | "text/plain": [ 244 | "(48915, 16)" 245 | ] 246 | }, 247 | "execution_count": 5, 248 | "metadata": {}, 249 | "output_type": "execute_result" 250 | } 251 | ], 252 | "source": [ 253 | "data.shape" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": 4, 259 | "id": "899d0abc", 260 | "metadata": { 261 | "scrolled": true 262 | }, 263 | "outputs": [ 264 | { 265 | "data": { 266 | "text/plain": [ 267 | "id 0\n", 268 | "name 16\n", 269 | "host_id 0\n", 270 | "host_name 21\n", 271 | "neighbourhood_group 0\n", 272 | "neighbourhood 0\n", 273 | "latitude 0\n", 274 | "longitude 0\n", 275 | "room_type 0\n", 276 | "price 0\n", 277 | "minimum_nights 0\n", 278 | "number_of_reviews 0\n", 279 | "last_review 10052\n", 280 | "reviews_per_month 10052\n", 281 | "calculated_host_listings_count 0\n", 282 | "availability_365 0\n", 283 | "dtype: int64" 284 | ] 285 | }, 286 | "execution_count": 4, 287 | "metadata": {}, 288 | "output_type": "execute_result" 289 | } 290 | ], 291 | "source": [ 292 | "data.isna().sum()" 293 | ] 294 | }, 295 | { 296 | "cell_type": "markdown", 297 | "id": "835ab45b", 298 | "metadata": {}, 299 | "source": [ 300 | "### Removing column" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": 6, 306 | "id": "6ff99b23", 307 | "metadata": {}, 308 | "outputs": [], 309 | "source": [ 310 | "df1 = data.drop(\"last_review\",axis =1)" 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": 7, 316 | "id": "c19a92e7", 317 | "metadata": {}, 318 | "outputs": [ 319 | { 320 | "data": { 321 | "text/plain": [ 322 | "id 0\n", 323 | "name 16\n", 324 | "host_id 0\n", 325 | "host_name 21\n", 326 | "neighbourhood_group 0\n", 327 | "neighbourhood 0\n", 328 | "latitude 0\n", 329 | "longitude 0\n", 330 | "room_type 0\n", 331 | "price 0\n", 332 | "minimum_nights 0\n", 333 | "number_of_reviews 0\n", 334 | "reviews_per_month 10052\n", 335 | "calculated_host_listings_count 0\n", 336 | "availability_365 0\n", 337 | "dtype: int64" 338 | ] 339 | }, 340 | "execution_count": 7, 341 | "metadata": {}, 342 | "output_type": "execute_result" 343 | } 344 | ], 345 | "source": [ 346 | "df1.isna().sum()" 347 | ] 348 | }, 349 | { 350 | "cell_type": "code", 351 | "execution_count": 184, 352 | "id": "7678f06e", 353 | "metadata": {}, 354 | "outputs": [ 355 | { 356 | "data": { 357 | "text/plain": [ 358 | "(48895, 15)" 359 | ] 360 | }, 361 | "execution_count": 184, 362 | "metadata": {}, 363 | "output_type": "execute_result" 364 | } 365 | ], 366 | "source": [ 367 | "df1.shape" 368 | ] 369 | }, 370 | { 371 | "cell_type": "markdown", 372 | "id": "e872761c", 373 | "metadata": {}, 374 | "source": [ 375 | "### Removing rows" 376 | ] 377 | }, 378 | { 379 | "cell_type": "code", 380 | "execution_count": 185, 381 | "id": "c73417f6", 382 | "metadata": {}, 383 | "outputs": [], 384 | "source": [ 385 | "df2 = data.dropna()" 386 | ] 387 | }, 388 | { 389 | "cell_type": "code", 390 | "execution_count": 186, 391 | "id": "6fae1f74", 392 | "metadata": {}, 393 | "outputs": [ 394 | { 395 | "data": { 396 | "text/plain": [ 397 | "(38821, 16)" 398 | ] 399 | }, 400 | "execution_count": 186, 401 | "metadata": {}, 402 | "output_type": "execute_result" 403 | } 404 | ], 405 | "source": [ 406 | "df2.shape" 407 | ] 408 | }, 409 | { 410 | "cell_type": "code", 411 | "execution_count": 187, 412 | "id": "1c655dc5", 413 | "metadata": {}, 414 | "outputs": [ 415 | { 416 | "data": { 417 | "text/plain": [ 418 | "id 0\n", 419 | "name 0\n", 420 | "host_id 0\n", 421 | "host_name 0\n", 422 | "neighbourhood_group 0\n", 423 | "neighbourhood 0\n", 424 | "latitude 0\n", 425 | "longitude 0\n", 426 | "room_type 0\n", 427 | "price 0\n", 428 | "minimum_nights 0\n", 429 | "number_of_reviews 0\n", 430 | "last_review 0\n", 431 | "reviews_per_month 0\n", 432 | "calculated_host_listings_count 0\n", 433 | "availability_365 0\n", 434 | "dtype: int64" 435 | ] 436 | }, 437 | "execution_count": 187, 438 | "metadata": {}, 439 | "output_type": "execute_result" 440 | } 441 | ], 442 | "source": [ 443 | "df2.isna().sum()" 444 | ] 445 | }, 446 | { 447 | "cell_type": "markdown", 448 | "id": "25afd242", 449 | "metadata": {}, 450 | "source": [ 451 | "### Filling the missing values - Imputation" 452 | ] 453 | }, 454 | { 455 | "cell_type": "code", 456 | "execution_count": 188, 457 | "id": "427c815c", 458 | "metadata": {}, 459 | "outputs": [], 460 | "source": [ 461 | "df3 = pd.read_csv(\"AB_NYC_2019.csv\")" 462 | ] 463 | }, 464 | { 465 | "cell_type": "code", 466 | "execution_count": 189, 467 | "id": "9a34dc54", 468 | "metadata": {}, 469 | "outputs": [ 470 | { 471 | "data": { 472 | "text/plain": [ 473 | "(48895, 16)" 474 | ] 475 | }, 476 | "execution_count": 189, 477 | "metadata": {}, 478 | "output_type": "execute_result" 479 | } 480 | ], 481 | "source": [ 482 | "df3.shape" 483 | ] 484 | }, 485 | { 486 | "cell_type": "code", 487 | "execution_count": 190, 488 | "id": "a32e8937", 489 | "metadata": {}, 490 | "outputs": [ 491 | { 492 | "data": { 493 | "text/html": [ 494 | "
\n", 495 | "\n", 508 | "\n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | "
idnamehost_idhost_nameneighbourhood_groupneighbourhoodlatitudelongituderoom_typepriceminimum_nightsnumber_of_reviewslast_reviewreviews_per_monthcalculated_host_listings_countavailability_365
02539Clean & quiet apt home by the park2787JohnBrooklynKensington40.64749-73.97237Private room149192018-10-190.216365
12595Skylit Midtown Castle2845JenniferManhattanMidtown40.75362-73.98377Entire home/apt2251452019-05-210.382355
23647THE VILLAGE OF HARLEM....NEW YORK !4632ElisabethManhattanHarlem40.80902-73.94190Private room15030NaNNaN1365
33831Cozy Entire Floor of Brownstone4869LisaRoxanneBrooklynClinton Hill40.68514-73.95976Entire home/apt8912702019-07-054.641194
45022Entire Apt: Spacious Studio/Loft by central park7192LauraManhattanEast Harlem40.79851-73.94399Entire home/apt801092018-11-190.1010
\n", 628 | "
" 629 | ], 630 | "text/plain": [ 631 | " id name host_id \\\n", 632 | "0 2539 Clean & quiet apt home by the park 2787 \n", 633 | "1 2595 Skylit Midtown Castle 2845 \n", 634 | "2 3647 THE VILLAGE OF HARLEM....NEW YORK ! 4632 \n", 635 | "3 3831 Cozy Entire Floor of Brownstone 4869 \n", 636 | "4 5022 Entire Apt: Spacious Studio/Loft by central park 7192 \n", 637 | "\n", 638 | " host_name neighbourhood_group neighbourhood latitude longitude \\\n", 639 | "0 John Brooklyn Kensington 40.64749 -73.97237 \n", 640 | "1 Jennifer Manhattan Midtown 40.75362 -73.98377 \n", 641 | "2 Elisabeth Manhattan Harlem 40.80902 -73.94190 \n", 642 | "3 LisaRoxanne Brooklyn Clinton Hill 40.68514 -73.95976 \n", 643 | "4 Laura Manhattan East Harlem 40.79851 -73.94399 \n", 644 | "\n", 645 | " room_type price minimum_nights number_of_reviews last_review \\\n", 646 | "0 Private room 149 1 9 2018-10-19 \n", 647 | "1 Entire home/apt 225 1 45 2019-05-21 \n", 648 | "2 Private room 150 3 0 NaN \n", 649 | "3 Entire home/apt 89 1 270 2019-07-05 \n", 650 | "4 Entire home/apt 80 10 9 2018-11-19 \n", 651 | "\n", 652 | " reviews_per_month calculated_host_listings_count availability_365 \n", 653 | "0 0.21 6 365 \n", 654 | "1 0.38 2 355 \n", 655 | "2 NaN 1 365 \n", 656 | "3 4.64 1 194 \n", 657 | "4 0.10 1 0 " 658 | ] 659 | }, 660 | "execution_count": 190, 661 | "metadata": {}, 662 | "output_type": "execute_result" 663 | } 664 | ], 665 | "source": [ 666 | "df3.head()" 667 | ] 668 | }, 669 | { 670 | "cell_type": "code", 671 | "execution_count": 191, 672 | "id": "413c8f0f", 673 | "metadata": {}, 674 | "outputs": [ 675 | { 676 | "data": { 677 | "text/plain": [ 678 | "id 0\n", 679 | "name 16\n", 680 | "host_id 0\n", 681 | "host_name 21\n", 682 | "neighbourhood_group 0\n", 683 | "neighbourhood 0\n", 684 | "latitude 0\n", 685 | "longitude 0\n", 686 | "room_type 0\n", 687 | "price 0\n", 688 | "minimum_nights 0\n", 689 | "number_of_reviews 0\n", 690 | "last_review 10052\n", 691 | "reviews_per_month 10052\n", 692 | "calculated_host_listings_count 0\n", 693 | "availability_365 0\n", 694 | "dtype: int64" 695 | ] 696 | }, 697 | "execution_count": 191, 698 | "metadata": {}, 699 | "output_type": "execute_result" 700 | } 701 | ], 702 | "source": [ 703 | "df3.isna().sum()" 704 | ] 705 | }, 706 | { 707 | "cell_type": "markdown", 708 | "id": "c509b61c", 709 | "metadata": {}, 710 | "source": [ 711 | "#### Numeric data (Use Mean)" 712 | ] 713 | }, 714 | { 715 | "cell_type": "code", 716 | "execution_count": 192, 717 | "id": "3c56ea41", 718 | "metadata": {}, 719 | "outputs": [], 720 | "source": [ 721 | "mean_value = df3['reviews_per_month'].mean()\n", 722 | "df3['reviews_per_month'].fillna(mean_value, inplace=True)" 723 | ] 724 | }, 725 | { 726 | "cell_type": "code", 727 | "execution_count": 193, 728 | "id": "a391aa62", 729 | "metadata": {}, 730 | "outputs": [ 731 | { 732 | "data": { 733 | "text/html": [ 734 | "
\n", 735 | "\n", 748 | "\n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | "
idnamehost_idhost_nameneighbourhood_groupneighbourhoodlatitudelongituderoom_typepriceminimum_nightsnumber_of_reviewslast_reviewreviews_per_monthcalculated_host_listings_countavailability_365
02539Clean & quiet apt home by the park2787JohnBrooklynKensington40.64749-73.97237Private room149192018-10-190.2100006365
12595Skylit Midtown Castle2845JenniferManhattanMidtown40.75362-73.98377Entire home/apt2251452019-05-210.3800002355
23647THE VILLAGE OF HARLEM....NEW YORK !4632ElisabethManhattanHarlem40.80902-73.94190Private room15030NaN1.3732211365
33831Cozy Entire Floor of Brownstone4869LisaRoxanneBrooklynClinton Hill40.68514-73.95976Entire home/apt8912702019-07-054.6400001194
45022Entire Apt: Spacious Studio/Loft by central park7192LauraManhattanEast Harlem40.79851-73.94399Entire home/apt801092018-11-190.10000010
\n", 868 | "
" 869 | ], 870 | "text/plain": [ 871 | " id name host_id \\\n", 872 | "0 2539 Clean & quiet apt home by the park 2787 \n", 873 | "1 2595 Skylit Midtown Castle 2845 \n", 874 | "2 3647 THE VILLAGE OF HARLEM....NEW YORK ! 4632 \n", 875 | "3 3831 Cozy Entire Floor of Brownstone 4869 \n", 876 | "4 5022 Entire Apt: Spacious Studio/Loft by central park 7192 \n", 877 | "\n", 878 | " host_name neighbourhood_group neighbourhood latitude longitude \\\n", 879 | "0 John Brooklyn Kensington 40.64749 -73.97237 \n", 880 | "1 Jennifer Manhattan Midtown 40.75362 -73.98377 \n", 881 | "2 Elisabeth Manhattan Harlem 40.80902 -73.94190 \n", 882 | "3 LisaRoxanne Brooklyn Clinton Hill 40.68514 -73.95976 \n", 883 | "4 Laura Manhattan East Harlem 40.79851 -73.94399 \n", 884 | "\n", 885 | " room_type price minimum_nights number_of_reviews last_review \\\n", 886 | "0 Private room 149 1 9 2018-10-19 \n", 887 | "1 Entire home/apt 225 1 45 2019-05-21 \n", 888 | "2 Private room 150 3 0 NaN \n", 889 | "3 Entire home/apt 89 1 270 2019-07-05 \n", 890 | "4 Entire home/apt 80 10 9 2018-11-19 \n", 891 | "\n", 892 | " reviews_per_month calculated_host_listings_count availability_365 \n", 893 | "0 0.210000 6 365 \n", 894 | "1 0.380000 2 355 \n", 895 | "2 1.373221 1 365 \n", 896 | "3 4.640000 1 194 \n", 897 | "4 0.100000 1 0 " 898 | ] 899 | }, 900 | "execution_count": 193, 901 | "metadata": {}, 902 | "output_type": "execute_result" 903 | } 904 | ], 905 | "source": [ 906 | "df3.head()" 907 | ] 908 | }, 909 | { 910 | "cell_type": "code", 911 | "execution_count": 194, 912 | "id": "4d16c6c0", 913 | "metadata": { 914 | "scrolled": false 915 | }, 916 | "outputs": [ 917 | { 918 | "data": { 919 | "text/html": [ 920 | "
\n", 921 | "\n", 934 | "\n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | "
idnamehost_idhost_nameneighbourhood_groupneighbourhoodlatitudelongituderoom_typepriceminimum_nightsnumber_of_reviewslast_reviewreviews_per_monthcalculated_host_listings_countavailability_365
23647THE VILLAGE OF HARLEM....NEW YORK !4632ElisabethManhattanHarlem40.80902-73.94190Private room15030NaN1.3732211365
197750Huge 2 BR Upper East Cental Park17985SingManhattanEast Harlem40.79685-73.94872Entire home/apt19070NaN1.3732212249
\n", 997 | "
" 998 | ], 999 | "text/plain": [ 1000 | " id name host_id host_name \\\n", 1001 | "2 3647 THE VILLAGE OF HARLEM....NEW YORK ! 4632 Elisabeth \n", 1002 | "19 7750 Huge 2 BR Upper East Cental Park 17985 Sing \n", 1003 | "\n", 1004 | " neighbourhood_group neighbourhood latitude longitude room_type \\\n", 1005 | "2 Manhattan Harlem 40.80902 -73.94190 Private room \n", 1006 | "19 Manhattan East Harlem 40.79685 -73.94872 Entire home/apt \n", 1007 | "\n", 1008 | " price minimum_nights number_of_reviews last_review reviews_per_month \\\n", 1009 | "2 150 3 0 NaN 1.373221 \n", 1010 | "19 190 7 0 NaN 1.373221 \n", 1011 | "\n", 1012 | " calculated_host_listings_count availability_365 \n", 1013 | "2 1 365 \n", 1014 | "19 2 249 " 1015 | ] 1016 | }, 1017 | "execution_count": 194, 1018 | "metadata": {}, 1019 | "output_type": "execute_result" 1020 | } 1021 | ], 1022 | "source": [ 1023 | "df3[df3[\"number_of_reviews\"]==0].head(2)" 1024 | ] 1025 | }, 1026 | { 1027 | "cell_type": "code", 1028 | "execution_count": 195, 1029 | "id": "033155b6", 1030 | "metadata": {}, 1031 | "outputs": [ 1032 | { 1033 | "data": { 1034 | "text/plain": [ 1035 | "id 0\n", 1036 | "name 16\n", 1037 | "host_id 0\n", 1038 | "host_name 21\n", 1039 | "neighbourhood_group 0\n", 1040 | "neighbourhood 0\n", 1041 | "latitude 0\n", 1042 | "longitude 0\n", 1043 | "room_type 0\n", 1044 | "price 0\n", 1045 | "minimum_nights 0\n", 1046 | "number_of_reviews 0\n", 1047 | "last_review 10052\n", 1048 | "reviews_per_month 0\n", 1049 | "calculated_host_listings_count 0\n", 1050 | "availability_365 0\n", 1051 | "dtype: int64" 1052 | ] 1053 | }, 1054 | "execution_count": 195, 1055 | "metadata": {}, 1056 | "output_type": "execute_result" 1057 | } 1058 | ], 1059 | "source": [ 1060 | "df3.isna().sum()" 1061 | ] 1062 | }, 1063 | { 1064 | "cell_type": "markdown", 1065 | "id": "a0819870", 1066 | "metadata": {}, 1067 | "source": [ 1068 | "#### Categorical Data (Use mode value)" 1069 | ] 1070 | }, 1071 | { 1072 | "cell_type": "code", 1073 | "execution_count": 196, 1074 | "id": "7b666009", 1075 | "metadata": {}, 1076 | "outputs": [ 1077 | { 1078 | "data": { 1079 | "text/plain": [ 1080 | "0 2018-10-19\n", 1081 | "1 2019-05-21\n", 1082 | "2 2019-06-23\n", 1083 | "3 2019-07-05\n", 1084 | "4 2018-11-19\n", 1085 | " ... \n", 1086 | "48890 2019-06-23\n", 1087 | "48891 2019-06-23\n", 1088 | "48892 2019-06-23\n", 1089 | "48893 2019-06-23\n", 1090 | "48894 2019-06-23\n", 1091 | "Name: last_review, Length: 48895, dtype: object" 1092 | ] 1093 | }, 1094 | "execution_count": 196, 1095 | "metadata": {}, 1096 | "output_type": "execute_result" 1097 | } 1098 | ], 1099 | "source": [ 1100 | "df3[\"last_review\"].fillna(df3[\"last_review\"].value_counts().index[0])" 1101 | ] 1102 | }, 1103 | { 1104 | "cell_type": "markdown", 1105 | "id": "ddb737e0", 1106 | "metadata": {}, 1107 | "source": [ 1108 | "#### Categorical data (Use new category)" 1109 | ] 1110 | }, 1111 | { 1112 | "cell_type": "code", 1113 | "execution_count": 197, 1114 | "id": "5c4904a4", 1115 | "metadata": {}, 1116 | "outputs": [], 1117 | "source": [ 1118 | "df3[\"last_review\"].fillna(\"Not Reviewed\", inplace = True)" 1119 | ] 1120 | }, 1121 | { 1122 | "cell_type": "code", 1123 | "execution_count": 198, 1124 | "id": "15dfab92", 1125 | "metadata": {}, 1126 | "outputs": [ 1127 | { 1128 | "data": { 1129 | "text/html": [ 1130 | "
\n", 1131 | "\n", 1144 | "\n", 1145 | " \n", 1146 | " \n", 1147 | " \n", 1148 | " \n", 1149 | " \n", 1150 | " \n", 1151 | " \n", 1152 | " \n", 1153 | " \n", 1154 | " \n", 1155 | " \n", 1156 | " \n", 1157 | " \n", 1158 | " \n", 1159 | " \n", 1160 | " \n", 1161 | " \n", 1162 | " \n", 1163 | " \n", 1164 | " \n", 1165 | " \n", 1166 | " \n", 1167 | " \n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | " \n", 1181 | " \n", 1182 | " \n", 1183 | " \n", 1184 | " \n", 1185 | " \n", 1186 | " \n", 1187 | " \n", 1188 | " \n", 1189 | " \n", 1190 | " \n", 1191 | " \n", 1192 | " \n", 1193 | " \n", 1194 | " \n", 1195 | " \n", 1196 | " \n", 1197 | " \n", 1198 | " \n", 1199 | " \n", 1200 | " \n", 1201 | " \n", 1202 | " \n", 1203 | " \n", 1204 | " \n", 1205 | " \n", 1206 | " \n", 1207 | " \n", 1208 | " \n", 1209 | " \n", 1210 | " \n", 1211 | " \n", 1212 | " \n", 1213 | " \n", 1214 | " \n", 1215 | " \n", 1216 | " \n", 1217 | " \n", 1218 | " \n", 1219 | " \n", 1220 | " \n", 1221 | " \n", 1222 | " \n", 1223 | " \n", 1224 | " \n", 1225 | " \n", 1226 | " \n", 1227 | " \n", 1228 | " \n", 1229 | " \n", 1230 | " \n", 1231 | " \n", 1232 | " \n", 1233 | " \n", 1234 | " \n", 1235 | " \n", 1236 | " \n", 1237 | " \n", 1238 | " \n", 1239 | " \n", 1240 | " \n", 1241 | " \n", 1242 | " \n", 1243 | " \n", 1244 | " \n", 1245 | " \n", 1246 | " \n", 1247 | " \n", 1248 | " \n", 1249 | " \n", 1250 | " \n", 1251 | " \n", 1252 | " \n", 1253 | " \n", 1254 | " \n", 1255 | " \n", 1256 | " \n", 1257 | " \n", 1258 | " \n", 1259 | " \n", 1260 | " \n", 1261 | " \n", 1262 | " \n", 1263 | "
idnamehost_idhost_nameneighbourhood_groupneighbourhoodlatitudelongituderoom_typepriceminimum_nightsnumber_of_reviewslast_reviewreviews_per_monthcalculated_host_listings_countavailability_365
02539Clean & quiet apt home by the park2787JohnBrooklynKensington40.64749-73.97237Private room149192018-10-190.2100006365
12595Skylit Midtown Castle2845JenniferManhattanMidtown40.75362-73.98377Entire home/apt2251452019-05-210.3800002355
23647THE VILLAGE OF HARLEM....NEW YORK !4632ElisabethManhattanHarlem40.80902-73.94190Private room15030Not Reviewed1.3732211365
33831Cozy Entire Floor of Brownstone4869LisaRoxanneBrooklynClinton Hill40.68514-73.95976Entire home/apt8912702019-07-054.6400001194
45022Entire Apt: Spacious Studio/Loft by central park7192LauraManhattanEast Harlem40.79851-73.94399Entire home/apt801092018-11-190.10000010
\n", 1264 | "
" 1265 | ], 1266 | "text/plain": [ 1267 | " id name host_id \\\n", 1268 | "0 2539 Clean & quiet apt home by the park 2787 \n", 1269 | "1 2595 Skylit Midtown Castle 2845 \n", 1270 | "2 3647 THE VILLAGE OF HARLEM....NEW YORK ! 4632 \n", 1271 | "3 3831 Cozy Entire Floor of Brownstone 4869 \n", 1272 | "4 5022 Entire Apt: Spacious Studio/Loft by central park 7192 \n", 1273 | "\n", 1274 | " host_name neighbourhood_group neighbourhood latitude longitude \\\n", 1275 | "0 John Brooklyn Kensington 40.64749 -73.97237 \n", 1276 | "1 Jennifer Manhattan Midtown 40.75362 -73.98377 \n", 1277 | "2 Elisabeth Manhattan Harlem 40.80902 -73.94190 \n", 1278 | "3 LisaRoxanne Brooklyn Clinton Hill 40.68514 -73.95976 \n", 1279 | "4 Laura Manhattan East Harlem 40.79851 -73.94399 \n", 1280 | "\n", 1281 | " room_type price minimum_nights number_of_reviews last_review \\\n", 1282 | "0 Private room 149 1 9 2018-10-19 \n", 1283 | "1 Entire home/apt 225 1 45 2019-05-21 \n", 1284 | "2 Private room 150 3 0 Not Reviewed \n", 1285 | "3 Entire home/apt 89 1 270 2019-07-05 \n", 1286 | "4 Entire home/apt 80 10 9 2018-11-19 \n", 1287 | "\n", 1288 | " reviews_per_month calculated_host_listings_count availability_365 \n", 1289 | "0 0.210000 6 365 \n", 1290 | "1 0.380000 2 355 \n", 1291 | "2 1.373221 1 365 \n", 1292 | "3 4.640000 1 194 \n", 1293 | "4 0.100000 1 0 " 1294 | ] 1295 | }, 1296 | "execution_count": 198, 1297 | "metadata": {}, 1298 | "output_type": "execute_result" 1299 | } 1300 | ], 1301 | "source": [ 1302 | "df3.head()" 1303 | ] 1304 | }, 1305 | { 1306 | "cell_type": "code", 1307 | "execution_count": 199, 1308 | "id": "ec36e3a0", 1309 | "metadata": {}, 1310 | "outputs": [ 1311 | { 1312 | "data": { 1313 | "text/plain": [ 1314 | "id 0\n", 1315 | "name 16\n", 1316 | "host_id 0\n", 1317 | "host_name 21\n", 1318 | "neighbourhood_group 0\n", 1319 | "neighbourhood 0\n", 1320 | "latitude 0\n", 1321 | "longitude 0\n", 1322 | "room_type 0\n", 1323 | "price 0\n", 1324 | "minimum_nights 0\n", 1325 | "number_of_reviews 0\n", 1326 | "last_review 0\n", 1327 | "reviews_per_month 0\n", 1328 | "calculated_host_listings_count 0\n", 1329 | "availability_365 0\n", 1330 | "dtype: int64" 1331 | ] 1332 | }, 1333 | "execution_count": 199, 1334 | "metadata": {}, 1335 | "output_type": "execute_result" 1336 | } 1337 | ], 1338 | "source": [ 1339 | "df3.isna().sum()" 1340 | ] 1341 | }, 1342 | { 1343 | "cell_type": "code", 1344 | "execution_count": 200, 1345 | "id": "e73131e5", 1346 | "metadata": {}, 1347 | "outputs": [], 1348 | "source": [ 1349 | "df3[\"host_name\"].fillna(\"Unknown\", inplace = True)" 1350 | ] 1351 | }, 1352 | { 1353 | "cell_type": "code", 1354 | "execution_count": 201, 1355 | "id": "4b4b6401", 1356 | "metadata": {}, 1357 | "outputs": [ 1358 | { 1359 | "data": { 1360 | "text/plain": [ 1361 | "id 0\n", 1362 | "name 16\n", 1363 | "host_id 0\n", 1364 | "host_name 0\n", 1365 | "neighbourhood_group 0\n", 1366 | "neighbourhood 0\n", 1367 | "latitude 0\n", 1368 | "longitude 0\n", 1369 | "room_type 0\n", 1370 | "price 0\n", 1371 | "minimum_nights 0\n", 1372 | "number_of_reviews 0\n", 1373 | "last_review 0\n", 1374 | "reviews_per_month 0\n", 1375 | "calculated_host_listings_count 0\n", 1376 | "availability_365 0\n", 1377 | "dtype: int64" 1378 | ] 1379 | }, 1380 | "execution_count": 201, 1381 | "metadata": {}, 1382 | "output_type": "execute_result" 1383 | } 1384 | ], 1385 | "source": [ 1386 | "df3.isna().sum()" 1387 | ] 1388 | }, 1389 | { 1390 | "cell_type": "code", 1391 | "execution_count": 202, 1392 | "id": "2578e4a2", 1393 | "metadata": {}, 1394 | "outputs": [], 1395 | "source": [ 1396 | "df3[\"name\"].fillna(method = \"pad\", inplace = True)" 1397 | ] 1398 | }, 1399 | { 1400 | "cell_type": "code", 1401 | "execution_count": 203, 1402 | "id": "65efa95f", 1403 | "metadata": {}, 1404 | "outputs": [ 1405 | { 1406 | "data": { 1407 | "text/plain": [ 1408 | "id 0\n", 1409 | "name 0\n", 1410 | "host_id 0\n", 1411 | "host_name 0\n", 1412 | "neighbourhood_group 0\n", 1413 | "neighbourhood 0\n", 1414 | "latitude 0\n", 1415 | "longitude 0\n", 1416 | "room_type 0\n", 1417 | "price 0\n", 1418 | "minimum_nights 0\n", 1419 | "number_of_reviews 0\n", 1420 | "last_review 0\n", 1421 | "reviews_per_month 0\n", 1422 | "calculated_host_listings_count 0\n", 1423 | "availability_365 0\n", 1424 | "dtype: int64" 1425 | ] 1426 | }, 1427 | "execution_count": 203, 1428 | "metadata": {}, 1429 | "output_type": "execute_result" 1430 | } 1431 | ], 1432 | "source": [ 1433 | "df3.isna().sum()" 1434 | ] 1435 | }, 1436 | { 1437 | "cell_type": "code", 1438 | "execution_count": 204, 1439 | "id": "d8622bff", 1440 | "metadata": { 1441 | "scrolled": true 1442 | }, 1443 | "outputs": [ 1444 | { 1445 | "data": { 1446 | "text/html": [ 1447 | "
\n", 1448 | "\n", 1461 | "\n", 1462 | " \n", 1463 | " \n", 1464 | " \n", 1465 | " \n", 1466 | " \n", 1467 | " \n", 1468 | " \n", 1469 | " \n", 1470 | " \n", 1471 | " \n", 1472 | " \n", 1473 | " \n", 1474 | " \n", 1475 | " \n", 1476 | " \n", 1477 | " \n", 1478 | " \n", 1479 | " \n", 1480 | " \n", 1481 | " \n", 1482 | " \n", 1483 | " \n", 1484 | " \n", 1485 | " \n", 1486 | " \n", 1487 | " \n", 1488 | " \n", 1489 | " \n", 1490 | " \n", 1491 | " \n", 1492 | " \n", 1493 | " \n", 1494 | " \n", 1495 | " \n", 1496 | " \n", 1497 | " \n", 1498 | " \n", 1499 | " \n", 1500 | " \n", 1501 | " \n", 1502 | " \n", 1503 | " \n", 1504 | " \n", 1505 | " \n", 1506 | " \n", 1507 | " \n", 1508 | " \n", 1509 | " \n", 1510 | " \n", 1511 | " \n", 1512 | " \n", 1513 | " \n", 1514 | " \n", 1515 | " \n", 1516 | " \n", 1517 | " \n", 1518 | " \n", 1519 | " \n", 1520 | " \n", 1521 | " \n", 1522 | " \n", 1523 | " \n", 1524 | " \n", 1525 | " \n", 1526 | " \n", 1527 | " \n", 1528 | " \n", 1529 | " \n", 1530 | " \n", 1531 | " \n", 1532 | " \n", 1533 | " \n", 1534 | " \n", 1535 | " \n", 1536 | " \n", 1537 | " \n", 1538 | " \n", 1539 | " \n", 1540 | " \n", 1541 | " \n", 1542 | " \n", 1543 | " \n", 1544 | " \n", 1545 | " \n", 1546 | " \n", 1547 | " \n", 1548 | " \n", 1549 | " \n", 1550 | " \n", 1551 | " \n", 1552 | " \n", 1553 | " \n", 1554 | " \n", 1555 | " \n", 1556 | " \n", 1557 | " \n", 1558 | " \n", 1559 | " \n", 1560 | " \n", 1561 | " \n", 1562 | " \n", 1563 | " \n", 1564 | " \n", 1565 | " \n", 1566 | " \n", 1567 | " \n", 1568 | " \n", 1569 | " \n", 1570 | " \n", 1571 | " \n", 1572 | " \n", 1573 | " \n", 1574 | " \n", 1575 | " \n", 1576 | " \n", 1577 | " \n", 1578 | " \n", 1579 | " \n", 1580 | "
idnamehost_idhost_nameneighbourhood_groupneighbourhoodlatitudelongituderoom_typepriceminimum_nightsnumber_of_reviewslast_reviewreviews_per_monthcalculated_host_listings_countavailability_365
02539Clean & quiet apt home by the park2787JohnBrooklynKensington40.64749-73.97237Private room149192018-10-190.2100006365
12595Skylit Midtown Castle2845JenniferManhattanMidtown40.75362-73.98377Entire home/apt2251452019-05-210.3800002355
23647THE VILLAGE OF HARLEM....NEW YORK !4632ElisabethManhattanHarlem40.80902-73.94190Private room15030Not Reviewed1.3732211365
33831Cozy Entire Floor of Brownstone4869LisaRoxanneBrooklynClinton Hill40.68514-73.95976Entire home/apt8912702019-07-054.6400001194
45022Entire Apt: Spacious Studio/Loft by central park7192LauraManhattanEast Harlem40.79851-73.94399Entire home/apt801092018-11-190.10000010
\n", 1581 | "
" 1582 | ], 1583 | "text/plain": [ 1584 | " id name host_id \\\n", 1585 | "0 2539 Clean & quiet apt home by the park 2787 \n", 1586 | "1 2595 Skylit Midtown Castle 2845 \n", 1587 | "2 3647 THE VILLAGE OF HARLEM....NEW YORK ! 4632 \n", 1588 | "3 3831 Cozy Entire Floor of Brownstone 4869 \n", 1589 | "4 5022 Entire Apt: Spacious Studio/Loft by central park 7192 \n", 1590 | "\n", 1591 | " host_name neighbourhood_group neighbourhood latitude longitude \\\n", 1592 | "0 John Brooklyn Kensington 40.64749 -73.97237 \n", 1593 | "1 Jennifer Manhattan Midtown 40.75362 -73.98377 \n", 1594 | "2 Elisabeth Manhattan Harlem 40.80902 -73.94190 \n", 1595 | "3 LisaRoxanne Brooklyn Clinton Hill 40.68514 -73.95976 \n", 1596 | "4 Laura Manhattan East Harlem 40.79851 -73.94399 \n", 1597 | "\n", 1598 | " room_type price minimum_nights number_of_reviews last_review \\\n", 1599 | "0 Private room 149 1 9 2018-10-19 \n", 1600 | "1 Entire home/apt 225 1 45 2019-05-21 \n", 1601 | "2 Private room 150 3 0 Not Reviewed \n", 1602 | "3 Entire home/apt 89 1 270 2019-07-05 \n", 1603 | "4 Entire home/apt 80 10 9 2018-11-19 \n", 1604 | "\n", 1605 | " reviews_per_month calculated_host_listings_count availability_365 \n", 1606 | "0 0.210000 6 365 \n", 1607 | "1 0.380000 2 355 \n", 1608 | "2 1.373221 1 365 \n", 1609 | "3 4.640000 1 194 \n", 1610 | "4 0.100000 1 0 " 1611 | ] 1612 | }, 1613 | "execution_count": 204, 1614 | "metadata": {}, 1615 | "output_type": "execute_result" 1616 | } 1617 | ], 1618 | "source": [ 1619 | "df3.head()" 1620 | ] 1621 | }, 1622 | { 1623 | "cell_type": "markdown", 1624 | "id": "de6d9cb6", 1625 | "metadata": {}, 1626 | "source": [ 1627 | "### Advanced Imputation" 1628 | ] 1629 | }, 1630 | { 1631 | "cell_type": "code", 1632 | "execution_count": 205, 1633 | "id": "c4e67106", 1634 | "metadata": {}, 1635 | "outputs": [], 1636 | "source": [ 1637 | "df = data" 1638 | ] 1639 | }, 1640 | { 1641 | "cell_type": "code", 1642 | "execution_count": 207, 1643 | "id": "43a21a00", 1644 | "metadata": {}, 1645 | "outputs": [ 1646 | { 1647 | "data": { 1648 | "text/plain": [ 1649 | "id 0\n", 1650 | "name 16\n", 1651 | "host_id 0\n", 1652 | "host_name 21\n", 1653 | "neighbourhood_group 0\n", 1654 | "neighbourhood 0\n", 1655 | "latitude 0\n", 1656 | "longitude 0\n", 1657 | "room_type 0\n", 1658 | "price 0\n", 1659 | "minimum_nights 0\n", 1660 | "number_of_reviews 0\n", 1661 | "last_review 10052\n", 1662 | "reviews_per_month 10052\n", 1663 | "calculated_host_listings_count 0\n", 1664 | "availability_365 0\n", 1665 | "dtype: int64" 1666 | ] 1667 | }, 1668 | "execution_count": 207, 1669 | "metadata": {}, 1670 | "output_type": "execute_result" 1671 | } 1672 | ], 1673 | "source": [ 1674 | "df.isna().sum()" 1675 | ] 1676 | }, 1677 | { 1678 | "cell_type": "code", 1679 | "execution_count": 209, 1680 | "id": "a4df50d0", 1681 | "metadata": {}, 1682 | "outputs": [], 1683 | "source": [ 1684 | "df[\"reviews_per_month\"].interpolate(inplace = True)" 1685 | ] 1686 | }, 1687 | { 1688 | "cell_type": "code", 1689 | "execution_count": 211, 1690 | "id": "55a6d283", 1691 | "metadata": {}, 1692 | "outputs": [ 1693 | { 1694 | "data": { 1695 | "text/html": [ 1696 | "
\n", 1697 | "\n", 1710 | "\n", 1711 | " \n", 1712 | " \n", 1713 | " \n", 1714 | " \n", 1715 | " \n", 1716 | " \n", 1717 | " \n", 1718 | " \n", 1719 | " \n", 1720 | " \n", 1721 | " \n", 1722 | " \n", 1723 | " \n", 1724 | " \n", 1725 | " \n", 1726 | " \n", 1727 | " \n", 1728 | " \n", 1729 | " \n", 1730 | " \n", 1731 | " \n", 1732 | " \n", 1733 | " \n", 1734 | " \n", 1735 | " \n", 1736 | " \n", 1737 | " \n", 1738 | " \n", 1739 | " \n", 1740 | " \n", 1741 | " \n", 1742 | " \n", 1743 | " \n", 1744 | " \n", 1745 | " \n", 1746 | " \n", 1747 | " \n", 1748 | " \n", 1749 | " \n", 1750 | " \n", 1751 | " \n", 1752 | " \n", 1753 | " \n", 1754 | " \n", 1755 | " \n", 1756 | " \n", 1757 | " \n", 1758 | " \n", 1759 | " \n", 1760 | " \n", 1761 | " \n", 1762 | " \n", 1763 | " \n", 1764 | " \n", 1765 | " \n", 1766 | " \n", 1767 | " \n", 1768 | " \n", 1769 | " \n", 1770 | " \n", 1771 | " \n", 1772 | " \n", 1773 | " \n", 1774 | " \n", 1775 | " \n", 1776 | " \n", 1777 | " \n", 1778 | " \n", 1779 | " \n", 1780 | " \n", 1781 | " \n", 1782 | " \n", 1783 | " \n", 1784 | " \n", 1785 | " \n", 1786 | " \n", 1787 | " \n", 1788 | " \n", 1789 | " \n", 1790 | " \n", 1791 | " \n", 1792 | " \n", 1793 | " \n", 1794 | " \n", 1795 | " \n", 1796 | " \n", 1797 | " \n", 1798 | " \n", 1799 | " \n", 1800 | " \n", 1801 | " \n", 1802 | " \n", 1803 | " \n", 1804 | " \n", 1805 | " \n", 1806 | " \n", 1807 | " \n", 1808 | " \n", 1809 | " \n", 1810 | " \n", 1811 | " \n", 1812 | " \n", 1813 | " \n", 1814 | " \n", 1815 | " \n", 1816 | " \n", 1817 | " \n", 1818 | " \n", 1819 | " \n", 1820 | " \n", 1821 | " \n", 1822 | " \n", 1823 | " \n", 1824 | " \n", 1825 | " \n", 1826 | " \n", 1827 | " \n", 1828 | " \n", 1829 | "
idnamehost_idhost_nameneighbourhood_groupneighbourhoodlatitudelongituderoom_typepriceminimum_nightsnumber_of_reviewslast_reviewreviews_per_monthcalculated_host_listings_countavailability_365
02539Clean & quiet apt home by the park2787JohnBrooklynKensington40.64749-73.97237Private room149192018-10-190.216365
12595Skylit Midtown Castle2845JenniferManhattanMidtown40.75362-73.98377Entire home/apt2251452019-05-210.382355
23647THE VILLAGE OF HARLEM....NEW YORK !4632ElisabethManhattanHarlem40.80902-73.94190Private room15030NaN2.511365
33831Cozy Entire Floor of Brownstone4869LisaRoxanneBrooklynClinton Hill40.68514-73.95976Entire home/apt8912702019-07-054.641194
45022Entire Apt: Spacious Studio/Loft by central park7192LauraManhattanEast Harlem40.79851-73.94399Entire home/apt801092018-11-190.1010
\n", 1830 | "
" 1831 | ], 1832 | "text/plain": [ 1833 | " id name host_id \\\n", 1834 | "0 2539 Clean & quiet apt home by the park 2787 \n", 1835 | "1 2595 Skylit Midtown Castle 2845 \n", 1836 | "2 3647 THE VILLAGE OF HARLEM....NEW YORK ! 4632 \n", 1837 | "3 3831 Cozy Entire Floor of Brownstone 4869 \n", 1838 | "4 5022 Entire Apt: Spacious Studio/Loft by central park 7192 \n", 1839 | "\n", 1840 | " host_name neighbourhood_group neighbourhood latitude longitude \\\n", 1841 | "0 John Brooklyn Kensington 40.64749 -73.97237 \n", 1842 | "1 Jennifer Manhattan Midtown 40.75362 -73.98377 \n", 1843 | "2 Elisabeth Manhattan Harlem 40.80902 -73.94190 \n", 1844 | "3 LisaRoxanne Brooklyn Clinton Hill 40.68514 -73.95976 \n", 1845 | "4 Laura Manhattan East Harlem 40.79851 -73.94399 \n", 1846 | "\n", 1847 | " room_type price minimum_nights number_of_reviews last_review \\\n", 1848 | "0 Private room 149 1 9 2018-10-19 \n", 1849 | "1 Entire home/apt 225 1 45 2019-05-21 \n", 1850 | "2 Private room 150 3 0 NaN \n", 1851 | "3 Entire home/apt 89 1 270 2019-07-05 \n", 1852 | "4 Entire home/apt 80 10 9 2018-11-19 \n", 1853 | "\n", 1854 | " reviews_per_month calculated_host_listings_count availability_365 \n", 1855 | "0 0.21 6 365 \n", 1856 | "1 0.38 2 355 \n", 1857 | "2 2.51 1 365 \n", 1858 | "3 4.64 1 194 \n", 1859 | "4 0.10 1 0 " 1860 | ] 1861 | }, 1862 | "execution_count": 211, 1863 | "metadata": {}, 1864 | "output_type": "execute_result" 1865 | } 1866 | ], 1867 | "source": [ 1868 | "df.head()" 1869 | ] 1870 | }, 1871 | { 1872 | "cell_type": "markdown", 1873 | "id": "332397ae", 1874 | "metadata": {}, 1875 | "source": [ 1876 | "### Handling Duplicate data" 1877 | ] 1878 | }, 1879 | { 1880 | "cell_type": "code", 1881 | "execution_count": 174, 1882 | "id": "d6f88ece", 1883 | "metadata": {}, 1884 | "outputs": [ 1885 | { 1886 | "data": { 1887 | "text/plain": [ 1888 | "0" 1889 | ] 1890 | }, 1891 | "execution_count": 174, 1892 | "metadata": {}, 1893 | "output_type": "execute_result" 1894 | } 1895 | ], 1896 | "source": [ 1897 | "data.duplicated().sum()" 1898 | ] 1899 | }, 1900 | { 1901 | "cell_type": "code", 1902 | "execution_count": 212, 1903 | "id": "32248270", 1904 | "metadata": {}, 1905 | "outputs": [], 1906 | "source": [ 1907 | "data.drop_duplicates(inplace = True)" 1908 | ] 1909 | } 1910 | ], 1911 | "metadata": { 1912 | "kernelspec": { 1913 | "display_name": "Python 3 (ipykernel)", 1914 | "language": "python", 1915 | "name": "python3" 1916 | }, 1917 | "language_info": { 1918 | "codemirror_mode": { 1919 | "name": "ipython", 1920 | "version": 3 1921 | }, 1922 | "file_extension": ".py", 1923 | "mimetype": "text/x-python", 1924 | "name": "python", 1925 | "nbconvert_exporter": "python", 1926 | "pygments_lexer": "ipython3", 1927 | "version": "3.11.1" 1928 | } 1929 | }, 1930 | "nbformat": 4, 1931 | "nbformat_minor": 5 1932 | } 1933 | -------------------------------------------------------------------------------- /9 Correction of datatype.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "6332b62b", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "id": "06f8ba29", 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "df = pd.read_csv(\"AB_NYC_2019.csv\")" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 4, 26 | "id": "5d1321df", 27 | "metadata": {}, 28 | "outputs": [ 29 | { 30 | "data": { 31 | "text/html": [ 32 | "
\n", 33 | "\n", 46 | "\n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | "
idnamehost_idhost_nameneighbourhood_groupneighbourhoodlatitudelongituderoom_typepriceminimum_nightsnumber_of_reviewslast_reviewreviews_per_monthcalculated_host_listings_countavailability_365
02539Clean & quiet apt home by the park2787JohnBrooklynKensington40.64749-73.97237Private room1491919-10-20180.216365
12595Skylit Midtown Castle2845JenniferManhattanMidtown40.75362-73.98377Entire home/apt22514521-05-20190.382355
\n", 109 | "
" 110 | ], 111 | "text/plain": [ 112 | " id name host_id host_name \\\n", 113 | "0 2539 Clean & quiet apt home by the park 2787 John \n", 114 | "1 2595 Skylit Midtown Castle 2845 Jennifer \n", 115 | "\n", 116 | " neighbourhood_group neighbourhood latitude longitude room_type \\\n", 117 | "0 Brooklyn Kensington 40.64749 -73.97237 Private room \n", 118 | "1 Manhattan Midtown 40.75362 -73.98377 Entire home/apt \n", 119 | "\n", 120 | " price minimum_nights number_of_reviews last_review reviews_per_month \\\n", 121 | "0 149 1 9 19-10-2018 0.21 \n", 122 | "1 225 1 45 21-05-2019 0.38 \n", 123 | "\n", 124 | " calculated_host_listings_count availability_365 \n", 125 | "0 6 365 \n", 126 | "1 2 355 " 127 | ] 128 | }, 129 | "execution_count": 4, 130 | "metadata": {}, 131 | "output_type": "execute_result" 132 | } 133 | ], 134 | "source": [ 135 | "df.head(2)" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 3, 141 | "id": "2fc99302", 142 | "metadata": {}, 143 | "outputs": [ 144 | { 145 | "name": "stdout", 146 | "output_type": "stream", 147 | "text": [ 148 | "\n", 149 | "RangeIndex: 48906 entries, 0 to 48905\n", 150 | "Data columns (total 16 columns):\n", 151 | " # Column Non-Null Count Dtype \n", 152 | "--- ------ -------------- ----- \n", 153 | " 0 id 48906 non-null int64 \n", 154 | " 1 name 48890 non-null object \n", 155 | " 2 host_id 48906 non-null int64 \n", 156 | " 3 host_name 48885 non-null object \n", 157 | " 4 neighbourhood_group 48906 non-null object \n", 158 | " 5 neighbourhood 48906 non-null object \n", 159 | " 6 latitude 48906 non-null float64\n", 160 | " 7 longitude 48906 non-null float64\n", 161 | " 8 room_type 48906 non-null object \n", 162 | " 9 price 48906 non-null int64 \n", 163 | " 10 minimum_nights 48906 non-null int64 \n", 164 | " 11 number_of_reviews 48906 non-null int64 \n", 165 | " 12 last_review 38854 non-null object \n", 166 | " 13 reviews_per_month 38854 non-null float64\n", 167 | " 14 calculated_host_listings_count 48906 non-null int64 \n", 168 | " 15 availability_365 48906 non-null int64 \n", 169 | "dtypes: float64(3), int64(7), object(6)\n", 170 | "memory usage: 6.0+ MB\n" 171 | ] 172 | } 173 | ], 174 | "source": [ 175 | "df.info()" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 9, 181 | "id": "bff805e2", 182 | "metadata": {}, 183 | "outputs": [], 184 | "source": [ 185 | "df[\"id\"] = df[\"id\"].astype(str)" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 18, 191 | "id": "e30541cc", 192 | "metadata": {}, 193 | "outputs": [ 194 | { 195 | "data": { 196 | "text/plain": [ 197 | "dtype('O')" 198 | ] 199 | }, 200 | "execution_count": 18, 201 | "metadata": {}, 202 | "output_type": "execute_result" 203 | } 204 | ], 205 | "source": [ 206 | "df[\"id\"].dtype" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 19, 212 | "id": "0ff5acec", 213 | "metadata": {}, 214 | "outputs": [], 215 | "source": [ 216 | "df[\"host_id\"] = df[\"host_id\"].astype(str)" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": 20, 222 | "id": "8dd6d327", 223 | "metadata": {}, 224 | "outputs": [ 225 | { 226 | "name": "stderr", 227 | "output_type": "stream", 228 | "text": [ 229 | "C:\\Users\\GFG19189\\AppData\\Local\\Temp\\ipykernel_3288\\3465608367.py:1: UserWarning: Parsing dates in DD/MM/YYYY format when dayfirst=False (the default) was specified. This may lead to inconsistently parsed dates! Specify a format to ensure consistent parsing.\n", 230 | " df[\"last_review\"] = pd.to_datetime(df[\"last_review\"])\n" 231 | ] 232 | } 233 | ], 234 | "source": [ 235 | "df[\"last_review\"] = pd.to_datetime(df[\"last_review\"])" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 22, 241 | "id": "8bfd344f", 242 | "metadata": {}, 243 | "outputs": [ 244 | { 245 | "name": "stdout", 246 | "output_type": "stream", 247 | "text": [ 248 | "\n", 249 | "RangeIndex: 48906 entries, 0 to 48905\n", 250 | "Data columns (total 16 columns):\n", 251 | " # Column Non-Null Count Dtype \n", 252 | "--- ------ -------------- ----- \n", 253 | " 0 id 48906 non-null object \n", 254 | " 1 name 48890 non-null object \n", 255 | " 2 host_id 48906 non-null object \n", 256 | " 3 host_name 48885 non-null object \n", 257 | " 4 neighbourhood_group 48906 non-null object \n", 258 | " 5 neighbourhood 48906 non-null object \n", 259 | " 6 latitude 48906 non-null float64 \n", 260 | " 7 longitude 48906 non-null float64 \n", 261 | " 8 room_type 48906 non-null object \n", 262 | " 9 price 48906 non-null int64 \n", 263 | " 10 minimum_nights 48906 non-null int64 \n", 264 | " 11 number_of_reviews 48906 non-null int64 \n", 265 | " 12 last_review 38854 non-null datetime64[ns]\n", 266 | " 13 reviews_per_month 38854 non-null float64 \n", 267 | " 14 calculated_host_listings_count 48906 non-null int64 \n", 268 | " 15 availability_365 48906 non-null int64 \n", 269 | "dtypes: datetime64[ns](1), float64(3), int64(5), object(7)\n", 270 | "memory usage: 6.0+ MB\n" 271 | ] 272 | } 273 | ], 274 | "source": [ 275 | "df.info()" 276 | ] 277 | } 278 | ], 279 | "metadata": { 280 | "kernelspec": { 281 | "display_name": "Python 3 (ipykernel)", 282 | "language": "python", 283 | "name": "python3" 284 | }, 285 | "language_info": { 286 | "codemirror_mode": { 287 | "name": "ipython", 288 | "version": 3 289 | }, 290 | "file_extension": ".py", 291 | "mimetype": "text/x-python", 292 | "name": "python", 293 | "nbconvert_exporter": "python", 294 | "pygments_lexer": "ipython3", 295 | "version": "3.11.1" 296 | } 297 | }, 298 | "nbformat": 4, 299 | "nbformat_minor": 5 300 | } 301 | -------------------------------------------------------------------------------- /EDA 5 (Outliers).pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sakshisinghal936/eda_data_science_course/80bc747c0d9ad6cfb2535812458a184d29d0d61d/EDA 5 (Outliers).pptx -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Introduction to Exploratory Data Analysis (EDA) Course 2 | 3 | ## Overview 4 | 5 | Welcome to the "Introduction to Exploratory Data Analysis (EDA)" course! This course will help you to effectively analyze and gain insights from various datasets. 6 | 7 | ## Course Content 8 | 9 | This course consists of the following modules: 10 | 11 | 1. **Understanding your data:** Before starting analysis, we first understand the data. 12 | 13 | 2. **Dealing with Missing Values:** Learn how to identify and handle missing data in a dataset using appropriate techniques. 14 | 15 | 3. **Dealing with Duplicate Data:** Explore methods to detect and manage duplicate records in your data. 16 | 17 | 4. **Dealing with Outliers:** Discover how to identify and handle outliers that can impact your analysis. 18 | 19 | 5. **Correction of DataType:** According to the data and analysis, we will see how to change the datatype. 20 | 21 | 6. **Univariate Analysis:** Learn how to perform univariate analysis to examine the distribution and characteristics of individual variables. 22 | 23 | 11. **Univariate Analysis Visualization:** Discover visualization techniques for univariate analysis to gain deeper insights into data patterns. 24 | 25 | 12. **Bivariate Analysis:** Understand how to perform bivariate analysis to explore relationships between two variables. 26 | 27 | ## Code Files 28 | 29 | The code files for each module are provided in this repository. These files contain hands-on examples and implementations of the concepts covered in the course. You can use these code files to practice and reinforce your understanding of EDA techniques. 30 | 31 | ## Requirements 32 | 33 | Before you begin the course, ensure you have the following: 34 | 35 | - Basic knowledge of Python programming language (for running code examples). 36 | - Familiarity with data handling concepts and basic statistical methods. 37 | 38 | ## How to Use the Code Files 39 | 40 | 1. Clone or download this repository to your local machine. 41 | 2. Open the code files in your preferred Python IDE or Jupyter Notebook. 42 | 3. Experiment with the code and datasets to enhance your understanding. 43 | -------------------------------------------------------------------------------- /scholarship.csv: -------------------------------------------------------------------------------- 1 | semester_percentage,scholarship_exam_marks,got_scholarship 2 | 71.9,26,1 3 | 74.6,38,1 4 | 75.4,40,1 5 | 64.2,8,1 6 | 72.3,17,0 7 | 73,23,1 8 | 66.9,11,0 9 | 71.2,39,1 10 | 64.5,38,0 11 | 77.5,94,1 12 | 68.2,16,1 13 | 63.8,7,1 14 | 65.8,16,1 15 | 56.8,26,0 16 | 79.1,43,0 17 | 71,21,0 18 | 65.3,19,0 19 | 75.6,22,1 20 | 69.3,27,0 21 | 76.3,29,0 22 | 66.9,47,0 23 | 74.3,33,1 24 | 67.6,54,1 25 | 60.5,11,0 26 | 64.4,11,0 27 | 62.8,58,1 28 | 74.5,8,1 29 | 65.3,46,0 30 | 72.3,19,0 31 | 65.1,15,1 32 | 74.6,16,0 33 | 76.6,44,0 34 | 59.1,11,1 35 | 62.3,27,0 36 | 81.5,9,0 37 | 74.8,12,0 38 | 68.5,16,1 39 | 85.1,9,1 40 | 65.8,20,1 41 | 72.5,17,0 42 | 66,86,1 43 | 67,38,0 44 | 74.6,71,1 45 | 78.5,63,0 46 | 78.8,55,0 47 | 69.2,10,1 48 | 73,15,0 49 | 69.2,46,0 50 | 62.9,42,0 51 | 82.3,28,1 52 | 62,18,1 53 | 62.4,28,1 54 | 75.3,27,1 55 | 66.9,33,1 56 | 84.2,36,1 57 | 81.2,15,0 58 | 69.3,15,1 59 | 77.3,24,0 60 | 73.4,43,0 61 | 77,32,1 62 | 79.4,31,1 63 | 75.1,86,0 64 | 76.1,22,0 65 | 62.2,26,0 66 | 69.8,52,1 67 | 62.7,28,1 68 | 66.7,31,1 69 | 80.4,18,1 70 | 65.8,30,0 71 | 74.1,44,0 72 | 69.8,17,1 73 | 61.3,20,1 74 | 79,19,0 75 | 63.9,12,1 76 | 61.6,3,1 77 | 67.7,56,1 78 | 83.5,12,0 79 | 71,28,1 80 | 82,12,0 81 | 76.3,19,1 82 | 68.7,46,1 83 | 71.2,44,1 84 | 72.2,12,0 85 | 73.8,20,1 86 | 56.5,35,1 87 | 60.3,28,0 88 | 71.9,22,1 89 | 69.6,19,1 90 | 67.8,39,0 91 | 65.6,17,0 92 | 76.1,15,1 93 | 74.2,6,1 94 | 64.8,63,0 95 | 70,30,0 96 | 71.5,21,1 97 | 68.9,35,1 98 | 70.9,28,1 99 | 73.3,52,1 100 | 81.2,76,0 101 | 74.6,23,0 102 | 65.6,46,0 103 | 64.8,26,1 104 | 59.2,22,1 105 | 64.4,18,1 106 | 68.2,47,1 107 | 67.2,12,1 108 | 65.9,22,0 109 | 73.2,9,0 110 | 59.4,10,0 111 | 72.3,21,1 112 | 69.6,31,0 113 | 64.8,33,0 114 | 72.3,15,0 115 | 72.4,10,0 116 | 63,77,1 117 | 72.8,37,0 118 | 71.6,21,0 119 | 71.1,0,1 120 | 69,54,0 121 | 70,39,0 122 | 69.9,46,1 123 | 59.1,27,0 124 | 59.7,51,0 125 | 62.5,52,0 126 | 62,30,0 127 | 74.6,55,1 128 | 71.6,37,0 129 | 69.5,23,1 130 | 67.6,25,0 131 | 68.1,39,1 132 | 59.1,53,0 133 | 81.9,19,1 134 | 77.1,18,0 135 | 76.5,64,1 136 | 63.3,93,0 137 | 66.3,47,0 138 | 72.8,15,0 139 | 56.9,53,0 140 | 75.3,8,1 141 | 80.9,5,1 142 | 68.5,22,1 143 | 71.9,18,0 144 | 76.6,41,1 145 | 63.2,33,1 146 | 66.7,17,0 147 | 70.8,28,0 148 | 67.5,22,1 149 | 66.8,11,0 150 | 68.6,49,1 151 | 68.1,58,0 152 | 85.3,22,0 153 | 72.9,16,1 154 | 77.8,56,0 155 | 62.9,10,1 156 | 75.7,54,1 157 | 78.4,12,0 158 | 64,34,0 159 | 70.7,81,1 160 | 57.4,50,0 161 | 67,22,0 162 | 73.6,10,1 163 | 62.6,52,1 164 | 78,90,0 165 | 67.7,42,1 166 | 62.9,34,1 167 | 75.3,7,0 168 | 77.6,59,0 169 | 66.5,48,1 170 | 65.1,37,1 171 | 61.9,16,1 172 | 61.7,37,0 173 | 62,38,1 174 | 72.4,43,1 175 | 67.1,48,0 176 | 73.1,21,1 177 | 75.7,26,1 178 | 77.2,30,1 179 | 62.2,40,1 180 | 75.1,39,0 181 | 64,45,1 182 | 71.8,29,0 183 | 73,42,0 184 | 68.7,76,0 185 | 61.1,18,1 186 | 77.8,33,0 187 | 73,31,0 188 | 69.7,63,1 189 | 66.3,10,1 190 | 59.2,55,1 191 | 66.3,9,0 192 | 75.4,23,0 193 | 66.3,8,0 194 | 66.7,38,0 195 | 63.1,14,0 196 | 81.9,33,0 197 | 67.1,44,0 198 | 72.8,34,1 199 | 67.5,27,0 200 | 82.9,44,1 201 | 74.3,25,1 202 | 57.7,22,0 203 | 66.7,15,0 204 | 65.1,48,1 205 | 79.3,52,0 206 | 70.1,5,1 207 | 69.9,47,1 208 | 75,25,0 209 | 70.5,16,0 210 | 72.5,12,1 211 | 68.8,35,0 212 | 65.8,19,1 213 | 73.6,14,0 214 | 62.3,57,0 215 | 66.2,19,1 216 | 74.2,60,1 217 | 77.2,22,0 218 | 54.2,36,1 219 | 69.2,16,0 220 | 66.2,52,0 221 | 54.8,3,0 222 | 77.5,16,1 223 | 67.8,24,1 224 | 69.7,78,0 225 | 61.5,11,1 226 | 71.5,19,0 227 | 73.6,13,1 228 | 67.9,21,1 229 | 70.3,19,0 230 | 76.1,15,0 231 | 66.8,13,0 232 | 71.2,14,1 233 | 64,35,0 234 | 77.7,34,0 235 | 72.5,41,0 236 | 80.9,36,0 237 | 72,52,1 238 | 77,36,1 239 | 71.4,52,1 240 | 74.1,78,0 241 | 67.1,16,1 242 | 72.5,38,0 243 | 72.2,18,0 244 | 59.9,44,0 245 | 67.8,55,0 246 | 76.8,44,0 247 | 67.8,62,0 248 | 76.7,26,0 249 | 62.5,49,0 250 | 69,44,0 251 | 74.6,52,0 252 | 74,13,1 253 | 64.6,45,0 254 | 62.4,54,1 255 | 69.8,76,0 256 | 67,64,0 257 | 77.6,10,0 258 | 73.7,48,1 259 | 57.4,76,0 260 | 61.2,6,1 261 | 71.6,10,0 262 | 56.6,57,1 263 | 71.7,11,1 264 | 71.9,10,1 265 | 81.1,50,1 266 | 78.2,28,1 267 | 69.4,57,0 268 | 77.3,50,1 269 | 60,65,0 270 | 73.8,21,0 271 | 64.7,16,0 272 | 68,16,1 273 | 76.4,21,1 274 | 62.3,56,1 275 | 70,9,0 276 | 71.3,4,1 277 | 80.3,39,0 278 | 69.6,46,0 279 | 64.5,15,1 280 | 52.3,17,1 281 | 84.9,40,1 282 | 66.2,55,0 283 | 76.8,22,1 284 | 64.3,44,1 285 | 70.9,87,0 286 | 69.9,62,1 287 | 74.2,57,1 288 | 62.9,58,1 289 | 62.4,24,1 290 | 70.7,21,0 291 | 67.3,9,1 292 | 83.8,87,0 293 | 74.6,14,1 294 | 58.2,41,0 295 | 69.7,11,1 296 | 61.2,17,0 297 | 60.2,64,0 298 | 69.9,19,0 299 | 70.3,32,1 300 | 61.6,48,0 301 | 67.5,15,1 302 | 73.6,52,1 303 | 78.4,5,1 304 | 79.5,34,0 305 | 73.5,56,0 306 | 71.2,33,1 307 | 66.7,32,1 308 | 68.6,19,0 309 | 74.7,28,1 310 | 72,47,1 311 | 74.6,31,1 312 | 55.6,50,0 313 | 69.7,87,1 314 | 70.2,55,1 315 | 65.1,18,1 316 | 74.3,14,1 317 | 79.6,37,0 318 | 70.7,38,1 319 | 74.7,19,0 320 | 67.4,60,0 321 | 75.3,13,0 322 | 71.8,51,1 323 | 68.2,47,0 324 | 67.7,38,1 325 | 66,38,0 326 | 66.4,90,0 327 | 66.5,13,0 328 | 75.4,13,0 329 | 82.1,56,0 330 | 74,28,1 331 | 73,35,0 332 | 66.8,16,1 333 | 61.2,22,1 334 | 70.3,39,0 335 | 67.4,20,1 336 | 65.6,19,1 337 | 69,17,1 338 | 79.3,9,1 339 | 69.5,68,0 340 | 74.5,46,1 341 | 73.2,18,1 342 | 74.5,51,0 343 | 74.1,8,1 344 | 63.7,23,0 345 | 72,70,1 346 | 72,26,1 347 | 62.6,13,1 348 | 73.1,33,0 349 | 72.4,34,1 350 | 61.3,17,1 351 | 81.2,16,0 352 | 72.1,70,0 353 | 69.2,10,0 354 | 65.1,4,1 355 | 68.2,28,1 356 | 67.3,23,1 357 | 64.2,18,1 358 | 59.1,50,1 359 | 79.2,42,1 360 | 67.4,52,0 361 | 76.1,40,0 362 | 72.4,33,0 363 | 63.7,37,1 364 | 72.5,21,1 365 | 76.1,13,0 366 | 68.7,9,1 367 | 70.7,25,0 368 | 67.8,42,0 369 | 64.1,30,0 370 | 70.9,32,0 371 | 66.9,36,1 372 | 57,79,1 373 | 59,29,0 374 | 59.5,8,0 375 | 55.8,41,0 376 | 73.4,41,0 377 | 73.1,31,0 378 | 62.5,60,0 379 | 67,47,1 380 | 72.7,25,1 381 | 73.7,65,0 382 | 71.2,31,1 383 | 61.7,53,0 384 | 75.5,61,0 385 | 72.6,4,1 386 | 70.3,33,0 387 | 73.3,21,0 388 | 76.3,43,0 389 | 72.5,7,1 390 | 72.8,23,0 391 | 59,16,1 392 | 66.3,16,1 393 | 77.7,66,1 394 | 67.6,35,0 395 | 66,18,0 396 | 79.4,18,1 397 | 72.9,11,0 398 | 72.8,10,1 399 | 75.5,63,0 400 | 75.5,48,0 401 | 57.4,43,1 402 | 60,33,1 403 | 67.4,22,0 404 | 60.8,34,1 405 | 67.1,83,0 406 | 80.2,35,1 407 | 66,32,1 408 | 71.4,17,1 409 | 82.3,20,1 410 | 78,24,1 411 | 69.2,24,0 412 | 73.6,37,1 413 | 60.4,66,0 414 | 65,45,1 415 | 66.5,28,1 416 | 73,36,1 417 | 63.7,46,1 418 | 75.1,55,1 419 | 69,19,0 420 | 63,29,0 421 | 62.2,46,0 422 | 75.9,49,0 423 | 62.9,46,0 424 | 70.9,19,0 425 | 82.7,26,0 426 | 69.1,30,0 427 | 74.5,15,0 428 | 72.6,21,0 429 | 61.7,39,1 430 | 78.9,46,0 431 | 73.5,55,0 432 | 72.2,19,1 433 | 76.4,21,0 434 | 66.7,26,1 435 | 57.7,11,1 436 | 70.1,5,0 437 | 74.5,37,0 438 | 63.1,19,0 439 | 67.3,22,0 440 | 73.1,7,0 441 | 66.7,10,1 442 | 85.6,22,0 443 | 78,15,1 444 | 70.8,48,1 445 | 69.2,77,0 446 | 66.6,14,1 447 | 55.6,10,0 448 | 66,23,1 449 | 65,17,1 450 | 63.2,31,0 451 | 62.1,37,0 452 | 64.4,31,1 453 | 59.4,31,0 454 | 65.9,42,1 455 | 73.1,24,1 456 | 71.3,53,0 457 | 69.2,43,0 458 | 67.6,12,1 459 | 65.8,20,0 460 | 71.3,47,0 461 | 66.2,79,1 462 | 69.1,4,1 463 | 73.9,12,0 464 | 67.1,9,1 465 | 68.2,49,1 466 | 62.8,37,1 467 | 76.5,49,0 468 | 64.5,16,0 469 | 66.5,7,0 470 | 67.3,70,0 471 | 58.6,46,1 472 | 64.6,28,0 473 | 54.8,18,0 474 | 61.9,38,1 475 | 63.8,25,0 476 | 71.1,25,1 477 | 73.8,10,0 478 | 68.3,34,0 479 | 57.8,18,0 480 | 73.2,36,1 481 | 63.8,33,0 482 | 68,37,0 483 | 73.3,32,0 484 | 75.6,8,0 485 | 79.3,25,0 486 | 58.1,18,0 487 | 49.2,44,1 488 | 76.7,65,1 489 | 69.5,13,0 490 | 67.5,43,0 491 | 72.4,50,0 492 | 73.4,32,0 493 | 77.7,49,1 494 | 85,21,1 495 | 68,18,0 496 | 68.3,51,0 497 | 69.5,64,1 498 | 75.2,35,1 499 | 68,37,1 500 | 68.9,14,0 501 | 64.2,31,0 502 | 60.7,24,1 503 | 65.2,31,0 504 | 71.8,50,1 505 | 69.7,10,1 506 | 72.4,25,0 507 | 64.8,18,1 508 | 78.4,8,1 509 | 67.7,75,1 510 | 64.1,14,1 511 | 78.3,48,0 512 | 68.4,34,1 513 | 75.3,50,0 514 | 70.8,9,1 515 | 64.3,46,0 516 | 73.6,15,0 517 | 77.5,44,0 518 | 68.3,28,0 519 | 63.7,38,1 520 | 75.3,22,0 521 | 66,8,1 522 | 73.9,20,1 523 | 57.2,37,1 524 | 67.1,63,1 525 | 61.3,19,0 526 | 68.5,21,0 527 | 63.3,17,0 528 | 71.2,34,0 529 | 77.2,37,0 530 | 69,38,1 531 | 71.9,34,0 532 | 62.8,18,1 533 | 70,81,1 534 | 69,20,0 535 | 71.9,15,0 536 | 60.5,45,1 537 | 64.9,62,0 538 | 68.7,48,1 539 | 71.3,9,1 540 | 75.8,27,0 541 | 67.9,14,0 542 | 63.7,63,0 543 | 61.1,18,1 544 | 70.6,22,0 545 | 74.8,16,0 546 | 71.1,14,1 547 | 63.5,17,1 548 | 67.5,18,1 549 | 73.3,39,0 550 | 74.8,30,1 551 | 73.7,79,1 552 | 66.2,38,1 553 | 65.5,66,1 554 | 64,24,0 555 | 74.6,17,0 556 | 68.4,9,1 557 | 64.6,32,0 558 | 67,15,1 559 | 64.7,25,0 560 | 72.8,14,0 561 | 66.1,58,1 562 | 71.1,67,0 563 | 70.7,58,0 564 | 60.6,23,0 565 | 68.1,14,1 566 | 71.7,34,1 567 | 66.1,54,1 568 | 60.3,25,0 569 | 61.8,36,1 570 | 81.3,4,0 571 | 70.8,30,0 572 | 70.4,18,0 573 | 64.7,66,1 574 | 76.6,37,1 575 | 72.3,54,1 576 | 65.4,33,0 577 | 75.5,16,0 578 | 61.2,8,1 579 | 68.5,24,1 580 | 75.2,47,0 581 | 71.3,13,0 582 | 79,63,1 583 | 65.7,25,0 584 | 70.5,27,1 585 | 71,68,0 586 | 65.7,60,1 587 | 78.5,49,1 588 | 65.8,15,0 589 | 75.8,14,1 590 | 63.8,25,0 591 | 66,16,0 592 | 66.8,22,1 593 | 78.9,35,0 594 | 70.9,27,1 595 | 65.5,62,0 596 | 69.5,64,0 597 | 61.3,7,1 598 | 84.8,25,1 599 | 78.5,32,0 600 | 75.9,27,1 601 | 70.2,55,0 602 | 62.4,38,1 603 | 68.9,17,1 604 | 71.6,32,0 605 | 65.5,54,1 606 | 68.9,40,1 607 | 66.4,42,0 608 | 67.9,24,0 609 | 77.6,49,1 610 | 73.3,16,1 611 | 65.5,4,0 612 | 73.2,61,1 613 | 72,19,1 614 | 61.4,60,1 615 | 66.6,55,1 616 | 80.5,41,0 617 | 65.1,12,0 618 | 70.9,33,1 619 | 70.8,22,0 620 | 67,22,0 621 | 68.1,43,0 622 | 70.7,18,1 623 | 78.5,28,1 624 | 74,19,1 625 | 73.5,14,1 626 | 73.5,80,1 627 | 76,30,1 628 | 76.3,19,1 629 | 63.1,30,1 630 | 71.2,9,0 631 | 81.5,11,1 632 | 65.6,96,1 633 | 72.6,16,0 634 | 63.7,13,0 635 | 66.3,39,1 636 | 76.9,39,0 637 | 66.9,10,0 638 | 63.9,43,1 639 | 72.5,74,0 640 | 74.6,74,1 641 | 64.2,43,1 642 | 63.6,61,1 643 | 74.9,22,0 644 | 78.9,39,1 645 | 75.9,28,0 646 | 76.5,29,1 647 | 67.5,36,0 648 | 73.8,45,1 649 | 66.9,4,1 650 | 71.5,24,0 651 | 76,65,1 652 | 61.9,27,0 653 | 65.7,39,1 654 | 76,43,1 655 | 69.3,48,0 656 | 70.3,76,0 657 | 73.6,34,0 658 | 73.9,72,1 659 | 76,23,0 660 | 75.7,9,0 661 | 66.8,37,0 662 | 59.7,26,1 663 | 57.2,47,0 664 | 73.8,15,1 665 | 70.5,31,1 666 | 72.3,34,0 667 | 61.6,10,0 668 | 76.4,27,1 669 | 71.7,54,0 670 | 72.4,47,0 671 | 69.4,8,1 672 | 75.1,17,1 673 | 74.5,31,0 674 | 77.5,13,0 675 | 68.7,20,0 676 | 67.3,31,1 677 | 61,20,0 678 | 67.3,21,1 679 | 67.5,24,0 680 | 66.5,34,1 681 | 67,35,0 682 | 73.5,15,0 683 | 67.8,10,1 684 | 63.4,17,1 685 | 65,69,0 686 | 80.2,67,0 687 | 60.5,87,1 688 | 71.5,62,1 689 | 68.1,23,0 690 | 65.9,36,0 691 | 80.2,67,0 692 | 70.1,26,0 693 | 73.2,14,0 694 | 69.5,43,0 695 | 66.4,42,1 696 | 71.2,51,0 697 | 67.5,11,1 698 | 69.4,20,0 699 | 72.3,49,1 700 | 72.7,16,0 701 | 66.4,20,0 702 | 68.2,16,1 703 | 67.4,2,1 704 | 62.4,39,0 705 | 70.4,41,0 706 | 69.1,45,1 707 | 65.5,35,1 708 | 71.1,13,1 709 | 68.6,11,1 710 | 79.1,50,0 711 | 69.7,42,1 712 | 75.6,6,1 713 | 69.7,30,1 714 | 63.5,8,1 715 | 78,24,0 716 | 70.6,19,1 717 | 74.1,17,1 718 | 70.6,19,1 719 | 64.9,21,0 720 | 71.2,36,1 721 | 71.7,26,0 722 | 68.7,43,0 723 | 69.8,11,0 724 | 68.6,29,0 725 | 73.5,10,1 726 | 79.5,21,0 727 | 63.4,10,0 728 | 79.3,25,0 729 | 70,30,0 730 | 66.3,23,0 731 | 67.9,58,1 732 | 61.4,90,1 733 | 67.4,5,0 734 | 72.6,55,1 735 | 70.7,10,0 736 | 67.7,50,1 737 | 72.3,39,1 738 | 68.6,24,1 739 | 65.2,12,1 740 | 70.4,17,1 741 | 63.4,39,0 742 | 62.1,74,1 743 | 75.8,66,0 744 | 67.9,29,0 745 | 76.2,15,1 746 | 74.9,58,0 747 | 68.2,13,1 748 | 67.2,25,1 749 | 62.6,18,0 750 | 70.2,9,0 751 | 73.1,61,1 752 | 64.3,8,1 753 | 66.9,20,1 754 | 73.7,35,0 755 | 68.7,42,0 756 | 79.1,17,0 757 | 69.4,26,0 758 | 74.1,30,1 759 | 68.4,15,0 760 | 65.5,29,0 761 | 65.5,22,1 762 | 71.3,48,1 763 | 80.2,6,0 764 | 67.9,14,1 765 | 71.6,59,1 766 | 64.7,70,1 767 | 65.5,28,0 768 | 71.7,34,0 769 | 73.3,31,0 770 | 65.5,63,1 771 | 66.8,18,1 772 | 73.3,67,1 773 | 73.1,86,1 774 | 66.3,26,0 775 | 80.6,79,0 776 | 69.3,69,0 777 | 67.6,24,0 778 | 70.8,30,0 779 | 76.7,23,0 780 | 69.8,34,0 781 | 63.9,32,0 782 | 56.2,31,0 783 | 71.7,49,1 784 | 76.5,41,0 785 | 62.3,24,1 786 | 61.9,19,0 787 | 60.7,29,1 788 | 75,10,1 789 | 70.2,9,0 790 | 65.4,19,0 791 | 64.6,74,0 792 | 74.8,47,0 793 | 76.5,12,1 794 | 79.2,32,0 795 | 72.3,9,0 796 | 62.4,17,1 797 | 70,9,1 798 | 60.2,60,0 799 | 81.4,20,1 800 | 58.6,56,0 801 | 68.9,53,1 802 | 65,28,0 803 | 63.1,14,0 804 | 61,16,0 805 | 71.9,20,1 806 | 64.9,81,0 807 | 62.3,13,0 808 | 63.9,15,0 809 | 71.6,19,1 810 | 76.8,31,0 811 | 63.9,22,1 812 | 71.4,21,1 813 | 80.4,14,0 814 | 59.9,23,0 815 | 74.3,52,1 816 | 77.8,17,1 817 | 52.7,43,0 818 | 71.9,23,1 819 | 78.4,26,0 820 | 55.2,62,0 821 | 69.3,10,0 822 | 62.7,16,0 823 | 61,9,0 824 | 73.3,32,1 825 | 66.7,18,0 826 | 74.8,27,1 827 | 63,23,0 828 | 70.5,32,0 829 | 64.6,21,0 830 | 64.4,63,1 831 | 68.7,37,0 832 | 64.6,28,0 833 | 68.1,55,1 834 | 69.1,59,0 835 | 72.9,23,1 836 | 62.2,52,1 837 | 66.7,65,1 838 | 70.9,33,1 839 | 66.5,24,0 840 | 68.4,32,0 841 | 64.4,24,1 842 | 68.9,22,1 843 | 74.5,27,1 844 | 73.3,14,1 845 | 63.1,60,0 846 | 76.2,18,1 847 | 59.8,36,0 848 | 69.9,97,0 849 | 72,54,1 850 | 72,27,1 851 | 78.9,8,1 852 | 83.9,37,1 853 | 62.9,17,0 854 | 65.4,43,1 855 | 77.4,22,0 856 | 58.3,68,1 857 | 78.9,9,1 858 | 75.2,6,1 859 | 57.2,64,1 860 | 75.1,26,0 861 | 65.2,22,0 862 | 75.1,21,0 863 | 63.2,28,0 864 | 60.4,37,1 865 | 80.4,2,1 866 | 63.2,5,0 867 | 67.7,16,0 868 | 72.9,35,0 869 | 80.4,26,0 870 | 73.3,36,0 871 | 55.1,58,1 872 | 66.7,15,1 873 | 65.6,60,0 874 | 69.5,54,0 875 | 79,42,1 876 | 66,24,0 877 | 57.3,16,1 878 | 63.3,20,0 879 | 63.2,9,0 880 | 69.8,34,0 881 | 68.3,31,0 882 | 64,13,0 883 | 77.2,42,0 884 | 72.5,35,1 885 | 79.8,6,1 886 | 71.1,4,1 887 | 71.3,16,1 888 | 72.2,17,1 889 | 71.9,17,1 890 | 76.2,23,1 891 | 73.9,38,0 892 | 77.3,30,0 893 | 74.2,26,1 894 | 59.6,18,1 895 | 70.7,32,1 896 | 65,16,0 897 | 78.2,35,1 898 | 57.3,38,0 899 | 71.3,35,1 900 | 71.4,18,1 901 | 76.2,8,1 902 | 60.8,19,1 903 | 71,50,1 904 | 70.3,42,1 905 | 75.6,28,1 906 | 73.6,32,0 907 | 70.8,29,0 908 | 75.6,34,1 909 | 70.3,44,1 910 | 60.6,17,1 911 | 65.7,11,1 912 | 78.2,29,1 913 | 74.5,36,1 914 | 74.8,38,1 915 | 67.8,7,1 916 | 69.7,25,0 917 | 68.8,34,0 918 | 68.8,11,1 919 | 59.5,100,0 920 | 70.9,9,0 921 | 65.9,21,0 922 | 79.7,52,1 923 | 69.1,38,1 924 | 76.8,23,1 925 | 65.2,10,0 926 | 68.5,23,1 927 | 69.1,56,1 928 | 70.2,14,0 929 | 73.7,52,1 930 | 66.3,32,0 931 | 68.9,25,0 932 | 69.4,7,1 933 | 71.9,33,0 934 | 68.9,71,0 935 | 60.1,9,0 936 | 71.5,9,0 937 | 81.2,37,1 938 | 73.2,20,0 939 | 64.2,34,0 940 | 62.7,24,0 941 | 69.8,41,0 942 | 61.6,38,0 943 | 71.8,8,0 944 | 70.3,6,0 945 | 73.6,2,0 946 | 72.1,21,1 947 | 67.1,26,1 948 | 79.5,12,0 949 | 64.9,70,0 950 | 63.8,15,0 951 | 79.4,11,0 952 | 66.5,33,1 953 | 75.2,48,1 954 | 77.1,25,1 955 | 69.1,44,0 956 | 80,39,1 957 | 72,26,1 958 | 68.6,21,0 959 | 65.1,27,1 960 | 74.3,11,0 961 | 67.6,27,1 962 | 68.1,10,1 963 | 76.5,9,0 964 | 70.9,46,1 965 | 67.4,8,0 966 | 80.9,54,1 967 | 73.7,52,1 968 | 62.4,72,1 969 | 73.5,59,0 970 | 68.4,26,0 971 | 68.6,41,0 972 | 72.8,42,1 973 | 66.9,18,1 974 | 70.2,18,0 975 | 61,37,1 976 | 64.2,38,0 977 | 84.3,25,1 978 | 69.6,48,0 979 | 71.8,49,0 980 | 67.8,12,0 981 | 70,13,1 982 | 61.8,27,0 983 | 80.6,37,0 984 | 68.7,22,0 985 | 68.1,11,1 986 | 68.9,16,1 987 | 71,26,0 988 | 70.9,19,1 989 | 67.7,62,0 990 | 56.4,37,1 991 | 62.3,31,0 992 | 61.7,33,1 993 | 70.4,57,0 994 | 62.6,12,0 995 | 67.3,21,1 996 | 64.8,63,0 997 | 88.7,44,1 998 | 91.2,65,1 999 | 48.9,34,0 1000 | 86.2,46,1 1001 | 49,10,1 1002 | --------------------------------------------------------------------------------