├── 10 Univariate non-graphical EDA.ipynb
├── 11 Univariate visualizations (Categorical).ipynb
├── 12 Univariate visualizations (Numerical).ipynb
├── 13 Bivariate numerical.ipynb
├── 14 Bivariate Categorical - Numerical.ipynb
├── 15 Bivariate categorical .ipynb
├── 2 Understanding your data.ipynb
├── 3 Missing values.ipynb
├── 4 Duplicated values.ipynb
├── 6 Outliers(z_score).ipynb
├── 7 Outliers(IQR).ipynb
├── 8 Outliers(Percentile).ipynb
├── 9 Correction of datatype.ipynb
├── AB_NYC_2019.csv
├── EDA 5 (Outliers).pptx
├── README.md
├── scholarship.csv
└── weight-height.csv
/10 Univariate non-graphical EDA.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "id": "8abdfe09",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import pandas as pd"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 2,
16 | "id": "119d4726",
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "df = pd.read_csv(\"AB_NYC_2019.csv\")"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": 3,
26 | "id": "217b1e02",
27 | "metadata": {
28 | "scrolled": true
29 | },
30 | "outputs": [
31 | {
32 | "data": {
33 | "text/html": [
34 | "
\n",
35 | "\n",
48 | "
\n",
49 | " \n",
50 | " \n",
51 | " | \n",
52 | " id | \n",
53 | " name | \n",
54 | " host_id | \n",
55 | " host_name | \n",
56 | " neighbourhood_group | \n",
57 | " neighbourhood | \n",
58 | " latitude | \n",
59 | " longitude | \n",
60 | " room_type | \n",
61 | " price | \n",
62 | " minimum_nights | \n",
63 | " number_of_reviews | \n",
64 | " last_review | \n",
65 | " reviews_per_month | \n",
66 | " calculated_host_listings_count | \n",
67 | " availability_365 | \n",
68 | "
\n",
69 | " \n",
70 | " \n",
71 | " \n",
72 | " 0 | \n",
73 | " 2539 | \n",
74 | " Clean & quiet apt home by the park | \n",
75 | " 2787 | \n",
76 | " John | \n",
77 | " Brooklyn | \n",
78 | " Kensington | \n",
79 | " 40.64749 | \n",
80 | " -73.97237 | \n",
81 | " Private room | \n",
82 | " 149 | \n",
83 | " 1 | \n",
84 | " 9 | \n",
85 | " 19-10-2018 | \n",
86 | " 0.21 | \n",
87 | " 6 | \n",
88 | " 365 | \n",
89 | "
\n",
90 | " \n",
91 | " 1 | \n",
92 | " 2595 | \n",
93 | " Skylit Midtown Castle | \n",
94 | " 2845 | \n",
95 | " Jennifer | \n",
96 | " Manhattan | \n",
97 | " Midtown | \n",
98 | " 40.75362 | \n",
99 | " -73.98377 | \n",
100 | " Entire home/apt | \n",
101 | " 225 | \n",
102 | " 1 | \n",
103 | " 45 | \n",
104 | " 21-05-2019 | \n",
105 | " 0.38 | \n",
106 | " 2 | \n",
107 | " 355 | \n",
108 | "
\n",
109 | " \n",
110 | " 2 | \n",
111 | " 3647 | \n",
112 | " THE VILLAGE OF HARLEM....NEW YORK ! | \n",
113 | " 4632 | \n",
114 | " Elisabeth | \n",
115 | " Manhattan | \n",
116 | " Harlem | \n",
117 | " 40.80902 | \n",
118 | " -73.94190 | \n",
119 | " Private room | \n",
120 | " 150 | \n",
121 | " 3 | \n",
122 | " 0 | \n",
123 | " NaN | \n",
124 | " NaN | \n",
125 | " 1 | \n",
126 | " 365 | \n",
127 | "
\n",
128 | " \n",
129 | " 3 | \n",
130 | " 3831 | \n",
131 | " Cozy Entire Floor of Brownstone | \n",
132 | " 4869 | \n",
133 | " LisaRoxanne | \n",
134 | " Brooklyn | \n",
135 | " Clinton Hill | \n",
136 | " 40.68514 | \n",
137 | " -73.95976 | \n",
138 | " Entire home/apt | \n",
139 | " 89 | \n",
140 | " 1 | \n",
141 | " 270 | \n",
142 | " 05-07-2019 | \n",
143 | " 4.64 | \n",
144 | " 1 | \n",
145 | " 194 | \n",
146 | "
\n",
147 | " \n",
148 | " 4 | \n",
149 | " 5022 | \n",
150 | " Entire Apt: Spacious Studio/Loft by central park | \n",
151 | " 7192 | \n",
152 | " Laura | \n",
153 | " Manhattan | \n",
154 | " East Harlem | \n",
155 | " 40.79851 | \n",
156 | " -73.94399 | \n",
157 | " Entire home/apt | \n",
158 | " 80 | \n",
159 | " 10 | \n",
160 | " 9 | \n",
161 | " 19-11-2018 | \n",
162 | " 0.10 | \n",
163 | " 1 | \n",
164 | " 0 | \n",
165 | "
\n",
166 | " \n",
167 | "
\n",
168 | "
"
169 | ],
170 | "text/plain": [
171 | " id name host_id \\\n",
172 | "0 2539 Clean & quiet apt home by the park 2787 \n",
173 | "1 2595 Skylit Midtown Castle 2845 \n",
174 | "2 3647 THE VILLAGE OF HARLEM....NEW YORK ! 4632 \n",
175 | "3 3831 Cozy Entire Floor of Brownstone 4869 \n",
176 | "4 5022 Entire Apt: Spacious Studio/Loft by central park 7192 \n",
177 | "\n",
178 | " host_name neighbourhood_group neighbourhood latitude longitude \\\n",
179 | "0 John Brooklyn Kensington 40.64749 -73.97237 \n",
180 | "1 Jennifer Manhattan Midtown 40.75362 -73.98377 \n",
181 | "2 Elisabeth Manhattan Harlem 40.80902 -73.94190 \n",
182 | "3 LisaRoxanne Brooklyn Clinton Hill 40.68514 -73.95976 \n",
183 | "4 Laura Manhattan East Harlem 40.79851 -73.94399 \n",
184 | "\n",
185 | " room_type price minimum_nights number_of_reviews last_review \\\n",
186 | "0 Private room 149 1 9 19-10-2018 \n",
187 | "1 Entire home/apt 225 1 45 21-05-2019 \n",
188 | "2 Private room 150 3 0 NaN \n",
189 | "3 Entire home/apt 89 1 270 05-07-2019 \n",
190 | "4 Entire home/apt 80 10 9 19-11-2018 \n",
191 | "\n",
192 | " reviews_per_month calculated_host_listings_count availability_365 \n",
193 | "0 0.21 6 365 \n",
194 | "1 0.38 2 355 \n",
195 | "2 NaN 1 365 \n",
196 | "3 4.64 1 194 \n",
197 | "4 0.10 1 0 "
198 | ]
199 | },
200 | "execution_count": 3,
201 | "metadata": {},
202 | "output_type": "execute_result"
203 | }
204 | ],
205 | "source": [
206 | "df.head()"
207 | ]
208 | },
209 | {
210 | "cell_type": "code",
211 | "execution_count": 4,
212 | "id": "94df84f5",
213 | "metadata": {},
214 | "outputs": [],
215 | "source": [
216 | "df[\"id\"]=df[\"id\"].astype(str)\n",
217 | "df[\"host_id\"]=df[\"host_id\"].astype(str)\n",
218 | "df[\"latitude\"]=df[\"latitude\"].astype(str)\n",
219 | "df[\"longitude\"]=df[\"longitude\"].astype(str)"
220 | ]
221 | },
222 | {
223 | "cell_type": "markdown",
224 | "id": "56db9221",
225 | "metadata": {},
226 | "source": [
227 | "How does the data look mathematically?"
228 | ]
229 | },
230 | {
231 | "cell_type": "code",
232 | "execution_count": 5,
233 | "id": "95b30309",
234 | "metadata": {},
235 | "outputs": [
236 | {
237 | "data": {
238 | "text/html": [
239 | "\n",
240 | "\n",
253 | "
\n",
254 | " \n",
255 | " \n",
256 | " | \n",
257 | " price | \n",
258 | " minimum_nights | \n",
259 | " number_of_reviews | \n",
260 | " reviews_per_month | \n",
261 | " calculated_host_listings_count | \n",
262 | " availability_365 | \n",
263 | "
\n",
264 | " \n",
265 | " \n",
266 | " \n",
267 | " count | \n",
268 | " 48906.000000 | \n",
269 | " 48906.000000 | \n",
270 | " 48906.000000 | \n",
271 | " 38854.000000 | \n",
272 | " 48906.000000 | \n",
273 | " 48906.000000 | \n",
274 | "
\n",
275 | " \n",
276 | " mean | \n",
277 | " 152.711324 | \n",
278 | " 7.031612 | \n",
279 | " 23.300454 | \n",
280 | " 1.373151 | \n",
281 | " 7.142702 | \n",
282 | " 112.782031 | \n",
283 | "
\n",
284 | " \n",
285 | " std | \n",
286 | " 240.128713 | \n",
287 | " 20.512489 | \n",
288 | " 44.607175 | \n",
289 | " 1.680270 | \n",
290 | " 32.948926 | \n",
291 | " 131.620370 | \n",
292 | "
\n",
293 | " \n",
294 | " min | \n",
295 | " 0.000000 | \n",
296 | " 1.000000 | \n",
297 | " 0.000000 | \n",
298 | " 0.010000 | \n",
299 | " 1.000000 | \n",
300 | " 0.000000 | \n",
301 | "
\n",
302 | " \n",
303 | " 25% | \n",
304 | " 69.000000 | \n",
305 | " 1.000000 | \n",
306 | " 1.000000 | \n",
307 | " 0.190000 | \n",
308 | " 1.000000 | \n",
309 | " 0.000000 | \n",
310 | "
\n",
311 | " \n",
312 | " 50% | \n",
313 | " 106.000000 | \n",
314 | " 3.000000 | \n",
315 | " 5.000000 | \n",
316 | " 0.720000 | \n",
317 | " 1.000000 | \n",
318 | " 45.000000 | \n",
319 | "
\n",
320 | " \n",
321 | " 75% | \n",
322 | " 175.000000 | \n",
323 | " 5.000000 | \n",
324 | " 24.000000 | \n",
325 | " 2.020000 | \n",
326 | " 2.000000 | \n",
327 | " 227.000000 | \n",
328 | "
\n",
329 | " \n",
330 | " max | \n",
331 | " 10000.000000 | \n",
332 | " 1250.000000 | \n",
333 | " 629.000000 | \n",
334 | " 58.500000 | \n",
335 | " 327.000000 | \n",
336 | " 365.000000 | \n",
337 | "
\n",
338 | " \n",
339 | "
\n",
340 | "
"
341 | ],
342 | "text/plain": [
343 | " price minimum_nights number_of_reviews reviews_per_month \\\n",
344 | "count 48906.000000 48906.000000 48906.000000 38854.000000 \n",
345 | "mean 152.711324 7.031612 23.300454 1.373151 \n",
346 | "std 240.128713 20.512489 44.607175 1.680270 \n",
347 | "min 0.000000 1.000000 0.000000 0.010000 \n",
348 | "25% 69.000000 1.000000 1.000000 0.190000 \n",
349 | "50% 106.000000 3.000000 5.000000 0.720000 \n",
350 | "75% 175.000000 5.000000 24.000000 2.020000 \n",
351 | "max 10000.000000 1250.000000 629.000000 58.500000 \n",
352 | "\n",
353 | " calculated_host_listings_count availability_365 \n",
354 | "count 48906.000000 48906.000000 \n",
355 | "mean 7.142702 112.782031 \n",
356 | "std 32.948926 131.620370 \n",
357 | "min 1.000000 0.000000 \n",
358 | "25% 1.000000 0.000000 \n",
359 | "50% 1.000000 45.000000 \n",
360 | "75% 2.000000 227.000000 \n",
361 | "max 327.000000 365.000000 "
362 | ]
363 | },
364 | "execution_count": 5,
365 | "metadata": {},
366 | "output_type": "execute_result"
367 | }
368 | ],
369 | "source": [
370 | "df.describe()"
371 | ]
372 | },
373 | {
374 | "cell_type": "markdown",
375 | "id": "138bafa9",
376 | "metadata": {},
377 | "source": [
378 | "range of minimum nights for listings is 1 and 1250"
379 | ]
380 | },
381 | {
382 | "cell_type": "markdown",
383 | "id": "a6b70275",
384 | "metadata": {},
385 | "source": [
386 | "# Categorical Data"
387 | ]
388 | },
389 | {
390 | "cell_type": "code",
391 | "execution_count": 6,
392 | "id": "74b3cb63",
393 | "metadata": {},
394 | "outputs": [
395 | {
396 | "data": {
397 | "text/plain": [
398 | "id 48895\n",
399 | "name 47896\n",
400 | "host_id 37457\n",
401 | "host_name 11452\n",
402 | "neighbourhood_group 5\n",
403 | "neighbourhood 221\n",
404 | "latitude 19048\n",
405 | "longitude 14718\n",
406 | "room_type 3\n",
407 | "price 674\n",
408 | "minimum_nights 109\n",
409 | "number_of_reviews 394\n",
410 | "last_review 1764\n",
411 | "reviews_per_month 937\n",
412 | "calculated_host_listings_count 47\n",
413 | "availability_365 366\n",
414 | "dtype: int64"
415 | ]
416 | },
417 | "execution_count": 6,
418 | "metadata": {},
419 | "output_type": "execute_result"
420 | }
421 | ],
422 | "source": [
423 | "df.nunique()"
424 | ]
425 | },
426 | {
427 | "cell_type": "code",
428 | "execution_count": 7,
429 | "id": "56c9d1dc",
430 | "metadata": {},
431 | "outputs": [
432 | {
433 | "data": {
434 | "text/plain": [
435 | "Index(['id', 'name', 'host_id', 'host_name', 'neighbourhood_group',\n",
436 | " 'neighbourhood', 'latitude', 'longitude', 'room_type', 'price',\n",
437 | " 'minimum_nights', 'number_of_reviews', 'last_review',\n",
438 | " 'reviews_per_month', 'calculated_host_listings_count',\n",
439 | " 'availability_365'],\n",
440 | " dtype='object')"
441 | ]
442 | },
443 | "execution_count": 7,
444 | "metadata": {},
445 | "output_type": "execute_result"
446 | }
447 | ],
448 | "source": [
449 | "df.columns"
450 | ]
451 | },
452 | {
453 | "cell_type": "code",
454 | "execution_count": 8,
455 | "id": "fa8aa028",
456 | "metadata": {},
457 | "outputs": [
458 | {
459 | "data": {
460 | "text/plain": [
461 | "Entire home/apt 25414\n",
462 | "Private room 22332\n",
463 | "Shared room 1160\n",
464 | "Name: room_type, dtype: int64"
465 | ]
466 | },
467 | "execution_count": 8,
468 | "metadata": {},
469 | "output_type": "execute_result"
470 | }
471 | ],
472 | "source": [
473 | "df[\"room_type\"].value_counts()"
474 | ]
475 | },
476 | {
477 | "cell_type": "code",
478 | "execution_count": 9,
479 | "id": "3401f55e",
480 | "metadata": {},
481 | "outputs": [
482 | {
483 | "data": {
484 | "text/plain": [
485 | "Entire home/apt 0.519650\n",
486 | "Private room 0.456631\n",
487 | "Shared room 0.023719\n",
488 | "Name: room_type, dtype: float64"
489 | ]
490 | },
491 | "execution_count": 9,
492 | "metadata": {},
493 | "output_type": "execute_result"
494 | }
495 | ],
496 | "source": [
497 | "df[\"room_type\"].value_counts(normalize = True)"
498 | ]
499 | },
500 | {
501 | "cell_type": "code",
502 | "execution_count": 10,
503 | "id": "5b722d04",
504 | "metadata": {},
505 | "outputs": [
506 | {
507 | "data": {
508 | "text/plain": [
509 | "Manhattan 21669\n",
510 | "Brooklyn 20107\n",
511 | "Queens 5666\n",
512 | "Bronx 1091\n",
513 | "Staten Island 373\n",
514 | "Name: neighbourhood_group, dtype: int64"
515 | ]
516 | },
517 | "execution_count": 10,
518 | "metadata": {},
519 | "output_type": "execute_result"
520 | }
521 | ],
522 | "source": [
523 | "df[\"neighbourhood_group\"].value_counts()"
524 | ]
525 | },
526 | {
527 | "cell_type": "markdown",
528 | "id": "388dfebe",
529 | "metadata": {},
530 | "source": [
531 | "# Numerical Data"
532 | ]
533 | },
534 | {
535 | "cell_type": "code",
536 | "execution_count": 11,
537 | "id": "c45b4650",
538 | "metadata": {},
539 | "outputs": [
540 | {
541 | "data": {
542 | "text/plain": [
543 | "(-10.001, 2000.0] 48820\n",
544 | "(2000.0, 4000.0] 54\n",
545 | "(4000.0, 6000.0] 16\n",
546 | "(6000.0, 8000.0] 9\n",
547 | "(8000.0, 10000.0] 7\n",
548 | "Name: price, dtype: int64"
549 | ]
550 | },
551 | "execution_count": 11,
552 | "metadata": {},
553 | "output_type": "execute_result"
554 | }
555 | ],
556 | "source": [
557 | "df[\"price\"].value_counts(bins = 5)"
558 | ]
559 | },
560 | {
561 | "cell_type": "code",
562 | "execution_count": 12,
563 | "id": "7f3ce5a3",
564 | "metadata": {
565 | "scrolled": false
566 | },
567 | "outputs": [
568 | {
569 | "data": {
570 | "text/plain": [
571 | "(50.0, 100.0] 17373\n",
572 | "(100.0, 200.0] 16588\n",
573 | "(200.0, 500.0] 7340\n",
574 | "(0.0, 50.0] 6550\n",
575 | "(500.0, 800.0] 624\n",
576 | "(800.0, 2000.0] 334\n",
577 | "(2000.0, 4000.0] 54\n",
578 | "(4000.0, 10000.0] 32\n",
579 | "(-10.001, 0.0] 11\n",
580 | "Name: price, dtype: int64"
581 | ]
582 | },
583 | "execution_count": 12,
584 | "metadata": {},
585 | "output_type": "execute_result"
586 | }
587 | ],
588 | "source": [
589 | "bins = [-10,0, 50,100, 200,500,800,2000,4000,10000]\n",
590 | "df[\"price\"].value_counts(bins = bins)"
591 | ]
592 | },
593 | {
594 | "cell_type": "markdown",
595 | "id": "993f989b",
596 | "metadata": {},
597 | "source": [
598 | "It is mainly helpful in small datasets."
599 | ]
600 | },
601 | {
602 | "cell_type": "markdown",
603 | "id": "c76d8cea",
604 | "metadata": {},
605 | "source": [
606 | "## Measures of central tendency"
607 | ]
608 | },
609 | {
610 | "cell_type": "code",
611 | "execution_count": 13,
612 | "id": "329245f5",
613 | "metadata": {
614 | "scrolled": true
615 | },
616 | "outputs": [
617 | {
618 | "data": {
619 | "text/plain": [
620 | "152.71132376395533"
621 | ]
622 | },
623 | "execution_count": 13,
624 | "metadata": {},
625 | "output_type": "execute_result"
626 | }
627 | ],
628 | "source": [
629 | "df[\"price\"].mean()"
630 | ]
631 | },
632 | {
633 | "cell_type": "code",
634 | "execution_count": 14,
635 | "id": "b48226ef",
636 | "metadata": {},
637 | "outputs": [
638 | {
639 | "data": {
640 | "text/plain": [
641 | "106.0"
642 | ]
643 | },
644 | "execution_count": 14,
645 | "metadata": {},
646 | "output_type": "execute_result"
647 | }
648 | ],
649 | "source": [
650 | "df[\"price\"].median()"
651 | ]
652 | },
653 | {
654 | "cell_type": "code",
655 | "execution_count": 15,
656 | "id": "00bae1ee",
657 | "metadata": {},
658 | "outputs": [
659 | {
660 | "data": {
661 | "text/plain": [
662 | "240.1287131622509"
663 | ]
664 | },
665 | "execution_count": 15,
666 | "metadata": {},
667 | "output_type": "execute_result"
668 | }
669 | ],
670 | "source": [
671 | "df[\"price\"].std()"
672 | ]
673 | },
674 | {
675 | "cell_type": "code",
676 | "execution_count": 16,
677 | "id": "33404a5d",
678 | "metadata": {},
679 | "outputs": [
680 | {
681 | "data": {
682 | "text/plain": [
683 | "7.031611663190611"
684 | ]
685 | },
686 | "execution_count": 16,
687 | "metadata": {},
688 | "output_type": "execute_result"
689 | }
690 | ],
691 | "source": [
692 | "df[\"minimum_nights\"].mean()"
693 | ]
694 | },
695 | {
696 | "cell_type": "code",
697 | "execution_count": 17,
698 | "id": "a45e32fe",
699 | "metadata": {},
700 | "outputs": [
701 | {
702 | "data": {
703 | "text/plain": [
704 | "3.0"
705 | ]
706 | },
707 | "execution_count": 17,
708 | "metadata": {},
709 | "output_type": "execute_result"
710 | }
711 | ],
712 | "source": [
713 | "df[\"minimum_nights\"].median()"
714 | ]
715 | },
716 | {
717 | "cell_type": "markdown",
718 | "id": "ce2e309d",
719 | "metadata": {},
720 | "source": [
721 | "## Measure of Spread"
722 | ]
723 | },
724 | {
725 | "cell_type": "code",
726 | "execution_count": 18,
727 | "id": "12649a50",
728 | "metadata": {},
729 | "outputs": [
730 | {
731 | "data": {
732 | "text/plain": [
733 | "19.120831694826197"
734 | ]
735 | },
736 | "execution_count": 18,
737 | "metadata": {},
738 | "output_type": "execute_result"
739 | }
740 | ],
741 | "source": [
742 | "df[\"price\"].skew()"
743 | ]
744 | },
745 | {
746 | "cell_type": "code",
747 | "execution_count": 19,
748 | "id": "0e856dc2",
749 | "metadata": {},
750 | "outputs": [
751 | {
752 | "data": {
753 | "text/plain": [
754 | "585.7930484394186"
755 | ]
756 | },
757 | "execution_count": 19,
758 | "metadata": {},
759 | "output_type": "execute_result"
760 | }
761 | ],
762 | "source": [
763 | "df[\"price\"].kurt()"
764 | ]
765 | },
766 | {
767 | "cell_type": "markdown",
768 | "id": "8e4ad1db",
769 | "metadata": {},
770 | "source": [
771 | "How many listings have availability throughout the year (365 days)"
772 | ]
773 | },
774 | {
775 | "cell_type": "code",
776 | "execution_count": 20,
777 | "id": "e1cc1b65",
778 | "metadata": {},
779 | "outputs": [
780 | {
781 | "data": {
782 | "text/plain": [
783 | "1295"
784 | ]
785 | },
786 | "execution_count": 20,
787 | "metadata": {},
788 | "output_type": "execute_result"
789 | }
790 | ],
791 | "source": [
792 | "df[df[\"availability_365\"]==365].shape[0]"
793 | ]
794 | },
795 | {
796 | "cell_type": "code",
797 | "execution_count": 21,
798 | "id": "87d7ac64",
799 | "metadata": {},
800 | "outputs": [
801 | {
802 | "name": "stderr",
803 | "output_type": "stream",
804 | "text": [
805 | "C:\\Users\\GFG19189\\AppData\\Local\\Temp\\ipykernel_1056\\1134722465.py:1: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.\n",
806 | " df.corr()\n"
807 | ]
808 | },
809 | {
810 | "data": {
811 | "text/html": [
812 | "\n",
813 | "\n",
826 | "
\n",
827 | " \n",
828 | " \n",
829 | " | \n",
830 | " price | \n",
831 | " minimum_nights | \n",
832 | " number_of_reviews | \n",
833 | " reviews_per_month | \n",
834 | " calculated_host_listings_count | \n",
835 | " availability_365 | \n",
836 | "
\n",
837 | " \n",
838 | " \n",
839 | " \n",
840 | " price | \n",
841 | " 1.000000 | \n",
842 | " 0.042771 | \n",
843 | " -0.048014 | \n",
844 | " -0.030608 | \n",
845 | " 0.057478 | \n",
846 | " 0.081817 | \n",
847 | "
\n",
848 | " \n",
849 | " minimum_nights | \n",
850 | " 0.042771 | \n",
851 | " 1.000000 | \n",
852 | " -0.080093 | \n",
853 | " -0.121772 | \n",
854 | " 0.127917 | \n",
855 | " 0.144146 | \n",
856 | "
\n",
857 | " \n",
858 | " number_of_reviews | \n",
859 | " -0.048014 | \n",
860 | " -0.080093 | \n",
861 | " 1.000000 | \n",
862 | " 0.549291 | \n",
863 | " -0.072375 | \n",
864 | " 0.172002 | \n",
865 | "
\n",
866 | " \n",
867 | " reviews_per_month | \n",
868 | " -0.030608 | \n",
869 | " -0.121772 | \n",
870 | " 0.549291 | \n",
871 | " 1.000000 | \n",
872 | " -0.009414 | \n",
873 | " 0.185818 | \n",
874 | "
\n",
875 | " \n",
876 | " calculated_host_listings_count | \n",
877 | " 0.057478 | \n",
878 | " 0.127917 | \n",
879 | " -0.072375 | \n",
880 | " -0.009414 | \n",
881 | " 1.000000 | \n",
882 | " 0.225680 | \n",
883 | "
\n",
884 | " \n",
885 | " availability_365 | \n",
886 | " 0.081817 | \n",
887 | " 0.144146 | \n",
888 | " 0.172002 | \n",
889 | " 0.185818 | \n",
890 | " 0.225680 | \n",
891 | " 1.000000 | \n",
892 | "
\n",
893 | " \n",
894 | "
\n",
895 | "
"
896 | ],
897 | "text/plain": [
898 | " price minimum_nights number_of_reviews \\\n",
899 | "price 1.000000 0.042771 -0.048014 \n",
900 | "minimum_nights 0.042771 1.000000 -0.080093 \n",
901 | "number_of_reviews -0.048014 -0.080093 1.000000 \n",
902 | "reviews_per_month -0.030608 -0.121772 0.549291 \n",
903 | "calculated_host_listings_count 0.057478 0.127917 -0.072375 \n",
904 | "availability_365 0.081817 0.144146 0.172002 \n",
905 | "\n",
906 | " reviews_per_month \\\n",
907 | "price -0.030608 \n",
908 | "minimum_nights -0.121772 \n",
909 | "number_of_reviews 0.549291 \n",
910 | "reviews_per_month 1.000000 \n",
911 | "calculated_host_listings_count -0.009414 \n",
912 | "availability_365 0.185818 \n",
913 | "\n",
914 | " calculated_host_listings_count \\\n",
915 | "price 0.057478 \n",
916 | "minimum_nights 0.127917 \n",
917 | "number_of_reviews -0.072375 \n",
918 | "reviews_per_month -0.009414 \n",
919 | "calculated_host_listings_count 1.000000 \n",
920 | "availability_365 0.225680 \n",
921 | "\n",
922 | " availability_365 \n",
923 | "price 0.081817 \n",
924 | "minimum_nights 0.144146 \n",
925 | "number_of_reviews 0.172002 \n",
926 | "reviews_per_month 0.185818 \n",
927 | "calculated_host_listings_count 0.225680 \n",
928 | "availability_365 1.000000 "
929 | ]
930 | },
931 | "execution_count": 21,
932 | "metadata": {},
933 | "output_type": "execute_result"
934 | }
935 | ],
936 | "source": [
937 | "df.corr()"
938 | ]
939 | }
940 | ],
941 | "metadata": {
942 | "kernelspec": {
943 | "display_name": "Python 3 (ipykernel)",
944 | "language": "python",
945 | "name": "python3"
946 | },
947 | "language_info": {
948 | "codemirror_mode": {
949 | "name": "ipython",
950 | "version": 3
951 | },
952 | "file_extension": ".py",
953 | "mimetype": "text/x-python",
954 | "name": "python",
955 | "nbconvert_exporter": "python",
956 | "pygments_lexer": "ipython3",
957 | "version": "3.11.1"
958 | }
959 | },
960 | "nbformat": 4,
961 | "nbformat_minor": 5
962 | }
963 |
--------------------------------------------------------------------------------
/2 Understanding your data.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 29,
6 | "id": "8eb8b9b5",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import pandas as pd\n",
11 | "import seaborn as sns"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 30,
17 | "id": "ec399f59",
18 | "metadata": {},
19 | "outputs": [],
20 | "source": [
21 | "df = pd.read_csv(\"AB_NYC_2019.csv\")"
22 | ]
23 | },
24 | {
25 | "cell_type": "markdown",
26 | "id": "c7f041e4",
27 | "metadata": {},
28 | "source": [
29 | "## 1. Dimension of data?"
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 31,
35 | "id": "ab628eec",
36 | "metadata": {},
37 | "outputs": [
38 | {
39 | "data": {
40 | "text/plain": [
41 | "(48906, 16)"
42 | ]
43 | },
44 | "execution_count": 31,
45 | "metadata": {},
46 | "output_type": "execute_result"
47 | }
48 | ],
49 | "source": [
50 | "df.shape"
51 | ]
52 | },
53 | {
54 | "cell_type": "markdown",
55 | "id": "bae2d9b7",
56 | "metadata": {},
57 | "source": [
58 | "## 2. How does the data look like?"
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": 32,
64 | "id": "a3bff2be",
65 | "metadata": {},
66 | "outputs": [
67 | {
68 | "data": {
69 | "text/html": [
70 | "\n",
71 | "\n",
84 | "
\n",
85 | " \n",
86 | " \n",
87 | " | \n",
88 | " id | \n",
89 | " name | \n",
90 | " host_id | \n",
91 | " host_name | \n",
92 | " neighbourhood_group | \n",
93 | " neighbourhood | \n",
94 | " latitude | \n",
95 | " longitude | \n",
96 | " room_type | \n",
97 | " price | \n",
98 | " minimum_nights | \n",
99 | " number_of_reviews | \n",
100 | " last_review | \n",
101 | " reviews_per_month | \n",
102 | " calculated_host_listings_count | \n",
103 | " availability_365 | \n",
104 | "
\n",
105 | " \n",
106 | " \n",
107 | " \n",
108 | " 0 | \n",
109 | " 2539 | \n",
110 | " Clean & quiet apt home by the park | \n",
111 | " 2787 | \n",
112 | " John | \n",
113 | " Brooklyn | \n",
114 | " Kensington | \n",
115 | " 40.64749 | \n",
116 | " -73.97237 | \n",
117 | " Private room | \n",
118 | " 149 | \n",
119 | " 1 | \n",
120 | " 9 | \n",
121 | " 19-10-2018 | \n",
122 | " 0.21 | \n",
123 | " 6 | \n",
124 | " 365 | \n",
125 | "
\n",
126 | " \n",
127 | " 1 | \n",
128 | " 2595 | \n",
129 | " Skylit Midtown Castle | \n",
130 | " 2845 | \n",
131 | " Jennifer | \n",
132 | " Manhattan | \n",
133 | " Midtown | \n",
134 | " 40.75362 | \n",
135 | " -73.98377 | \n",
136 | " Entire home/apt | \n",
137 | " 225 | \n",
138 | " 1 | \n",
139 | " 45 | \n",
140 | " 21-05-2019 | \n",
141 | " 0.38 | \n",
142 | " 2 | \n",
143 | " 355 | \n",
144 | "
\n",
145 | " \n",
146 | " 2 | \n",
147 | " 3647 | \n",
148 | " THE VILLAGE OF HARLEM....NEW YORK ! | \n",
149 | " 4632 | \n",
150 | " Elisabeth | \n",
151 | " Manhattan | \n",
152 | " Harlem | \n",
153 | " 40.80902 | \n",
154 | " -73.94190 | \n",
155 | " Private room | \n",
156 | " 150 | \n",
157 | " 3 | \n",
158 | " 0 | \n",
159 | " NaN | \n",
160 | " NaN | \n",
161 | " 1 | \n",
162 | " 365 | \n",
163 | "
\n",
164 | " \n",
165 | " 3 | \n",
166 | " 3831 | \n",
167 | " Cozy Entire Floor of Brownstone | \n",
168 | " 4869 | \n",
169 | " LisaRoxanne | \n",
170 | " Brooklyn | \n",
171 | " Clinton Hill | \n",
172 | " 40.68514 | \n",
173 | " -73.95976 | \n",
174 | " Entire home/apt | \n",
175 | " 89 | \n",
176 | " 1 | \n",
177 | " 270 | \n",
178 | " 05-07-2019 | \n",
179 | " 4.64 | \n",
180 | " 1 | \n",
181 | " 194 | \n",
182 | "
\n",
183 | " \n",
184 | " 4 | \n",
185 | " 5022 | \n",
186 | " Entire Apt: Spacious Studio/Loft by central park | \n",
187 | " 7192 | \n",
188 | " Laura | \n",
189 | " Manhattan | \n",
190 | " East Harlem | \n",
191 | " 40.79851 | \n",
192 | " -73.94399 | \n",
193 | " Entire home/apt | \n",
194 | " 80 | \n",
195 | " 10 | \n",
196 | " 9 | \n",
197 | " 19-11-2018 | \n",
198 | " 0.10 | \n",
199 | " 1 | \n",
200 | " 0 | \n",
201 | "
\n",
202 | " \n",
203 | "
\n",
204 | "
"
205 | ],
206 | "text/plain": [
207 | " id name host_id \\\n",
208 | "0 2539 Clean & quiet apt home by the park 2787 \n",
209 | "1 2595 Skylit Midtown Castle 2845 \n",
210 | "2 3647 THE VILLAGE OF HARLEM....NEW YORK ! 4632 \n",
211 | "3 3831 Cozy Entire Floor of Brownstone 4869 \n",
212 | "4 5022 Entire Apt: Spacious Studio/Loft by central park 7192 \n",
213 | "\n",
214 | " host_name neighbourhood_group neighbourhood latitude longitude \\\n",
215 | "0 John Brooklyn Kensington 40.64749 -73.97237 \n",
216 | "1 Jennifer Manhattan Midtown 40.75362 -73.98377 \n",
217 | "2 Elisabeth Manhattan Harlem 40.80902 -73.94190 \n",
218 | "3 LisaRoxanne Brooklyn Clinton Hill 40.68514 -73.95976 \n",
219 | "4 Laura Manhattan East Harlem 40.79851 -73.94399 \n",
220 | "\n",
221 | " room_type price minimum_nights number_of_reviews last_review \\\n",
222 | "0 Private room 149 1 9 19-10-2018 \n",
223 | "1 Entire home/apt 225 1 45 21-05-2019 \n",
224 | "2 Private room 150 3 0 NaN \n",
225 | "3 Entire home/apt 89 1 270 05-07-2019 \n",
226 | "4 Entire home/apt 80 10 9 19-11-2018 \n",
227 | "\n",
228 | " reviews_per_month calculated_host_listings_count availability_365 \n",
229 | "0 0.21 6 365 \n",
230 | "1 0.38 2 355 \n",
231 | "2 NaN 1 365 \n",
232 | "3 4.64 1 194 \n",
233 | "4 0.10 1 0 "
234 | ]
235 | },
236 | "execution_count": 32,
237 | "metadata": {},
238 | "output_type": "execute_result"
239 | }
240 | ],
241 | "source": [
242 | "df.head()"
243 | ]
244 | },
245 | {
246 | "cell_type": "code",
247 | "execution_count": 33,
248 | "id": "29978697",
249 | "metadata": {},
250 | "outputs": [
251 | {
252 | "data": {
253 | "text/html": [
254 | "\n",
255 | "\n",
268 | "
\n",
269 | " \n",
270 | " \n",
271 | " | \n",
272 | " id | \n",
273 | " name | \n",
274 | " host_id | \n",
275 | " host_name | \n",
276 | " neighbourhood_group | \n",
277 | " neighbourhood | \n",
278 | " latitude | \n",
279 | " longitude | \n",
280 | " room_type | \n",
281 | " price | \n",
282 | " minimum_nights | \n",
283 | " number_of_reviews | \n",
284 | " last_review | \n",
285 | " reviews_per_month | \n",
286 | " calculated_host_listings_count | \n",
287 | " availability_365 | \n",
288 | "
\n",
289 | " \n",
290 | " \n",
291 | " \n",
292 | " 48901 | \n",
293 | " 5441 | \n",
294 | " Central Manhattan/near Broadway | \n",
295 | " 7989 | \n",
296 | " Kate | \n",
297 | " Manhattan | \n",
298 | " Hell's Kitchen | \n",
299 | " 40.76076 | \n",
300 | " -73.98867 | \n",
301 | " Private room | \n",
302 | " 85 | \n",
303 | " 2 | \n",
304 | " 188 | \n",
305 | " 23-06-2019 | \n",
306 | " 1.50 | \n",
307 | " 1 | \n",
308 | " 39 | \n",
309 | "
\n",
310 | " \n",
311 | " 48902 | \n",
312 | " 5803 | \n",
313 | " Lovely Room 1, Garden, Best Area, Legal rental | \n",
314 | " 9744 | \n",
315 | " Laurie | \n",
316 | " Brooklyn | \n",
317 | " South Slope | \n",
318 | " 40.66829 | \n",
319 | " -73.98779 | \n",
320 | " Private room | \n",
321 | " 89 | \n",
322 | " 4 | \n",
323 | " 167 | \n",
324 | " 24-06-2019 | \n",
325 | " 1.34 | \n",
326 | " 3 | \n",
327 | " 314 | \n",
328 | "
\n",
329 | " \n",
330 | " 48903 | \n",
331 | " 6021 | \n",
332 | " Wonderful Guest Bedroom in Manhattan for SINGLES | \n",
333 | " 11528 | \n",
334 | " Claudio | \n",
335 | " Manhattan | \n",
336 | " Upper West Side | \n",
337 | " 40.79826 | \n",
338 | " -73.96113 | \n",
339 | " Private room | \n",
340 | " 85 | \n",
341 | " 2 | \n",
342 | " 113 | \n",
343 | " 05-07-2019 | \n",
344 | " 0.91 | \n",
345 | " 1 | \n",
346 | " 333 | \n",
347 | "
\n",
348 | " \n",
349 | " 48904 | \n",
350 | " 6090 | \n",
351 | " West Village Nest - Superhost | \n",
352 | " 11975 | \n",
353 | " Alina | \n",
354 | " Manhattan | \n",
355 | " West Village | \n",
356 | " 40.73530 | \n",
357 | " -74.00525 | \n",
358 | " Entire home/apt | \n",
359 | " 120 | \n",
360 | " 90 | \n",
361 | " 27 | \n",
362 | " 31-10-2018 | \n",
363 | " 0.22 | \n",
364 | " 1 | \n",
365 | " 0 | \n",
366 | "
\n",
367 | " \n",
368 | " 48905 | \n",
369 | " 6848 | \n",
370 | " Only 2 stops to Manhattan studio | \n",
371 | " 15991 | \n",
372 | " Allen & Irina | \n",
373 | " Brooklyn | \n",
374 | " Williamsburg | \n",
375 | " 40.70837 | \n",
376 | " -73.95352 | \n",
377 | " Entire home/apt | \n",
378 | " 140 | \n",
379 | " 2 | \n",
380 | " 148 | \n",
381 | " 29-06-2019 | \n",
382 | " 1.20 | \n",
383 | " 1 | \n",
384 | " 46 | \n",
385 | "
\n",
386 | " \n",
387 | "
\n",
388 | "
"
389 | ],
390 | "text/plain": [
391 | " id name host_id \\\n",
392 | "48901 5441 Central Manhattan/near Broadway 7989 \n",
393 | "48902 5803 Lovely Room 1, Garden, Best Area, Legal rental 9744 \n",
394 | "48903 6021 Wonderful Guest Bedroom in Manhattan for SINGLES 11528 \n",
395 | "48904 6090 West Village Nest - Superhost 11975 \n",
396 | "48905 6848 Only 2 stops to Manhattan studio 15991 \n",
397 | "\n",
398 | " host_name neighbourhood_group neighbourhood latitude \\\n",
399 | "48901 Kate Manhattan Hell's Kitchen 40.76076 \n",
400 | "48902 Laurie Brooklyn South Slope 40.66829 \n",
401 | "48903 Claudio Manhattan Upper West Side 40.79826 \n",
402 | "48904 Alina Manhattan West Village 40.73530 \n",
403 | "48905 Allen & Irina Brooklyn Williamsburg 40.70837 \n",
404 | "\n",
405 | " longitude room_type price minimum_nights number_of_reviews \\\n",
406 | "48901 -73.98867 Private room 85 2 188 \n",
407 | "48902 -73.98779 Private room 89 4 167 \n",
408 | "48903 -73.96113 Private room 85 2 113 \n",
409 | "48904 -74.00525 Entire home/apt 120 90 27 \n",
410 | "48905 -73.95352 Entire home/apt 140 2 148 \n",
411 | "\n",
412 | " last_review reviews_per_month calculated_host_listings_count \\\n",
413 | "48901 23-06-2019 1.50 1 \n",
414 | "48902 24-06-2019 1.34 3 \n",
415 | "48903 05-07-2019 0.91 1 \n",
416 | "48904 31-10-2018 0.22 1 \n",
417 | "48905 29-06-2019 1.20 1 \n",
418 | "\n",
419 | " availability_365 \n",
420 | "48901 39 \n",
421 | "48902 314 \n",
422 | "48903 333 \n",
423 | "48904 0 \n",
424 | "48905 46 "
425 | ]
426 | },
427 | "execution_count": 33,
428 | "metadata": {},
429 | "output_type": "execute_result"
430 | }
431 | ],
432 | "source": [
433 | "df.tail()"
434 | ]
435 | },
436 | {
437 | "cell_type": "code",
438 | "execution_count": 34,
439 | "id": "e1ee689c",
440 | "metadata": {},
441 | "outputs": [
442 | {
443 | "data": {
444 | "text/html": [
445 | "\n",
446 | "\n",
459 | "
\n",
460 | " \n",
461 | " \n",
462 | " | \n",
463 | " id | \n",
464 | " name | \n",
465 | " host_id | \n",
466 | " host_name | \n",
467 | " neighbourhood_group | \n",
468 | " neighbourhood | \n",
469 | " latitude | \n",
470 | " longitude | \n",
471 | " room_type | \n",
472 | " price | \n",
473 | " minimum_nights | \n",
474 | " number_of_reviews | \n",
475 | " last_review | \n",
476 | " reviews_per_month | \n",
477 | " calculated_host_listings_count | \n",
478 | " availability_365 | \n",
479 | "
\n",
480 | " \n",
481 | " \n",
482 | " \n",
483 | " 7024 | \n",
484 | " 5050156 | \n",
485 | " Wake Up In The City That Never Sleeps | \n",
486 | " 193488 | \n",
487 | " Jane | \n",
488 | " Manhattan | \n",
489 | " East Village | \n",
490 | " 40.73273 | \n",
491 | " -73.98969 | \n",
492 | " Private room | \n",
493 | " 135 | \n",
494 | " 1 | \n",
495 | " 14 | \n",
496 | " 26-05-2019 | \n",
497 | " 0.28 | \n",
498 | " 2 | \n",
499 | " 332 | \n",
500 | "
\n",
501 | " \n",
502 | " 3221 | \n",
503 | " 1923153 | \n",
504 | " Cozy Vintage Inspired East Village | \n",
505 | " 9965028 | \n",
506 | " Sasha | \n",
507 | " Manhattan | \n",
508 | " East Village | \n",
509 | " 40.72709 | \n",
510 | " -73.98442 | \n",
511 | " Entire home/apt | \n",
512 | " 450 | \n",
513 | " 2 | \n",
514 | " 18 | \n",
515 | " 15-01-2017 | \n",
516 | " 0.28 | \n",
517 | " 1 | \n",
518 | " 363 | \n",
519 | "
\n",
520 | " \n",
521 | " 18637 | \n",
522 | " 14741623 | \n",
523 | " Cozy Private Bedroom and Bathroom in Brooklyn | \n",
524 | " 39181402 | \n",
525 | " Sarah | \n",
526 | " Brooklyn | \n",
527 | " Bedford-Stuyvesant | \n",
528 | " 40.68179 | \n",
529 | " -73.95252 | \n",
530 | " Private room | \n",
531 | " 48 | \n",
532 | " 3 | \n",
533 | " 67 | \n",
534 | " 22-06-2019 | \n",
535 | " 1.95 | \n",
536 | " 1 | \n",
537 | " 70 | \n",
538 | "
\n",
539 | " \n",
540 | " 15805 | \n",
541 | " 12786165 | \n",
542 | " Cozy Private room in Chelsea ! | \n",
543 | " 69598657 | \n",
544 | " Vh | \n",
545 | " Manhattan | \n",
546 | " Chelsea | \n",
547 | " 40.75094 | \n",
548 | " -73.99748 | \n",
549 | " Private room | \n",
550 | " 75 | \n",
551 | " 2 | \n",
552 | " 115 | \n",
553 | " 23-06-2019 | \n",
554 | " 2.99 | \n",
555 | " 1 | \n",
556 | " 37 | \n",
557 | "
\n",
558 | " \n",
559 | " 24028 | \n",
560 | " 19368455 | \n",
561 | " Chelsea Gem | \n",
562 | " 70154608 | \n",
563 | " Donna | \n",
564 | " Manhattan | \n",
565 | " Chelsea | \n",
566 | " 40.74625 | \n",
567 | " -73.99911 | \n",
568 | " Entire home/apt | \n",
569 | " 185 | \n",
570 | " 5 | \n",
571 | " 17 | \n",
572 | " 01-07-2019 | \n",
573 | " 0.88 | \n",
574 | " 1 | \n",
575 | " 15 | \n",
576 | "
\n",
577 | " \n",
578 | "
\n",
579 | "
"
580 | ],
581 | "text/plain": [
582 | " id name host_id \\\n",
583 | "7024 5050156 Wake Up In The City That Never Sleeps 193488 \n",
584 | "3221 1923153 Cozy Vintage Inspired East Village 9965028 \n",
585 | "18637 14741623 Cozy Private Bedroom and Bathroom in Brooklyn 39181402 \n",
586 | "15805 12786165 Cozy Private room in Chelsea ! 69598657 \n",
587 | "24028 19368455 Chelsea Gem 70154608 \n",
588 | "\n",
589 | " host_name neighbourhood_group neighbourhood latitude longitude \\\n",
590 | "7024 Jane Manhattan East Village 40.73273 -73.98969 \n",
591 | "3221 Sasha Manhattan East Village 40.72709 -73.98442 \n",
592 | "18637 Sarah Brooklyn Bedford-Stuyvesant 40.68179 -73.95252 \n",
593 | "15805 Vh Manhattan Chelsea 40.75094 -73.99748 \n",
594 | "24028 Donna Manhattan Chelsea 40.74625 -73.99911 \n",
595 | "\n",
596 | " room_type price minimum_nights number_of_reviews last_review \\\n",
597 | "7024 Private room 135 1 14 26-05-2019 \n",
598 | "3221 Entire home/apt 450 2 18 15-01-2017 \n",
599 | "18637 Private room 48 3 67 22-06-2019 \n",
600 | "15805 Private room 75 2 115 23-06-2019 \n",
601 | "24028 Entire home/apt 185 5 17 01-07-2019 \n",
602 | "\n",
603 | " reviews_per_month calculated_host_listings_count availability_365 \n",
604 | "7024 0.28 2 332 \n",
605 | "3221 0.28 1 363 \n",
606 | "18637 1.95 1 70 \n",
607 | "15805 2.99 1 37 \n",
608 | "24028 0.88 1 15 "
609 | ]
610 | },
611 | "execution_count": 34,
612 | "metadata": {},
613 | "output_type": "execute_result"
614 | }
615 | ],
616 | "source": [
617 | "df.sample(5)"
618 | ]
619 | },
620 | {
621 | "cell_type": "markdown",
622 | "id": "14ded357",
623 | "metadata": {},
624 | "source": [
625 | "## 3. What is the datatype of cols?"
626 | ]
627 | },
628 | {
629 | "cell_type": "code",
630 | "execution_count": 36,
631 | "id": "3b6ba267",
632 | "metadata": {},
633 | "outputs": [
634 | {
635 | "name": "stdout",
636 | "output_type": "stream",
637 | "text": [
638 | "\n",
639 | "RangeIndex: 48906 entries, 0 to 48905\n",
640 | "Data columns (total 16 columns):\n",
641 | " # Column Non-Null Count Dtype \n",
642 | "--- ------ -------------- ----- \n",
643 | " 0 id 48906 non-null int64 \n",
644 | " 1 name 48890 non-null object \n",
645 | " 2 host_id 48906 non-null int64 \n",
646 | " 3 host_name 48885 non-null object \n",
647 | " 4 neighbourhood_group 48906 non-null object \n",
648 | " 5 neighbourhood 48906 non-null object \n",
649 | " 6 latitude 48906 non-null float64\n",
650 | " 7 longitude 48906 non-null float64\n",
651 | " 8 room_type 48906 non-null object \n",
652 | " 9 price 48906 non-null int64 \n",
653 | " 10 minimum_nights 48906 non-null int64 \n",
654 | " 11 number_of_reviews 48906 non-null int64 \n",
655 | " 12 last_review 38854 non-null object \n",
656 | " 13 reviews_per_month 38854 non-null float64\n",
657 | " 14 calculated_host_listings_count 48906 non-null int64 \n",
658 | " 15 availability_365 48906 non-null int64 \n",
659 | "dtypes: float64(3), int64(7), object(6)\n",
660 | "memory usage: 6.0+ MB\n"
661 | ]
662 | }
663 | ],
664 | "source": [
665 | "df.info()"
666 | ]
667 | },
668 | {
669 | "cell_type": "markdown",
670 | "id": "5f2c538a",
671 | "metadata": {},
672 | "source": [
673 | "## 4. Are there any missing values?"
674 | ]
675 | },
676 | {
677 | "cell_type": "code",
678 | "execution_count": 37,
679 | "id": "fe257b4f",
680 | "metadata": {},
681 | "outputs": [
682 | {
683 | "data": {
684 | "text/plain": [
685 | "id 0\n",
686 | "name 16\n",
687 | "host_id 0\n",
688 | "host_name 21\n",
689 | "neighbourhood_group 0\n",
690 | "neighbourhood 0\n",
691 | "latitude 0\n",
692 | "longitude 0\n",
693 | "room_type 0\n",
694 | "price 0\n",
695 | "minimum_nights 0\n",
696 | "number_of_reviews 0\n",
697 | "last_review 10052\n",
698 | "reviews_per_month 10052\n",
699 | "calculated_host_listings_count 0\n",
700 | "availability_365 0\n",
701 | "dtype: int64"
702 | ]
703 | },
704 | "execution_count": 37,
705 | "metadata": {},
706 | "output_type": "execute_result"
707 | }
708 | ],
709 | "source": [
710 | "df.isna().sum()"
711 | ]
712 | },
713 | {
714 | "cell_type": "markdown",
715 | "id": "b8b5f3a1",
716 | "metadata": {},
717 | "source": [
718 | "## 5. How does the data look like mathematically?"
719 | ]
720 | },
721 | {
722 | "cell_type": "code",
723 | "execution_count": 38,
724 | "id": "fa4bc8f7",
725 | "metadata": {},
726 | "outputs": [
727 | {
728 | "data": {
729 | "text/html": [
730 | "\n",
731 | "\n",
744 | "
\n",
745 | " \n",
746 | " \n",
747 | " | \n",
748 | " id | \n",
749 | " host_id | \n",
750 | " latitude | \n",
751 | " longitude | \n",
752 | " price | \n",
753 | " minimum_nights | \n",
754 | " number_of_reviews | \n",
755 | " reviews_per_month | \n",
756 | " calculated_host_listings_count | \n",
757 | " availability_365 | \n",
758 | "
\n",
759 | " \n",
760 | " \n",
761 | " \n",
762 | " count | \n",
763 | " 4.890600e+04 | \n",
764 | " 4.890600e+04 | \n",
765 | " 48906.000000 | \n",
766 | " 48906.000000 | \n",
767 | " 48906.000000 | \n",
768 | " 48906.000000 | \n",
769 | " 48906.000000 | \n",
770 | " 38854.000000 | \n",
771 | " 48906.000000 | \n",
772 | " 48906.000000 | \n",
773 | "
\n",
774 | " \n",
775 | " mean | \n",
776 | " 1.901287e+07 | \n",
777 | " 6.760480e+07 | \n",
778 | " 40.728952 | \n",
779 | " -73.952175 | \n",
780 | " 152.711324 | \n",
781 | " 7.031612 | \n",
782 | " 23.300454 | \n",
783 | " 1.373151 | \n",
784 | " 7.142702 | \n",
785 | " 112.782031 | \n",
786 | "
\n",
787 | " \n",
788 | " std | \n",
789 | " 1.098557e+07 | \n",
790 | " 7.860866e+07 | \n",
791 | " 0.054529 | \n",
792 | " 0.046154 | \n",
793 | " 240.128713 | \n",
794 | " 20.512489 | \n",
795 | " 44.607175 | \n",
796 | " 1.680270 | \n",
797 | " 32.948926 | \n",
798 | " 131.620370 | \n",
799 | "
\n",
800 | " \n",
801 | " min | \n",
802 | " 2.539000e+03 | \n",
803 | " 2.438000e+03 | \n",
804 | " 40.499790 | \n",
805 | " -74.244420 | \n",
806 | " 0.000000 | \n",
807 | " 1.000000 | \n",
808 | " 0.000000 | \n",
809 | " 0.010000 | \n",
810 | " 1.000000 | \n",
811 | " 0.000000 | \n",
812 | "
\n",
813 | " \n",
814 | " 25% | \n",
815 | " 9.464662e+06 | \n",
816 | " 7.809567e+06 | \n",
817 | " 40.690100 | \n",
818 | " -73.983080 | \n",
819 | " 69.000000 | \n",
820 | " 1.000000 | \n",
821 | " 1.000000 | \n",
822 | " 0.190000 | \n",
823 | " 1.000000 | \n",
824 | " 0.000000 | \n",
825 | "
\n",
826 | " \n",
827 | " 50% | \n",
828 | " 1.967545e+07 | \n",
829 | " 3.078463e+07 | \n",
830 | " 40.723080 | \n",
831 | " -73.955685 | \n",
832 | " 106.000000 | \n",
833 | " 3.000000 | \n",
834 | " 5.000000 | \n",
835 | " 0.720000 | \n",
836 | " 1.000000 | \n",
837 | " 45.000000 | \n",
838 | "
\n",
839 | " \n",
840 | " 75% | \n",
841 | " 2.915085e+07 | \n",
842 | " 1.074344e+08 | \n",
843 | " 40.763120 | \n",
844 | " -73.936283 | \n",
845 | " 175.000000 | \n",
846 | " 5.000000 | \n",
847 | " 24.000000 | \n",
848 | " 2.020000 | \n",
849 | " 2.000000 | \n",
850 | " 227.000000 | \n",
851 | "
\n",
852 | " \n",
853 | " max | \n",
854 | " 3.648724e+07 | \n",
855 | " 2.743213e+08 | \n",
856 | " 40.913060 | \n",
857 | " -73.712990 | \n",
858 | " 10000.000000 | \n",
859 | " 1250.000000 | \n",
860 | " 629.000000 | \n",
861 | " 58.500000 | \n",
862 | " 327.000000 | \n",
863 | " 365.000000 | \n",
864 | "
\n",
865 | " \n",
866 | "
\n",
867 | "
"
868 | ],
869 | "text/plain": [
870 | " id host_id latitude longitude price \\\n",
871 | "count 4.890600e+04 4.890600e+04 48906.000000 48906.000000 48906.000000 \n",
872 | "mean 1.901287e+07 6.760480e+07 40.728952 -73.952175 152.711324 \n",
873 | "std 1.098557e+07 7.860866e+07 0.054529 0.046154 240.128713 \n",
874 | "min 2.539000e+03 2.438000e+03 40.499790 -74.244420 0.000000 \n",
875 | "25% 9.464662e+06 7.809567e+06 40.690100 -73.983080 69.000000 \n",
876 | "50% 1.967545e+07 3.078463e+07 40.723080 -73.955685 106.000000 \n",
877 | "75% 2.915085e+07 1.074344e+08 40.763120 -73.936283 175.000000 \n",
878 | "max 3.648724e+07 2.743213e+08 40.913060 -73.712990 10000.000000 \n",
879 | "\n",
880 | " minimum_nights number_of_reviews reviews_per_month \\\n",
881 | "count 48906.000000 48906.000000 38854.000000 \n",
882 | "mean 7.031612 23.300454 1.373151 \n",
883 | "std 20.512489 44.607175 1.680270 \n",
884 | "min 1.000000 0.000000 0.010000 \n",
885 | "25% 1.000000 1.000000 0.190000 \n",
886 | "50% 3.000000 5.000000 0.720000 \n",
887 | "75% 5.000000 24.000000 2.020000 \n",
888 | "max 1250.000000 629.000000 58.500000 \n",
889 | "\n",
890 | " calculated_host_listings_count availability_365 \n",
891 | "count 48906.000000 48906.000000 \n",
892 | "mean 7.142702 112.782031 \n",
893 | "std 32.948926 131.620370 \n",
894 | "min 1.000000 0.000000 \n",
895 | "25% 1.000000 0.000000 \n",
896 | "50% 1.000000 45.000000 \n",
897 | "75% 2.000000 227.000000 \n",
898 | "max 327.000000 365.000000 "
899 | ]
900 | },
901 | "execution_count": 38,
902 | "metadata": {},
903 | "output_type": "execute_result"
904 | }
905 | ],
906 | "source": [
907 | "df.describe()"
908 | ]
909 | },
910 | {
911 | "cell_type": "markdown",
912 | "id": "290855e4",
913 | "metadata": {},
914 | "source": [
915 | "## 6. Are there any duplicate values?"
916 | ]
917 | },
918 | {
919 | "cell_type": "code",
920 | "execution_count": 39,
921 | "id": "0298e5a3",
922 | "metadata": {},
923 | "outputs": [
924 | {
925 | "data": {
926 | "text/plain": [
927 | "11"
928 | ]
929 | },
930 | "execution_count": 39,
931 | "metadata": {},
932 | "output_type": "execute_result"
933 | }
934 | ],
935 | "source": [
936 | "df.duplicated().sum()"
937 | ]
938 | },
939 | {
940 | "cell_type": "markdown",
941 | "id": "59b998f0",
942 | "metadata": {},
943 | "source": [
944 | "## 7. How many unique values are there in each column?"
945 | ]
946 | },
947 | {
948 | "cell_type": "code",
949 | "execution_count": 40,
950 | "id": "6541d846",
951 | "metadata": {},
952 | "outputs": [
953 | {
954 | "data": {
955 | "text/plain": [
956 | "id 48895\n",
957 | "name 47896\n",
958 | "host_id 37457\n",
959 | "host_name 11452\n",
960 | "neighbourhood_group 5\n",
961 | "neighbourhood 221\n",
962 | "latitude 19048\n",
963 | "longitude 14718\n",
964 | "room_type 3\n",
965 | "price 674\n",
966 | "minimum_nights 109\n",
967 | "number_of_reviews 394\n",
968 | "last_review 1764\n",
969 | "reviews_per_month 937\n",
970 | "calculated_host_listings_count 47\n",
971 | "availability_365 366\n",
972 | "dtype: int64"
973 | ]
974 | },
975 | "execution_count": 40,
976 | "metadata": {},
977 | "output_type": "execute_result"
978 | }
979 | ],
980 | "source": [
981 | "df.nunique()"
982 | ]
983 | },
984 | {
985 | "cell_type": "code",
986 | "execution_count": 41,
987 | "id": "ce2dc51b",
988 | "metadata": {},
989 | "outputs": [
990 | {
991 | "data": {
992 | "text/plain": [
993 | "array(['Brooklyn', 'Manhattan', 'Queens', 'Staten Island', 'Bronx'],\n",
994 | " dtype=object)"
995 | ]
996 | },
997 | "execution_count": 41,
998 | "metadata": {},
999 | "output_type": "execute_result"
1000 | }
1001 | ],
1002 | "source": [
1003 | "df[\"neighbourhood_group\"].unique()"
1004 | ]
1005 | },
1006 | {
1007 | "cell_type": "markdown",
1008 | "id": "61865104",
1009 | "metadata": {},
1010 | "source": [
1011 | "## 8. Are there any outliers in the numerical columns?"
1012 | ]
1013 | },
1014 | {
1015 | "cell_type": "code",
1016 | "execution_count": 42,
1017 | "id": "03a7266d",
1018 | "metadata": {},
1019 | "outputs": [
1020 | {
1021 | "data": {
1022 | "text/plain": [
1023 | ""
1024 | ]
1025 | },
1026 | "execution_count": 42,
1027 | "metadata": {},
1028 | "output_type": "execute_result"
1029 | },
1030 | {
1031 | "data": {
1032 | "image/png": "\n",
1033 | "text/plain": [
1034 | ""
1035 | ]
1036 | },
1037 | "metadata": {},
1038 | "output_type": "display_data"
1039 | }
1040 | ],
1041 | "source": [
1042 | "sns.boxplot(df[\"price\"])"
1043 | ]
1044 | },
1045 | {
1046 | "cell_type": "code",
1047 | "execution_count": 43,
1048 | "id": "35d534e9",
1049 | "metadata": {},
1050 | "outputs": [
1051 | {
1052 | "data": {
1053 | "text/plain": [
1054 | ""
1055 | ]
1056 | },
1057 | "execution_count": 43,
1058 | "metadata": {},
1059 | "output_type": "execute_result"
1060 | },
1061 | {
1062 | "data": {
1063 | "image/png": "\n",
1064 | "text/plain": [
1065 | ""
1066 | ]
1067 | },
1068 | "metadata": {},
1069 | "output_type": "display_data"
1070 | }
1071 | ],
1072 | "source": [
1073 | "sns.boxplot(df[\"availability_365\"])"
1074 | ]
1075 | }
1076 | ],
1077 | "metadata": {
1078 | "kernelspec": {
1079 | "display_name": "Python 3 (ipykernel)",
1080 | "language": "python",
1081 | "name": "python3"
1082 | },
1083 | "language_info": {
1084 | "codemirror_mode": {
1085 | "name": "ipython",
1086 | "version": 3
1087 | },
1088 | "file_extension": ".py",
1089 | "mimetype": "text/x-python",
1090 | "name": "python",
1091 | "nbconvert_exporter": "python",
1092 | "pygments_lexer": "ipython3",
1093 | "version": "3.11.1"
1094 | }
1095 | },
1096 | "nbformat": 4,
1097 | "nbformat_minor": 5
1098 | }
1099 |
--------------------------------------------------------------------------------
/3 Missing values.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "bb76a3b8",
6 | "metadata": {},
7 | "source": [
8 | "In the field of data-related research, it is very important to handle missing data either by deleting or imputation(handling the missing values with some estimation)."
9 | ]
10 | },
11 | {
12 | "cell_type": "markdown",
13 | "id": "2dbb8dd1",
14 | "metadata": {},
15 | "source": [
16 | "Different Methods of Dealing With Missing Data\n",
17 | "1. Deleting the column with missing data\n",
18 | "2. Deleting the row with missing data\n",
19 | "3. Filling the Missing Values – Imputation\n",
20 | " \n",
21 | " (i) Numerical data - use mean\n",
22 | " \n",
23 | " (ii) categorical data \n",
24 | " - use mode\n",
25 | " - assign the NaN values their own category\n",
26 | " \n",
27 | " \n",
28 | "4. Advanced Imputation"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": 2,
34 | "id": "e9dcf94b",
35 | "metadata": {},
36 | "outputs": [],
37 | "source": [
38 | "import pandas as pd"
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": 3,
44 | "id": "d2efcf45",
45 | "metadata": {},
46 | "outputs": [],
47 | "source": [
48 | "data = pd.read_csv(\"AB_NYC_2019.csv\")"
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": 178,
54 | "id": "63308fe4",
55 | "metadata": {},
56 | "outputs": [
57 | {
58 | "data": {
59 | "text/html": [
60 | "\n",
61 | "\n",
74 | "
\n",
75 | " \n",
76 | " \n",
77 | " | \n",
78 | " id | \n",
79 | " name | \n",
80 | " host_id | \n",
81 | " host_name | \n",
82 | " neighbourhood_group | \n",
83 | " neighbourhood | \n",
84 | " latitude | \n",
85 | " longitude | \n",
86 | " room_type | \n",
87 | " price | \n",
88 | " minimum_nights | \n",
89 | " number_of_reviews | \n",
90 | " last_review | \n",
91 | " reviews_per_month | \n",
92 | " calculated_host_listings_count | \n",
93 | " availability_365 | \n",
94 | "
\n",
95 | " \n",
96 | " \n",
97 | " \n",
98 | " 0 | \n",
99 | " 2539 | \n",
100 | " Clean & quiet apt home by the park | \n",
101 | " 2787 | \n",
102 | " John | \n",
103 | " Brooklyn | \n",
104 | " Kensington | \n",
105 | " 40.64749 | \n",
106 | " -73.97237 | \n",
107 | " Private room | \n",
108 | " 149 | \n",
109 | " 1 | \n",
110 | " 9 | \n",
111 | " 2018-10-19 | \n",
112 | " 0.21 | \n",
113 | " 6 | \n",
114 | " 365 | \n",
115 | "
\n",
116 | " \n",
117 | " 1 | \n",
118 | " 2595 | \n",
119 | " Skylit Midtown Castle | \n",
120 | " 2845 | \n",
121 | " Jennifer | \n",
122 | " Manhattan | \n",
123 | " Midtown | \n",
124 | " 40.75362 | \n",
125 | " -73.98377 | \n",
126 | " Entire home/apt | \n",
127 | " 225 | \n",
128 | " 1 | \n",
129 | " 45 | \n",
130 | " 2019-05-21 | \n",
131 | " 0.38 | \n",
132 | " 2 | \n",
133 | " 355 | \n",
134 | "
\n",
135 | " \n",
136 | " 2 | \n",
137 | " 3647 | \n",
138 | " THE VILLAGE OF HARLEM....NEW YORK ! | \n",
139 | " 4632 | \n",
140 | " Elisabeth | \n",
141 | " Manhattan | \n",
142 | " Harlem | \n",
143 | " 40.80902 | \n",
144 | " -73.94190 | \n",
145 | " Private room | \n",
146 | " 150 | \n",
147 | " 3 | \n",
148 | " 0 | \n",
149 | " NaN | \n",
150 | " NaN | \n",
151 | " 1 | \n",
152 | " 365 | \n",
153 | "
\n",
154 | " \n",
155 | " 3 | \n",
156 | " 3831 | \n",
157 | " Cozy Entire Floor of Brownstone | \n",
158 | " 4869 | \n",
159 | " LisaRoxanne | \n",
160 | " Brooklyn | \n",
161 | " Clinton Hill | \n",
162 | " 40.68514 | \n",
163 | " -73.95976 | \n",
164 | " Entire home/apt | \n",
165 | " 89 | \n",
166 | " 1 | \n",
167 | " 270 | \n",
168 | " 2019-07-05 | \n",
169 | " 4.64 | \n",
170 | " 1 | \n",
171 | " 194 | \n",
172 | "
\n",
173 | " \n",
174 | " 4 | \n",
175 | " 5022 | \n",
176 | " Entire Apt: Spacious Studio/Loft by central park | \n",
177 | " 7192 | \n",
178 | " Laura | \n",
179 | " Manhattan | \n",
180 | " East Harlem | \n",
181 | " 40.79851 | \n",
182 | " -73.94399 | \n",
183 | " Entire home/apt | \n",
184 | " 80 | \n",
185 | " 10 | \n",
186 | " 9 | \n",
187 | " 2018-11-19 | \n",
188 | " 0.10 | \n",
189 | " 1 | \n",
190 | " 0 | \n",
191 | "
\n",
192 | " \n",
193 | "
\n",
194 | "
"
195 | ],
196 | "text/plain": [
197 | " id name host_id \\\n",
198 | "0 2539 Clean & quiet apt home by the park 2787 \n",
199 | "1 2595 Skylit Midtown Castle 2845 \n",
200 | "2 3647 THE VILLAGE OF HARLEM....NEW YORK ! 4632 \n",
201 | "3 3831 Cozy Entire Floor of Brownstone 4869 \n",
202 | "4 5022 Entire Apt: Spacious Studio/Loft by central park 7192 \n",
203 | "\n",
204 | " host_name neighbourhood_group neighbourhood latitude longitude \\\n",
205 | "0 John Brooklyn Kensington 40.64749 -73.97237 \n",
206 | "1 Jennifer Manhattan Midtown 40.75362 -73.98377 \n",
207 | "2 Elisabeth Manhattan Harlem 40.80902 -73.94190 \n",
208 | "3 LisaRoxanne Brooklyn Clinton Hill 40.68514 -73.95976 \n",
209 | "4 Laura Manhattan East Harlem 40.79851 -73.94399 \n",
210 | "\n",
211 | " room_type price minimum_nights number_of_reviews last_review \\\n",
212 | "0 Private room 149 1 9 2018-10-19 \n",
213 | "1 Entire home/apt 225 1 45 2019-05-21 \n",
214 | "2 Private room 150 3 0 NaN \n",
215 | "3 Entire home/apt 89 1 270 2019-07-05 \n",
216 | "4 Entire home/apt 80 10 9 2018-11-19 \n",
217 | "\n",
218 | " reviews_per_month calculated_host_listings_count availability_365 \n",
219 | "0 0.21 6 365 \n",
220 | "1 0.38 2 355 \n",
221 | "2 NaN 1 365 \n",
222 | "3 4.64 1 194 \n",
223 | "4 0.10 1 0 "
224 | ]
225 | },
226 | "execution_count": 178,
227 | "metadata": {},
228 | "output_type": "execute_result"
229 | }
230 | ],
231 | "source": [
232 | "data.head()"
233 | ]
234 | },
235 | {
236 | "cell_type": "code",
237 | "execution_count": 5,
238 | "id": "312e5057",
239 | "metadata": {},
240 | "outputs": [
241 | {
242 | "data": {
243 | "text/plain": [
244 | "(48915, 16)"
245 | ]
246 | },
247 | "execution_count": 5,
248 | "metadata": {},
249 | "output_type": "execute_result"
250 | }
251 | ],
252 | "source": [
253 | "data.shape"
254 | ]
255 | },
256 | {
257 | "cell_type": "code",
258 | "execution_count": 4,
259 | "id": "899d0abc",
260 | "metadata": {
261 | "scrolled": true
262 | },
263 | "outputs": [
264 | {
265 | "data": {
266 | "text/plain": [
267 | "id 0\n",
268 | "name 16\n",
269 | "host_id 0\n",
270 | "host_name 21\n",
271 | "neighbourhood_group 0\n",
272 | "neighbourhood 0\n",
273 | "latitude 0\n",
274 | "longitude 0\n",
275 | "room_type 0\n",
276 | "price 0\n",
277 | "minimum_nights 0\n",
278 | "number_of_reviews 0\n",
279 | "last_review 10052\n",
280 | "reviews_per_month 10052\n",
281 | "calculated_host_listings_count 0\n",
282 | "availability_365 0\n",
283 | "dtype: int64"
284 | ]
285 | },
286 | "execution_count": 4,
287 | "metadata": {},
288 | "output_type": "execute_result"
289 | }
290 | ],
291 | "source": [
292 | "data.isna().sum()"
293 | ]
294 | },
295 | {
296 | "cell_type": "markdown",
297 | "id": "835ab45b",
298 | "metadata": {},
299 | "source": [
300 | "### Removing column"
301 | ]
302 | },
303 | {
304 | "cell_type": "code",
305 | "execution_count": 6,
306 | "id": "6ff99b23",
307 | "metadata": {},
308 | "outputs": [],
309 | "source": [
310 | "df1 = data.drop(\"last_review\",axis =1)"
311 | ]
312 | },
313 | {
314 | "cell_type": "code",
315 | "execution_count": 7,
316 | "id": "c19a92e7",
317 | "metadata": {},
318 | "outputs": [
319 | {
320 | "data": {
321 | "text/plain": [
322 | "id 0\n",
323 | "name 16\n",
324 | "host_id 0\n",
325 | "host_name 21\n",
326 | "neighbourhood_group 0\n",
327 | "neighbourhood 0\n",
328 | "latitude 0\n",
329 | "longitude 0\n",
330 | "room_type 0\n",
331 | "price 0\n",
332 | "minimum_nights 0\n",
333 | "number_of_reviews 0\n",
334 | "reviews_per_month 10052\n",
335 | "calculated_host_listings_count 0\n",
336 | "availability_365 0\n",
337 | "dtype: int64"
338 | ]
339 | },
340 | "execution_count": 7,
341 | "metadata": {},
342 | "output_type": "execute_result"
343 | }
344 | ],
345 | "source": [
346 | "df1.isna().sum()"
347 | ]
348 | },
349 | {
350 | "cell_type": "code",
351 | "execution_count": 184,
352 | "id": "7678f06e",
353 | "metadata": {},
354 | "outputs": [
355 | {
356 | "data": {
357 | "text/plain": [
358 | "(48895, 15)"
359 | ]
360 | },
361 | "execution_count": 184,
362 | "metadata": {},
363 | "output_type": "execute_result"
364 | }
365 | ],
366 | "source": [
367 | "df1.shape"
368 | ]
369 | },
370 | {
371 | "cell_type": "markdown",
372 | "id": "e872761c",
373 | "metadata": {},
374 | "source": [
375 | "### Removing rows"
376 | ]
377 | },
378 | {
379 | "cell_type": "code",
380 | "execution_count": 185,
381 | "id": "c73417f6",
382 | "metadata": {},
383 | "outputs": [],
384 | "source": [
385 | "df2 = data.dropna()"
386 | ]
387 | },
388 | {
389 | "cell_type": "code",
390 | "execution_count": 186,
391 | "id": "6fae1f74",
392 | "metadata": {},
393 | "outputs": [
394 | {
395 | "data": {
396 | "text/plain": [
397 | "(38821, 16)"
398 | ]
399 | },
400 | "execution_count": 186,
401 | "metadata": {},
402 | "output_type": "execute_result"
403 | }
404 | ],
405 | "source": [
406 | "df2.shape"
407 | ]
408 | },
409 | {
410 | "cell_type": "code",
411 | "execution_count": 187,
412 | "id": "1c655dc5",
413 | "metadata": {},
414 | "outputs": [
415 | {
416 | "data": {
417 | "text/plain": [
418 | "id 0\n",
419 | "name 0\n",
420 | "host_id 0\n",
421 | "host_name 0\n",
422 | "neighbourhood_group 0\n",
423 | "neighbourhood 0\n",
424 | "latitude 0\n",
425 | "longitude 0\n",
426 | "room_type 0\n",
427 | "price 0\n",
428 | "minimum_nights 0\n",
429 | "number_of_reviews 0\n",
430 | "last_review 0\n",
431 | "reviews_per_month 0\n",
432 | "calculated_host_listings_count 0\n",
433 | "availability_365 0\n",
434 | "dtype: int64"
435 | ]
436 | },
437 | "execution_count": 187,
438 | "metadata": {},
439 | "output_type": "execute_result"
440 | }
441 | ],
442 | "source": [
443 | "df2.isna().sum()"
444 | ]
445 | },
446 | {
447 | "cell_type": "markdown",
448 | "id": "25afd242",
449 | "metadata": {},
450 | "source": [
451 | "### Filling the missing values - Imputation"
452 | ]
453 | },
454 | {
455 | "cell_type": "code",
456 | "execution_count": 188,
457 | "id": "427c815c",
458 | "metadata": {},
459 | "outputs": [],
460 | "source": [
461 | "df3 = pd.read_csv(\"AB_NYC_2019.csv\")"
462 | ]
463 | },
464 | {
465 | "cell_type": "code",
466 | "execution_count": 189,
467 | "id": "9a34dc54",
468 | "metadata": {},
469 | "outputs": [
470 | {
471 | "data": {
472 | "text/plain": [
473 | "(48895, 16)"
474 | ]
475 | },
476 | "execution_count": 189,
477 | "metadata": {},
478 | "output_type": "execute_result"
479 | }
480 | ],
481 | "source": [
482 | "df3.shape"
483 | ]
484 | },
485 | {
486 | "cell_type": "code",
487 | "execution_count": 190,
488 | "id": "a32e8937",
489 | "metadata": {},
490 | "outputs": [
491 | {
492 | "data": {
493 | "text/html": [
494 | "\n",
495 | "\n",
508 | "
\n",
509 | " \n",
510 | " \n",
511 | " | \n",
512 | " id | \n",
513 | " name | \n",
514 | " host_id | \n",
515 | " host_name | \n",
516 | " neighbourhood_group | \n",
517 | " neighbourhood | \n",
518 | " latitude | \n",
519 | " longitude | \n",
520 | " room_type | \n",
521 | " price | \n",
522 | " minimum_nights | \n",
523 | " number_of_reviews | \n",
524 | " last_review | \n",
525 | " reviews_per_month | \n",
526 | " calculated_host_listings_count | \n",
527 | " availability_365 | \n",
528 | "
\n",
529 | " \n",
530 | " \n",
531 | " \n",
532 | " 0 | \n",
533 | " 2539 | \n",
534 | " Clean & quiet apt home by the park | \n",
535 | " 2787 | \n",
536 | " John | \n",
537 | " Brooklyn | \n",
538 | " Kensington | \n",
539 | " 40.64749 | \n",
540 | " -73.97237 | \n",
541 | " Private room | \n",
542 | " 149 | \n",
543 | " 1 | \n",
544 | " 9 | \n",
545 | " 2018-10-19 | \n",
546 | " 0.21 | \n",
547 | " 6 | \n",
548 | " 365 | \n",
549 | "
\n",
550 | " \n",
551 | " 1 | \n",
552 | " 2595 | \n",
553 | " Skylit Midtown Castle | \n",
554 | " 2845 | \n",
555 | " Jennifer | \n",
556 | " Manhattan | \n",
557 | " Midtown | \n",
558 | " 40.75362 | \n",
559 | " -73.98377 | \n",
560 | " Entire home/apt | \n",
561 | " 225 | \n",
562 | " 1 | \n",
563 | " 45 | \n",
564 | " 2019-05-21 | \n",
565 | " 0.38 | \n",
566 | " 2 | \n",
567 | " 355 | \n",
568 | "
\n",
569 | " \n",
570 | " 2 | \n",
571 | " 3647 | \n",
572 | " THE VILLAGE OF HARLEM....NEW YORK ! | \n",
573 | " 4632 | \n",
574 | " Elisabeth | \n",
575 | " Manhattan | \n",
576 | " Harlem | \n",
577 | " 40.80902 | \n",
578 | " -73.94190 | \n",
579 | " Private room | \n",
580 | " 150 | \n",
581 | " 3 | \n",
582 | " 0 | \n",
583 | " NaN | \n",
584 | " NaN | \n",
585 | " 1 | \n",
586 | " 365 | \n",
587 | "
\n",
588 | " \n",
589 | " 3 | \n",
590 | " 3831 | \n",
591 | " Cozy Entire Floor of Brownstone | \n",
592 | " 4869 | \n",
593 | " LisaRoxanne | \n",
594 | " Brooklyn | \n",
595 | " Clinton Hill | \n",
596 | " 40.68514 | \n",
597 | " -73.95976 | \n",
598 | " Entire home/apt | \n",
599 | " 89 | \n",
600 | " 1 | \n",
601 | " 270 | \n",
602 | " 2019-07-05 | \n",
603 | " 4.64 | \n",
604 | " 1 | \n",
605 | " 194 | \n",
606 | "
\n",
607 | " \n",
608 | " 4 | \n",
609 | " 5022 | \n",
610 | " Entire Apt: Spacious Studio/Loft by central park | \n",
611 | " 7192 | \n",
612 | " Laura | \n",
613 | " Manhattan | \n",
614 | " East Harlem | \n",
615 | " 40.79851 | \n",
616 | " -73.94399 | \n",
617 | " Entire home/apt | \n",
618 | " 80 | \n",
619 | " 10 | \n",
620 | " 9 | \n",
621 | " 2018-11-19 | \n",
622 | " 0.10 | \n",
623 | " 1 | \n",
624 | " 0 | \n",
625 | "
\n",
626 | " \n",
627 | "
\n",
628 | "
"
629 | ],
630 | "text/plain": [
631 | " id name host_id \\\n",
632 | "0 2539 Clean & quiet apt home by the park 2787 \n",
633 | "1 2595 Skylit Midtown Castle 2845 \n",
634 | "2 3647 THE VILLAGE OF HARLEM....NEW YORK ! 4632 \n",
635 | "3 3831 Cozy Entire Floor of Brownstone 4869 \n",
636 | "4 5022 Entire Apt: Spacious Studio/Loft by central park 7192 \n",
637 | "\n",
638 | " host_name neighbourhood_group neighbourhood latitude longitude \\\n",
639 | "0 John Brooklyn Kensington 40.64749 -73.97237 \n",
640 | "1 Jennifer Manhattan Midtown 40.75362 -73.98377 \n",
641 | "2 Elisabeth Manhattan Harlem 40.80902 -73.94190 \n",
642 | "3 LisaRoxanne Brooklyn Clinton Hill 40.68514 -73.95976 \n",
643 | "4 Laura Manhattan East Harlem 40.79851 -73.94399 \n",
644 | "\n",
645 | " room_type price minimum_nights number_of_reviews last_review \\\n",
646 | "0 Private room 149 1 9 2018-10-19 \n",
647 | "1 Entire home/apt 225 1 45 2019-05-21 \n",
648 | "2 Private room 150 3 0 NaN \n",
649 | "3 Entire home/apt 89 1 270 2019-07-05 \n",
650 | "4 Entire home/apt 80 10 9 2018-11-19 \n",
651 | "\n",
652 | " reviews_per_month calculated_host_listings_count availability_365 \n",
653 | "0 0.21 6 365 \n",
654 | "1 0.38 2 355 \n",
655 | "2 NaN 1 365 \n",
656 | "3 4.64 1 194 \n",
657 | "4 0.10 1 0 "
658 | ]
659 | },
660 | "execution_count": 190,
661 | "metadata": {},
662 | "output_type": "execute_result"
663 | }
664 | ],
665 | "source": [
666 | "df3.head()"
667 | ]
668 | },
669 | {
670 | "cell_type": "code",
671 | "execution_count": 191,
672 | "id": "413c8f0f",
673 | "metadata": {},
674 | "outputs": [
675 | {
676 | "data": {
677 | "text/plain": [
678 | "id 0\n",
679 | "name 16\n",
680 | "host_id 0\n",
681 | "host_name 21\n",
682 | "neighbourhood_group 0\n",
683 | "neighbourhood 0\n",
684 | "latitude 0\n",
685 | "longitude 0\n",
686 | "room_type 0\n",
687 | "price 0\n",
688 | "minimum_nights 0\n",
689 | "number_of_reviews 0\n",
690 | "last_review 10052\n",
691 | "reviews_per_month 10052\n",
692 | "calculated_host_listings_count 0\n",
693 | "availability_365 0\n",
694 | "dtype: int64"
695 | ]
696 | },
697 | "execution_count": 191,
698 | "metadata": {},
699 | "output_type": "execute_result"
700 | }
701 | ],
702 | "source": [
703 | "df3.isna().sum()"
704 | ]
705 | },
706 | {
707 | "cell_type": "markdown",
708 | "id": "c509b61c",
709 | "metadata": {},
710 | "source": [
711 | "#### Numeric data (Use Mean)"
712 | ]
713 | },
714 | {
715 | "cell_type": "code",
716 | "execution_count": 192,
717 | "id": "3c56ea41",
718 | "metadata": {},
719 | "outputs": [],
720 | "source": [
721 | "mean_value = df3['reviews_per_month'].mean()\n",
722 | "df3['reviews_per_month'].fillna(mean_value, inplace=True)"
723 | ]
724 | },
725 | {
726 | "cell_type": "code",
727 | "execution_count": 193,
728 | "id": "a391aa62",
729 | "metadata": {},
730 | "outputs": [
731 | {
732 | "data": {
733 | "text/html": [
734 | "\n",
735 | "\n",
748 | "
\n",
749 | " \n",
750 | " \n",
751 | " | \n",
752 | " id | \n",
753 | " name | \n",
754 | " host_id | \n",
755 | " host_name | \n",
756 | " neighbourhood_group | \n",
757 | " neighbourhood | \n",
758 | " latitude | \n",
759 | " longitude | \n",
760 | " room_type | \n",
761 | " price | \n",
762 | " minimum_nights | \n",
763 | " number_of_reviews | \n",
764 | " last_review | \n",
765 | " reviews_per_month | \n",
766 | " calculated_host_listings_count | \n",
767 | " availability_365 | \n",
768 | "
\n",
769 | " \n",
770 | " \n",
771 | " \n",
772 | " 0 | \n",
773 | " 2539 | \n",
774 | " Clean & quiet apt home by the park | \n",
775 | " 2787 | \n",
776 | " John | \n",
777 | " Brooklyn | \n",
778 | " Kensington | \n",
779 | " 40.64749 | \n",
780 | " -73.97237 | \n",
781 | " Private room | \n",
782 | " 149 | \n",
783 | " 1 | \n",
784 | " 9 | \n",
785 | " 2018-10-19 | \n",
786 | " 0.210000 | \n",
787 | " 6 | \n",
788 | " 365 | \n",
789 | "
\n",
790 | " \n",
791 | " 1 | \n",
792 | " 2595 | \n",
793 | " Skylit Midtown Castle | \n",
794 | " 2845 | \n",
795 | " Jennifer | \n",
796 | " Manhattan | \n",
797 | " Midtown | \n",
798 | " 40.75362 | \n",
799 | " -73.98377 | \n",
800 | " Entire home/apt | \n",
801 | " 225 | \n",
802 | " 1 | \n",
803 | " 45 | \n",
804 | " 2019-05-21 | \n",
805 | " 0.380000 | \n",
806 | " 2 | \n",
807 | " 355 | \n",
808 | "
\n",
809 | " \n",
810 | " 2 | \n",
811 | " 3647 | \n",
812 | " THE VILLAGE OF HARLEM....NEW YORK ! | \n",
813 | " 4632 | \n",
814 | " Elisabeth | \n",
815 | " Manhattan | \n",
816 | " Harlem | \n",
817 | " 40.80902 | \n",
818 | " -73.94190 | \n",
819 | " Private room | \n",
820 | " 150 | \n",
821 | " 3 | \n",
822 | " 0 | \n",
823 | " NaN | \n",
824 | " 1.373221 | \n",
825 | " 1 | \n",
826 | " 365 | \n",
827 | "
\n",
828 | " \n",
829 | " 3 | \n",
830 | " 3831 | \n",
831 | " Cozy Entire Floor of Brownstone | \n",
832 | " 4869 | \n",
833 | " LisaRoxanne | \n",
834 | " Brooklyn | \n",
835 | " Clinton Hill | \n",
836 | " 40.68514 | \n",
837 | " -73.95976 | \n",
838 | " Entire home/apt | \n",
839 | " 89 | \n",
840 | " 1 | \n",
841 | " 270 | \n",
842 | " 2019-07-05 | \n",
843 | " 4.640000 | \n",
844 | " 1 | \n",
845 | " 194 | \n",
846 | "
\n",
847 | " \n",
848 | " 4 | \n",
849 | " 5022 | \n",
850 | " Entire Apt: Spacious Studio/Loft by central park | \n",
851 | " 7192 | \n",
852 | " Laura | \n",
853 | " Manhattan | \n",
854 | " East Harlem | \n",
855 | " 40.79851 | \n",
856 | " -73.94399 | \n",
857 | " Entire home/apt | \n",
858 | " 80 | \n",
859 | " 10 | \n",
860 | " 9 | \n",
861 | " 2018-11-19 | \n",
862 | " 0.100000 | \n",
863 | " 1 | \n",
864 | " 0 | \n",
865 | "
\n",
866 | " \n",
867 | "
\n",
868 | "
"
869 | ],
870 | "text/plain": [
871 | " id name host_id \\\n",
872 | "0 2539 Clean & quiet apt home by the park 2787 \n",
873 | "1 2595 Skylit Midtown Castle 2845 \n",
874 | "2 3647 THE VILLAGE OF HARLEM....NEW YORK ! 4632 \n",
875 | "3 3831 Cozy Entire Floor of Brownstone 4869 \n",
876 | "4 5022 Entire Apt: Spacious Studio/Loft by central park 7192 \n",
877 | "\n",
878 | " host_name neighbourhood_group neighbourhood latitude longitude \\\n",
879 | "0 John Brooklyn Kensington 40.64749 -73.97237 \n",
880 | "1 Jennifer Manhattan Midtown 40.75362 -73.98377 \n",
881 | "2 Elisabeth Manhattan Harlem 40.80902 -73.94190 \n",
882 | "3 LisaRoxanne Brooklyn Clinton Hill 40.68514 -73.95976 \n",
883 | "4 Laura Manhattan East Harlem 40.79851 -73.94399 \n",
884 | "\n",
885 | " room_type price minimum_nights number_of_reviews last_review \\\n",
886 | "0 Private room 149 1 9 2018-10-19 \n",
887 | "1 Entire home/apt 225 1 45 2019-05-21 \n",
888 | "2 Private room 150 3 0 NaN \n",
889 | "3 Entire home/apt 89 1 270 2019-07-05 \n",
890 | "4 Entire home/apt 80 10 9 2018-11-19 \n",
891 | "\n",
892 | " reviews_per_month calculated_host_listings_count availability_365 \n",
893 | "0 0.210000 6 365 \n",
894 | "1 0.380000 2 355 \n",
895 | "2 1.373221 1 365 \n",
896 | "3 4.640000 1 194 \n",
897 | "4 0.100000 1 0 "
898 | ]
899 | },
900 | "execution_count": 193,
901 | "metadata": {},
902 | "output_type": "execute_result"
903 | }
904 | ],
905 | "source": [
906 | "df3.head()"
907 | ]
908 | },
909 | {
910 | "cell_type": "code",
911 | "execution_count": 194,
912 | "id": "4d16c6c0",
913 | "metadata": {
914 | "scrolled": false
915 | },
916 | "outputs": [
917 | {
918 | "data": {
919 | "text/html": [
920 | "\n",
921 | "\n",
934 | "
\n",
935 | " \n",
936 | " \n",
937 | " | \n",
938 | " id | \n",
939 | " name | \n",
940 | " host_id | \n",
941 | " host_name | \n",
942 | " neighbourhood_group | \n",
943 | " neighbourhood | \n",
944 | " latitude | \n",
945 | " longitude | \n",
946 | " room_type | \n",
947 | " price | \n",
948 | " minimum_nights | \n",
949 | " number_of_reviews | \n",
950 | " last_review | \n",
951 | " reviews_per_month | \n",
952 | " calculated_host_listings_count | \n",
953 | " availability_365 | \n",
954 | "
\n",
955 | " \n",
956 | " \n",
957 | " \n",
958 | " 2 | \n",
959 | " 3647 | \n",
960 | " THE VILLAGE OF HARLEM....NEW YORK ! | \n",
961 | " 4632 | \n",
962 | " Elisabeth | \n",
963 | " Manhattan | \n",
964 | " Harlem | \n",
965 | " 40.80902 | \n",
966 | " -73.94190 | \n",
967 | " Private room | \n",
968 | " 150 | \n",
969 | " 3 | \n",
970 | " 0 | \n",
971 | " NaN | \n",
972 | " 1.373221 | \n",
973 | " 1 | \n",
974 | " 365 | \n",
975 | "
\n",
976 | " \n",
977 | " 19 | \n",
978 | " 7750 | \n",
979 | " Huge 2 BR Upper East Cental Park | \n",
980 | " 17985 | \n",
981 | " Sing | \n",
982 | " Manhattan | \n",
983 | " East Harlem | \n",
984 | " 40.79685 | \n",
985 | " -73.94872 | \n",
986 | " Entire home/apt | \n",
987 | " 190 | \n",
988 | " 7 | \n",
989 | " 0 | \n",
990 | " NaN | \n",
991 | " 1.373221 | \n",
992 | " 2 | \n",
993 | " 249 | \n",
994 | "
\n",
995 | " \n",
996 | "
\n",
997 | "
"
998 | ],
999 | "text/plain": [
1000 | " id name host_id host_name \\\n",
1001 | "2 3647 THE VILLAGE OF HARLEM....NEW YORK ! 4632 Elisabeth \n",
1002 | "19 7750 Huge 2 BR Upper East Cental Park 17985 Sing \n",
1003 | "\n",
1004 | " neighbourhood_group neighbourhood latitude longitude room_type \\\n",
1005 | "2 Manhattan Harlem 40.80902 -73.94190 Private room \n",
1006 | "19 Manhattan East Harlem 40.79685 -73.94872 Entire home/apt \n",
1007 | "\n",
1008 | " price minimum_nights number_of_reviews last_review reviews_per_month \\\n",
1009 | "2 150 3 0 NaN 1.373221 \n",
1010 | "19 190 7 0 NaN 1.373221 \n",
1011 | "\n",
1012 | " calculated_host_listings_count availability_365 \n",
1013 | "2 1 365 \n",
1014 | "19 2 249 "
1015 | ]
1016 | },
1017 | "execution_count": 194,
1018 | "metadata": {},
1019 | "output_type": "execute_result"
1020 | }
1021 | ],
1022 | "source": [
1023 | "df3[df3[\"number_of_reviews\"]==0].head(2)"
1024 | ]
1025 | },
1026 | {
1027 | "cell_type": "code",
1028 | "execution_count": 195,
1029 | "id": "033155b6",
1030 | "metadata": {},
1031 | "outputs": [
1032 | {
1033 | "data": {
1034 | "text/plain": [
1035 | "id 0\n",
1036 | "name 16\n",
1037 | "host_id 0\n",
1038 | "host_name 21\n",
1039 | "neighbourhood_group 0\n",
1040 | "neighbourhood 0\n",
1041 | "latitude 0\n",
1042 | "longitude 0\n",
1043 | "room_type 0\n",
1044 | "price 0\n",
1045 | "minimum_nights 0\n",
1046 | "number_of_reviews 0\n",
1047 | "last_review 10052\n",
1048 | "reviews_per_month 0\n",
1049 | "calculated_host_listings_count 0\n",
1050 | "availability_365 0\n",
1051 | "dtype: int64"
1052 | ]
1053 | },
1054 | "execution_count": 195,
1055 | "metadata": {},
1056 | "output_type": "execute_result"
1057 | }
1058 | ],
1059 | "source": [
1060 | "df3.isna().sum()"
1061 | ]
1062 | },
1063 | {
1064 | "cell_type": "markdown",
1065 | "id": "a0819870",
1066 | "metadata": {},
1067 | "source": [
1068 | "#### Categorical Data (Use mode value)"
1069 | ]
1070 | },
1071 | {
1072 | "cell_type": "code",
1073 | "execution_count": 196,
1074 | "id": "7b666009",
1075 | "metadata": {},
1076 | "outputs": [
1077 | {
1078 | "data": {
1079 | "text/plain": [
1080 | "0 2018-10-19\n",
1081 | "1 2019-05-21\n",
1082 | "2 2019-06-23\n",
1083 | "3 2019-07-05\n",
1084 | "4 2018-11-19\n",
1085 | " ... \n",
1086 | "48890 2019-06-23\n",
1087 | "48891 2019-06-23\n",
1088 | "48892 2019-06-23\n",
1089 | "48893 2019-06-23\n",
1090 | "48894 2019-06-23\n",
1091 | "Name: last_review, Length: 48895, dtype: object"
1092 | ]
1093 | },
1094 | "execution_count": 196,
1095 | "metadata": {},
1096 | "output_type": "execute_result"
1097 | }
1098 | ],
1099 | "source": [
1100 | "df3[\"last_review\"].fillna(df3[\"last_review\"].value_counts().index[0])"
1101 | ]
1102 | },
1103 | {
1104 | "cell_type": "markdown",
1105 | "id": "ddb737e0",
1106 | "metadata": {},
1107 | "source": [
1108 | "#### Categorical data (Use new category)"
1109 | ]
1110 | },
1111 | {
1112 | "cell_type": "code",
1113 | "execution_count": 197,
1114 | "id": "5c4904a4",
1115 | "metadata": {},
1116 | "outputs": [],
1117 | "source": [
1118 | "df3[\"last_review\"].fillna(\"Not Reviewed\", inplace = True)"
1119 | ]
1120 | },
1121 | {
1122 | "cell_type": "code",
1123 | "execution_count": 198,
1124 | "id": "15dfab92",
1125 | "metadata": {},
1126 | "outputs": [
1127 | {
1128 | "data": {
1129 | "text/html": [
1130 | "\n",
1131 | "\n",
1144 | "
\n",
1145 | " \n",
1146 | " \n",
1147 | " | \n",
1148 | " id | \n",
1149 | " name | \n",
1150 | " host_id | \n",
1151 | " host_name | \n",
1152 | " neighbourhood_group | \n",
1153 | " neighbourhood | \n",
1154 | " latitude | \n",
1155 | " longitude | \n",
1156 | " room_type | \n",
1157 | " price | \n",
1158 | " minimum_nights | \n",
1159 | " number_of_reviews | \n",
1160 | " last_review | \n",
1161 | " reviews_per_month | \n",
1162 | " calculated_host_listings_count | \n",
1163 | " availability_365 | \n",
1164 | "
\n",
1165 | " \n",
1166 | " \n",
1167 | " \n",
1168 | " 0 | \n",
1169 | " 2539 | \n",
1170 | " Clean & quiet apt home by the park | \n",
1171 | " 2787 | \n",
1172 | " John | \n",
1173 | " Brooklyn | \n",
1174 | " Kensington | \n",
1175 | " 40.64749 | \n",
1176 | " -73.97237 | \n",
1177 | " Private room | \n",
1178 | " 149 | \n",
1179 | " 1 | \n",
1180 | " 9 | \n",
1181 | " 2018-10-19 | \n",
1182 | " 0.210000 | \n",
1183 | " 6 | \n",
1184 | " 365 | \n",
1185 | "
\n",
1186 | " \n",
1187 | " 1 | \n",
1188 | " 2595 | \n",
1189 | " Skylit Midtown Castle | \n",
1190 | " 2845 | \n",
1191 | " Jennifer | \n",
1192 | " Manhattan | \n",
1193 | " Midtown | \n",
1194 | " 40.75362 | \n",
1195 | " -73.98377 | \n",
1196 | " Entire home/apt | \n",
1197 | " 225 | \n",
1198 | " 1 | \n",
1199 | " 45 | \n",
1200 | " 2019-05-21 | \n",
1201 | " 0.380000 | \n",
1202 | " 2 | \n",
1203 | " 355 | \n",
1204 | "
\n",
1205 | " \n",
1206 | " 2 | \n",
1207 | " 3647 | \n",
1208 | " THE VILLAGE OF HARLEM....NEW YORK ! | \n",
1209 | " 4632 | \n",
1210 | " Elisabeth | \n",
1211 | " Manhattan | \n",
1212 | " Harlem | \n",
1213 | " 40.80902 | \n",
1214 | " -73.94190 | \n",
1215 | " Private room | \n",
1216 | " 150 | \n",
1217 | " 3 | \n",
1218 | " 0 | \n",
1219 | " Not Reviewed | \n",
1220 | " 1.373221 | \n",
1221 | " 1 | \n",
1222 | " 365 | \n",
1223 | "
\n",
1224 | " \n",
1225 | " 3 | \n",
1226 | " 3831 | \n",
1227 | " Cozy Entire Floor of Brownstone | \n",
1228 | " 4869 | \n",
1229 | " LisaRoxanne | \n",
1230 | " Brooklyn | \n",
1231 | " Clinton Hill | \n",
1232 | " 40.68514 | \n",
1233 | " -73.95976 | \n",
1234 | " Entire home/apt | \n",
1235 | " 89 | \n",
1236 | " 1 | \n",
1237 | " 270 | \n",
1238 | " 2019-07-05 | \n",
1239 | " 4.640000 | \n",
1240 | " 1 | \n",
1241 | " 194 | \n",
1242 | "
\n",
1243 | " \n",
1244 | " 4 | \n",
1245 | " 5022 | \n",
1246 | " Entire Apt: Spacious Studio/Loft by central park | \n",
1247 | " 7192 | \n",
1248 | " Laura | \n",
1249 | " Manhattan | \n",
1250 | " East Harlem | \n",
1251 | " 40.79851 | \n",
1252 | " -73.94399 | \n",
1253 | " Entire home/apt | \n",
1254 | " 80 | \n",
1255 | " 10 | \n",
1256 | " 9 | \n",
1257 | " 2018-11-19 | \n",
1258 | " 0.100000 | \n",
1259 | " 1 | \n",
1260 | " 0 | \n",
1261 | "
\n",
1262 | " \n",
1263 | "
\n",
1264 | "
"
1265 | ],
1266 | "text/plain": [
1267 | " id name host_id \\\n",
1268 | "0 2539 Clean & quiet apt home by the park 2787 \n",
1269 | "1 2595 Skylit Midtown Castle 2845 \n",
1270 | "2 3647 THE VILLAGE OF HARLEM....NEW YORK ! 4632 \n",
1271 | "3 3831 Cozy Entire Floor of Brownstone 4869 \n",
1272 | "4 5022 Entire Apt: Spacious Studio/Loft by central park 7192 \n",
1273 | "\n",
1274 | " host_name neighbourhood_group neighbourhood latitude longitude \\\n",
1275 | "0 John Brooklyn Kensington 40.64749 -73.97237 \n",
1276 | "1 Jennifer Manhattan Midtown 40.75362 -73.98377 \n",
1277 | "2 Elisabeth Manhattan Harlem 40.80902 -73.94190 \n",
1278 | "3 LisaRoxanne Brooklyn Clinton Hill 40.68514 -73.95976 \n",
1279 | "4 Laura Manhattan East Harlem 40.79851 -73.94399 \n",
1280 | "\n",
1281 | " room_type price minimum_nights number_of_reviews last_review \\\n",
1282 | "0 Private room 149 1 9 2018-10-19 \n",
1283 | "1 Entire home/apt 225 1 45 2019-05-21 \n",
1284 | "2 Private room 150 3 0 Not Reviewed \n",
1285 | "3 Entire home/apt 89 1 270 2019-07-05 \n",
1286 | "4 Entire home/apt 80 10 9 2018-11-19 \n",
1287 | "\n",
1288 | " reviews_per_month calculated_host_listings_count availability_365 \n",
1289 | "0 0.210000 6 365 \n",
1290 | "1 0.380000 2 355 \n",
1291 | "2 1.373221 1 365 \n",
1292 | "3 4.640000 1 194 \n",
1293 | "4 0.100000 1 0 "
1294 | ]
1295 | },
1296 | "execution_count": 198,
1297 | "metadata": {},
1298 | "output_type": "execute_result"
1299 | }
1300 | ],
1301 | "source": [
1302 | "df3.head()"
1303 | ]
1304 | },
1305 | {
1306 | "cell_type": "code",
1307 | "execution_count": 199,
1308 | "id": "ec36e3a0",
1309 | "metadata": {},
1310 | "outputs": [
1311 | {
1312 | "data": {
1313 | "text/plain": [
1314 | "id 0\n",
1315 | "name 16\n",
1316 | "host_id 0\n",
1317 | "host_name 21\n",
1318 | "neighbourhood_group 0\n",
1319 | "neighbourhood 0\n",
1320 | "latitude 0\n",
1321 | "longitude 0\n",
1322 | "room_type 0\n",
1323 | "price 0\n",
1324 | "minimum_nights 0\n",
1325 | "number_of_reviews 0\n",
1326 | "last_review 0\n",
1327 | "reviews_per_month 0\n",
1328 | "calculated_host_listings_count 0\n",
1329 | "availability_365 0\n",
1330 | "dtype: int64"
1331 | ]
1332 | },
1333 | "execution_count": 199,
1334 | "metadata": {},
1335 | "output_type": "execute_result"
1336 | }
1337 | ],
1338 | "source": [
1339 | "df3.isna().sum()"
1340 | ]
1341 | },
1342 | {
1343 | "cell_type": "code",
1344 | "execution_count": 200,
1345 | "id": "e73131e5",
1346 | "metadata": {},
1347 | "outputs": [],
1348 | "source": [
1349 | "df3[\"host_name\"].fillna(\"Unknown\", inplace = True)"
1350 | ]
1351 | },
1352 | {
1353 | "cell_type": "code",
1354 | "execution_count": 201,
1355 | "id": "4b4b6401",
1356 | "metadata": {},
1357 | "outputs": [
1358 | {
1359 | "data": {
1360 | "text/plain": [
1361 | "id 0\n",
1362 | "name 16\n",
1363 | "host_id 0\n",
1364 | "host_name 0\n",
1365 | "neighbourhood_group 0\n",
1366 | "neighbourhood 0\n",
1367 | "latitude 0\n",
1368 | "longitude 0\n",
1369 | "room_type 0\n",
1370 | "price 0\n",
1371 | "minimum_nights 0\n",
1372 | "number_of_reviews 0\n",
1373 | "last_review 0\n",
1374 | "reviews_per_month 0\n",
1375 | "calculated_host_listings_count 0\n",
1376 | "availability_365 0\n",
1377 | "dtype: int64"
1378 | ]
1379 | },
1380 | "execution_count": 201,
1381 | "metadata": {},
1382 | "output_type": "execute_result"
1383 | }
1384 | ],
1385 | "source": [
1386 | "df3.isna().sum()"
1387 | ]
1388 | },
1389 | {
1390 | "cell_type": "code",
1391 | "execution_count": 202,
1392 | "id": "2578e4a2",
1393 | "metadata": {},
1394 | "outputs": [],
1395 | "source": [
1396 | "df3[\"name\"].fillna(method = \"pad\", inplace = True)"
1397 | ]
1398 | },
1399 | {
1400 | "cell_type": "code",
1401 | "execution_count": 203,
1402 | "id": "65efa95f",
1403 | "metadata": {},
1404 | "outputs": [
1405 | {
1406 | "data": {
1407 | "text/plain": [
1408 | "id 0\n",
1409 | "name 0\n",
1410 | "host_id 0\n",
1411 | "host_name 0\n",
1412 | "neighbourhood_group 0\n",
1413 | "neighbourhood 0\n",
1414 | "latitude 0\n",
1415 | "longitude 0\n",
1416 | "room_type 0\n",
1417 | "price 0\n",
1418 | "minimum_nights 0\n",
1419 | "number_of_reviews 0\n",
1420 | "last_review 0\n",
1421 | "reviews_per_month 0\n",
1422 | "calculated_host_listings_count 0\n",
1423 | "availability_365 0\n",
1424 | "dtype: int64"
1425 | ]
1426 | },
1427 | "execution_count": 203,
1428 | "metadata": {},
1429 | "output_type": "execute_result"
1430 | }
1431 | ],
1432 | "source": [
1433 | "df3.isna().sum()"
1434 | ]
1435 | },
1436 | {
1437 | "cell_type": "code",
1438 | "execution_count": 204,
1439 | "id": "d8622bff",
1440 | "metadata": {
1441 | "scrolled": true
1442 | },
1443 | "outputs": [
1444 | {
1445 | "data": {
1446 | "text/html": [
1447 | "\n",
1448 | "\n",
1461 | "
\n",
1462 | " \n",
1463 | " \n",
1464 | " | \n",
1465 | " id | \n",
1466 | " name | \n",
1467 | " host_id | \n",
1468 | " host_name | \n",
1469 | " neighbourhood_group | \n",
1470 | " neighbourhood | \n",
1471 | " latitude | \n",
1472 | " longitude | \n",
1473 | " room_type | \n",
1474 | " price | \n",
1475 | " minimum_nights | \n",
1476 | " number_of_reviews | \n",
1477 | " last_review | \n",
1478 | " reviews_per_month | \n",
1479 | " calculated_host_listings_count | \n",
1480 | " availability_365 | \n",
1481 | "
\n",
1482 | " \n",
1483 | " \n",
1484 | " \n",
1485 | " 0 | \n",
1486 | " 2539 | \n",
1487 | " Clean & quiet apt home by the park | \n",
1488 | " 2787 | \n",
1489 | " John | \n",
1490 | " Brooklyn | \n",
1491 | " Kensington | \n",
1492 | " 40.64749 | \n",
1493 | " -73.97237 | \n",
1494 | " Private room | \n",
1495 | " 149 | \n",
1496 | " 1 | \n",
1497 | " 9 | \n",
1498 | " 2018-10-19 | \n",
1499 | " 0.210000 | \n",
1500 | " 6 | \n",
1501 | " 365 | \n",
1502 | "
\n",
1503 | " \n",
1504 | " 1 | \n",
1505 | " 2595 | \n",
1506 | " Skylit Midtown Castle | \n",
1507 | " 2845 | \n",
1508 | " Jennifer | \n",
1509 | " Manhattan | \n",
1510 | " Midtown | \n",
1511 | " 40.75362 | \n",
1512 | " -73.98377 | \n",
1513 | " Entire home/apt | \n",
1514 | " 225 | \n",
1515 | " 1 | \n",
1516 | " 45 | \n",
1517 | " 2019-05-21 | \n",
1518 | " 0.380000 | \n",
1519 | " 2 | \n",
1520 | " 355 | \n",
1521 | "
\n",
1522 | " \n",
1523 | " 2 | \n",
1524 | " 3647 | \n",
1525 | " THE VILLAGE OF HARLEM....NEW YORK ! | \n",
1526 | " 4632 | \n",
1527 | " Elisabeth | \n",
1528 | " Manhattan | \n",
1529 | " Harlem | \n",
1530 | " 40.80902 | \n",
1531 | " -73.94190 | \n",
1532 | " Private room | \n",
1533 | " 150 | \n",
1534 | " 3 | \n",
1535 | " 0 | \n",
1536 | " Not Reviewed | \n",
1537 | " 1.373221 | \n",
1538 | " 1 | \n",
1539 | " 365 | \n",
1540 | "
\n",
1541 | " \n",
1542 | " 3 | \n",
1543 | " 3831 | \n",
1544 | " Cozy Entire Floor of Brownstone | \n",
1545 | " 4869 | \n",
1546 | " LisaRoxanne | \n",
1547 | " Brooklyn | \n",
1548 | " Clinton Hill | \n",
1549 | " 40.68514 | \n",
1550 | " -73.95976 | \n",
1551 | " Entire home/apt | \n",
1552 | " 89 | \n",
1553 | " 1 | \n",
1554 | " 270 | \n",
1555 | " 2019-07-05 | \n",
1556 | " 4.640000 | \n",
1557 | " 1 | \n",
1558 | " 194 | \n",
1559 | "
\n",
1560 | " \n",
1561 | " 4 | \n",
1562 | " 5022 | \n",
1563 | " Entire Apt: Spacious Studio/Loft by central park | \n",
1564 | " 7192 | \n",
1565 | " Laura | \n",
1566 | " Manhattan | \n",
1567 | " East Harlem | \n",
1568 | " 40.79851 | \n",
1569 | " -73.94399 | \n",
1570 | " Entire home/apt | \n",
1571 | " 80 | \n",
1572 | " 10 | \n",
1573 | " 9 | \n",
1574 | " 2018-11-19 | \n",
1575 | " 0.100000 | \n",
1576 | " 1 | \n",
1577 | " 0 | \n",
1578 | "
\n",
1579 | " \n",
1580 | "
\n",
1581 | "
"
1582 | ],
1583 | "text/plain": [
1584 | " id name host_id \\\n",
1585 | "0 2539 Clean & quiet apt home by the park 2787 \n",
1586 | "1 2595 Skylit Midtown Castle 2845 \n",
1587 | "2 3647 THE VILLAGE OF HARLEM....NEW YORK ! 4632 \n",
1588 | "3 3831 Cozy Entire Floor of Brownstone 4869 \n",
1589 | "4 5022 Entire Apt: Spacious Studio/Loft by central park 7192 \n",
1590 | "\n",
1591 | " host_name neighbourhood_group neighbourhood latitude longitude \\\n",
1592 | "0 John Brooklyn Kensington 40.64749 -73.97237 \n",
1593 | "1 Jennifer Manhattan Midtown 40.75362 -73.98377 \n",
1594 | "2 Elisabeth Manhattan Harlem 40.80902 -73.94190 \n",
1595 | "3 LisaRoxanne Brooklyn Clinton Hill 40.68514 -73.95976 \n",
1596 | "4 Laura Manhattan East Harlem 40.79851 -73.94399 \n",
1597 | "\n",
1598 | " room_type price minimum_nights number_of_reviews last_review \\\n",
1599 | "0 Private room 149 1 9 2018-10-19 \n",
1600 | "1 Entire home/apt 225 1 45 2019-05-21 \n",
1601 | "2 Private room 150 3 0 Not Reviewed \n",
1602 | "3 Entire home/apt 89 1 270 2019-07-05 \n",
1603 | "4 Entire home/apt 80 10 9 2018-11-19 \n",
1604 | "\n",
1605 | " reviews_per_month calculated_host_listings_count availability_365 \n",
1606 | "0 0.210000 6 365 \n",
1607 | "1 0.380000 2 355 \n",
1608 | "2 1.373221 1 365 \n",
1609 | "3 4.640000 1 194 \n",
1610 | "4 0.100000 1 0 "
1611 | ]
1612 | },
1613 | "execution_count": 204,
1614 | "metadata": {},
1615 | "output_type": "execute_result"
1616 | }
1617 | ],
1618 | "source": [
1619 | "df3.head()"
1620 | ]
1621 | },
1622 | {
1623 | "cell_type": "markdown",
1624 | "id": "de6d9cb6",
1625 | "metadata": {},
1626 | "source": [
1627 | "### Advanced Imputation"
1628 | ]
1629 | },
1630 | {
1631 | "cell_type": "code",
1632 | "execution_count": 205,
1633 | "id": "c4e67106",
1634 | "metadata": {},
1635 | "outputs": [],
1636 | "source": [
1637 | "df = data"
1638 | ]
1639 | },
1640 | {
1641 | "cell_type": "code",
1642 | "execution_count": 207,
1643 | "id": "43a21a00",
1644 | "metadata": {},
1645 | "outputs": [
1646 | {
1647 | "data": {
1648 | "text/plain": [
1649 | "id 0\n",
1650 | "name 16\n",
1651 | "host_id 0\n",
1652 | "host_name 21\n",
1653 | "neighbourhood_group 0\n",
1654 | "neighbourhood 0\n",
1655 | "latitude 0\n",
1656 | "longitude 0\n",
1657 | "room_type 0\n",
1658 | "price 0\n",
1659 | "minimum_nights 0\n",
1660 | "number_of_reviews 0\n",
1661 | "last_review 10052\n",
1662 | "reviews_per_month 10052\n",
1663 | "calculated_host_listings_count 0\n",
1664 | "availability_365 0\n",
1665 | "dtype: int64"
1666 | ]
1667 | },
1668 | "execution_count": 207,
1669 | "metadata": {},
1670 | "output_type": "execute_result"
1671 | }
1672 | ],
1673 | "source": [
1674 | "df.isna().sum()"
1675 | ]
1676 | },
1677 | {
1678 | "cell_type": "code",
1679 | "execution_count": 209,
1680 | "id": "a4df50d0",
1681 | "metadata": {},
1682 | "outputs": [],
1683 | "source": [
1684 | "df[\"reviews_per_month\"].interpolate(inplace = True)"
1685 | ]
1686 | },
1687 | {
1688 | "cell_type": "code",
1689 | "execution_count": 211,
1690 | "id": "55a6d283",
1691 | "metadata": {},
1692 | "outputs": [
1693 | {
1694 | "data": {
1695 | "text/html": [
1696 | "\n",
1697 | "\n",
1710 | "
\n",
1711 | " \n",
1712 | " \n",
1713 | " | \n",
1714 | " id | \n",
1715 | " name | \n",
1716 | " host_id | \n",
1717 | " host_name | \n",
1718 | " neighbourhood_group | \n",
1719 | " neighbourhood | \n",
1720 | " latitude | \n",
1721 | " longitude | \n",
1722 | " room_type | \n",
1723 | " price | \n",
1724 | " minimum_nights | \n",
1725 | " number_of_reviews | \n",
1726 | " last_review | \n",
1727 | " reviews_per_month | \n",
1728 | " calculated_host_listings_count | \n",
1729 | " availability_365 | \n",
1730 | "
\n",
1731 | " \n",
1732 | " \n",
1733 | " \n",
1734 | " 0 | \n",
1735 | " 2539 | \n",
1736 | " Clean & quiet apt home by the park | \n",
1737 | " 2787 | \n",
1738 | " John | \n",
1739 | " Brooklyn | \n",
1740 | " Kensington | \n",
1741 | " 40.64749 | \n",
1742 | " -73.97237 | \n",
1743 | " Private room | \n",
1744 | " 149 | \n",
1745 | " 1 | \n",
1746 | " 9 | \n",
1747 | " 2018-10-19 | \n",
1748 | " 0.21 | \n",
1749 | " 6 | \n",
1750 | " 365 | \n",
1751 | "
\n",
1752 | " \n",
1753 | " 1 | \n",
1754 | " 2595 | \n",
1755 | " Skylit Midtown Castle | \n",
1756 | " 2845 | \n",
1757 | " Jennifer | \n",
1758 | " Manhattan | \n",
1759 | " Midtown | \n",
1760 | " 40.75362 | \n",
1761 | " -73.98377 | \n",
1762 | " Entire home/apt | \n",
1763 | " 225 | \n",
1764 | " 1 | \n",
1765 | " 45 | \n",
1766 | " 2019-05-21 | \n",
1767 | " 0.38 | \n",
1768 | " 2 | \n",
1769 | " 355 | \n",
1770 | "
\n",
1771 | " \n",
1772 | " 2 | \n",
1773 | " 3647 | \n",
1774 | " THE VILLAGE OF HARLEM....NEW YORK ! | \n",
1775 | " 4632 | \n",
1776 | " Elisabeth | \n",
1777 | " Manhattan | \n",
1778 | " Harlem | \n",
1779 | " 40.80902 | \n",
1780 | " -73.94190 | \n",
1781 | " Private room | \n",
1782 | " 150 | \n",
1783 | " 3 | \n",
1784 | " 0 | \n",
1785 | " NaN | \n",
1786 | " 2.51 | \n",
1787 | " 1 | \n",
1788 | " 365 | \n",
1789 | "
\n",
1790 | " \n",
1791 | " 3 | \n",
1792 | " 3831 | \n",
1793 | " Cozy Entire Floor of Brownstone | \n",
1794 | " 4869 | \n",
1795 | " LisaRoxanne | \n",
1796 | " Brooklyn | \n",
1797 | " Clinton Hill | \n",
1798 | " 40.68514 | \n",
1799 | " -73.95976 | \n",
1800 | " Entire home/apt | \n",
1801 | " 89 | \n",
1802 | " 1 | \n",
1803 | " 270 | \n",
1804 | " 2019-07-05 | \n",
1805 | " 4.64 | \n",
1806 | " 1 | \n",
1807 | " 194 | \n",
1808 | "
\n",
1809 | " \n",
1810 | " 4 | \n",
1811 | " 5022 | \n",
1812 | " Entire Apt: Spacious Studio/Loft by central park | \n",
1813 | " 7192 | \n",
1814 | " Laura | \n",
1815 | " Manhattan | \n",
1816 | " East Harlem | \n",
1817 | " 40.79851 | \n",
1818 | " -73.94399 | \n",
1819 | " Entire home/apt | \n",
1820 | " 80 | \n",
1821 | " 10 | \n",
1822 | " 9 | \n",
1823 | " 2018-11-19 | \n",
1824 | " 0.10 | \n",
1825 | " 1 | \n",
1826 | " 0 | \n",
1827 | "
\n",
1828 | " \n",
1829 | "
\n",
1830 | "
"
1831 | ],
1832 | "text/plain": [
1833 | " id name host_id \\\n",
1834 | "0 2539 Clean & quiet apt home by the park 2787 \n",
1835 | "1 2595 Skylit Midtown Castle 2845 \n",
1836 | "2 3647 THE VILLAGE OF HARLEM....NEW YORK ! 4632 \n",
1837 | "3 3831 Cozy Entire Floor of Brownstone 4869 \n",
1838 | "4 5022 Entire Apt: Spacious Studio/Loft by central park 7192 \n",
1839 | "\n",
1840 | " host_name neighbourhood_group neighbourhood latitude longitude \\\n",
1841 | "0 John Brooklyn Kensington 40.64749 -73.97237 \n",
1842 | "1 Jennifer Manhattan Midtown 40.75362 -73.98377 \n",
1843 | "2 Elisabeth Manhattan Harlem 40.80902 -73.94190 \n",
1844 | "3 LisaRoxanne Brooklyn Clinton Hill 40.68514 -73.95976 \n",
1845 | "4 Laura Manhattan East Harlem 40.79851 -73.94399 \n",
1846 | "\n",
1847 | " room_type price minimum_nights number_of_reviews last_review \\\n",
1848 | "0 Private room 149 1 9 2018-10-19 \n",
1849 | "1 Entire home/apt 225 1 45 2019-05-21 \n",
1850 | "2 Private room 150 3 0 NaN \n",
1851 | "3 Entire home/apt 89 1 270 2019-07-05 \n",
1852 | "4 Entire home/apt 80 10 9 2018-11-19 \n",
1853 | "\n",
1854 | " reviews_per_month calculated_host_listings_count availability_365 \n",
1855 | "0 0.21 6 365 \n",
1856 | "1 0.38 2 355 \n",
1857 | "2 2.51 1 365 \n",
1858 | "3 4.64 1 194 \n",
1859 | "4 0.10 1 0 "
1860 | ]
1861 | },
1862 | "execution_count": 211,
1863 | "metadata": {},
1864 | "output_type": "execute_result"
1865 | }
1866 | ],
1867 | "source": [
1868 | "df.head()"
1869 | ]
1870 | },
1871 | {
1872 | "cell_type": "markdown",
1873 | "id": "332397ae",
1874 | "metadata": {},
1875 | "source": [
1876 | "### Handling Duplicate data"
1877 | ]
1878 | },
1879 | {
1880 | "cell_type": "code",
1881 | "execution_count": 174,
1882 | "id": "d6f88ece",
1883 | "metadata": {},
1884 | "outputs": [
1885 | {
1886 | "data": {
1887 | "text/plain": [
1888 | "0"
1889 | ]
1890 | },
1891 | "execution_count": 174,
1892 | "metadata": {},
1893 | "output_type": "execute_result"
1894 | }
1895 | ],
1896 | "source": [
1897 | "data.duplicated().sum()"
1898 | ]
1899 | },
1900 | {
1901 | "cell_type": "code",
1902 | "execution_count": 212,
1903 | "id": "32248270",
1904 | "metadata": {},
1905 | "outputs": [],
1906 | "source": [
1907 | "data.drop_duplicates(inplace = True)"
1908 | ]
1909 | }
1910 | ],
1911 | "metadata": {
1912 | "kernelspec": {
1913 | "display_name": "Python 3 (ipykernel)",
1914 | "language": "python",
1915 | "name": "python3"
1916 | },
1917 | "language_info": {
1918 | "codemirror_mode": {
1919 | "name": "ipython",
1920 | "version": 3
1921 | },
1922 | "file_extension": ".py",
1923 | "mimetype": "text/x-python",
1924 | "name": "python",
1925 | "nbconvert_exporter": "python",
1926 | "pygments_lexer": "ipython3",
1927 | "version": "3.11.1"
1928 | }
1929 | },
1930 | "nbformat": 4,
1931 | "nbformat_minor": 5
1932 | }
1933 |
--------------------------------------------------------------------------------
/9 Correction of datatype.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "id": "6332b62b",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import pandas as pd"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 2,
16 | "id": "06f8ba29",
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "df = pd.read_csv(\"AB_NYC_2019.csv\")"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": 4,
26 | "id": "5d1321df",
27 | "metadata": {},
28 | "outputs": [
29 | {
30 | "data": {
31 | "text/html": [
32 | "\n",
33 | "\n",
46 | "
\n",
47 | " \n",
48 | " \n",
49 | " | \n",
50 | " id | \n",
51 | " name | \n",
52 | " host_id | \n",
53 | " host_name | \n",
54 | " neighbourhood_group | \n",
55 | " neighbourhood | \n",
56 | " latitude | \n",
57 | " longitude | \n",
58 | " room_type | \n",
59 | " price | \n",
60 | " minimum_nights | \n",
61 | " number_of_reviews | \n",
62 | " last_review | \n",
63 | " reviews_per_month | \n",
64 | " calculated_host_listings_count | \n",
65 | " availability_365 | \n",
66 | "
\n",
67 | " \n",
68 | " \n",
69 | " \n",
70 | " 0 | \n",
71 | " 2539 | \n",
72 | " Clean & quiet apt home by the park | \n",
73 | " 2787 | \n",
74 | " John | \n",
75 | " Brooklyn | \n",
76 | " Kensington | \n",
77 | " 40.64749 | \n",
78 | " -73.97237 | \n",
79 | " Private room | \n",
80 | " 149 | \n",
81 | " 1 | \n",
82 | " 9 | \n",
83 | " 19-10-2018 | \n",
84 | " 0.21 | \n",
85 | " 6 | \n",
86 | " 365 | \n",
87 | "
\n",
88 | " \n",
89 | " 1 | \n",
90 | " 2595 | \n",
91 | " Skylit Midtown Castle | \n",
92 | " 2845 | \n",
93 | " Jennifer | \n",
94 | " Manhattan | \n",
95 | " Midtown | \n",
96 | " 40.75362 | \n",
97 | " -73.98377 | \n",
98 | " Entire home/apt | \n",
99 | " 225 | \n",
100 | " 1 | \n",
101 | " 45 | \n",
102 | " 21-05-2019 | \n",
103 | " 0.38 | \n",
104 | " 2 | \n",
105 | " 355 | \n",
106 | "
\n",
107 | " \n",
108 | "
\n",
109 | "
"
110 | ],
111 | "text/plain": [
112 | " id name host_id host_name \\\n",
113 | "0 2539 Clean & quiet apt home by the park 2787 John \n",
114 | "1 2595 Skylit Midtown Castle 2845 Jennifer \n",
115 | "\n",
116 | " neighbourhood_group neighbourhood latitude longitude room_type \\\n",
117 | "0 Brooklyn Kensington 40.64749 -73.97237 Private room \n",
118 | "1 Manhattan Midtown 40.75362 -73.98377 Entire home/apt \n",
119 | "\n",
120 | " price minimum_nights number_of_reviews last_review reviews_per_month \\\n",
121 | "0 149 1 9 19-10-2018 0.21 \n",
122 | "1 225 1 45 21-05-2019 0.38 \n",
123 | "\n",
124 | " calculated_host_listings_count availability_365 \n",
125 | "0 6 365 \n",
126 | "1 2 355 "
127 | ]
128 | },
129 | "execution_count": 4,
130 | "metadata": {},
131 | "output_type": "execute_result"
132 | }
133 | ],
134 | "source": [
135 | "df.head(2)"
136 | ]
137 | },
138 | {
139 | "cell_type": "code",
140 | "execution_count": 3,
141 | "id": "2fc99302",
142 | "metadata": {},
143 | "outputs": [
144 | {
145 | "name": "stdout",
146 | "output_type": "stream",
147 | "text": [
148 | "\n",
149 | "RangeIndex: 48906 entries, 0 to 48905\n",
150 | "Data columns (total 16 columns):\n",
151 | " # Column Non-Null Count Dtype \n",
152 | "--- ------ -------------- ----- \n",
153 | " 0 id 48906 non-null int64 \n",
154 | " 1 name 48890 non-null object \n",
155 | " 2 host_id 48906 non-null int64 \n",
156 | " 3 host_name 48885 non-null object \n",
157 | " 4 neighbourhood_group 48906 non-null object \n",
158 | " 5 neighbourhood 48906 non-null object \n",
159 | " 6 latitude 48906 non-null float64\n",
160 | " 7 longitude 48906 non-null float64\n",
161 | " 8 room_type 48906 non-null object \n",
162 | " 9 price 48906 non-null int64 \n",
163 | " 10 minimum_nights 48906 non-null int64 \n",
164 | " 11 number_of_reviews 48906 non-null int64 \n",
165 | " 12 last_review 38854 non-null object \n",
166 | " 13 reviews_per_month 38854 non-null float64\n",
167 | " 14 calculated_host_listings_count 48906 non-null int64 \n",
168 | " 15 availability_365 48906 non-null int64 \n",
169 | "dtypes: float64(3), int64(7), object(6)\n",
170 | "memory usage: 6.0+ MB\n"
171 | ]
172 | }
173 | ],
174 | "source": [
175 | "df.info()"
176 | ]
177 | },
178 | {
179 | "cell_type": "code",
180 | "execution_count": 9,
181 | "id": "bff805e2",
182 | "metadata": {},
183 | "outputs": [],
184 | "source": [
185 | "df[\"id\"] = df[\"id\"].astype(str)"
186 | ]
187 | },
188 | {
189 | "cell_type": "code",
190 | "execution_count": 18,
191 | "id": "e30541cc",
192 | "metadata": {},
193 | "outputs": [
194 | {
195 | "data": {
196 | "text/plain": [
197 | "dtype('O')"
198 | ]
199 | },
200 | "execution_count": 18,
201 | "metadata": {},
202 | "output_type": "execute_result"
203 | }
204 | ],
205 | "source": [
206 | "df[\"id\"].dtype"
207 | ]
208 | },
209 | {
210 | "cell_type": "code",
211 | "execution_count": 19,
212 | "id": "0ff5acec",
213 | "metadata": {},
214 | "outputs": [],
215 | "source": [
216 | "df[\"host_id\"] = df[\"host_id\"].astype(str)"
217 | ]
218 | },
219 | {
220 | "cell_type": "code",
221 | "execution_count": 20,
222 | "id": "8dd6d327",
223 | "metadata": {},
224 | "outputs": [
225 | {
226 | "name": "stderr",
227 | "output_type": "stream",
228 | "text": [
229 | "C:\\Users\\GFG19189\\AppData\\Local\\Temp\\ipykernel_3288\\3465608367.py:1: UserWarning: Parsing dates in DD/MM/YYYY format when dayfirst=False (the default) was specified. This may lead to inconsistently parsed dates! Specify a format to ensure consistent parsing.\n",
230 | " df[\"last_review\"] = pd.to_datetime(df[\"last_review\"])\n"
231 | ]
232 | }
233 | ],
234 | "source": [
235 | "df[\"last_review\"] = pd.to_datetime(df[\"last_review\"])"
236 | ]
237 | },
238 | {
239 | "cell_type": "code",
240 | "execution_count": 22,
241 | "id": "8bfd344f",
242 | "metadata": {},
243 | "outputs": [
244 | {
245 | "name": "stdout",
246 | "output_type": "stream",
247 | "text": [
248 | "\n",
249 | "RangeIndex: 48906 entries, 0 to 48905\n",
250 | "Data columns (total 16 columns):\n",
251 | " # Column Non-Null Count Dtype \n",
252 | "--- ------ -------------- ----- \n",
253 | " 0 id 48906 non-null object \n",
254 | " 1 name 48890 non-null object \n",
255 | " 2 host_id 48906 non-null object \n",
256 | " 3 host_name 48885 non-null object \n",
257 | " 4 neighbourhood_group 48906 non-null object \n",
258 | " 5 neighbourhood 48906 non-null object \n",
259 | " 6 latitude 48906 non-null float64 \n",
260 | " 7 longitude 48906 non-null float64 \n",
261 | " 8 room_type 48906 non-null object \n",
262 | " 9 price 48906 non-null int64 \n",
263 | " 10 minimum_nights 48906 non-null int64 \n",
264 | " 11 number_of_reviews 48906 non-null int64 \n",
265 | " 12 last_review 38854 non-null datetime64[ns]\n",
266 | " 13 reviews_per_month 38854 non-null float64 \n",
267 | " 14 calculated_host_listings_count 48906 non-null int64 \n",
268 | " 15 availability_365 48906 non-null int64 \n",
269 | "dtypes: datetime64[ns](1), float64(3), int64(5), object(7)\n",
270 | "memory usage: 6.0+ MB\n"
271 | ]
272 | }
273 | ],
274 | "source": [
275 | "df.info()"
276 | ]
277 | }
278 | ],
279 | "metadata": {
280 | "kernelspec": {
281 | "display_name": "Python 3 (ipykernel)",
282 | "language": "python",
283 | "name": "python3"
284 | },
285 | "language_info": {
286 | "codemirror_mode": {
287 | "name": "ipython",
288 | "version": 3
289 | },
290 | "file_extension": ".py",
291 | "mimetype": "text/x-python",
292 | "name": "python",
293 | "nbconvert_exporter": "python",
294 | "pygments_lexer": "ipython3",
295 | "version": "3.11.1"
296 | }
297 | },
298 | "nbformat": 4,
299 | "nbformat_minor": 5
300 | }
301 |
--------------------------------------------------------------------------------
/EDA 5 (Outliers).pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sakshisinghal936/eda_data_science_course/80bc747c0d9ad6cfb2535812458a184d29d0d61d/EDA 5 (Outliers).pptx
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Introduction to Exploratory Data Analysis (EDA) Course
2 |
3 | ## Overview
4 |
5 | Welcome to the "Introduction to Exploratory Data Analysis (EDA)" course! This course will help you to effectively analyze and gain insights from various datasets.
6 |
7 | ## Course Content
8 |
9 | This course consists of the following modules:
10 |
11 | 1. **Understanding your data:** Before starting analysis, we first understand the data.
12 |
13 | 2. **Dealing with Missing Values:** Learn how to identify and handle missing data in a dataset using appropriate techniques.
14 |
15 | 3. **Dealing with Duplicate Data:** Explore methods to detect and manage duplicate records in your data.
16 |
17 | 4. **Dealing with Outliers:** Discover how to identify and handle outliers that can impact your analysis.
18 |
19 | 5. **Correction of DataType:** According to the data and analysis, we will see how to change the datatype.
20 |
21 | 6. **Univariate Analysis:** Learn how to perform univariate analysis to examine the distribution and characteristics of individual variables.
22 |
23 | 11. **Univariate Analysis Visualization:** Discover visualization techniques for univariate analysis to gain deeper insights into data patterns.
24 |
25 | 12. **Bivariate Analysis:** Understand how to perform bivariate analysis to explore relationships between two variables.
26 |
27 | ## Code Files
28 |
29 | The code files for each module are provided in this repository. These files contain hands-on examples and implementations of the concepts covered in the course. You can use these code files to practice and reinforce your understanding of EDA techniques.
30 |
31 | ## Requirements
32 |
33 | Before you begin the course, ensure you have the following:
34 |
35 | - Basic knowledge of Python programming language (for running code examples).
36 | - Familiarity with data handling concepts and basic statistical methods.
37 |
38 | ## How to Use the Code Files
39 |
40 | 1. Clone or download this repository to your local machine.
41 | 2. Open the code files in your preferred Python IDE or Jupyter Notebook.
42 | 3. Experiment with the code and datasets to enhance your understanding.
43 |
--------------------------------------------------------------------------------
/scholarship.csv:
--------------------------------------------------------------------------------
1 | semester_percentage,scholarship_exam_marks,got_scholarship
2 | 71.9,26,1
3 | 74.6,38,1
4 | 75.4,40,1
5 | 64.2,8,1
6 | 72.3,17,0
7 | 73,23,1
8 | 66.9,11,0
9 | 71.2,39,1
10 | 64.5,38,0
11 | 77.5,94,1
12 | 68.2,16,1
13 | 63.8,7,1
14 | 65.8,16,1
15 | 56.8,26,0
16 | 79.1,43,0
17 | 71,21,0
18 | 65.3,19,0
19 | 75.6,22,1
20 | 69.3,27,0
21 | 76.3,29,0
22 | 66.9,47,0
23 | 74.3,33,1
24 | 67.6,54,1
25 | 60.5,11,0
26 | 64.4,11,0
27 | 62.8,58,1
28 | 74.5,8,1
29 | 65.3,46,0
30 | 72.3,19,0
31 | 65.1,15,1
32 | 74.6,16,0
33 | 76.6,44,0
34 | 59.1,11,1
35 | 62.3,27,0
36 | 81.5,9,0
37 | 74.8,12,0
38 | 68.5,16,1
39 | 85.1,9,1
40 | 65.8,20,1
41 | 72.5,17,0
42 | 66,86,1
43 | 67,38,0
44 | 74.6,71,1
45 | 78.5,63,0
46 | 78.8,55,0
47 | 69.2,10,1
48 | 73,15,0
49 | 69.2,46,0
50 | 62.9,42,0
51 | 82.3,28,1
52 | 62,18,1
53 | 62.4,28,1
54 | 75.3,27,1
55 | 66.9,33,1
56 | 84.2,36,1
57 | 81.2,15,0
58 | 69.3,15,1
59 | 77.3,24,0
60 | 73.4,43,0
61 | 77,32,1
62 | 79.4,31,1
63 | 75.1,86,0
64 | 76.1,22,0
65 | 62.2,26,0
66 | 69.8,52,1
67 | 62.7,28,1
68 | 66.7,31,1
69 | 80.4,18,1
70 | 65.8,30,0
71 | 74.1,44,0
72 | 69.8,17,1
73 | 61.3,20,1
74 | 79,19,0
75 | 63.9,12,1
76 | 61.6,3,1
77 | 67.7,56,1
78 | 83.5,12,0
79 | 71,28,1
80 | 82,12,0
81 | 76.3,19,1
82 | 68.7,46,1
83 | 71.2,44,1
84 | 72.2,12,0
85 | 73.8,20,1
86 | 56.5,35,1
87 | 60.3,28,0
88 | 71.9,22,1
89 | 69.6,19,1
90 | 67.8,39,0
91 | 65.6,17,0
92 | 76.1,15,1
93 | 74.2,6,1
94 | 64.8,63,0
95 | 70,30,0
96 | 71.5,21,1
97 | 68.9,35,1
98 | 70.9,28,1
99 | 73.3,52,1
100 | 81.2,76,0
101 | 74.6,23,0
102 | 65.6,46,0
103 | 64.8,26,1
104 | 59.2,22,1
105 | 64.4,18,1
106 | 68.2,47,1
107 | 67.2,12,1
108 | 65.9,22,0
109 | 73.2,9,0
110 | 59.4,10,0
111 | 72.3,21,1
112 | 69.6,31,0
113 | 64.8,33,0
114 | 72.3,15,0
115 | 72.4,10,0
116 | 63,77,1
117 | 72.8,37,0
118 | 71.6,21,0
119 | 71.1,0,1
120 | 69,54,0
121 | 70,39,0
122 | 69.9,46,1
123 | 59.1,27,0
124 | 59.7,51,0
125 | 62.5,52,0
126 | 62,30,0
127 | 74.6,55,1
128 | 71.6,37,0
129 | 69.5,23,1
130 | 67.6,25,0
131 | 68.1,39,1
132 | 59.1,53,0
133 | 81.9,19,1
134 | 77.1,18,0
135 | 76.5,64,1
136 | 63.3,93,0
137 | 66.3,47,0
138 | 72.8,15,0
139 | 56.9,53,0
140 | 75.3,8,1
141 | 80.9,5,1
142 | 68.5,22,1
143 | 71.9,18,0
144 | 76.6,41,1
145 | 63.2,33,1
146 | 66.7,17,0
147 | 70.8,28,0
148 | 67.5,22,1
149 | 66.8,11,0
150 | 68.6,49,1
151 | 68.1,58,0
152 | 85.3,22,0
153 | 72.9,16,1
154 | 77.8,56,0
155 | 62.9,10,1
156 | 75.7,54,1
157 | 78.4,12,0
158 | 64,34,0
159 | 70.7,81,1
160 | 57.4,50,0
161 | 67,22,0
162 | 73.6,10,1
163 | 62.6,52,1
164 | 78,90,0
165 | 67.7,42,1
166 | 62.9,34,1
167 | 75.3,7,0
168 | 77.6,59,0
169 | 66.5,48,1
170 | 65.1,37,1
171 | 61.9,16,1
172 | 61.7,37,0
173 | 62,38,1
174 | 72.4,43,1
175 | 67.1,48,0
176 | 73.1,21,1
177 | 75.7,26,1
178 | 77.2,30,1
179 | 62.2,40,1
180 | 75.1,39,0
181 | 64,45,1
182 | 71.8,29,0
183 | 73,42,0
184 | 68.7,76,0
185 | 61.1,18,1
186 | 77.8,33,0
187 | 73,31,0
188 | 69.7,63,1
189 | 66.3,10,1
190 | 59.2,55,1
191 | 66.3,9,0
192 | 75.4,23,0
193 | 66.3,8,0
194 | 66.7,38,0
195 | 63.1,14,0
196 | 81.9,33,0
197 | 67.1,44,0
198 | 72.8,34,1
199 | 67.5,27,0
200 | 82.9,44,1
201 | 74.3,25,1
202 | 57.7,22,0
203 | 66.7,15,0
204 | 65.1,48,1
205 | 79.3,52,0
206 | 70.1,5,1
207 | 69.9,47,1
208 | 75,25,0
209 | 70.5,16,0
210 | 72.5,12,1
211 | 68.8,35,0
212 | 65.8,19,1
213 | 73.6,14,0
214 | 62.3,57,0
215 | 66.2,19,1
216 | 74.2,60,1
217 | 77.2,22,0
218 | 54.2,36,1
219 | 69.2,16,0
220 | 66.2,52,0
221 | 54.8,3,0
222 | 77.5,16,1
223 | 67.8,24,1
224 | 69.7,78,0
225 | 61.5,11,1
226 | 71.5,19,0
227 | 73.6,13,1
228 | 67.9,21,1
229 | 70.3,19,0
230 | 76.1,15,0
231 | 66.8,13,0
232 | 71.2,14,1
233 | 64,35,0
234 | 77.7,34,0
235 | 72.5,41,0
236 | 80.9,36,0
237 | 72,52,1
238 | 77,36,1
239 | 71.4,52,1
240 | 74.1,78,0
241 | 67.1,16,1
242 | 72.5,38,0
243 | 72.2,18,0
244 | 59.9,44,0
245 | 67.8,55,0
246 | 76.8,44,0
247 | 67.8,62,0
248 | 76.7,26,0
249 | 62.5,49,0
250 | 69,44,0
251 | 74.6,52,0
252 | 74,13,1
253 | 64.6,45,0
254 | 62.4,54,1
255 | 69.8,76,0
256 | 67,64,0
257 | 77.6,10,0
258 | 73.7,48,1
259 | 57.4,76,0
260 | 61.2,6,1
261 | 71.6,10,0
262 | 56.6,57,1
263 | 71.7,11,1
264 | 71.9,10,1
265 | 81.1,50,1
266 | 78.2,28,1
267 | 69.4,57,0
268 | 77.3,50,1
269 | 60,65,0
270 | 73.8,21,0
271 | 64.7,16,0
272 | 68,16,1
273 | 76.4,21,1
274 | 62.3,56,1
275 | 70,9,0
276 | 71.3,4,1
277 | 80.3,39,0
278 | 69.6,46,0
279 | 64.5,15,1
280 | 52.3,17,1
281 | 84.9,40,1
282 | 66.2,55,0
283 | 76.8,22,1
284 | 64.3,44,1
285 | 70.9,87,0
286 | 69.9,62,1
287 | 74.2,57,1
288 | 62.9,58,1
289 | 62.4,24,1
290 | 70.7,21,0
291 | 67.3,9,1
292 | 83.8,87,0
293 | 74.6,14,1
294 | 58.2,41,0
295 | 69.7,11,1
296 | 61.2,17,0
297 | 60.2,64,0
298 | 69.9,19,0
299 | 70.3,32,1
300 | 61.6,48,0
301 | 67.5,15,1
302 | 73.6,52,1
303 | 78.4,5,1
304 | 79.5,34,0
305 | 73.5,56,0
306 | 71.2,33,1
307 | 66.7,32,1
308 | 68.6,19,0
309 | 74.7,28,1
310 | 72,47,1
311 | 74.6,31,1
312 | 55.6,50,0
313 | 69.7,87,1
314 | 70.2,55,1
315 | 65.1,18,1
316 | 74.3,14,1
317 | 79.6,37,0
318 | 70.7,38,1
319 | 74.7,19,0
320 | 67.4,60,0
321 | 75.3,13,0
322 | 71.8,51,1
323 | 68.2,47,0
324 | 67.7,38,1
325 | 66,38,0
326 | 66.4,90,0
327 | 66.5,13,0
328 | 75.4,13,0
329 | 82.1,56,0
330 | 74,28,1
331 | 73,35,0
332 | 66.8,16,1
333 | 61.2,22,1
334 | 70.3,39,0
335 | 67.4,20,1
336 | 65.6,19,1
337 | 69,17,1
338 | 79.3,9,1
339 | 69.5,68,0
340 | 74.5,46,1
341 | 73.2,18,1
342 | 74.5,51,0
343 | 74.1,8,1
344 | 63.7,23,0
345 | 72,70,1
346 | 72,26,1
347 | 62.6,13,1
348 | 73.1,33,0
349 | 72.4,34,1
350 | 61.3,17,1
351 | 81.2,16,0
352 | 72.1,70,0
353 | 69.2,10,0
354 | 65.1,4,1
355 | 68.2,28,1
356 | 67.3,23,1
357 | 64.2,18,1
358 | 59.1,50,1
359 | 79.2,42,1
360 | 67.4,52,0
361 | 76.1,40,0
362 | 72.4,33,0
363 | 63.7,37,1
364 | 72.5,21,1
365 | 76.1,13,0
366 | 68.7,9,1
367 | 70.7,25,0
368 | 67.8,42,0
369 | 64.1,30,0
370 | 70.9,32,0
371 | 66.9,36,1
372 | 57,79,1
373 | 59,29,0
374 | 59.5,8,0
375 | 55.8,41,0
376 | 73.4,41,0
377 | 73.1,31,0
378 | 62.5,60,0
379 | 67,47,1
380 | 72.7,25,1
381 | 73.7,65,0
382 | 71.2,31,1
383 | 61.7,53,0
384 | 75.5,61,0
385 | 72.6,4,1
386 | 70.3,33,0
387 | 73.3,21,0
388 | 76.3,43,0
389 | 72.5,7,1
390 | 72.8,23,0
391 | 59,16,1
392 | 66.3,16,1
393 | 77.7,66,1
394 | 67.6,35,0
395 | 66,18,0
396 | 79.4,18,1
397 | 72.9,11,0
398 | 72.8,10,1
399 | 75.5,63,0
400 | 75.5,48,0
401 | 57.4,43,1
402 | 60,33,1
403 | 67.4,22,0
404 | 60.8,34,1
405 | 67.1,83,0
406 | 80.2,35,1
407 | 66,32,1
408 | 71.4,17,1
409 | 82.3,20,1
410 | 78,24,1
411 | 69.2,24,0
412 | 73.6,37,1
413 | 60.4,66,0
414 | 65,45,1
415 | 66.5,28,1
416 | 73,36,1
417 | 63.7,46,1
418 | 75.1,55,1
419 | 69,19,0
420 | 63,29,0
421 | 62.2,46,0
422 | 75.9,49,0
423 | 62.9,46,0
424 | 70.9,19,0
425 | 82.7,26,0
426 | 69.1,30,0
427 | 74.5,15,0
428 | 72.6,21,0
429 | 61.7,39,1
430 | 78.9,46,0
431 | 73.5,55,0
432 | 72.2,19,1
433 | 76.4,21,0
434 | 66.7,26,1
435 | 57.7,11,1
436 | 70.1,5,0
437 | 74.5,37,0
438 | 63.1,19,0
439 | 67.3,22,0
440 | 73.1,7,0
441 | 66.7,10,1
442 | 85.6,22,0
443 | 78,15,1
444 | 70.8,48,1
445 | 69.2,77,0
446 | 66.6,14,1
447 | 55.6,10,0
448 | 66,23,1
449 | 65,17,1
450 | 63.2,31,0
451 | 62.1,37,0
452 | 64.4,31,1
453 | 59.4,31,0
454 | 65.9,42,1
455 | 73.1,24,1
456 | 71.3,53,0
457 | 69.2,43,0
458 | 67.6,12,1
459 | 65.8,20,0
460 | 71.3,47,0
461 | 66.2,79,1
462 | 69.1,4,1
463 | 73.9,12,0
464 | 67.1,9,1
465 | 68.2,49,1
466 | 62.8,37,1
467 | 76.5,49,0
468 | 64.5,16,0
469 | 66.5,7,0
470 | 67.3,70,0
471 | 58.6,46,1
472 | 64.6,28,0
473 | 54.8,18,0
474 | 61.9,38,1
475 | 63.8,25,0
476 | 71.1,25,1
477 | 73.8,10,0
478 | 68.3,34,0
479 | 57.8,18,0
480 | 73.2,36,1
481 | 63.8,33,0
482 | 68,37,0
483 | 73.3,32,0
484 | 75.6,8,0
485 | 79.3,25,0
486 | 58.1,18,0
487 | 49.2,44,1
488 | 76.7,65,1
489 | 69.5,13,0
490 | 67.5,43,0
491 | 72.4,50,0
492 | 73.4,32,0
493 | 77.7,49,1
494 | 85,21,1
495 | 68,18,0
496 | 68.3,51,0
497 | 69.5,64,1
498 | 75.2,35,1
499 | 68,37,1
500 | 68.9,14,0
501 | 64.2,31,0
502 | 60.7,24,1
503 | 65.2,31,0
504 | 71.8,50,1
505 | 69.7,10,1
506 | 72.4,25,0
507 | 64.8,18,1
508 | 78.4,8,1
509 | 67.7,75,1
510 | 64.1,14,1
511 | 78.3,48,0
512 | 68.4,34,1
513 | 75.3,50,0
514 | 70.8,9,1
515 | 64.3,46,0
516 | 73.6,15,0
517 | 77.5,44,0
518 | 68.3,28,0
519 | 63.7,38,1
520 | 75.3,22,0
521 | 66,8,1
522 | 73.9,20,1
523 | 57.2,37,1
524 | 67.1,63,1
525 | 61.3,19,0
526 | 68.5,21,0
527 | 63.3,17,0
528 | 71.2,34,0
529 | 77.2,37,0
530 | 69,38,1
531 | 71.9,34,0
532 | 62.8,18,1
533 | 70,81,1
534 | 69,20,0
535 | 71.9,15,0
536 | 60.5,45,1
537 | 64.9,62,0
538 | 68.7,48,1
539 | 71.3,9,1
540 | 75.8,27,0
541 | 67.9,14,0
542 | 63.7,63,0
543 | 61.1,18,1
544 | 70.6,22,0
545 | 74.8,16,0
546 | 71.1,14,1
547 | 63.5,17,1
548 | 67.5,18,1
549 | 73.3,39,0
550 | 74.8,30,1
551 | 73.7,79,1
552 | 66.2,38,1
553 | 65.5,66,1
554 | 64,24,0
555 | 74.6,17,0
556 | 68.4,9,1
557 | 64.6,32,0
558 | 67,15,1
559 | 64.7,25,0
560 | 72.8,14,0
561 | 66.1,58,1
562 | 71.1,67,0
563 | 70.7,58,0
564 | 60.6,23,0
565 | 68.1,14,1
566 | 71.7,34,1
567 | 66.1,54,1
568 | 60.3,25,0
569 | 61.8,36,1
570 | 81.3,4,0
571 | 70.8,30,0
572 | 70.4,18,0
573 | 64.7,66,1
574 | 76.6,37,1
575 | 72.3,54,1
576 | 65.4,33,0
577 | 75.5,16,0
578 | 61.2,8,1
579 | 68.5,24,1
580 | 75.2,47,0
581 | 71.3,13,0
582 | 79,63,1
583 | 65.7,25,0
584 | 70.5,27,1
585 | 71,68,0
586 | 65.7,60,1
587 | 78.5,49,1
588 | 65.8,15,0
589 | 75.8,14,1
590 | 63.8,25,0
591 | 66,16,0
592 | 66.8,22,1
593 | 78.9,35,0
594 | 70.9,27,1
595 | 65.5,62,0
596 | 69.5,64,0
597 | 61.3,7,1
598 | 84.8,25,1
599 | 78.5,32,0
600 | 75.9,27,1
601 | 70.2,55,0
602 | 62.4,38,1
603 | 68.9,17,1
604 | 71.6,32,0
605 | 65.5,54,1
606 | 68.9,40,1
607 | 66.4,42,0
608 | 67.9,24,0
609 | 77.6,49,1
610 | 73.3,16,1
611 | 65.5,4,0
612 | 73.2,61,1
613 | 72,19,1
614 | 61.4,60,1
615 | 66.6,55,1
616 | 80.5,41,0
617 | 65.1,12,0
618 | 70.9,33,1
619 | 70.8,22,0
620 | 67,22,0
621 | 68.1,43,0
622 | 70.7,18,1
623 | 78.5,28,1
624 | 74,19,1
625 | 73.5,14,1
626 | 73.5,80,1
627 | 76,30,1
628 | 76.3,19,1
629 | 63.1,30,1
630 | 71.2,9,0
631 | 81.5,11,1
632 | 65.6,96,1
633 | 72.6,16,0
634 | 63.7,13,0
635 | 66.3,39,1
636 | 76.9,39,0
637 | 66.9,10,0
638 | 63.9,43,1
639 | 72.5,74,0
640 | 74.6,74,1
641 | 64.2,43,1
642 | 63.6,61,1
643 | 74.9,22,0
644 | 78.9,39,1
645 | 75.9,28,0
646 | 76.5,29,1
647 | 67.5,36,0
648 | 73.8,45,1
649 | 66.9,4,1
650 | 71.5,24,0
651 | 76,65,1
652 | 61.9,27,0
653 | 65.7,39,1
654 | 76,43,1
655 | 69.3,48,0
656 | 70.3,76,0
657 | 73.6,34,0
658 | 73.9,72,1
659 | 76,23,0
660 | 75.7,9,0
661 | 66.8,37,0
662 | 59.7,26,1
663 | 57.2,47,0
664 | 73.8,15,1
665 | 70.5,31,1
666 | 72.3,34,0
667 | 61.6,10,0
668 | 76.4,27,1
669 | 71.7,54,0
670 | 72.4,47,0
671 | 69.4,8,1
672 | 75.1,17,1
673 | 74.5,31,0
674 | 77.5,13,0
675 | 68.7,20,0
676 | 67.3,31,1
677 | 61,20,0
678 | 67.3,21,1
679 | 67.5,24,0
680 | 66.5,34,1
681 | 67,35,0
682 | 73.5,15,0
683 | 67.8,10,1
684 | 63.4,17,1
685 | 65,69,0
686 | 80.2,67,0
687 | 60.5,87,1
688 | 71.5,62,1
689 | 68.1,23,0
690 | 65.9,36,0
691 | 80.2,67,0
692 | 70.1,26,0
693 | 73.2,14,0
694 | 69.5,43,0
695 | 66.4,42,1
696 | 71.2,51,0
697 | 67.5,11,1
698 | 69.4,20,0
699 | 72.3,49,1
700 | 72.7,16,0
701 | 66.4,20,0
702 | 68.2,16,1
703 | 67.4,2,1
704 | 62.4,39,0
705 | 70.4,41,0
706 | 69.1,45,1
707 | 65.5,35,1
708 | 71.1,13,1
709 | 68.6,11,1
710 | 79.1,50,0
711 | 69.7,42,1
712 | 75.6,6,1
713 | 69.7,30,1
714 | 63.5,8,1
715 | 78,24,0
716 | 70.6,19,1
717 | 74.1,17,1
718 | 70.6,19,1
719 | 64.9,21,0
720 | 71.2,36,1
721 | 71.7,26,0
722 | 68.7,43,0
723 | 69.8,11,0
724 | 68.6,29,0
725 | 73.5,10,1
726 | 79.5,21,0
727 | 63.4,10,0
728 | 79.3,25,0
729 | 70,30,0
730 | 66.3,23,0
731 | 67.9,58,1
732 | 61.4,90,1
733 | 67.4,5,0
734 | 72.6,55,1
735 | 70.7,10,0
736 | 67.7,50,1
737 | 72.3,39,1
738 | 68.6,24,1
739 | 65.2,12,1
740 | 70.4,17,1
741 | 63.4,39,0
742 | 62.1,74,1
743 | 75.8,66,0
744 | 67.9,29,0
745 | 76.2,15,1
746 | 74.9,58,0
747 | 68.2,13,1
748 | 67.2,25,1
749 | 62.6,18,0
750 | 70.2,9,0
751 | 73.1,61,1
752 | 64.3,8,1
753 | 66.9,20,1
754 | 73.7,35,0
755 | 68.7,42,0
756 | 79.1,17,0
757 | 69.4,26,0
758 | 74.1,30,1
759 | 68.4,15,0
760 | 65.5,29,0
761 | 65.5,22,1
762 | 71.3,48,1
763 | 80.2,6,0
764 | 67.9,14,1
765 | 71.6,59,1
766 | 64.7,70,1
767 | 65.5,28,0
768 | 71.7,34,0
769 | 73.3,31,0
770 | 65.5,63,1
771 | 66.8,18,1
772 | 73.3,67,1
773 | 73.1,86,1
774 | 66.3,26,0
775 | 80.6,79,0
776 | 69.3,69,0
777 | 67.6,24,0
778 | 70.8,30,0
779 | 76.7,23,0
780 | 69.8,34,0
781 | 63.9,32,0
782 | 56.2,31,0
783 | 71.7,49,1
784 | 76.5,41,0
785 | 62.3,24,1
786 | 61.9,19,0
787 | 60.7,29,1
788 | 75,10,1
789 | 70.2,9,0
790 | 65.4,19,0
791 | 64.6,74,0
792 | 74.8,47,0
793 | 76.5,12,1
794 | 79.2,32,0
795 | 72.3,9,0
796 | 62.4,17,1
797 | 70,9,1
798 | 60.2,60,0
799 | 81.4,20,1
800 | 58.6,56,0
801 | 68.9,53,1
802 | 65,28,0
803 | 63.1,14,0
804 | 61,16,0
805 | 71.9,20,1
806 | 64.9,81,0
807 | 62.3,13,0
808 | 63.9,15,0
809 | 71.6,19,1
810 | 76.8,31,0
811 | 63.9,22,1
812 | 71.4,21,1
813 | 80.4,14,0
814 | 59.9,23,0
815 | 74.3,52,1
816 | 77.8,17,1
817 | 52.7,43,0
818 | 71.9,23,1
819 | 78.4,26,0
820 | 55.2,62,0
821 | 69.3,10,0
822 | 62.7,16,0
823 | 61,9,0
824 | 73.3,32,1
825 | 66.7,18,0
826 | 74.8,27,1
827 | 63,23,0
828 | 70.5,32,0
829 | 64.6,21,0
830 | 64.4,63,1
831 | 68.7,37,0
832 | 64.6,28,0
833 | 68.1,55,1
834 | 69.1,59,0
835 | 72.9,23,1
836 | 62.2,52,1
837 | 66.7,65,1
838 | 70.9,33,1
839 | 66.5,24,0
840 | 68.4,32,0
841 | 64.4,24,1
842 | 68.9,22,1
843 | 74.5,27,1
844 | 73.3,14,1
845 | 63.1,60,0
846 | 76.2,18,1
847 | 59.8,36,0
848 | 69.9,97,0
849 | 72,54,1
850 | 72,27,1
851 | 78.9,8,1
852 | 83.9,37,1
853 | 62.9,17,0
854 | 65.4,43,1
855 | 77.4,22,0
856 | 58.3,68,1
857 | 78.9,9,1
858 | 75.2,6,1
859 | 57.2,64,1
860 | 75.1,26,0
861 | 65.2,22,0
862 | 75.1,21,0
863 | 63.2,28,0
864 | 60.4,37,1
865 | 80.4,2,1
866 | 63.2,5,0
867 | 67.7,16,0
868 | 72.9,35,0
869 | 80.4,26,0
870 | 73.3,36,0
871 | 55.1,58,1
872 | 66.7,15,1
873 | 65.6,60,0
874 | 69.5,54,0
875 | 79,42,1
876 | 66,24,0
877 | 57.3,16,1
878 | 63.3,20,0
879 | 63.2,9,0
880 | 69.8,34,0
881 | 68.3,31,0
882 | 64,13,0
883 | 77.2,42,0
884 | 72.5,35,1
885 | 79.8,6,1
886 | 71.1,4,1
887 | 71.3,16,1
888 | 72.2,17,1
889 | 71.9,17,1
890 | 76.2,23,1
891 | 73.9,38,0
892 | 77.3,30,0
893 | 74.2,26,1
894 | 59.6,18,1
895 | 70.7,32,1
896 | 65,16,0
897 | 78.2,35,1
898 | 57.3,38,0
899 | 71.3,35,1
900 | 71.4,18,1
901 | 76.2,8,1
902 | 60.8,19,1
903 | 71,50,1
904 | 70.3,42,1
905 | 75.6,28,1
906 | 73.6,32,0
907 | 70.8,29,0
908 | 75.6,34,1
909 | 70.3,44,1
910 | 60.6,17,1
911 | 65.7,11,1
912 | 78.2,29,1
913 | 74.5,36,1
914 | 74.8,38,1
915 | 67.8,7,1
916 | 69.7,25,0
917 | 68.8,34,0
918 | 68.8,11,1
919 | 59.5,100,0
920 | 70.9,9,0
921 | 65.9,21,0
922 | 79.7,52,1
923 | 69.1,38,1
924 | 76.8,23,1
925 | 65.2,10,0
926 | 68.5,23,1
927 | 69.1,56,1
928 | 70.2,14,0
929 | 73.7,52,1
930 | 66.3,32,0
931 | 68.9,25,0
932 | 69.4,7,1
933 | 71.9,33,0
934 | 68.9,71,0
935 | 60.1,9,0
936 | 71.5,9,0
937 | 81.2,37,1
938 | 73.2,20,0
939 | 64.2,34,0
940 | 62.7,24,0
941 | 69.8,41,0
942 | 61.6,38,0
943 | 71.8,8,0
944 | 70.3,6,0
945 | 73.6,2,0
946 | 72.1,21,1
947 | 67.1,26,1
948 | 79.5,12,0
949 | 64.9,70,0
950 | 63.8,15,0
951 | 79.4,11,0
952 | 66.5,33,1
953 | 75.2,48,1
954 | 77.1,25,1
955 | 69.1,44,0
956 | 80,39,1
957 | 72,26,1
958 | 68.6,21,0
959 | 65.1,27,1
960 | 74.3,11,0
961 | 67.6,27,1
962 | 68.1,10,1
963 | 76.5,9,0
964 | 70.9,46,1
965 | 67.4,8,0
966 | 80.9,54,1
967 | 73.7,52,1
968 | 62.4,72,1
969 | 73.5,59,0
970 | 68.4,26,0
971 | 68.6,41,0
972 | 72.8,42,1
973 | 66.9,18,1
974 | 70.2,18,0
975 | 61,37,1
976 | 64.2,38,0
977 | 84.3,25,1
978 | 69.6,48,0
979 | 71.8,49,0
980 | 67.8,12,0
981 | 70,13,1
982 | 61.8,27,0
983 | 80.6,37,0
984 | 68.7,22,0
985 | 68.1,11,1
986 | 68.9,16,1
987 | 71,26,0
988 | 70.9,19,1
989 | 67.7,62,0
990 | 56.4,37,1
991 | 62.3,31,0
992 | 61.7,33,1
993 | 70.4,57,0
994 | 62.6,12,0
995 | 67.3,21,1
996 | 64.8,63,0
997 | 88.7,44,1
998 | 91.2,65,1
999 | 48.9,34,0
1000 | 86.2,46,1
1001 | 49,10,1
1002 |
--------------------------------------------------------------------------------