├── crime.csv ├── data-exploration-exercise.ipynb └── data-exploration-with-solution.ipynb /crime.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mochen862/explore-data-python/25e0ead56adf9aacf9517bd554320b04cbae82c7/crime.csv -------------------------------------------------------------------------------- /data-exploration-exercise.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "id": "96009edc", 7 | "metadata": {}, 8 | "source": [ 9 | "### Import all the necessary libraries" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "id": "eb61f6f6", 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "import numpy as np\n", 20 | "import pandas as pd\n", 21 | "import matplotlib.pyplot as plt\n", 22 | "import seaborn as sns\n", 23 | "from encodings.aliases import aliases # Python has a file containing a dictionary of encoding names and associated aliases\n", 24 | "\n", 25 | "# the matplotlib plots will appear directly below the cell in which the plot function was called.\n", 26 | "%matplotlib inline" 27 | ] 28 | }, 29 | { 30 | "attachments": {}, 31 | "cell_type": "markdown", 32 | "id": "a1013c3a", 33 | "metadata": {}, 34 | "source": [ 35 | "### Read in the csv files and remove duplicates" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 2, 41 | "id": "a454bb34", 42 | "metadata": {}, 43 | "outputs": [ 44 | { 45 | "name": "stdout", 46 | "output_type": "stream", 47 | "text": [ 48 | "successful iso8859_15\n", 49 | "successful cp864\n", 50 | "successful big5hkscs\n", 51 | "successful mac_latin2\n", 52 | "successful cp1252\n", 53 | "successful cp500\n", 54 | "successful iso8859_13\n", 55 | "successful iso8859_10\n", 56 | "successful koi8_r\n", 57 | "successful cp1251\n", 58 | "successful iso8859_11\n", 59 | "successful mac_turkish\n", 60 | "successful utf_16_be\n", 61 | "successful mac_iceland\n", 62 | "successful utf_16_le\n", 63 | "successful cp1254\n", 64 | "successful cp860\n", 65 | "successful cp1256\n", 66 | "successful cp1140\n", 67 | "successful mac_roman\n", 68 | "successful mac_cyrillic\n", 69 | "successful iso8859_6\n", 70 | "successful cp850\n", 71 | "successful iso8859_9\n", 72 | "successful cp949\n", 73 | "successful cp862\n", 74 | "successful latin_1\n", 75 | "successful cp866\n", 76 | "successful iso8859_5\n", 77 | "successful cp1125\n", 78 | "successful cp1258\n", 79 | "successful cp1253\n", 80 | "successful cp1257\n", 81 | "successful cp869\n", 82 | "successful kz1048\n", 83 | "successful iso8859_3\n", 84 | "successful cp932\n", 85 | "successful hp_roman8\n", 86 | "successful cp865\n", 87 | "successful cp1250\n", 88 | "successful iso8859_8\n", 89 | "successful cp037\n", 90 | "successful cp855\n", 91 | "successful cp273\n", 92 | "successful cp861\n", 93 | "successful cp858\n", 94 | "successful iso8859_14\n", 95 | "successful mac_greek\n", 96 | "successful cp775\n", 97 | "successful iso8859_16\n", 98 | "successful mbcs\n", 99 | "successful iso8859_7\n", 100 | "successful gbk\n", 101 | "successful ptcp154\n", 102 | "successful cp863\n", 103 | "successful iso8859_4\n", 104 | "successful iso8859_2\n", 105 | "successful cp1255\n", 106 | "successful gb18030\n", 107 | "successful cp437\n", 108 | "successful cp1026\n", 109 | "successful cp852\n", 110 | "successful cp857\n" 111 | ] 112 | } 113 | ], 114 | "source": [ 115 | "# To find encodings that work\n", 116 | "\n", 117 | "# Below line creates a set of all available encodings\n", 118 | "alias_values = set(aliases.values())\n", 119 | "\n", 120 | "for encoding in set(aliases.values()):\n", 121 | " try:\n", 122 | " df=pd.read_csv(\"crime.csv\", nrows=10, encoding=encoding) # read in only 10 lines for faster read\n", 123 | " print('successful', encoding)\n", 124 | " except:\n", 125 | " pass" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 3, 131 | "id": "20ee872c", 132 | "metadata": {}, 133 | "outputs": [], 134 | "source": [ 135 | "# Read in the crime.csv file and use the timestamp as a datetime index\n", 136 | "crime = pd.read_csv(\"crime.csv\", encoding=\"ISO-8859-11\")" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 4, 142 | "id": "6b786b8c", 143 | "metadata": {}, 144 | "outputs": [ 145 | { 146 | "data": { 147 | "text/html": [ 148 | "
\n", 149 | "\n", 162 | "\n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | "
INCIDENT_NUMBEROFFENSE_CODEOFFENSE_CODE_GROUPOFFENSE_DESCRIPTIONDISTRICTREPORTING_AREASHOOTINGOCCURRED_ON_DATEYEARMONTHDAY_OF_WEEKHOURUCR_PARTSTREETLatLongLocation
0I182070945619LarcenyLARCENY ALL OTHERSD14808NaN2018-09-02 13:00:0020189Sunday13Part OneLINCOLN ST42.357791-71.139371(42.35779134, -71.13937053)
1I1820709431402VandalismVANDALISMC11347NaN2018-08-21 00:00:0020188Tuesday0Part TwoHECLA ST42.306821-71.060300(42.30682138, -71.06030035)
2I1820709413410TowedTOWED MOTOR VEHICLED4151NaN2018-09-03 19:27:0020189Monday19Part ThreeCAZENOVE ST42.346589-71.072429(42.34658879, -71.07242943)
3I1820709403114Investigate PropertyINVESTIGATE PROPERTYD4272NaN2018-09-03 21:16:0020189Monday21Part ThreeNEWCOMB ST42.334182-71.078664(42.33418175, -71.07866441)
4I1820709383114Investigate PropertyINVESTIGATE PROPERTYB3421NaN2018-09-03 21:05:0020189Monday21Part ThreeDELHI ST42.275365-71.090361(42.27536542, -71.09036101)
\n", 288 | "
" 289 | ], 290 | "text/plain": [ 291 | " INCIDENT_NUMBER OFFENSE_CODE OFFENSE_CODE_GROUP OFFENSE_DESCRIPTION \\\n", 292 | "0 I182070945 619 Larceny LARCENY ALL OTHERS \n", 293 | "1 I182070943 1402 Vandalism VANDALISM \n", 294 | "2 I182070941 3410 Towed TOWED MOTOR VEHICLE \n", 295 | "3 I182070940 3114 Investigate Property INVESTIGATE PROPERTY \n", 296 | "4 I182070938 3114 Investigate Property INVESTIGATE PROPERTY \n", 297 | "\n", 298 | " DISTRICT REPORTING_AREA SHOOTING OCCURRED_ON_DATE YEAR MONTH \\\n", 299 | "0 D14 808 NaN 2018-09-02 13:00:00 2018 9 \n", 300 | "1 C11 347 NaN 2018-08-21 00:00:00 2018 8 \n", 301 | "2 D4 151 NaN 2018-09-03 19:27:00 2018 9 \n", 302 | "3 D4 272 NaN 2018-09-03 21:16:00 2018 9 \n", 303 | "4 B3 421 NaN 2018-09-03 21:05:00 2018 9 \n", 304 | "\n", 305 | " DAY_OF_WEEK HOUR UCR_PART STREET Lat Long \\\n", 306 | "0 Sunday 13 Part One LINCOLN ST 42.357791 -71.139371 \n", 307 | "1 Tuesday 0 Part Two HECLA ST 42.306821 -71.060300 \n", 308 | "2 Monday 19 Part Three CAZENOVE ST 42.346589 -71.072429 \n", 309 | "3 Monday 21 Part Three NEWCOMB ST 42.334182 -71.078664 \n", 310 | "4 Monday 21 Part Three DELHI ST 42.275365 -71.090361 \n", 311 | "\n", 312 | " Location \n", 313 | "0 (42.35779134, -71.13937053) \n", 314 | "1 (42.30682138, -71.06030035) \n", 315 | "2 (42.34658879, -71.07242943) \n", 316 | "3 (42.33418175, -71.07866441) \n", 317 | "4 (42.27536542, -71.09036101) " 318 | ] 319 | }, 320 | "execution_count": 4, 321 | "metadata": {}, 322 | "output_type": "execute_result" 323 | } 324 | ], 325 | "source": [ 326 | "crime.head() # Quick check on how the dataset looks like" 327 | ] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "execution_count": 5, 332 | "id": "78dac45e", 333 | "metadata": {}, 334 | "outputs": [ 335 | { 336 | "data": { 337 | "text/plain": [ 338 | "(319073, 17)" 339 | ] 340 | }, 341 | "execution_count": 5, 342 | "metadata": {}, 343 | "output_type": "execute_result" 344 | } 345 | ], 346 | "source": [ 347 | "crime.shape # Checking the shape of the data. It has 319,073 rows and 17 columns" 348 | ] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "execution_count": 6, 353 | "id": "93f474f9", 354 | "metadata": {}, 355 | "outputs": [ 356 | { 357 | "data": { 358 | "text/plain": [ 359 | "23" 360 | ] 361 | }, 362 | "execution_count": 6, 363 | "metadata": {}, 364 | "output_type": "execute_result" 365 | } 366 | ], 367 | "source": [ 368 | "crime.duplicated().sum() # Counting the duplicate rows" 369 | ] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "execution_count": 7, 374 | "id": "148a2f42", 375 | "metadata": {}, 376 | "outputs": [], 377 | "source": [ 378 | "crime.drop_duplicates(inplace=True) # Dropping the duplicate rows" 379 | ] 380 | }, 381 | { 382 | "cell_type": "code", 383 | "execution_count": 8, 384 | "id": "1c0279ac", 385 | "metadata": {}, 386 | "outputs": [ 387 | { 388 | "data": { 389 | "text/plain": [ 390 | "(319050, 17)" 391 | ] 392 | }, 393 | "execution_count": 8, 394 | "metadata": {}, 395 | "output_type": "execute_result" 396 | } 397 | ], 398 | "source": [ 399 | "crime.shape # Checking the shape again to see if dropping the duplicate rows worked" 400 | ] 401 | }, 402 | { 403 | "attachments": {}, 404 | "cell_type": "markdown", 405 | "id": "f0823dc0", 406 | "metadata": {}, 407 | "source": [ 408 | "### Explore the dataset" 409 | ] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "execution_count": 9, 414 | "id": "5ad86c4d", 415 | "metadata": {}, 416 | "outputs": [ 417 | { 418 | "data": { 419 | "text/html": [ 420 | "
\n", 421 | "\n", 434 | "\n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | "
INCIDENT_NUMBEROFFENSE_CODEOFFENSE_CODE_GROUPOFFENSE_DESCRIPTIONDISTRICTREPORTING_AREASHOOTINGOCCURRED_ON_DATEYEARMONTHDAY_OF_WEEKHOURUCR_PARTSTREETLatLongLocation
0I182070945619LarcenyLARCENY ALL OTHERSD14808NaN2018-09-02 13:00:0020189Sunday13Part OneLINCOLN ST42.357791-71.139371(42.35779134, -71.13937053)
1I1820709431402VandalismVANDALISMC11347NaN2018-08-21 00:00:0020188Tuesday0Part TwoHECLA ST42.306821-71.060300(42.30682138, -71.06030035)
2I1820709413410TowedTOWED MOTOR VEHICLED4151NaN2018-09-03 19:27:0020189Monday19Part ThreeCAZENOVE ST42.346589-71.072429(42.34658879, -71.07242943)
3I1820709403114Investigate PropertyINVESTIGATE PROPERTYD4272NaN2018-09-03 21:16:0020189Monday21Part ThreeNEWCOMB ST42.334182-71.078664(42.33418175, -71.07866441)
4I1820709383114Investigate PropertyINVESTIGATE PROPERTYB3421NaN2018-09-03 21:05:0020189Monday21Part ThreeDELHI ST42.275365-71.090361(42.27536542, -71.09036101)
\n", 560 | "
" 561 | ], 562 | "text/plain": [ 563 | " INCIDENT_NUMBER OFFENSE_CODE OFFENSE_CODE_GROUP OFFENSE_DESCRIPTION \\\n", 564 | "0 I182070945 619 Larceny LARCENY ALL OTHERS \n", 565 | "1 I182070943 1402 Vandalism VANDALISM \n", 566 | "2 I182070941 3410 Towed TOWED MOTOR VEHICLE \n", 567 | "3 I182070940 3114 Investigate Property INVESTIGATE PROPERTY \n", 568 | "4 I182070938 3114 Investigate Property INVESTIGATE PROPERTY \n", 569 | "\n", 570 | " DISTRICT REPORTING_AREA SHOOTING OCCURRED_ON_DATE YEAR MONTH \\\n", 571 | "0 D14 808 NaN 2018-09-02 13:00:00 2018 9 \n", 572 | "1 C11 347 NaN 2018-08-21 00:00:00 2018 8 \n", 573 | "2 D4 151 NaN 2018-09-03 19:27:00 2018 9 \n", 574 | "3 D4 272 NaN 2018-09-03 21:16:00 2018 9 \n", 575 | "4 B3 421 NaN 2018-09-03 21:05:00 2018 9 \n", 576 | "\n", 577 | " DAY_OF_WEEK HOUR UCR_PART STREET Lat Long \\\n", 578 | "0 Sunday 13 Part One LINCOLN ST 42.357791 -71.139371 \n", 579 | "1 Tuesday 0 Part Two HECLA ST 42.306821 -71.060300 \n", 580 | "2 Monday 19 Part Three CAZENOVE ST 42.346589 -71.072429 \n", 581 | "3 Monday 21 Part Three NEWCOMB ST 42.334182 -71.078664 \n", 582 | "4 Monday 21 Part Three DELHI ST 42.275365 -71.090361 \n", 583 | "\n", 584 | " Location \n", 585 | "0 (42.35779134, -71.13937053) \n", 586 | "1 (42.30682138, -71.06030035) \n", 587 | "2 (42.34658879, -71.07242943) \n", 588 | "3 (42.33418175, -71.07866441) \n", 589 | "4 (42.27536542, -71.09036101) " 590 | ] 591 | }, 592 | "execution_count": 9, 593 | "metadata": {}, 594 | "output_type": "execute_result" 595 | } 596 | ], 597 | "source": [ 598 | "crime.head() # Quick check of the beginning of the dataframe" 599 | ] 600 | }, 601 | { 602 | "cell_type": "code", 603 | "execution_count": 10, 604 | "id": "9f2eca52", 605 | "metadata": {}, 606 | "outputs": [ 607 | { 608 | "data": { 609 | "text/html": [ 610 | "
\n", 611 | "\n", 624 | "\n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | "
INCIDENT_NUMBEROFFENSE_CODEOFFENSE_CODE_GROUPOFFENSE_DESCRIPTIONDISTRICTREPORTING_AREASHOOTINGOCCURRED_ON_DATEYEARMONTHDAY_OF_WEEKHOURUCR_PARTSTREETLatLongLocation
319068I050310906-003125Warrant ArrestsWARRANT ARRESTD4285NaN2016-06-05 17:25:0020166Sunday17Part ThreeCOVENTRY ST42.336951-71.085748(42.33695098, -71.08574813)
319069I030217815-08111HomicideMURDER, NON-NEGLIGIENT MANSLAUGHTERE18520NaN2015-07-09 13:38:0020157Thursday13Part OneRIVER ST42.255926-71.123172(42.25592648, -71.12317207)
319070I030217815-083125Warrant ArrestsWARRANT ARRESTE18520NaN2015-07-09 13:38:0020157Thursday13Part ThreeRIVER ST42.255926-71.123172(42.25592648, -71.12317207)
319071I010370257-003125Warrant ArrestsWARRANT ARRESTE13569NaN2016-05-31 19:35:0020165Tuesday19Part ThreeNEW WASHINGTON ST42.302333-71.111565(42.30233307, -71.11156487)
3190721420525503125Warrant ArrestsWARRANT ARRESTD4903NaN2015-06-22 00:12:0020156Monday0Part ThreeWASHINGTON ST42.333839-71.080290(42.33383935, -71.08029038)
\n", 750 | "
" 751 | ], 752 | "text/plain": [ 753 | " INCIDENT_NUMBER OFFENSE_CODE OFFENSE_CODE_GROUP \\\n", 754 | "319068 I050310906-00 3125 Warrant Arrests \n", 755 | "319069 I030217815-08 111 Homicide \n", 756 | "319070 I030217815-08 3125 Warrant Arrests \n", 757 | "319071 I010370257-00 3125 Warrant Arrests \n", 758 | "319072 142052550 3125 Warrant Arrests \n", 759 | "\n", 760 | " OFFENSE_DESCRIPTION DISTRICT REPORTING_AREA SHOOTING \\\n", 761 | "319068 WARRANT ARREST D4 285 NaN \n", 762 | "319069 MURDER, NON-NEGLIGIENT MANSLAUGHTER E18 520 NaN \n", 763 | "319070 WARRANT ARREST E18 520 NaN \n", 764 | "319071 WARRANT ARREST E13 569 NaN \n", 765 | "319072 WARRANT ARREST D4 903 NaN \n", 766 | "\n", 767 | " OCCURRED_ON_DATE YEAR MONTH DAY_OF_WEEK HOUR UCR_PART \\\n", 768 | "319068 2016-06-05 17:25:00 2016 6 Sunday 17 Part Three \n", 769 | "319069 2015-07-09 13:38:00 2015 7 Thursday 13 Part One \n", 770 | "319070 2015-07-09 13:38:00 2015 7 Thursday 13 Part Three \n", 771 | "319071 2016-05-31 19:35:00 2016 5 Tuesday 19 Part Three \n", 772 | "319072 2015-06-22 00:12:00 2015 6 Monday 0 Part Three \n", 773 | "\n", 774 | " STREET Lat Long Location \n", 775 | "319068 COVENTRY ST 42.336951 -71.085748 (42.33695098, -71.08574813) \n", 776 | "319069 RIVER ST 42.255926 -71.123172 (42.25592648, -71.12317207) \n", 777 | "319070 RIVER ST 42.255926 -71.123172 (42.25592648, -71.12317207) \n", 778 | "319071 NEW WASHINGTON ST 42.302333 -71.111565 (42.30233307, -71.11156487) \n", 779 | "319072 WASHINGTON ST 42.333839 -71.080290 (42.33383935, -71.08029038) " 780 | ] 781 | }, 782 | "execution_count": 10, 783 | "metadata": {}, 784 | "output_type": "execute_result" 785 | } 786 | ], 787 | "source": [ 788 | "crime.tail() # Quick check of the end of the dataframe" 789 | ] 790 | }, 791 | { 792 | "cell_type": "code", 793 | "execution_count": 11, 794 | "id": "33f581e1", 795 | "metadata": {}, 796 | "outputs": [], 797 | "source": [ 798 | "pd.options.display.max_rows = 10" 799 | ] 800 | }, 801 | { 802 | "cell_type": "code", 803 | "execution_count": 12, 804 | "id": "9ba67772", 805 | "metadata": {}, 806 | "outputs": [ 807 | { 808 | "data": { 809 | "text/html": [ 810 | "
\n", 811 | "\n", 824 | "\n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | "
INCIDENT_NUMBEROFFENSE_CODEOFFENSE_CODE_GROUPOFFENSE_DESCRIPTIONDISTRICTREPORTING_AREASHOOTINGOCCURRED_ON_DATEYEARMONTHDAY_OF_WEEKHOURUCR_PARTSTREETLatLongLocation
0I182070945619LarcenyLARCENY ALL OTHERSD14808NaN2018-09-02 13:00:0020189Sunday13Part OneLINCOLN ST42.357791-71.139371(42.35779134, -71.13937053)
1I1820709431402VandalismVANDALISMC11347NaN2018-08-21 00:00:0020188Tuesday0Part TwoHECLA ST42.306821-71.060300(42.30682138, -71.06030035)
2I1820709413410TowedTOWED MOTOR VEHICLED4151NaN2018-09-03 19:27:0020189Monday19Part ThreeCAZENOVE ST42.346589-71.072429(42.34658879, -71.07242943)
3I1820709403114Investigate PropertyINVESTIGATE PROPERTYD4272NaN2018-09-03 21:16:0020189Monday21Part ThreeNEWCOMB ST42.334182-71.078664(42.33418175, -71.07866441)
4I1820709383114Investigate PropertyINVESTIGATE PROPERTYB3421NaN2018-09-03 21:05:0020189Monday21Part ThreeDELHI ST42.275365-71.090361(42.27536542, -71.09036101)
......................................................
319068I050310906-003125Warrant ArrestsWARRANT ARRESTD4285NaN2016-06-05 17:25:0020166Sunday17Part ThreeCOVENTRY ST42.336951-71.085748(42.33695098, -71.08574813)
319069I030217815-08111HomicideMURDER, NON-NEGLIGIENT MANSLAUGHTERE18520NaN2015-07-09 13:38:0020157Thursday13Part OneRIVER ST42.255926-71.123172(42.25592648, -71.12317207)
319070I030217815-083125Warrant ArrestsWARRANT ARRESTE18520NaN2015-07-09 13:38:0020157Thursday13Part ThreeRIVER ST42.255926-71.123172(42.25592648, -71.12317207)
319071I010370257-003125Warrant ArrestsWARRANT ARRESTE13569NaN2016-05-31 19:35:0020165Tuesday19Part ThreeNEW WASHINGTON ST42.302333-71.111565(42.30233307, -71.11156487)
3190721420525503125Warrant ArrestsWARRANT ARRESTD4903NaN2015-06-22 00:12:0020156Monday0Part ThreeWASHINGTON ST42.333839-71.080290(42.33383935, -71.08029038)
\n", 1070 | "

319050 rows × 17 columns

\n", 1071 | "
" 1072 | ], 1073 | "text/plain": [ 1074 | " INCIDENT_NUMBER OFFENSE_CODE OFFENSE_CODE_GROUP \\\n", 1075 | "0 I182070945 619 Larceny \n", 1076 | "1 I182070943 1402 Vandalism \n", 1077 | "2 I182070941 3410 Towed \n", 1078 | "3 I182070940 3114 Investigate Property \n", 1079 | "4 I182070938 3114 Investigate Property \n", 1080 | "... ... ... ... \n", 1081 | "319068 I050310906-00 3125 Warrant Arrests \n", 1082 | "319069 I030217815-08 111 Homicide \n", 1083 | "319070 I030217815-08 3125 Warrant Arrests \n", 1084 | "319071 I010370257-00 3125 Warrant Arrests \n", 1085 | "319072 142052550 3125 Warrant Arrests \n", 1086 | "\n", 1087 | " OFFENSE_DESCRIPTION DISTRICT REPORTING_AREA SHOOTING \\\n", 1088 | "0 LARCENY ALL OTHERS D14 808 NaN \n", 1089 | "1 VANDALISM C11 347 NaN \n", 1090 | "2 TOWED MOTOR VEHICLE D4 151 NaN \n", 1091 | "3 INVESTIGATE PROPERTY D4 272 NaN \n", 1092 | "4 INVESTIGATE PROPERTY B3 421 NaN \n", 1093 | "... ... ... ... ... \n", 1094 | "319068 WARRANT ARREST D4 285 NaN \n", 1095 | "319069 MURDER, NON-NEGLIGIENT MANSLAUGHTER E18 520 NaN \n", 1096 | "319070 WARRANT ARREST E18 520 NaN \n", 1097 | "319071 WARRANT ARREST E13 569 NaN \n", 1098 | "319072 WARRANT ARREST D4 903 NaN \n", 1099 | "\n", 1100 | " OCCURRED_ON_DATE YEAR MONTH DAY_OF_WEEK HOUR UCR_PART \\\n", 1101 | "0 2018-09-02 13:00:00 2018 9 Sunday 13 Part One \n", 1102 | "1 2018-08-21 00:00:00 2018 8 Tuesday 0 Part Two \n", 1103 | "2 2018-09-03 19:27:00 2018 9 Monday 19 Part Three \n", 1104 | "3 2018-09-03 21:16:00 2018 9 Monday 21 Part Three \n", 1105 | "4 2018-09-03 21:05:00 2018 9 Monday 21 Part Three \n", 1106 | "... ... ... ... ... ... ... \n", 1107 | "319068 2016-06-05 17:25:00 2016 6 Sunday 17 Part Three \n", 1108 | "319069 2015-07-09 13:38:00 2015 7 Thursday 13 Part One \n", 1109 | "319070 2015-07-09 13:38:00 2015 7 Thursday 13 Part Three \n", 1110 | "319071 2016-05-31 19:35:00 2016 5 Tuesday 19 Part Three \n", 1111 | "319072 2015-06-22 00:12:00 2015 6 Monday 0 Part Three \n", 1112 | "\n", 1113 | " STREET Lat Long Location \n", 1114 | "0 LINCOLN ST 42.357791 -71.139371 (42.35779134, -71.13937053) \n", 1115 | "1 HECLA ST 42.306821 -71.060300 (42.30682138, -71.06030035) \n", 1116 | "2 CAZENOVE ST 42.346589 -71.072429 (42.34658879, -71.07242943) \n", 1117 | "3 NEWCOMB ST 42.334182 -71.078664 (42.33418175, -71.07866441) \n", 1118 | "4 DELHI ST 42.275365 -71.090361 (42.27536542, -71.09036101) \n", 1119 | "... ... ... ... ... \n", 1120 | "319068 COVENTRY ST 42.336951 -71.085748 (42.33695098, -71.08574813) \n", 1121 | "319069 RIVER ST 42.255926 -71.123172 (42.25592648, -71.12317207) \n", 1122 | "319070 RIVER ST 42.255926 -71.123172 (42.25592648, -71.12317207) \n", 1123 | "319071 NEW WASHINGTON ST 42.302333 -71.111565 (42.30233307, -71.11156487) \n", 1124 | "319072 WASHINGTON ST 42.333839 -71.080290 (42.33383935, -71.08029038) \n", 1125 | "\n", 1126 | "[319050 rows x 17 columns]" 1127 | ] 1128 | }, 1129 | "execution_count": 12, 1130 | "metadata": {}, 1131 | "output_type": "execute_result" 1132 | } 1133 | ], 1134 | "source": [ 1135 | "crime" 1136 | ] 1137 | }, 1138 | { 1139 | "cell_type": "code", 1140 | "execution_count": 13, 1141 | "id": "4413e055", 1142 | "metadata": { 1143 | "scrolled": true 1144 | }, 1145 | "outputs": [ 1146 | { 1147 | "name": "stdout", 1148 | "output_type": "stream", 1149 | "text": [ 1150 | "\n", 1151 | "Int64Index: 319050 entries, 0 to 319072\n", 1152 | "Data columns (total 17 columns):\n", 1153 | " # Column Non-Null Count Dtype \n", 1154 | "--- ------ -------------- ----- \n", 1155 | " 0 INCIDENT_NUMBER 319050 non-null object \n", 1156 | " 1 OFFENSE_CODE 319050 non-null int64 \n", 1157 | " 2 OFFENSE_CODE_GROUP 319050 non-null object \n", 1158 | " 3 OFFENSE_DESCRIPTION 319050 non-null object \n", 1159 | " 4 DISTRICT 317285 non-null object \n", 1160 | " 5 REPORTING_AREA 319050 non-null object \n", 1161 | " 6 SHOOTING 1019 non-null object \n", 1162 | " 7 OCCURRED_ON_DATE 319050 non-null object \n", 1163 | " 8 YEAR 319050 non-null int64 \n", 1164 | " 9 MONTH 319050 non-null int64 \n", 1165 | " 10 DAY_OF_WEEK 319050 non-null object \n", 1166 | " 11 HOUR 319050 non-null int64 \n", 1167 | " 12 UCR_PART 318960 non-null object \n", 1168 | " 13 STREET 308179 non-null object \n", 1169 | " 14 Lat 299052 non-null float64\n", 1170 | " 15 Long 299052 non-null float64\n", 1171 | " 16 Location 319050 non-null object \n", 1172 | "dtypes: float64(2), int64(4), object(11)\n", 1173 | "memory usage: 43.8+ MB\n" 1174 | ] 1175 | } 1176 | ], 1177 | "source": [ 1178 | "crime.info() # Summary information about the dataframe" 1179 | ] 1180 | }, 1181 | { 1182 | "cell_type": "code", 1183 | "execution_count": 14, 1184 | "id": "b73455f0", 1185 | "metadata": {}, 1186 | "outputs": [], 1187 | "source": [ 1188 | "# Changing the data type from object to datetime for the OCCURRED_ON_DATE column\n", 1189 | "crime.OCCURRED_ON_DATE = pd.to_datetime(crime.OCCURRED_ON_DATE)" 1190 | ] 1191 | }, 1192 | { 1193 | "cell_type": "code", 1194 | "execution_count": 15, 1195 | "id": "f36de48b", 1196 | "metadata": {}, 1197 | "outputs": [ 1198 | { 1199 | "name": "stdout", 1200 | "output_type": "stream", 1201 | "text": [ 1202 | "\n", 1203 | "Int64Index: 319050 entries, 0 to 319072\n", 1204 | "Data columns (total 17 columns):\n", 1205 | " # Column Non-Null Count Dtype \n", 1206 | "--- ------ -------------- ----- \n", 1207 | " 0 INCIDENT_NUMBER 319050 non-null object \n", 1208 | " 1 OFFENSE_CODE 319050 non-null int64 \n", 1209 | " 2 OFFENSE_CODE_GROUP 319050 non-null object \n", 1210 | " 3 OFFENSE_DESCRIPTION 319050 non-null object \n", 1211 | " 4 DISTRICT 317285 non-null object \n", 1212 | " 5 REPORTING_AREA 319050 non-null object \n", 1213 | " 6 SHOOTING 1019 non-null object \n", 1214 | " 7 OCCURRED_ON_DATE 319050 non-null datetime64[ns]\n", 1215 | " 8 YEAR 319050 non-null int64 \n", 1216 | " 9 MONTH 319050 non-null int64 \n", 1217 | " 10 DAY_OF_WEEK 319050 non-null object \n", 1218 | " 11 HOUR 319050 non-null int64 \n", 1219 | " 12 UCR_PART 318960 non-null object \n", 1220 | " 13 STREET 308179 non-null object \n", 1221 | " 14 Lat 299052 non-null float64 \n", 1222 | " 15 Long 299052 non-null float64 \n", 1223 | " 16 Location 319050 non-null object \n", 1224 | "dtypes: datetime64[ns](1), float64(2), int64(4), object(10)\n", 1225 | "memory usage: 43.8+ MB\n" 1226 | ] 1227 | } 1228 | ], 1229 | "source": [ 1230 | "# Check if it worked\n", 1231 | "crime.info()" 1232 | ] 1233 | }, 1234 | { 1235 | "cell_type": "code", 1236 | "execution_count": 16, 1237 | "id": "8e6d7208", 1238 | "metadata": {}, 1239 | "outputs": [ 1240 | { 1241 | "data": { 1242 | "text/plain": [ 1243 | "0 2018\n", 1244 | "1 2018\n", 1245 | "2 2018\n", 1246 | "3 2018\n", 1247 | "4 2018\n", 1248 | " ... \n", 1249 | "319068 2016\n", 1250 | "319069 2015\n", 1251 | "319070 2015\n", 1252 | "319071 2016\n", 1253 | "319072 2015\n", 1254 | "Name: OCCURRED_ON_DATE, Length: 319050, dtype: int64" 1255 | ] 1256 | }, 1257 | "execution_count": 16, 1258 | "metadata": {}, 1259 | "output_type": "execute_result" 1260 | } 1261 | ], 1262 | "source": [ 1263 | "# easily extract datetime information frome the OCCURRED_ON_DATE column\n", 1264 | "crime.OCCURRED_ON_DATE.dt.year" 1265 | ] 1266 | }, 1267 | { 1268 | "cell_type": "code", 1269 | "execution_count": 17, 1270 | "id": "a16d5dd8", 1271 | "metadata": {}, 1272 | "outputs": [ 1273 | { 1274 | "data": { 1275 | "text/plain": [ 1276 | "0 9\n", 1277 | "1 8\n", 1278 | "2 9\n", 1279 | "3 9\n", 1280 | "4 9\n", 1281 | " ..\n", 1282 | "319068 6\n", 1283 | "319069 7\n", 1284 | "319070 7\n", 1285 | "319071 5\n", 1286 | "319072 6\n", 1287 | "Name: OCCURRED_ON_DATE, Length: 319050, dtype: int64" 1288 | ] 1289 | }, 1290 | "execution_count": 17, 1291 | "metadata": {}, 1292 | "output_type": "execute_result" 1293 | } 1294 | ], 1295 | "source": [ 1296 | "crime.OCCURRED_ON_DATE.dt.month" 1297 | ] 1298 | }, 1299 | { 1300 | "cell_type": "code", 1301 | "execution_count": 18, 1302 | "id": "260527b7", 1303 | "metadata": {}, 1304 | "outputs": [ 1305 | { 1306 | "name": "stderr", 1307 | "output_type": "stream", 1308 | "text": [ 1309 | "C:\\Users\\moche\\AppData\\Local\\Temp\\ipykernel_124320\\2588699237.py:1: FutureWarning: Series.dt.weekofyear and Series.dt.week have been deprecated. Please use Series.dt.isocalendar().week instead.\n", 1310 | " crime.OCCURRED_ON_DATE.dt.week\n" 1311 | ] 1312 | }, 1313 | { 1314 | "data": { 1315 | "text/plain": [ 1316 | "0 35\n", 1317 | "1 34\n", 1318 | "2 36\n", 1319 | "3 36\n", 1320 | "4 36\n", 1321 | " ..\n", 1322 | "319068 22\n", 1323 | "319069 28\n", 1324 | "319070 28\n", 1325 | "319071 22\n", 1326 | "319072 26\n", 1327 | "Name: OCCURRED_ON_DATE, Length: 319050, dtype: int64" 1328 | ] 1329 | }, 1330 | "execution_count": 18, 1331 | "metadata": {}, 1332 | "output_type": "execute_result" 1333 | } 1334 | ], 1335 | "source": [ 1336 | "crime.OCCURRED_ON_DATE.dt.week" 1337 | ] 1338 | }, 1339 | { 1340 | "cell_type": "code", 1341 | "execution_count": 19, 1342 | "id": "5e6502e9", 1343 | "metadata": {}, 1344 | "outputs": [ 1345 | { 1346 | "data": { 1347 | "text/plain": [ 1348 | "0 13\n", 1349 | "1 0\n", 1350 | "2 19\n", 1351 | "3 21\n", 1352 | "4 21\n", 1353 | " ..\n", 1354 | "319068 17\n", 1355 | "319069 13\n", 1356 | "319070 13\n", 1357 | "319071 19\n", 1358 | "319072 0\n", 1359 | "Name: OCCURRED_ON_DATE, Length: 319050, dtype: int64" 1360 | ] 1361 | }, 1362 | "execution_count": 19, 1363 | "metadata": {}, 1364 | "output_type": "execute_result" 1365 | } 1366 | ], 1367 | "source": [ 1368 | "crime.OCCURRED_ON_DATE.dt.hour" 1369 | ] 1370 | }, 1371 | { 1372 | "cell_type": "code", 1373 | "execution_count": 20, 1374 | "id": "0ef6d738", 1375 | "metadata": {}, 1376 | "outputs": [ 1377 | { 1378 | "data": { 1379 | "text/plain": [ 1380 | "0 0\n", 1381 | "1 0\n", 1382 | "2 27\n", 1383 | "3 16\n", 1384 | "4 5\n", 1385 | " ..\n", 1386 | "319068 25\n", 1387 | "319069 38\n", 1388 | "319070 38\n", 1389 | "319071 35\n", 1390 | "319072 12\n", 1391 | "Name: OCCURRED_ON_DATE, Length: 319050, dtype: int64" 1392 | ] 1393 | }, 1394 | "execution_count": 20, 1395 | "metadata": {}, 1396 | "output_type": "execute_result" 1397 | } 1398 | ], 1399 | "source": [ 1400 | "crime.OCCURRED_ON_DATE.dt.minute" 1401 | ] 1402 | }, 1403 | { 1404 | "cell_type": "code", 1405 | "execution_count": 21, 1406 | "id": "c03863c6", 1407 | "metadata": { 1408 | "scrolled": true 1409 | }, 1410 | "outputs": [ 1411 | { 1412 | "data": { 1413 | "text/html": [ 1414 | "
\n", 1415 | "\n", 1428 | "\n", 1429 | " \n", 1430 | " \n", 1431 | " \n", 1432 | " \n", 1433 | " \n", 1434 | " \n", 1435 | " \n", 1436 | " \n", 1437 | " \n", 1438 | " \n", 1439 | " \n", 1440 | " \n", 1441 | " \n", 1442 | " \n", 1443 | " \n", 1444 | " \n", 1445 | " \n", 1446 | " \n", 1447 | " \n", 1448 | " \n", 1449 | " \n", 1450 | " \n", 1451 | " \n", 1452 | " \n", 1453 | " \n", 1454 | " \n", 1455 | " \n", 1456 | " \n", 1457 | " \n", 1458 | " \n", 1459 | " \n", 1460 | " \n", 1461 | " \n", 1462 | " \n", 1463 | " \n", 1464 | " \n", 1465 | " \n", 1466 | " \n", 1467 | " \n", 1468 | " \n", 1469 | " \n", 1470 | " \n", 1471 | " \n", 1472 | " \n", 1473 | " \n", 1474 | " \n", 1475 | " \n", 1476 | " \n", 1477 | " \n", 1478 | " \n", 1479 | " \n", 1480 | " \n", 1481 | " \n", 1482 | " \n", 1483 | " \n", 1484 | " \n", 1485 | " \n", 1486 | " \n", 1487 | " \n", 1488 | " \n", 1489 | " \n", 1490 | " \n", 1491 | " \n", 1492 | " \n", 1493 | " \n", 1494 | " \n", 1495 | " \n", 1496 | " \n", 1497 | " \n", 1498 | " \n", 1499 | " \n", 1500 | " \n", 1501 | " \n", 1502 | " \n", 1503 | " \n", 1504 | " \n", 1505 | " \n", 1506 | " \n", 1507 | " \n", 1508 | " \n", 1509 | " \n", 1510 | " \n", 1511 | " \n", 1512 | " \n", 1513 | " \n", 1514 | "
OFFENSE_CODEYEARMONTHHOURLatLong
count319050.000000319050.000000319050.000000319050.000000299052.000000299052.000000
mean2317.5169572016.5606746.60962213.11817642.214373-70.908260
std1185.3089210.9963123.2736776.2942582.1598453.493746
min111.0000002015.0000001.0000000.000000-1.000000-71.178674
25%1001.0000002016.0000004.0000009.00000042.297438-71.097135
50%2907.0000002017.0000007.00000014.00000042.325538-71.077524
75%3201.0000002017.0000009.00000018.00000042.348624-71.062467
max3831.0000002018.00000012.00000023.00000042.395042-1.000000
\n", 1515 | "
" 1516 | ], 1517 | "text/plain": [ 1518 | " OFFENSE_CODE YEAR MONTH HOUR \\\n", 1519 | "count 319050.000000 319050.000000 319050.000000 319050.000000 \n", 1520 | "mean 2317.516957 2016.560674 6.609622 13.118176 \n", 1521 | "std 1185.308921 0.996312 3.273677 6.294258 \n", 1522 | "min 111.000000 2015.000000 1.000000 0.000000 \n", 1523 | "25% 1001.000000 2016.000000 4.000000 9.000000 \n", 1524 | "50% 2907.000000 2017.000000 7.000000 14.000000 \n", 1525 | "75% 3201.000000 2017.000000 9.000000 18.000000 \n", 1526 | "max 3831.000000 2018.000000 12.000000 23.000000 \n", 1527 | "\n", 1528 | " Lat Long \n", 1529 | "count 299052.000000 299052.000000 \n", 1530 | "mean 42.214373 -70.908260 \n", 1531 | "std 2.159845 3.493746 \n", 1532 | "min -1.000000 -71.178674 \n", 1533 | "25% 42.297438 -71.097135 \n", 1534 | "50% 42.325538 -71.077524 \n", 1535 | "75% 42.348624 -71.062467 \n", 1536 | "max 42.395042 -1.000000 " 1537 | ] 1538 | }, 1539 | "execution_count": 21, 1540 | "metadata": {}, 1541 | "output_type": "execute_result" 1542 | } 1543 | ], 1544 | "source": [ 1545 | "crime.describe() # summary information on the numeric columns" 1546 | ] 1547 | }, 1548 | { 1549 | "cell_type": "code", 1550 | "execution_count": 22, 1551 | "id": "6dc94fdd", 1552 | "metadata": {}, 1553 | "outputs": [ 1554 | { 1555 | "data": { 1556 | "text/html": [ 1557 | "
\n", 1558 | "\n", 1571 | "\n", 1572 | " \n", 1573 | " \n", 1574 | " \n", 1575 | " \n", 1576 | " \n", 1577 | " \n", 1578 | " \n", 1579 | " \n", 1580 | " \n", 1581 | " \n", 1582 | " \n", 1583 | " \n", 1584 | " \n", 1585 | " \n", 1586 | " \n", 1587 | " \n", 1588 | " \n", 1589 | " \n", 1590 | " \n", 1591 | " \n", 1592 | " \n", 1593 | " \n", 1594 | " \n", 1595 | " \n", 1596 | " \n", 1597 | " \n", 1598 | " \n", 1599 | " \n", 1600 | " \n", 1601 | " \n", 1602 | " \n", 1603 | " \n", 1604 | " \n", 1605 | " \n", 1606 | " \n", 1607 | " \n", 1608 | " \n", 1609 | " \n", 1610 | " \n", 1611 | " \n", 1612 | " \n", 1613 | " \n", 1614 | " \n", 1615 | " \n", 1616 | " \n", 1617 | " \n", 1618 | " \n", 1619 | " \n", 1620 | " \n", 1621 | " \n", 1622 | " \n", 1623 | " \n", 1624 | " \n", 1625 | " \n", 1626 | " \n", 1627 | " \n", 1628 | " \n", 1629 | " \n", 1630 | " \n", 1631 | " \n", 1632 | " \n", 1633 | " \n", 1634 | " \n", 1635 | " \n", 1636 | " \n", 1637 | " \n", 1638 | " \n", 1639 | " \n", 1640 | " \n", 1641 | "
INCIDENT_NUMBEROFFENSE_CODE_GROUPOFFENSE_DESCRIPTIONDISTRICTREPORTING_AREASHOOTINGDAY_OF_WEEKUCR_PARTSTREETLocation
count3190503190503190503172853190501019319050318960308179319050
unique2825176724412879174465718194
topI162030584Motor Vehicle Accident ResponseSICK/INJURED/MEDICAL - PERSONB2YFridayPart ThreeWASHINGTON ST(0.00000000, 0.00000000)
freq13371321878349940202501019484891585371419219998
\n", 1642 | "
" 1643 | ], 1644 | "text/plain": [ 1645 | " INCIDENT_NUMBER OFFENSE_CODE_GROUP \\\n", 1646 | "count 319050 319050 \n", 1647 | "unique 282517 67 \n", 1648 | "top I162030584 Motor Vehicle Accident Response \n", 1649 | "freq 13 37132 \n", 1650 | "\n", 1651 | " OFFENSE_DESCRIPTION DISTRICT REPORTING_AREA SHOOTING \\\n", 1652 | "count 319050 317285 319050 1019 \n", 1653 | "unique 244 12 879 1 \n", 1654 | "top SICK/INJURED/MEDICAL - PERSON B2 Y \n", 1655 | "freq 18783 49940 20250 1019 \n", 1656 | "\n", 1657 | " DAY_OF_WEEK UCR_PART STREET Location \n", 1658 | "count 319050 318960 308179 319050 \n", 1659 | "unique 7 4 4657 18194 \n", 1660 | "top Friday Part Three WASHINGTON ST (0.00000000, 0.00000000) \n", 1661 | "freq 48489 158537 14192 19998 " 1662 | ] 1663 | }, 1664 | "execution_count": 22, 1665 | "metadata": {}, 1666 | "output_type": "execute_result" 1667 | } 1668 | ], 1669 | "source": [ 1670 | "crime.describe(include='object') # summary information on the non-numeric columns" 1671 | ] 1672 | }, 1673 | { 1674 | "cell_type": "code", 1675 | "execution_count": 23, 1676 | "id": "0e30db63", 1677 | "metadata": {}, 1678 | "outputs": [ 1679 | { 1680 | "data": { 1681 | "text/plain": [ 1682 | "Index(['INCIDENT_NUMBER', 'OFFENSE_CODE', 'OFFENSE_CODE_GROUP',\n", 1683 | " 'OFFENSE_DESCRIPTION', 'DISTRICT', 'REPORTING_AREA', 'SHOOTING',\n", 1684 | " 'OCCURRED_ON_DATE', 'YEAR', 'MONTH', 'DAY_OF_WEEK', 'HOUR', 'UCR_PART',\n", 1685 | " 'STREET', 'Lat', 'Long', 'Location'],\n", 1686 | " dtype='object')" 1687 | ] 1688 | }, 1689 | "execution_count": 23, 1690 | "metadata": {}, 1691 | "output_type": "execute_result" 1692 | } 1693 | ], 1694 | "source": [ 1695 | "crime.columns" 1696 | ] 1697 | }, 1698 | { 1699 | "cell_type": "code", 1700 | "execution_count": 24, 1701 | "id": "9772d819", 1702 | "metadata": {}, 1703 | "outputs": [ 1704 | { 1705 | "data": { 1706 | "text/plain": [ 1707 | "Index(['DISTRICT', 'SHOOTING', 'UCR_PART', 'STREET', 'Lat', 'Long'], dtype='object')" 1708 | ] 1709 | }, 1710 | "execution_count": 24, 1711 | "metadata": {}, 1712 | "output_type": "execute_result" 1713 | } 1714 | ], 1715 | "source": [ 1716 | "crime.columns[np.sum(crime.isnull()) != 0]" 1717 | ] 1718 | }, 1719 | { 1720 | "cell_type": "code", 1721 | "execution_count": 25, 1722 | "id": "58e96c07", 1723 | "metadata": {}, 1724 | "outputs": [ 1725 | { 1726 | "data": { 1727 | "text/plain": [ 1728 | "Index(['DISTRICT', 'SHOOTING', 'UCR_PART', 'STREET', 'Lat', 'Long'], dtype='object')" 1729 | ] 1730 | }, 1731 | "execution_count": 25, 1732 | "metadata": {}, 1733 | "output_type": "execute_result" 1734 | } 1735 | ], 1736 | "source": [ 1737 | "# Checking for columns with missing values\n", 1738 | "crime.columns[np.sum(crime.isnull()) != 0]" 1739 | ] 1740 | }, 1741 | { 1742 | "cell_type": "code", 1743 | "execution_count": 26, 1744 | "id": "906c0994", 1745 | "metadata": {}, 1746 | "outputs": [ 1747 | { 1748 | "data": { 1749 | "text/plain": [ 1750 | "Index(['INCIDENT_NUMBER', 'OFFENSE_CODE', 'OFFENSE_CODE_GROUP',\n", 1751 | " 'OFFENSE_DESCRIPTION', 'REPORTING_AREA', 'OCCURRED_ON_DATE', 'YEAR',\n", 1752 | " 'MONTH', 'DAY_OF_WEEK', 'HOUR', 'Location'],\n", 1753 | " dtype='object')" 1754 | ] 1755 | }, 1756 | "execution_count": 26, 1757 | "metadata": {}, 1758 | "output_type": "execute_result" 1759 | } 1760 | ], 1761 | "source": [ 1762 | "# Checking for columns with no missing values\n", 1763 | "crime.columns[np.sum(crime.isnull()) == 0]" 1764 | ] 1765 | }, 1766 | { 1767 | "cell_type": "code", 1768 | "execution_count": 27, 1769 | "id": "887b29bf", 1770 | "metadata": {}, 1771 | "outputs": [ 1772 | { 1773 | "name": "stdout", 1774 | "output_type": "stream", 1775 | "text": [ 1776 | "INCIDENT_NUMBER has 282517 unique values\n", 1777 | "OFFENSE_CODE has 222 unique values\n", 1778 | "OFFENSE_CODE_GROUP has 67 unique values\n", 1779 | "OFFENSE_DESCRIPTION has 244 unique values\n", 1780 | "DISTRICT has 12 unique values\n", 1781 | "REPORTING_AREA has 879 unique values\n", 1782 | "SHOOTING has 1 unique values\n", 1783 | "OCCURRED_ON_DATE has 233229 unique values\n", 1784 | "YEAR has 4 unique values\n", 1785 | "MONTH has 12 unique values\n", 1786 | "DAY_OF_WEEK has 7 unique values\n", 1787 | "HOUR has 24 unique values\n", 1788 | "UCR_PART has 4 unique values\n", 1789 | "STREET has 4657 unique values\n", 1790 | "Lat has 18178 unique values\n", 1791 | "Long has 18178 unique values\n", 1792 | "Location has 18194 unique values\n" 1793 | ] 1794 | } 1795 | ], 1796 | "source": [ 1797 | "# Checking for the number of unique values in each column\n", 1798 | "for col in crime.columns:\n", 1799 | " unique_count = crime[col].nunique()\n", 1800 | " print(col + \" has \" + str(unique_count) + \" unique values\")" 1801 | ] 1802 | }, 1803 | { 1804 | "attachments": {}, 1805 | "cell_type": "markdown", 1806 | "id": "7e12d2c7", 1807 | "metadata": {}, 1808 | "source": [ 1809 | "### Let's answer some questions" 1810 | ] 1811 | }, 1812 | { 1813 | "cell_type": "code", 1814 | "execution_count": 28, 1815 | "id": "5ce85da1", 1816 | "metadata": {}, 1817 | "outputs": [ 1818 | { 1819 | "data": { 1820 | "text/plain": [ 1821 | "Motor Vehicle Accident Response 37132\n", 1822 | "Larceny 25935\n", 1823 | "Medical Assistance 23540\n", 1824 | "Investigate Person 18749\n", 1825 | "Other 18073\n", 1826 | " ... \n", 1827 | "HUMAN TRAFFICKING 7\n", 1828 | "INVESTIGATE PERSON 4\n", 1829 | "Biological Threat 2\n", 1830 | "HUMAN TRAFFICKING - INVOLUNTARY SERVITUDE 2\n", 1831 | "Burglary - No Property Taken 2\n", 1832 | "Name: OFFENSE_CODE_GROUP, Length: 67, dtype: int64" 1833 | ] 1834 | }, 1835 | "execution_count": 28, 1836 | "metadata": {}, 1837 | "output_type": "execute_result" 1838 | } 1839 | ], 1840 | "source": [ 1841 | "# What are the most common crimes in terms of offense group?\n", 1842 | "crime.OFFENSE_CODE_GROUP.value_counts()" 1843 | ] 1844 | }, 1845 | { 1846 | "cell_type": "code", 1847 | "execution_count": 29, 1848 | "id": "da8409f8", 1849 | "metadata": {}, 1850 | "outputs": [ 1851 | { 1852 | "data": { 1853 | "text/plain": [ 1854 | "Motor Vehicle Accident Response 37132\n", 1855 | "Larceny 25935\n", 1856 | "Medical Assistance 23540\n", 1857 | "Investigate Person 18749\n", 1858 | "Other 18073\n", 1859 | "Drug Violation 16545\n", 1860 | "Simple Assault 15826\n", 1861 | "Vandalism 15414\n", 1862 | "Verbal Disputes 13099\n", 1863 | "Towed 11287\n", 1864 | "Name: OFFENSE_CODE_GROUP, dtype: int64" 1865 | ] 1866 | }, 1867 | "execution_count": 29, 1868 | "metadata": {}, 1869 | "output_type": "execute_result" 1870 | } 1871 | ], 1872 | "source": [ 1873 | "crime.OFFENSE_CODE_GROUP.value_counts()[:10]" 1874 | ] 1875 | }, 1876 | { 1877 | "cell_type": "code", 1878 | "execution_count": 30, 1879 | "id": "dfdc9687", 1880 | "metadata": {}, 1881 | "outputs": [ 1882 | { 1883 | "data": { 1884 | "text/plain": [ 1885 | "319050" 1886 | ] 1887 | }, 1888 | "execution_count": 30, 1889 | "metadata": {}, 1890 | "output_type": "execute_result" 1891 | } 1892 | ], 1893 | "source": [ 1894 | "crime.shape[0]" 1895 | ] 1896 | }, 1897 | { 1898 | "cell_type": "code", 1899 | "execution_count": 31, 1900 | "id": "0ba23067", 1901 | "metadata": { 1902 | "scrolled": true 1903 | }, 1904 | "outputs": [ 1905 | { 1906 | "data": { 1907 | "text/plain": [ 1908 | "Motor Vehicle Accident Response 0.116383\n", 1909 | "Larceny 0.081288\n", 1910 | "Medical Assistance 0.073782\n", 1911 | "Investigate Person 0.058765\n", 1912 | "Other 0.056646\n", 1913 | "Drug Violation 0.051857\n", 1914 | "Simple Assault 0.049604\n", 1915 | "Vandalism 0.048312\n", 1916 | "Verbal Disputes 0.041056\n", 1917 | "Towed 0.035377\n", 1918 | "Name: OFFENSE_CODE_GROUP, dtype: float64" 1919 | ] 1920 | }, 1921 | "metadata": {}, 1922 | "output_type": "display_data" 1923 | }, 1924 | { 1925 | "data": { 1926 | "image/png": "", 1927 | "text/plain": [ 1928 | "
" 1929 | ] 1930 | }, 1931 | "metadata": {}, 1932 | "output_type": "display_data" 1933 | } 1934 | ], 1935 | "source": [ 1936 | "offense_group_vals = crime.OFFENSE_CODE_GROUP.value_counts()[:10]\n", 1937 | "\n", 1938 | "display(offense_group_vals / crime.shape[0])\n", 1939 | "\n", 1940 | "# Creating a bar chart of the Top 10 offense groups\n", 1941 | "(offense_group_vals / crime.shape[0]).plot(kind='bar');\n", 1942 | "plt.title('Top 10 Offense Groups (as % of all crimes)');" 1943 | ] 1944 | }, 1945 | { 1946 | "cell_type": "code", 1947 | "execution_count": 32, 1948 | "id": "e15c0db4", 1949 | "metadata": {}, 1950 | "outputs": [ 1951 | { 1952 | "data": { 1953 | "text/plain": [ 1954 | "Burglary - No Property Taken 2\n", 1955 | "HUMAN TRAFFICKING - INVOLUNTARY SERVITUDE 2\n", 1956 | "Biological Threat 2\n", 1957 | "INVESTIGATE PERSON 4\n", 1958 | "HUMAN TRAFFICKING 7\n", 1959 | "Gambling 8\n", 1960 | "Manslaughter 8\n", 1961 | "Explosives 27\n", 1962 | "Phone Call Complaints 31\n", 1963 | "Aircraft 36\n", 1964 | "Name: OFFENSE_CODE_GROUP, dtype: int64" 1965 | ] 1966 | }, 1967 | "execution_count": 32, 1968 | "metadata": {}, 1969 | "output_type": "execute_result" 1970 | } 1971 | ], 1972 | "source": [ 1973 | "# What are the least common offense groups?\n", 1974 | "crime.OFFENSE_CODE_GROUP.value_counts().sort_values(ascending=True)[:10]" 1975 | ] 1976 | }, 1977 | { 1978 | "cell_type": "code", 1979 | "execution_count": 33, 1980 | "id": "d269207d", 1981 | "metadata": {}, 1982 | "outputs": [], 1983 | "source": [ 1984 | "# Question 1 - What are the most common offense descriptions?\n", 1985 | "# Hint - Use value_counts() to print out the values" 1986 | ] 1987 | }, 1988 | { 1989 | "cell_type": "code", 1990 | "execution_count": 34, 1991 | "id": "da299784", 1992 | "metadata": { 1993 | "scrolled": true 1994 | }, 1995 | "outputs": [], 1996 | "source": [ 1997 | "# Question 2 - Now try and create a bar chart of the Top 10 Offense Descriptions as a % of total crimes" 1998 | ] 1999 | }, 2000 | { 2001 | "cell_type": "code", 2002 | "execution_count": 35, 2003 | "id": "f79b9fbf", 2004 | "metadata": {}, 2005 | "outputs": [ 2006 | { 2007 | "data": { 2008 | "text/plain": [ 2009 | "YEAR\n", 2010 | "2015 53371\n", 2011 | "2016 99110\n", 2012 | "2017 100884\n", 2013 | "2018 65685\n", 2014 | "Name: INCIDENT_NUMBER, dtype: int64" 2015 | ] 2016 | }, 2017 | "execution_count": 35, 2018 | "metadata": {}, 2019 | "output_type": "execute_result" 2020 | } 2021 | ], 2022 | "source": [ 2023 | "crime.groupby('YEAR').count()['INCIDENT_NUMBER']" 2024 | ] 2025 | }, 2026 | { 2027 | "cell_type": "code", 2028 | "execution_count": 36, 2029 | "id": "004ed113", 2030 | "metadata": {}, 2031 | "outputs": [ 2032 | { 2033 | "data": { 2034 | "image/png": "", 2035 | "text/plain": [ 2036 | "
" 2037 | ] 2038 | }, 2039 | "metadata": {}, 2040 | "output_type": "display_data" 2041 | } 2042 | ], 2043 | "source": [ 2044 | "# In which year were the most crimes committed?\n", 2045 | "crime.groupby('YEAR').count()['INCIDENT_NUMBER'].plot(kind='bar');\n", 2046 | "plt.title('Number of crimes');" 2047 | ] 2048 | }, 2049 | { 2050 | "cell_type": "code", 2051 | "execution_count": 37, 2052 | "id": "88ba7a14", 2053 | "metadata": {}, 2054 | "outputs": [], 2055 | "source": [ 2056 | "# Question 3 - Are there more crimes committed on specific days?" 2057 | ] 2058 | }, 2059 | { 2060 | "cell_type": "code", 2061 | "execution_count": 38, 2062 | "id": "880134ba", 2063 | "metadata": {}, 2064 | "outputs": [ 2065 | { 2066 | "data": { 2067 | "image/png": "", 2068 | "text/plain": [ 2069 | "
" 2070 | ] 2071 | }, 2072 | "metadata": {}, 2073 | "output_type": "display_data" 2074 | } 2075 | ], 2076 | "source": [ 2077 | "# Are there more crimes during specific hours?\n", 2078 | "crime.groupby('HOUR').count()['INCIDENT_NUMBER'].plot(kind='bar');" 2079 | ] 2080 | }, 2081 | { 2082 | "cell_type": "code", 2083 | "execution_count": 39, 2084 | "id": "8731c724", 2085 | "metadata": {}, 2086 | "outputs": [], 2087 | "source": [ 2088 | "# On what days and during which hours are the most crimes committed?" 2089 | ] 2090 | }, 2091 | { 2092 | "cell_type": "code", 2093 | "execution_count": 40, 2094 | "id": "6b1da8af", 2095 | "metadata": {}, 2096 | "outputs": [ 2097 | { 2098 | "data": { 2099 | "text/html": [ 2100 | "
\n", 2101 | "\n", 2114 | "\n", 2115 | " \n", 2116 | " \n", 2117 | " \n", 2118 | " \n", 2119 | " \n", 2120 | " \n", 2121 | " \n", 2122 | " \n", 2123 | " \n", 2124 | " \n", 2125 | " \n", 2126 | " \n", 2127 | " \n", 2128 | " \n", 2129 | " \n", 2130 | " \n", 2131 | " \n", 2132 | " \n", 2133 | " \n", 2134 | " \n", 2135 | " \n", 2136 | " \n", 2137 | " \n", 2138 | " \n", 2139 | " \n", 2140 | " \n", 2141 | " \n", 2142 | " \n", 2143 | " \n", 2144 | " \n", 2145 | " \n", 2146 | " \n", 2147 | " \n", 2148 | " \n", 2149 | " \n", 2150 | " \n", 2151 | " \n", 2152 | " \n", 2153 | " \n", 2154 | " \n", 2155 | " \n", 2156 | " \n", 2157 | " \n", 2158 | " \n", 2159 | " \n", 2160 | " \n", 2161 | " \n", 2162 | " \n", 2163 | " \n", 2164 | " \n", 2165 | " \n", 2166 | " \n", 2167 | " \n", 2168 | " \n", 2169 | " \n", 2170 | " \n", 2171 | " \n", 2172 | " \n", 2173 | " \n", 2174 | " \n", 2175 | " \n", 2176 | " \n", 2177 | " \n", 2178 | " \n", 2179 | " \n", 2180 | " \n", 2181 | " \n", 2182 | " \n", 2183 | " \n", 2184 | " \n", 2185 | " \n", 2186 | " \n", 2187 | " \n", 2188 | " \n", 2189 | " \n", 2190 | " \n", 2191 | " \n", 2192 | " \n", 2193 | " \n", 2194 | " \n", 2195 | " \n", 2196 | " \n", 2197 | " \n", 2198 | " \n", 2199 | " \n", 2200 | " \n", 2201 | " \n", 2202 | " \n", 2203 | " \n", 2204 | " \n", 2205 | " \n", 2206 | " \n", 2207 | " \n", 2208 | " \n", 2209 | " \n", 2210 | " \n", 2211 | " \n", 2212 | " \n", 2213 | " \n", 2214 | " \n", 2215 | " \n", 2216 | " \n", 2217 | " \n", 2218 | " \n", 2219 | " \n", 2220 | " \n", 2221 | " \n", 2222 | " \n", 2223 | " \n", 2224 | " \n", 2225 | " \n", 2226 | " \n", 2227 | " \n", 2228 | " \n", 2229 | " \n", 2230 | " \n", 2231 | " \n", 2232 | " \n", 2233 | " \n", 2234 | " \n", 2235 | " \n", 2236 | " \n", 2237 | " \n", 2238 | " \n", 2239 | " \n", 2240 | " \n", 2241 | " \n", 2242 | " \n", 2243 | " \n", 2244 | " \n", 2245 | " \n", 2246 | " \n", 2247 | " \n", 2248 | " \n", 2249 | "
DAY_OF_WEEKFridayMondaySaturdaySundayThursdayTuesdayWednesday
HOUR
02161200026122400203918971997
1127510581855204310771017942
295284618271855774641798
35325839571119526460412
4441386672704436399370
........................
192564260623012114251027682724
202307231921312109234923692265
212089200320771902207019252043
222160163421131728179517571738
231936124319511380149212961298
\n", 2250 | "

24 rows × 7 columns

\n", 2251 | "
" 2252 | ], 2253 | "text/plain": [ 2254 | "DAY_OF_WEEK Friday Monday Saturday Sunday Thursday Tuesday Wednesday\n", 2255 | "HOUR \n", 2256 | "0 2161 2000 2612 2400 2039 1897 1997\n", 2257 | "1 1275 1058 1855 2043 1077 1017 942\n", 2258 | "2 952 846 1827 1855 774 641 798\n", 2259 | "3 532 583 957 1119 526 460 412\n", 2260 | "4 441 386 672 704 436 399 370\n", 2261 | "... ... ... ... ... ... ... ...\n", 2262 | "19 2564 2606 2301 2114 2510 2768 2724\n", 2263 | "20 2307 2319 2131 2109 2349 2369 2265\n", 2264 | "21 2089 2003 2077 1902 2070 1925 2043\n", 2265 | "22 2160 1634 2113 1728 1795 1757 1738\n", 2266 | "23 1936 1243 1951 1380 1492 1296 1298\n", 2267 | "\n", 2268 | "[24 rows x 7 columns]" 2269 | ] 2270 | }, 2271 | "execution_count": 40, 2272 | "metadata": {}, 2273 | "output_type": "execute_result" 2274 | } 2275 | ], 2276 | "source": [ 2277 | "crime.groupby(['HOUR','DAY_OF_WEEK']).count()['INCIDENT_NUMBER'].unstack()" 2278 | ] 2279 | }, 2280 | { 2281 | "cell_type": "code", 2282 | "execution_count": 41, 2283 | "id": "a968bcce", 2284 | "metadata": {}, 2285 | "outputs": [], 2286 | "source": [ 2287 | "week_and_hour = crime.groupby(['HOUR','DAY_OF_WEEK']).count()['INCIDENT_NUMBER'].unstack()" 2288 | ] 2289 | }, 2290 | { 2291 | "cell_type": "code", 2292 | "execution_count": 42, 2293 | "id": "1470a2a8", 2294 | "metadata": {}, 2295 | "outputs": [], 2296 | "source": [ 2297 | "week_and_hour = week_and_hour[['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']]" 2298 | ] 2299 | }, 2300 | { 2301 | "cell_type": "code", 2302 | "execution_count": 43, 2303 | "id": "7053f34d", 2304 | "metadata": { 2305 | "scrolled": false 2306 | }, 2307 | "outputs": [ 2308 | { 2309 | "data": { 2310 | "image/png": "", 2311 | "text/plain": [ 2312 | "
" 2313 | ] 2314 | }, 2315 | "metadata": {}, 2316 | "output_type": "display_data" 2317 | } 2318 | ], 2319 | "source": [ 2320 | "sns.heatmap(week_and_hour, cmap=sns.cubehelix_palette(as_cmap=True));" 2321 | ] 2322 | }, 2323 | { 2324 | "cell_type": "code", 2325 | "execution_count": 44, 2326 | "id": "bb5d8f47", 2327 | "metadata": {}, 2328 | "outputs": [], 2329 | "source": [ 2330 | "# In which months were the number of crimes below average? \n", 2331 | "# In which months on average did the most crimes occur?" 2332 | ] 2333 | }, 2334 | { 2335 | "cell_type": "code", 2336 | "execution_count": 45, 2337 | "id": "e43ec5b7", 2338 | "metadata": {}, 2339 | "outputs": [ 2340 | { 2341 | "name": "stdout", 2342 | "output_type": "stream", 2343 | "text": [ 2344 | "The average number of crimes is 7976.25\n" 2345 | ] 2346 | }, 2347 | { 2348 | "data": { 2349 | "text/html": [ 2350 | "\n", 2355 | "\n", 2356 | " \n", 2357 | " \n", 2358 | " \n", 2359 | " \n", 2360 | " \n", 2361 | " \n", 2362 | " \n", 2363 | " \n", 2364 | " \n", 2365 | " \n", 2366 | " \n", 2367 | " \n", 2368 | " \n", 2369 | " \n", 2370 | " \n", 2371 | " \n", 2372 | " \n", 2373 | " \n", 2374 | " \n", 2375 | " \n", 2376 | " \n", 2377 | " \n", 2378 | " \n", 2379 | " \n", 2380 | " \n", 2381 | " \n", 2382 | " \n", 2383 | " \n", 2384 | " \n", 2385 | " \n", 2386 | " \n", 2387 | " \n", 2388 | " \n", 2389 | " \n", 2390 | " \n", 2391 | " \n", 2392 | " \n", 2393 | " \n", 2394 | " \n", 2395 | " \n", 2396 | " \n", 2397 | " \n", 2398 | " \n", 2399 | " \n", 2400 | " \n", 2401 | " \n", 2402 | " \n", 2403 | " \n", 2404 | " \n", 2405 | " \n", 2406 | " \n", 2407 | " \n", 2408 | " \n", 2409 | " \n", 2410 | " \n", 2411 | " \n", 2412 | " \n", 2413 | " \n", 2414 | " \n", 2415 | " \n", 2416 | " \n", 2417 | " \n", 2418 | " \n", 2419 | " \n", 2420 | " \n", 2421 | " \n", 2422 | " \n", 2423 | " \n", 2424 | " \n", 2425 | " \n", 2426 | " \n", 2427 | " \n", 2428 | " \n", 2429 | " \n", 2430 | " \n", 2431 | " \n", 2432 | " \n", 2433 | " \n", 2434 | " \n", 2435 | " \n", 2436 | " \n", 2437 | " \n", 2438 | " \n", 2439 | " \n", 2440 | " \n", 2441 | " \n", 2442 | " \n", 2443 | " \n", 2444 | " \n", 2445 | " \n", 2446 | " \n", 2447 | " \n", 2448 | " \n", 2449 | " \n", 2450 | " \n", 2451 | " \n", 2452 | " \n", 2453 | " \n", 2454 | " \n", 2455 | " \n", 2456 | " \n", 2457 | " \n", 2458 | "
YEAR2015201620172018
MONTH    
1nan7835.0000007991.0000007782.000000
2nan7307.0000007408.0000006937.000000
3nan8199.0000008179.0000007768.000000
4nan8101.0000008069.0000007916.000000
5nan8578.0000008715.0000008906.000000
64188.0000008558.0000008985.0000008834.000000
78322.0000008618.0000009075.0000008538.000000
88340.0000008938.0000009206.0000008337.000000
98411.0000008521.0000008940.000000667.000000
108305.0000008582.0000008846.000000nan
117818.0000007922.0000007935.000000nan
127987.0000007951.0000007535.000000nan
\n" 2459 | ], 2460 | "text/plain": [ 2461 | "" 2462 | ] 2463 | }, 2464 | "execution_count": 45, 2465 | "metadata": {}, 2466 | "output_type": "execute_result" 2467 | } 2468 | ], 2469 | "source": [ 2470 | "# if the value is less than the average crime per month, highlight the value in blue\n", 2471 | "\n", 2472 | "avg_crime = crime.groupby(['YEAR', 'MONTH']).count()['INCIDENT_NUMBER'].mean()\n", 2473 | "print(\"The average number of crimes is \" + str(avg_crime))\n", 2474 | "\n", 2475 | "year_and_month = crime.groupby(['MONTH', 'YEAR']).count()['INCIDENT_NUMBER'].unstack()\n", 2476 | "\n", 2477 | "def style_negative(v, props=''):\n", 2478 | " return props if v < avg_crime else None\n", 2479 | "s2 = year_and_month.style.applymap(style_negative, props='color:blue;')\\\n", 2480 | " .applymap(lambda v: 'opacity: 20%;' if (v < 0.3) and (v > -0.3) else None)\n", 2481 | "s2" 2482 | ] 2483 | }, 2484 | { 2485 | "cell_type": "code", 2486 | "execution_count": 46, 2487 | "id": "4e9cfcfc", 2488 | "metadata": { 2489 | "scrolled": true 2490 | }, 2491 | "outputs": [ 2492 | { 2493 | "data": { 2494 | "text/html": [ 2495 | "\n", 2504 | "\n", 2505 | " \n", 2506 | " \n", 2507 | " \n", 2508 | " \n", 2509 | " \n", 2510 | " \n", 2511 | " \n", 2512 | " \n", 2513 | " \n", 2514 | " \n", 2515 | " \n", 2516 | " \n", 2517 | " \n", 2518 | " \n", 2519 | " \n", 2520 | " \n", 2521 | " \n", 2522 | " \n", 2523 | " \n", 2524 | " \n", 2525 | " \n", 2526 | " \n", 2527 | " \n", 2528 | " \n", 2529 | " \n", 2530 | " \n", 2531 | " \n", 2532 | " \n", 2533 | " \n", 2534 | " \n", 2535 | " \n", 2536 | " \n", 2537 | " \n", 2538 | " \n", 2539 | " \n", 2540 | " \n", 2541 | " \n", 2542 | " \n", 2543 | " \n", 2544 | " \n", 2545 | " \n", 2546 | " \n", 2547 | " \n", 2548 | " \n", 2549 | " \n", 2550 | " \n", 2551 | " \n", 2552 | " \n", 2553 | " \n", 2554 | " \n", 2555 | " \n", 2556 | " \n", 2557 | " \n", 2558 | " \n", 2559 | " \n", 2560 | " \n", 2561 | " \n", 2562 | " \n", 2563 | " \n", 2564 | " \n", 2565 | " \n", 2566 | " \n", 2567 | " \n", 2568 | " \n", 2569 | " \n", 2570 | " \n", 2571 | " \n", 2572 | " \n", 2573 | " \n", 2574 | " \n", 2575 | " \n", 2576 | " \n", 2577 | " \n", 2578 | " \n", 2579 | " \n", 2580 | " \n", 2581 | " \n", 2582 | " \n", 2583 | " \n", 2584 | " \n", 2585 | " \n", 2586 | " \n", 2587 | " \n", 2588 | " \n", 2589 | " \n", 2590 | " \n", 2591 | " \n", 2592 | " \n", 2593 | " \n", 2594 | " \n", 2595 | " \n", 2596 | " \n", 2597 | " \n", 2598 | " \n", 2599 | " \n", 2600 | " \n", 2601 | " \n", 2602 | " \n", 2603 | " \n", 2604 | " \n", 2605 | " \n", 2606 | " \n", 2607 | "
YEAR2015201620172018
MONTH    
1nan7835.0000007991.0000007782.000000
2nan7307.0000007408.0000006937.000000
3nan8199.0000008179.0000007768.000000
4nan8101.0000008069.0000007916.000000
5nan8578.0000008715.0000008906.000000
64188.0000008558.0000008985.0000008834.000000
78322.0000008618.0000009075.0000008538.000000
88340.0000008938.0000009206.0000008337.000000
98411.0000008521.0000008940.000000667.000000
108305.0000008582.0000008846.000000nan
117818.0000007922.0000007935.000000nan
127987.0000007951.0000007535.000000nan
\n" 2608 | ], 2609 | "text/plain": [ 2610 | "" 2611 | ] 2612 | }, 2613 | "execution_count": 46, 2614 | "metadata": {}, 2615 | "output_type": "execute_result" 2616 | } 2617 | ], 2618 | "source": [ 2619 | "# Use apply to highlight the maximum in a column in darkgreen\n", 2620 | "\n", 2621 | "def highlight_max(s, props=''):\n", 2622 | " return np.where(s == np.nanmax(s.values), props, '')\n", 2623 | "s2.apply(highlight_max, props='color:white;background-color:darkgreen', axis=0)" 2624 | ] 2625 | }, 2626 | { 2627 | "cell_type": "code", 2628 | "execution_count": 47, 2629 | "id": "3d7bb38f", 2630 | "metadata": { 2631 | "scrolled": false 2632 | }, 2633 | "outputs": [], 2634 | "source": [ 2635 | "# Question 4 - In which districts were the most crimes commmitted on yearly basis? \n", 2636 | "# Try and use everything you've learned in this video to answer this question.\n", 2637 | "# Feel free to use functions, tables and other visuals." 2638 | ] 2639 | } 2640 | ], 2641 | "metadata": { 2642 | "kernelspec": { 2643 | "display_name": "Python 3 (ipykernel)", 2644 | "language": "python", 2645 | "name": "python3" 2646 | }, 2647 | "language_info": { 2648 | "codemirror_mode": { 2649 | "name": "ipython", 2650 | "version": 3 2651 | }, 2652 | "file_extension": ".py", 2653 | "mimetype": "text/x-python", 2654 | "name": "python", 2655 | "nbconvert_exporter": "python", 2656 | "pygments_lexer": "ipython3", 2657 | "version": "3.10.6" 2658 | } 2659 | }, 2660 | "nbformat": 4, 2661 | "nbformat_minor": 5 2662 | } 2663 | --------------------------------------------------------------------------------