├── Cleaning.ipynb ├── Drawing-Conclusions-pynb.txt ├── Histogram-Practice.ipynb ├── Pandas_Functions_Formulae.xlsx ├── README.md ├── appending.ipynb ├── appending_rename.ipynb ├── assessing.ipynb ├── assessing_case2.ipynb ├── assessing_quiz.html ├── assessing_quiz.ipynb ├── cleaning_column_labels.ipynb ├── cleaning_practice.ipynb ├── conclusions-quiz-solutions.ipynb.txt ├── conclusions_groupby.ipynb ├── conclusions_query.ipynb ├── conclusions_quiz.ipynb ├── drawing_conclusions_Fuel.ipynb ├── eda_visuals.ipynb ├── eda_visuals_practise_functions.ipynb ├── exploring_visuals.ipynb ├── fix_datatypes_air_pollution.ipynb ├── fix_datatypes_cyl.ipynb ├── matplotlib_example.ipynb ├── plots-pandas.ipynb ├── plotting_type_quality.ipynb ├── query_filter.ipynb ├── reading_csv.ipynb ├── visuals_quiz.ipynb ├── wine_visualizations.ipynb ├── winequality-red.csv ├── winequality-white.csv └── winequality_edited.csv /Cleaning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "#import packages\n", 12 | "import pandas as pd\n", 13 | "\n", 14 | "#read csv document\n", 15 | "df = pd.read_csv('ChicagoResults.csv')" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 2, 21 | "metadata": {}, 22 | "outputs": [ 23 | { 24 | "name": "stdout", 25 | "output_type": "stream", 26 | "text": [ 27 | "\n", 28 | "RangeIndex: 271 entries, 0 to 270\n", 29 | "Data columns (total 7 columns):\n", 30 | "year 271 non-null int64\n", 31 | "city 271 non-null object\n", 32 | "country 271 non-null object\n", 33 | "avg_temp 271 non-null float64\n", 34 | "sevenDayMA 265 non-null float64\n", 35 | "FiveYearMA 267 non-null float64\n", 36 | "TenYearMA 262 non-null float64\n", 37 | "dtypes: float64(4), int64(1), object(2)\n", 38 | "memory usage: 14.9+ KB\n" 39 | ] 40 | } 41 | ], 42 | "source": [ 43 | "df.info()" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 4, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "mean_FiveYearMA = df['FiveYearMA'].mean()" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 5, 58 | "metadata": {}, 59 | "outputs": [ 60 | { 61 | "data": { 62 | "text/plain": [ 63 | "0 9.893401\n", 64 | "1 9.893401\n", 65 | "2 9.893401\n", 66 | "3 9.893401\n", 67 | "4 3.794000\n", 68 | "5 2.706000\n", 69 | "6 0.360000\n", 70 | "7 2.098000\n", 71 | "8 4.336000\n", 72 | "9 5.236000\n", 73 | "10 7.244000\n", 74 | "11 9.372000\n", 75 | "12 8.756000\n", 76 | "13 8.748000\n", 77 | "14 9.748000\n", 78 | "15 9.462000\n", 79 | "16 9.252000\n", 80 | "17 9.380000\n", 81 | "18 9.230000\n", 82 | "19 9.294000\n", 83 | "20 9.116000\n", 84 | "21 9.324000\n", 85 | "22 9.670000\n", 86 | "23 9.688000\n", 87 | "24 9.606000\n", 88 | "25 9.852000\n", 89 | "26 9.684000\n", 90 | "27 9.660000\n", 91 | "28 9.650000\n", 92 | "29 9.822000\n", 93 | " ... \n", 94 | "241 10.436000\n", 95 | "242 10.448000\n", 96 | "243 10.504000\n", 97 | "244 10.914000\n", 98 | "245 10.898000\n", 99 | "246 10.726000\n", 100 | "247 11.004000\n", 101 | "248 11.156000\n", 102 | "249 10.796000\n", 103 | "250 10.660000\n", 104 | "251 10.812000\n", 105 | "252 10.626000\n", 106 | "253 10.222000\n", 107 | "254 10.202000\n", 108 | "255 10.728000\n", 109 | "256 10.960000\n", 110 | "257 11.064000\n", 111 | "258 11.492000\n", 112 | "259 11.770000\n", 113 | "260 11.304000\n", 114 | "261 11.148000\n", 115 | "262 11.246000\n", 116 | "263 11.280000\n", 117 | "264 11.264000\n", 118 | "265 11.216000\n", 119 | "266 11.088000\n", 120 | "267 11.136000\n", 121 | "268 11.004000\n", 122 | "269 11.278000\n", 123 | "270 11.548000\n", 124 | "Name: FiveYearMA, Length: 271, dtype: float64" 125 | ] 126 | }, 127 | "execution_count": 5, 128 | "metadata": {}, 129 | "output_type": "execute_result" 130 | } 131 | ], 132 | "source": [ 133 | "df['FiveYearMA'].fillna(mean_FiveYearMA)" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 7, 139 | "metadata": {}, 140 | "outputs": [ 141 | { 142 | "data": { 143 | "text/html": [ 144 | "
\n", 145 | "\n", 158 | "\n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | "
yearcitycountryavg_tempsevenDayMAFiveYearMATenYearMA
01743ChicagoUnited States5.44NaNNaNNaN
11744ChicagoUnited States11.73NaNNaNNaN
21745ChicagoUnited States1.80NaNNaNNaN
31746ChicagoUnited States0.00NaNNaNNaN
41747ChicagoUnited States0.00NaN3.794NaN
51748ChicagoUnited States0.00NaN2.706NaN
61749ChicagoUnited States0.002.7100000.360NaN
71750ChicagoUnited States10.493.4314292.098NaN
81751ChicagoUnited States11.193.3542864.336NaN
91752ChicagoUnited States4.503.7400005.2364.515
101753ChicagoUnited States10.045.1742867.2444.975
\n", 284 | "
" 285 | ], 286 | "text/plain": [ 287 | " year city country avg_temp sevenDayMA FiveYearMA TenYearMA\n", 288 | "0 1743 Chicago United States 5.44 NaN NaN NaN\n", 289 | "1 1744 Chicago United States 11.73 NaN NaN NaN\n", 290 | "2 1745 Chicago United States 1.80 NaN NaN NaN\n", 291 | "3 1746 Chicago United States 0.00 NaN NaN NaN\n", 292 | "4 1747 Chicago United States 0.00 NaN 3.794 NaN\n", 293 | "5 1748 Chicago United States 0.00 NaN 2.706 NaN\n", 294 | "6 1749 Chicago United States 0.00 2.710000 0.360 NaN\n", 295 | "7 1750 Chicago United States 10.49 3.431429 2.098 NaN\n", 296 | "8 1751 Chicago United States 11.19 3.354286 4.336 NaN\n", 297 | "9 1752 Chicago United States 4.50 3.740000 5.236 4.515\n", 298 | "10 1753 Chicago United States 10.04 5.174286 7.244 4.975" 299 | ] 300 | }, 301 | "execution_count": 7, 302 | "metadata": {}, 303 | "output_type": "execute_result" 304 | } 305 | ], 306 | "source": [ 307 | "df.head(11)" 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": 8, 313 | "metadata": { 314 | "collapsed": true 315 | }, 316 | "outputs": [], 317 | "source": [ 318 | "df['FiveYearMA'] = df['FiveYearMA'].fillna(mean_FiveYearMA)" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": 9, 324 | "metadata": { 325 | "scrolled": true 326 | }, 327 | "outputs": [ 328 | { 329 | "data": { 330 | "text/html": [ 331 | "
\n", 332 | "\n", 345 | "\n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | "
yearcitycountryavg_tempsevenDayMAFiveYearMATenYearMA
01743ChicagoUnited States5.44NaN9.893401NaN
11744ChicagoUnited States11.73NaN9.893401NaN
21745ChicagoUnited States1.80NaN9.893401NaN
31746ChicagoUnited States0.00NaN9.893401NaN
41747ChicagoUnited States0.00NaN3.794000NaN
51748ChicagoUnited States0.00NaN2.706000NaN
61749ChicagoUnited States0.002.7100000.360000NaN
71750ChicagoUnited States10.493.4314292.098000NaN
81751ChicagoUnited States11.193.3542864.336000NaN
91752ChicagoUnited States4.503.7400005.2360004.515
\n", 461 | "
" 462 | ], 463 | "text/plain": [ 464 | " year city country avg_temp sevenDayMA FiveYearMA TenYearMA\n", 465 | "0 1743 Chicago United States 5.44 NaN 9.893401 NaN\n", 466 | "1 1744 Chicago United States 11.73 NaN 9.893401 NaN\n", 467 | "2 1745 Chicago United States 1.80 NaN 9.893401 NaN\n", 468 | "3 1746 Chicago United States 0.00 NaN 9.893401 NaN\n", 469 | "4 1747 Chicago United States 0.00 NaN 3.794000 NaN\n", 470 | "5 1748 Chicago United States 0.00 NaN 2.706000 NaN\n", 471 | "6 1749 Chicago United States 0.00 2.710000 0.360000 NaN\n", 472 | "7 1750 Chicago United States 10.49 3.431429 2.098000 NaN\n", 473 | "8 1751 Chicago United States 11.19 3.354286 4.336000 NaN\n", 474 | "9 1752 Chicago United States 4.50 3.740000 5.236000 4.515" 475 | ] 476 | }, 477 | "execution_count": 9, 478 | "metadata": {}, 479 | "output_type": "execute_result" 480 | } 481 | ], 482 | "source": [ 483 | "df.head(10)" 484 | ] 485 | }, 486 | { 487 | "cell_type": "code", 488 | "execution_count": 11, 489 | "metadata": {}, 490 | "outputs": [], 491 | "source": [ 492 | "mean_TenYearMA = df['TenYearMA'].mean()" 493 | ] 494 | }, 495 | { 496 | "cell_type": "code", 497 | "execution_count": 12, 498 | "metadata": { 499 | "collapsed": true 500 | }, 501 | "outputs": [], 502 | "source": [ 503 | "df['TenYearMA'].fillna(mean_TenYearMA, inplace = True)" 504 | ] 505 | }, 506 | { 507 | "cell_type": "code", 508 | "execution_count": 13, 509 | "metadata": {}, 510 | "outputs": [ 511 | { 512 | "data": { 513 | "text/html": [ 514 | "
\n", 515 | "\n", 528 | "\n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | "
yearcitycountryavg_tempsevenDayMAFiveYearMATenYearMA
01743ChicagoUnited States5.44NaN9.8934019.949863
11744ChicagoUnited States11.73NaN9.8934019.949863
21745ChicagoUnited States1.80NaN9.8934019.949863
31746ChicagoUnited States0.00NaN9.8934019.949863
41747ChicagoUnited States0.00NaN3.7940009.949863
51748ChicagoUnited States0.00NaN2.7060009.949863
61749ChicagoUnited States0.002.7100000.3600009.949863
71750ChicagoUnited States10.493.4314292.0980009.949863
81751ChicagoUnited States11.193.3542864.3360009.949863
91752ChicagoUnited States4.503.7400005.2360004.515000
\n", 644 | "
" 645 | ], 646 | "text/plain": [ 647 | " year city country avg_temp sevenDayMA FiveYearMA TenYearMA\n", 648 | "0 1743 Chicago United States 5.44 NaN 9.893401 9.949863\n", 649 | "1 1744 Chicago United States 11.73 NaN 9.893401 9.949863\n", 650 | "2 1745 Chicago United States 1.80 NaN 9.893401 9.949863\n", 651 | "3 1746 Chicago United States 0.00 NaN 9.893401 9.949863\n", 652 | "4 1747 Chicago United States 0.00 NaN 3.794000 9.949863\n", 653 | "5 1748 Chicago United States 0.00 NaN 2.706000 9.949863\n", 654 | "6 1749 Chicago United States 0.00 2.710000 0.360000 9.949863\n", 655 | "7 1750 Chicago United States 10.49 3.431429 2.098000 9.949863\n", 656 | "8 1751 Chicago United States 11.19 3.354286 4.336000 9.949863\n", 657 | "9 1752 Chicago United States 4.50 3.740000 5.236000 4.515000" 658 | ] 659 | }, 660 | "execution_count": 13, 661 | "metadata": {}, 662 | "output_type": "execute_result" 663 | } 664 | ], 665 | "source": [ 666 | "df.head(10)" 667 | ] 668 | }, 669 | { 670 | "cell_type": "code", 671 | "execution_count": 14, 672 | "metadata": {}, 673 | "outputs": [ 674 | { 675 | "data": { 676 | "text/plain": [ 677 | "0 False\n", 678 | "1 False\n", 679 | "2 False\n", 680 | "3 False\n", 681 | "4 False\n", 682 | "5 False\n", 683 | "6 False\n", 684 | "7 False\n", 685 | "8 False\n", 686 | "9 False\n", 687 | "10 False\n", 688 | "11 False\n", 689 | "12 False\n", 690 | "13 False\n", 691 | "14 False\n", 692 | "15 False\n", 693 | "16 False\n", 694 | "17 False\n", 695 | "18 False\n", 696 | "19 False\n", 697 | "20 False\n", 698 | "21 False\n", 699 | "22 False\n", 700 | "23 False\n", 701 | "24 False\n", 702 | "25 False\n", 703 | "26 False\n", 704 | "27 False\n", 705 | "28 False\n", 706 | "29 False\n", 707 | " ... \n", 708 | "241 False\n", 709 | "242 False\n", 710 | "243 False\n", 711 | "244 False\n", 712 | "245 False\n", 713 | "246 False\n", 714 | "247 False\n", 715 | "248 False\n", 716 | "249 False\n", 717 | "250 False\n", 718 | "251 False\n", 719 | "252 False\n", 720 | "253 False\n", 721 | "254 False\n", 722 | "255 False\n", 723 | "256 False\n", 724 | "257 False\n", 725 | "258 False\n", 726 | "259 False\n", 727 | "260 False\n", 728 | "261 False\n", 729 | "262 False\n", 730 | "263 False\n", 731 | "264 False\n", 732 | "265 False\n", 733 | "266 False\n", 734 | "267 False\n", 735 | "268 False\n", 736 | "269 False\n", 737 | "270 False\n", 738 | "Length: 271, dtype: bool" 739 | ] 740 | }, 741 | "execution_count": 14, 742 | "metadata": {}, 743 | "output_type": "execute_result" 744 | } 745 | ], 746 | "source": [ 747 | "df.duplicated()" 748 | ] 749 | }, 750 | { 751 | "cell_type": "code", 752 | "execution_count": 15, 753 | "metadata": {}, 754 | "outputs": [ 755 | { 756 | "data": { 757 | "text/plain": [ 758 | "0" 759 | ] 760 | }, 761 | "execution_count": 15, 762 | "metadata": {}, 763 | "output_type": "execute_result" 764 | } 765 | ], 766 | "source": [ 767 | "sum(df.duplicated())" 768 | ] 769 | }, 770 | { 771 | "cell_type": "code", 772 | "execution_count": 16, 773 | "metadata": { 774 | "collapsed": true 775 | }, 776 | "outputs": [], 777 | "source": [ 778 | "df.drop_duplicates(inplace = True)" 779 | ] 780 | }, 781 | { 782 | "cell_type": "code", 783 | "execution_count": 18, 784 | "metadata": {}, 785 | "outputs": [ 786 | { 787 | "name": "stdout", 788 | "output_type": "stream", 789 | "text": [ 790 | "\n", 791 | "Int64Index: 271 entries, 0 to 270\n", 792 | "Data columns (total 7 columns):\n", 793 | "year 271 non-null int64\n", 794 | "city 271 non-null object\n", 795 | "country 271 non-null object\n", 796 | "avg_temp 271 non-null float64\n", 797 | "sevenDayMA 265 non-null float64\n", 798 | "FiveYearMA 271 non-null float64\n", 799 | "TenYearMA 271 non-null float64\n", 800 | "dtypes: float64(4), int64(1), object(2)\n", 801 | "memory usage: 16.9+ KB\n" 802 | ] 803 | } 804 | ], 805 | "source": [ 806 | "df.info(0)" 807 | ] 808 | }, 809 | { 810 | "cell_type": "code", 811 | "execution_count": 19, 812 | "metadata": { 813 | "collapsed": true 814 | }, 815 | "outputs": [], 816 | "source": [ 817 | "#df['timestamp'] = pd.to_datetime(df['timestamp'])" 818 | ] 819 | }, 820 | { 821 | "cell_type": "code", 822 | "execution_count": null, 823 | "metadata": { 824 | "collapsed": true 825 | }, 826 | "outputs": [], 827 | "source": [] 828 | } 829 | ], 830 | "metadata": { 831 | "kernelspec": { 832 | "display_name": "Python 3", 833 | "language": "python", 834 | "name": "python3" 835 | }, 836 | "language_info": { 837 | "codemirror_mode": { 838 | "name": "ipython", 839 | "version": 3 840 | }, 841 | "file_extension": ".py", 842 | "mimetype": "text/x-python", 843 | "name": "python", 844 | "nbconvert_exporter": "python", 845 | "pygments_lexer": "ipython3", 846 | "version": "3.6.2" 847 | } 848 | }, 849 | "nbformat": 4, 850 | "nbformat_minor": 2 851 | } 852 | -------------------------------------------------------------------------------- /Drawing-Conclusions-pynb.txt: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | -------------------------------------------------------------------------------- /Pandas_Functions_Formulae.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nirupamaprv/Data-Analysis/05892f4058faf9ccc6aa90995e90d4ae7bd0bf7c/Pandas_Functions_Formulae.xlsx -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | Practice Exercise from Data Analysis course of Udacity DAND 3 | 4 | - .ipynb file contains code with Markdown cells from Jupyter Notebook. 5 | - Exercises solved by self to answer questions for assignment. 6 | - .html file is .pynb converted to web version for easy viewing 7 | - .csv files contain data on which data analysis was conducted 8 | 9 | ## Datasets and Project Summary 10 | ### Chicago temperature set 11 | - Cleaning.ipynb - Data cleaning practice 12 | - Histogram-Practice.ipynb - Practicing creating plots 13 | - plots-pandas.ipynb - computing values for plots 14 | 15 | ### Wine Dataset 16 | - appending.ipynb - Appending data from different datasets 17 | - appending_rename.ipynb - Appending, renaming and saving data from different datasets 18 | - assessing_quiz.ipynb - answering Quiz using pandas 19 | - conclusions_groupby.ipynb - using groupby function to analyze quality, ratings and other Questions. 20 | - conclusions_query.ipynb - drawing conclusions to Qs on ratings 21 | - eda_visuals.ipynb - Addressing Qs on wine dataset using different plots 22 | - eda_visuals_practise_functions.ipynb - Addressing additional Qs using different plots; Here, varying colors are used to differentiate groups 23 | - plotting_type_quality.ipynb - Creating plots with matplotlib for ratings 24 | - wine_visualizations.ipynb - Use Matplotlib to create bar charts that visualize the conclusions made with groupby and queries 25 | 26 | ### Cancer Dataset 27 | - assessing.ipynb - inspecting datasets, data types, selecting different ranges 28 | - cleaning_practice.ipynb - practicing data wrangling 29 | 30 | 31 | ### Auto Dataset for 2008 and 2018 models 32 | - assessing_case2.ipynb - answering Quiz using pandas 33 | - cleaning_column_labels.ipynb - data wrangling 34 | - drawing_conclusions_Fuel.ipynb - Making inferences and comparisons on fuel efficiency, improvements, classes, etc. and visualizing using histograms and pie charts 35 | - exploring_visuals.ipynb - Making inferences and comparisons using visualizations 36 | - fix_datatypes_air_pollution.ipynb - Data Wrangling 37 | fix_datatypes_cyl.ipynb - Datatypes transformation 38 | - query_filter.ipynb - Data Wrangling 39 | 40 | ### Other Datasets 41 | - matplotlib_example.ipynb - Practicing bar charts 42 | - conclusions_quiz.ipynb - **Store Sales Dataset** - Analyzing sales figures and periods to determine performance and revenue 43 | - reading_csv.ipynb - **Student Scores Data**- Reading, writing and inspecting values 44 | - visuals_quiz.ipynb - **Powerplant Data** - creating plots using matplotlib and answering Quiz questions 45 | 46 | 47 | -------------------------------------------------------------------------------- /assessing_case2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "collapsed": true 7 | }, 8 | "source": [ 9 | "# Assessing\n", 10 | "Use the space below to explore `all_alpha_08.csv` and `all_alpha_18.csv` to answer the quiz questions below." 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 28, 16 | "metadata": {}, 17 | "outputs": [ 18 | { 19 | "name": "stdout", 20 | "output_type": "stream", 21 | "text": [ 22 | "\n", 23 | "RangeIndex: 1611 entries, 0 to 1610\n", 24 | "Data columns (total 18 columns):\n", 25 | "Model 1611 non-null object\n", 26 | "Displ 1609 non-null float64\n", 27 | "Cyl 1609 non-null float64\n", 28 | "Trans 1611 non-null object\n", 29 | "Drive 1611 non-null object\n", 30 | "Fuel 1611 non-null object\n", 31 | "Cert Region 1611 non-null object\n", 32 | "Stnd 1611 non-null object\n", 33 | "Stnd Description 1611 non-null object\n", 34 | "Underhood ID 1611 non-null object\n", 35 | "Veh Class 1611 non-null object\n", 36 | "Air Pollution Score 1611 non-null int64\n", 37 | "City MPG 1611 non-null object\n", 38 | "Hwy MPG 1611 non-null object\n", 39 | "Cmb MPG 1611 non-null object\n", 40 | "Greenhouse Gas Score 1611 non-null int64\n", 41 | "SmartWay 1611 non-null object\n", 42 | "Comb CO2 1611 non-null object\n", 43 | "dtypes: float64(2), int64(2), object(14)\n", 44 | "memory usage: 226.6+ KB\n" 45 | ] 46 | } 47 | ], 48 | "source": [ 49 | "import pandas as pd\n", 50 | "\n", 51 | "df_18 = pd.read_csv('all_alpha_18.csv')\n", 52 | "df_18.info()\n" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 29, 58 | "metadata": {}, 59 | "outputs": [ 60 | { 61 | "name": "stdout", 62 | "output_type": "stream", 63 | "text": [ 64 | "\n", 65 | "RangeIndex: 2404 entries, 0 to 2403\n", 66 | "Data columns (total 18 columns):\n", 67 | "Model 2404 non-null object\n", 68 | "Displ 2404 non-null float64\n", 69 | "Cyl 2205 non-null object\n", 70 | "Trans 2205 non-null object\n", 71 | "Drive 2311 non-null object\n", 72 | "Fuel 2404 non-null object\n", 73 | "Sales Area 2404 non-null object\n", 74 | "Stnd 2404 non-null object\n", 75 | "Underhood ID 2404 non-null object\n", 76 | "Veh Class 2404 non-null object\n", 77 | "Air Pollution Score 2404 non-null object\n", 78 | "FE Calc Appr 2205 non-null object\n", 79 | "City MPG 2205 non-null object\n", 80 | "Hwy MPG 2205 non-null object\n", 81 | "Cmb MPG 2205 non-null object\n", 82 | "Unadj Cmb MPG 2205 non-null float64\n", 83 | "Greenhouse Gas Score 2205 non-null object\n", 84 | "SmartWay 2404 non-null object\n", 85 | "dtypes: float64(2), object(16)\n", 86 | "memory usage: 338.1+ KB\n" 87 | ] 88 | } 89 | ], 90 | "source": [ 91 | "df_08 = pd.read_csv('all_alpha_08.csv')\n", 92 | "df_08.info()" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 30, 98 | "metadata": {}, 99 | "outputs": [ 100 | { 101 | "data": { 102 | "text/plain": [ 103 | "0" 104 | ] 105 | }, 106 | "execution_count": 30, 107 | "metadata": {}, 108 | "output_type": "execute_result" 109 | } 110 | ], 111 | "source": [ 112 | "sum(df_18.duplicated())" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 31, 118 | "metadata": {}, 119 | "outputs": [ 120 | { 121 | "data": { 122 | "text/plain": [ 123 | "25" 124 | ] 125 | }, 126 | "execution_count": 31, 127 | "metadata": {}, 128 | "output_type": "execute_result" 129 | } 130 | ], 131 | "source": [ 132 | "sum(df_08.duplicated())" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 32, 138 | "metadata": {}, 139 | "outputs": [ 140 | { 141 | "data": { 142 | "text/plain": [ 143 | "Gasoline 1492\n", 144 | "Ethanol/Gas 55\n", 145 | "Diesel 38\n", 146 | "Gasoline/Electricity 24\n", 147 | "Electricity 2\n", 148 | "Name: Fuel, dtype: int64" 149 | ] 150 | }, 151 | "execution_count": 32, 152 | "metadata": {}, 153 | "output_type": "execute_result" 154 | } 155 | ], 156 | "source": [ 157 | "df_18.Fuel.value_counts()" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 33, 163 | "metadata": {}, 164 | "outputs": [ 165 | { 166 | "data": { 167 | "text/plain": [ 168 | "Gasoline 2318\n", 169 | "ethanol/gas 72\n", 170 | "diesel 11\n", 171 | "CNG 2\n", 172 | "ethanol 1\n", 173 | "Name: Fuel, dtype: int64" 174 | ] 175 | }, 176 | "execution_count": 33, 177 | "metadata": {}, 178 | "output_type": "execute_result" 179 | } 180 | ], 181 | "source": [ 182 | "df_08.Fuel.value_counts()" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 34, 188 | "metadata": {}, 189 | "outputs": [ 190 | { 191 | "data": { 192 | "text/plain": [ 193 | "1611" 194 | ] 195 | }, 196 | "execution_count": 34, 197 | "metadata": {}, 198 | "output_type": "execute_result" 199 | } 200 | ], 201 | "source": [ 202 | "sum(df_18.Model.value_counts())" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": 35, 208 | "metadata": {}, 209 | "outputs": [ 210 | { 211 | "data": { 212 | "text/html": [ 213 | "
\n", 214 | "\n", 227 | "\n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | "
DisplCylAir Pollution ScoreGreenhouse Gas Score
count1609.0000001609.0000001611.0000001611.000000
mean3.0556875.4791803.9584114.711359
std1.3445741.7491211.8243031.657429
min1.2000003.0000001.0000001.000000
25%2.0000004.0000003.0000004.000000
50%3.0000006.0000003.0000005.000000
75%3.6000006.0000005.0000006.000000
max8.00000016.00000010.00000010.000000
\n", 296 | "
" 297 | ], 298 | "text/plain": [ 299 | " Displ Cyl Air Pollution Score Greenhouse Gas Score\n", 300 | "count 1609.000000 1609.000000 1611.000000 1611.000000\n", 301 | "mean 3.055687 5.479180 3.958411 4.711359\n", 302 | "std 1.344574 1.749121 1.824303 1.657429\n", 303 | "min 1.200000 3.000000 1.000000 1.000000\n", 304 | "25% 2.000000 4.000000 3.000000 4.000000\n", 305 | "50% 3.000000 6.000000 3.000000 5.000000\n", 306 | "75% 3.600000 6.000000 5.000000 6.000000\n", 307 | "max 8.000000 16.000000 10.000000 10.000000" 308 | ] 309 | }, 310 | "execution_count": 35, 311 | "metadata": {}, 312 | "output_type": "execute_result" 313 | } 314 | ], 315 | "source": [ 316 | "df_18.describe()" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": 36, 322 | "metadata": {}, 323 | "outputs": [ 324 | { 325 | "data": { 326 | "text/html": [ 327 | "
\n", 328 | "\n", 341 | "\n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | "
ModelDisplCylTransDriveFuelCert RegionStndStnd DescriptionUnderhood IDVeh ClassAir Pollution ScoreCity MPGHwy MPGCmb MPGGreenhouse Gas ScoreSmartWayComb CO2
0ACURA RDX3.56.0SemiAuto-62WDGasolineFAT3B125Federal Tier 3 Bin 125JHNXT03.5GV3small SUV32028235No386
1ACURA RDX3.56.0SemiAuto-62WDGasolineCAU2California LEV-II ULEVJHNXT03.5GV3small SUV32028235No386
2ACURA RDX3.56.0SemiAuto-64WDGasolineFAT3B125Federal Tier 3 Bin 125JHNXT03.5GV3small SUV31927224No402
3ACURA RDX3.56.0SemiAuto-64WDGasolineCAU2California LEV-II ULEVJHNXT03.5GV3small SUV31927224No402
4ACURA TLX2.44.0AMS-82WDGasolineCAL3ULEV125California LEV-III ULEV125JHNXV02.4WH3small car32333276No330
\n", 473 | "
" 474 | ], 475 | "text/plain": [ 476 | " Model Displ Cyl Trans Drive Fuel Cert Region Stnd \\\n", 477 | "0 ACURA RDX 3.5 6.0 SemiAuto-6 2WD Gasoline FA T3B125 \n", 478 | "1 ACURA RDX 3.5 6.0 SemiAuto-6 2WD Gasoline CA U2 \n", 479 | "2 ACURA RDX 3.5 6.0 SemiAuto-6 4WD Gasoline FA T3B125 \n", 480 | "3 ACURA RDX 3.5 6.0 SemiAuto-6 4WD Gasoline CA U2 \n", 481 | "4 ACURA TLX 2.4 4.0 AMS-8 2WD Gasoline CA L3ULEV125 \n", 482 | "\n", 483 | " Stnd Description Underhood ID Veh Class Air Pollution Score \\\n", 484 | "0 Federal Tier 3 Bin 125 JHNXT03.5GV3 small SUV 3 \n", 485 | "1 California LEV-II ULEV JHNXT03.5GV3 small SUV 3 \n", 486 | "2 Federal Tier 3 Bin 125 JHNXT03.5GV3 small SUV 3 \n", 487 | "3 California LEV-II ULEV JHNXT03.5GV3 small SUV 3 \n", 488 | "4 California LEV-III ULEV125 JHNXV02.4WH3 small car 3 \n", 489 | "\n", 490 | " City MPG Hwy MPG Cmb MPG Greenhouse Gas Score SmartWay Comb CO2 \n", 491 | "0 20 28 23 5 No 386 \n", 492 | "1 20 28 23 5 No 386 \n", 493 | "2 19 27 22 4 No 402 \n", 494 | "3 19 27 22 4 No 402 \n", 495 | "4 23 33 27 6 No 330 " 496 | ] 497 | }, 498 | "execution_count": 36, 499 | "metadata": {}, 500 | "output_type": "execute_result" 501 | } 502 | ], 503 | "source": [ 504 | "df_18.head()" 505 | ] 506 | }, 507 | { 508 | "cell_type": "code", 509 | "execution_count": 37, 510 | "metadata": {}, 511 | "outputs": [ 512 | { 513 | "data": { 514 | "text/html": [ 515 | "
\n", 516 | "\n", 529 | "\n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | "
DisplUnadj Cmb MPG
count2404.0000002205.000000
mean3.74891823.916104
std1.3357856.366170
min1.30000010.018400
25%2.50000019.113900
50%3.50000023.921300
75%4.80000027.869300
max8.40000065.777800
\n", 580 | "
" 581 | ], 582 | "text/plain": [ 583 | " Displ Unadj Cmb MPG\n", 584 | "count 2404.000000 2205.000000\n", 585 | "mean 3.748918 23.916104\n", 586 | "std 1.335785 6.366170\n", 587 | "min 1.300000 10.018400\n", 588 | "25% 2.500000 19.113900\n", 589 | "50% 3.500000 23.921300\n", 590 | "75% 4.800000 27.869300\n", 591 | "max 8.400000 65.777800" 592 | ] 593 | }, 594 | "execution_count": 37, 595 | "metadata": {}, 596 | "output_type": "execute_result" 597 | } 598 | ], 599 | "source": [ 600 | "df_08.describe()\n" 601 | ] 602 | }, 603 | { 604 | "cell_type": "code", 605 | "execution_count": 38, 606 | "metadata": {}, 607 | "outputs": [ 608 | { 609 | "data": { 610 | "text/plain": [ 611 | "2205" 612 | ] 613 | }, 614 | "execution_count": 38, 615 | "metadata": {}, 616 | "output_type": "execute_result" 617 | } 618 | ], 619 | "source": [ 620 | "sum(df_08.Cyl.value_counts())" 621 | ] 622 | }, 623 | { 624 | "cell_type": "code", 625 | "execution_count": 39, 626 | "metadata": {}, 627 | "outputs": [ 628 | { 629 | "data": { 630 | "text/plain": [ 631 | "Model 436\n", 632 | "Displ 47\n", 633 | "Cyl 8\n", 634 | "Trans 14\n", 635 | "Drive 2\n", 636 | "Fuel 5\n", 637 | "Sales Area 3\n", 638 | "Stnd 12\n", 639 | "Underhood ID 343\n", 640 | "Veh Class 9\n", 641 | "Air Pollution Score 13\n", 642 | "FE Calc Appr 2\n", 643 | "City MPG 39\n", 644 | "Hwy MPG 43\n", 645 | "Cmb MPG 38\n", 646 | "Unadj Cmb MPG 721\n", 647 | "Greenhouse Gas Score 20\n", 648 | "SmartWay 2\n", 649 | "dtype: int64" 650 | ] 651 | }, 652 | "execution_count": 39, 653 | "metadata": {}, 654 | "output_type": "execute_result" 655 | } 656 | ], 657 | "source": [ 658 | "df_08.nunique()" 659 | ] 660 | }, 661 | { 662 | "cell_type": "code", 663 | "execution_count": 40, 664 | "metadata": {}, 665 | "outputs": [ 666 | { 667 | "data": { 668 | "text/plain": [ 669 | "Model 367\n", 670 | "Displ 36\n", 671 | "Cyl 7\n", 672 | "Trans 26\n", 673 | "Drive 2\n", 674 | "Fuel 5\n", 675 | "Cert Region 2\n", 676 | "Stnd 19\n", 677 | "Stnd Description 19\n", 678 | "Underhood ID 230\n", 679 | "Veh Class 9\n", 680 | "Air Pollution Score 6\n", 681 | "City MPG 58\n", 682 | "Hwy MPG 62\n", 683 | "Cmb MPG 57\n", 684 | "Greenhouse Gas Score 10\n", 685 | "SmartWay 3\n", 686 | "Comb CO2 299\n", 687 | "dtype: int64" 688 | ] 689 | }, 690 | "execution_count": 40, 691 | "metadata": {}, 692 | "output_type": "execute_result" 693 | } 694 | ], 695 | "source": [ 696 | "df_18.nunique()" 697 | ] 698 | }, 699 | { 700 | "cell_type": "code", 701 | "execution_count": null, 702 | "metadata": { 703 | "collapsed": true 704 | }, 705 | "outputs": [], 706 | "source": [] 707 | }, 708 | { 709 | "cell_type": "code", 710 | "execution_count": null, 711 | "metadata": { 712 | "collapsed": true 713 | }, 714 | "outputs": [], 715 | "source": [] 716 | }, 717 | { 718 | "cell_type": "code", 719 | "execution_count": null, 720 | "metadata": { 721 | "collapsed": true 722 | }, 723 | "outputs": [], 724 | "source": [] 725 | } 726 | ], 727 | "metadata": { 728 | "kernelspec": { 729 | "display_name": "Python 3", 730 | "language": "python", 731 | "name": "python3" 732 | }, 733 | "language_info": { 734 | "codemirror_mode": { 735 | "name": "ipython", 736 | "version": 3 737 | }, 738 | "file_extension": ".py", 739 | "mimetype": "text/x-python", 740 | "name": "python", 741 | "nbconvert_exporter": "python", 742 | "pygments_lexer": "ipython3", 743 | "version": "3.6.1" 744 | } 745 | }, 746 | "nbformat": 4, 747 | "nbformat_minor": 2 748 | } 749 | -------------------------------------------------------------------------------- /assessing_quiz.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Assessing\n", 8 | "Use the space below to explore `winequality-red.csv` and `winequality-white.csv` to answer the quiz questions below.\n", 9 | "\n", 10 | "Assessing Data\n", 11 | "Using Pandas, explore winequality-red.csv and winequality-white.csv in the Jupyter notebook below to answer quiz questions below the notebook about these characteristics of the datasets:\n", 12 | "\n", 13 | "number of samples in each dataset\n", 14 | "number of columns in each dataset\n", 15 | "features with missing values\n", 16 | "duplicate rows in the white wine dataset\n", 17 | "number of unique values for quality in each dataset\n", 18 | "mean density of the red wine dataset\n", 19 | "This data was originally taken from here:\n", 20 | "https://archive.ics.uci.edu/ml/datasets/Wine+Quality" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 1, 26 | "metadata": { 27 | "collapsed": true 28 | }, 29 | "outputs": [], 30 | "source": [ 31 | "import pandas as pd\n", 32 | "# maptplotlib inline\n", 33 | "\n", 34 | "df_red = pd.read_csv('winequality-red.csv', sep = ';')\n", 35 | "df_white = pd.read_csv('winequality-white.csv',sep = ';')" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 2, 41 | "metadata": {}, 42 | "outputs": [ 43 | { 44 | "name": "stdout", 45 | "output_type": "stream", 46 | "text": [ 47 | "\n", 48 | "RangeIndex: 1599 entries, 0 to 1598\n", 49 | "Data columns (total 12 columns):\n", 50 | "fixed acidity 1599 non-null float64\n", 51 | "volatile acidity 1599 non-null float64\n", 52 | "citric acid 1599 non-null float64\n", 53 | "residual sugar 1599 non-null float64\n", 54 | "chlorides 1599 non-null float64\n", 55 | "free sulfur dioxide 1599 non-null float64\n", 56 | "total sulfur dioxide 1599 non-null float64\n", 57 | "density 1599 non-null float64\n", 58 | "pH 1599 non-null float64\n", 59 | "sulphates 1599 non-null float64\n", 60 | "alcohol 1599 non-null float64\n", 61 | "quality 1599 non-null int64\n", 62 | "dtypes: float64(11), int64(1)\n", 63 | "memory usage: 150.0 KB\n", 64 | "\n", 65 | "RangeIndex: 4898 entries, 0 to 4897\n", 66 | "Data columns (total 12 columns):\n", 67 | "fixed acidity 4898 non-null float64\n", 68 | "volatile acidity 4898 non-null float64\n", 69 | "citric acid 4898 non-null float64\n", 70 | "residual sugar 4898 non-null float64\n", 71 | "chlorides 4898 non-null float64\n", 72 | "free sulfur dioxide 4898 non-null float64\n", 73 | "total sulfur dioxide 4898 non-null float64\n", 74 | "density 4898 non-null float64\n", 75 | "pH 4898 non-null float64\n", 76 | "sulphates 4898 non-null float64\n", 77 | "alcohol 4898 non-null float64\n", 78 | "quality 4898 non-null int64\n", 79 | "dtypes: float64(11), int64(1)\n", 80 | "memory usage: 459.3 KB\n" 81 | ] 82 | } 83 | ], 84 | "source": [ 85 | "df_red.info()\n", 86 | "df_white.info()" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 3, 92 | "metadata": {}, 93 | "outputs": [ 94 | { 95 | "data": { 96 | "text/html": [ 97 | "
\n", 98 | "\n", 111 | "\n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | "
fixed acidityvolatile aciditycitric acidresidual sugarchloridesfree sulfur dioxidetotal sulfur dioxidedensitypHsulphatesalcoholquality
07.40.700.001.90.07611.034.00.99783.510.569.45
17.80.880.002.60.09825.067.00.99683.200.689.85
27.80.760.042.30.09215.054.00.99703.260.659.85
311.20.280.561.90.07517.060.00.99803.160.589.86
47.40.700.001.90.07611.034.00.99783.510.569.45
\n", 207 | "
" 208 | ], 209 | "text/plain": [ 210 | " fixed acidity volatile acidity citric acid residual sugar chlorides \\\n", 211 | "0 7.4 0.70 0.00 1.9 0.076 \n", 212 | "1 7.8 0.88 0.00 2.6 0.098 \n", 213 | "2 7.8 0.76 0.04 2.3 0.092 \n", 214 | "3 11.2 0.28 0.56 1.9 0.075 \n", 215 | "4 7.4 0.70 0.00 1.9 0.076 \n", 216 | "\n", 217 | " free sulfur dioxide total sulfur dioxide density pH sulphates \\\n", 218 | "0 11.0 34.0 0.9978 3.51 0.56 \n", 219 | "1 25.0 67.0 0.9968 3.20 0.68 \n", 220 | "2 15.0 54.0 0.9970 3.26 0.65 \n", 221 | "3 17.0 60.0 0.9980 3.16 0.58 \n", 222 | "4 11.0 34.0 0.9978 3.51 0.56 \n", 223 | "\n", 224 | " alcohol quality \n", 225 | "0 9.4 5 \n", 226 | "1 9.8 5 \n", 227 | "2 9.8 5 \n", 228 | "3 9.8 6 \n", 229 | "4 9.4 5 " 230 | ] 231 | }, 232 | "execution_count": 3, 233 | "metadata": {}, 234 | "output_type": "execute_result" 235 | } 236 | ], 237 | "source": [ 238 | "df_red.head()" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": 4, 244 | "metadata": {}, 245 | "outputs": [ 246 | { 247 | "data": { 248 | "text/plain": [ 249 | "240" 250 | ] 251 | }, 252 | "execution_count": 4, 253 | "metadata": {}, 254 | "output_type": "execute_result" 255 | } 256 | ], 257 | "source": [ 258 | "#number of duplicate rows\n", 259 | "sum(df_red.duplicated())" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": 5, 265 | "metadata": {}, 266 | "outputs": [ 267 | { 268 | "data": { 269 | "text/plain": [ 270 | "937" 271 | ] 272 | }, 273 | "execution_count": 5, 274 | "metadata": {}, 275 | "output_type": "execute_result" 276 | } 277 | ], 278 | "source": [ 279 | "#number of duplicate rows\n", 280 | "sum(df_white.duplicated())" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": 7, 286 | "metadata": {}, 287 | "outputs": [ 288 | { 289 | "name": "stdout", 290 | "output_type": "stream", 291 | "text": [ 292 | "Unique values of quality for red wine are 6\n", 293 | "unique values of quality for white wine are 7\n" 294 | ] 295 | } 296 | ], 297 | "source": [ 298 | "#number of unique values of quality\n", 299 | "print(\"Unique values of quality for red wine are {}\".format(len(df_red.quality.unique())))\n", 300 | "print(\"unique values of quality for white wine are {}\".format(len(df_white.quality.unique())))" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": 8, 306 | "metadata": {}, 307 | "outputs": [ 308 | { 309 | "data": { 310 | "text/plain": [ 311 | "0.9967466791744833" 312 | ] 313 | }, 314 | "execution_count": 8, 315 | "metadata": {}, 316 | "output_type": "execute_result" 317 | } 318 | ], 319 | "source": [ 320 | "df_red['density'].mean()" 321 | ] 322 | }, 323 | { 324 | "cell_type": "markdown", 325 | "metadata": {}, 326 | "source": [ 327 | "# Questions and Answers to Quiz\n", 328 | "\n", 329 | "Answers derived by code calculations provided above\n", 330 | "\n", 331 | "- How many samples of red wine are there? 1599\n", 332 | "- How many samples of white wine are there? 4898\n", 333 | "- How many columns are in each dataset? 12\n", 334 | "- Which features have missing values? None of these features have missing values\n", 335 | "- How many duplicate rows are in the white wine dataset? 937\n", 336 | "- Are duplicate rows in these datasets significant/ need to be dropped? Not necessarily\n", 337 | "- How many unique values of quality are in the red wine dataset? 6\n", 338 | "- How many unique values of quality are in the white wine dataset? 7\n", 339 | "- What is the mean density in the red wine dataset? 0.996747\n", 340 | "\n", 341 | "\n" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": null, 347 | "metadata": { 348 | "collapsed": true 349 | }, 350 | "outputs": [], 351 | "source": [] 352 | } 353 | ], 354 | "metadata": { 355 | "kernelspec": { 356 | "display_name": "Python 3", 357 | "language": "python", 358 | "name": "python3" 359 | }, 360 | "language_info": { 361 | "codemirror_mode": { 362 | "name": "ipython", 363 | "version": 3 364 | }, 365 | "file_extension": ".py", 366 | "mimetype": "text/x-python", 367 | "name": "python", 368 | "nbconvert_exporter": "python", 369 | "pygments_lexer": "ipython3", 370 | "version": "3.6.2" 371 | } 372 | }, 373 | "nbformat": 4, 374 | "nbformat_minor": 2 375 | } 376 | -------------------------------------------------------------------------------- /cleaning_column_labels.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Cleaning Column Labels\n", 8 | "Use `all_alpha_08.csv` and `all_alpha_18.csv`" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "metadata": { 15 | "collapsed": true 16 | }, 17 | "outputs": [], 18 | "source": [ 19 | "import pandas as pd" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "metadata": { 26 | "collapsed": true 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "# load datasets\n", 31 | "\n", 32 | "df_08 = pd.read_csv('all_alpha_08.csv')\n", 33 | "df_18 = pd.read_csv('all_alpha_18.csv')" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 3, 39 | "metadata": {}, 40 | "outputs": [ 41 | { 42 | "data": { 43 | "text/html": [ 44 | "
\n", 45 | "\n", 58 | "\n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | "
ModelDisplCylTransDriveFuelSales AreaStndUnderhood IDVeh ClassAir Pollution ScoreFE Calc ApprCity MPGHwy MPGCmb MPGUnadj Cmb MPGGreenhouse Gas ScoreSmartWay
0ACURA MDX3.7(6 cyl)Auto-S54WDGasolineCAU28HNXT03.7PKRSUV7Drv15201722.05274no
\n", 106 | "
" 107 | ], 108 | "text/plain": [ 109 | " Model Displ Cyl Trans Drive Fuel Sales Area Stnd \\\n", 110 | "0 ACURA MDX 3.7 (6 cyl) Auto-S5 4WD Gasoline CA U2 \n", 111 | "\n", 112 | " Underhood ID Veh Class Air Pollution Score FE Calc Appr City MPG Hwy MPG \\\n", 113 | "0 8HNXT03.7PKR SUV 7 Drv 15 20 \n", 114 | "\n", 115 | " Cmb MPG Unadj Cmb MPG Greenhouse Gas Score SmartWay \n", 116 | "0 17 22.0527 4 no " 117 | ] 118 | }, 119 | "execution_count": 3, 120 | "metadata": {}, 121 | "output_type": "execute_result" 122 | } 123 | ], 124 | "source": [ 125 | "# view 2008 dataset\n", 126 | "df_08.head(1)" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 4, 132 | "metadata": {}, 133 | "outputs": [ 134 | { 135 | "data": { 136 | "text/html": [ 137 | "
\n", 138 | "\n", 151 | "\n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | "
ModelDisplCylTransDriveFuelCert RegionStndStnd DescriptionUnderhood IDVeh ClassAir Pollution ScoreCity MPGHwy MPGCmb MPGGreenhouse Gas ScoreSmartWayComb CO2
0ACURA RDX3.56.0SemiAuto-62WDGasolineFAT3B125Federal Tier 3 Bin 125JHNXT03.5GV3small SUV32028235No386
\n", 199 | "
" 200 | ], 201 | "text/plain": [ 202 | " Model Displ Cyl Trans Drive Fuel Cert Region Stnd \\\n", 203 | "0 ACURA RDX 3.5 6.0 SemiAuto-6 2WD Gasoline FA T3B125 \n", 204 | "\n", 205 | " Stnd Description Underhood ID Veh Class Air Pollution Score \\\n", 206 | "0 Federal Tier 3 Bin 125 JHNXT03.5GV3 small SUV 3 \n", 207 | "\n", 208 | " City MPG Hwy MPG Cmb MPG Greenhouse Gas Score SmartWay Comb CO2 \n", 209 | "0 20 28 23 5 No 386 " 210 | ] 211 | }, 212 | "execution_count": 4, 213 | "metadata": {}, 214 | "output_type": "execute_result" 215 | } 216 | ], 217 | "source": [ 218 | "# view 2018 dataset\n", 219 | "df_18.head(1)" 220 | ] 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "metadata": {}, 225 | "source": [ 226 | "### Drop Extraneous Columns" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": 5, 232 | "metadata": {}, 233 | "outputs": [ 234 | { 235 | "data": { 236 | "text/html": [ 237 | "
\n", 238 | "\n", 251 | "\n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | "
ModelDisplCylTransDriveFuelSales AreaVeh ClassAir Pollution ScoreCity MPGHwy MPGCmb MPGGreenhouse Gas ScoreSmartWay
0ACURA MDX3.7(6 cyl)Auto-S54WDGasolineCASUV71520174no
\n", 291 | "
" 292 | ], 293 | "text/plain": [ 294 | " Model Displ Cyl Trans Drive Fuel Sales Area Veh Class \\\n", 295 | "0 ACURA MDX 3.7 (6 cyl) Auto-S5 4WD Gasoline CA SUV \n", 296 | "\n", 297 | " Air Pollution Score City MPG Hwy MPG Cmb MPG Greenhouse Gas Score SmartWay \n", 298 | "0 7 15 20 17 4 no " 299 | ] 300 | }, 301 | "execution_count": 5, 302 | "metadata": {}, 303 | "output_type": "execute_result" 304 | } 305 | ], 306 | "source": [ 307 | "# drop columns from 2008 dataset\n", 308 | "df_08.drop(['Stnd', 'Underhood ID', 'FE Calc Appr', 'Unadj Cmb MPG'], axis=1, inplace=True)\n", 309 | "\n", 310 | "# confirm changes\n", 311 | "df_08.head(1)" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": 6, 317 | "metadata": {}, 318 | "outputs": [ 319 | { 320 | "data": { 321 | "text/html": [ 322 | "
\n", 323 | "\n", 336 | "\n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | "
ModelDisplCylTransDriveFuelCert RegionVeh ClassAir Pollution ScoreCity MPGHwy MPGCmb MPGGreenhouse Gas ScoreSmartWay
0ACURA RDX3.56.0SemiAuto-62WDGasolineFAsmall SUV32028235No
1ACURA RDX3.56.0SemiAuto-62WDGasolineCAsmall SUV32028235No
2ACURA RDX3.56.0SemiAuto-64WDGasolineFAsmall SUV31927224No
3ACURA RDX3.56.0SemiAuto-64WDGasolineCAsmall SUV31927224No
4ACURA TLX2.44.0AMS-82WDGasolineCAsmall car32333276No
\n", 444 | "
" 445 | ], 446 | "text/plain": [ 447 | " Model Displ Cyl Trans Drive Fuel Cert Region Veh Class \\\n", 448 | "0 ACURA RDX 3.5 6.0 SemiAuto-6 2WD Gasoline FA small SUV \n", 449 | "1 ACURA RDX 3.5 6.0 SemiAuto-6 2WD Gasoline CA small SUV \n", 450 | "2 ACURA RDX 3.5 6.0 SemiAuto-6 4WD Gasoline FA small SUV \n", 451 | "3 ACURA RDX 3.5 6.0 SemiAuto-6 4WD Gasoline CA small SUV \n", 452 | "4 ACURA TLX 2.4 4.0 AMS-8 2WD Gasoline CA small car \n", 453 | "\n", 454 | " Air Pollution Score City MPG Hwy MPG Cmb MPG Greenhouse Gas Score SmartWay \n", 455 | "0 3 20 28 23 5 No \n", 456 | "1 3 20 28 23 5 No \n", 457 | "2 3 19 27 22 4 No \n", 458 | "3 3 19 27 22 4 No \n", 459 | "4 3 23 33 27 6 No " 460 | ] 461 | }, 462 | "execution_count": 6, 463 | "metadata": {}, 464 | "output_type": "execute_result" 465 | } 466 | ], 467 | "source": [ 468 | "# drop columns from 2018 dataset\n", 469 | "df_18.drop(['Stnd', 'Stnd Description', 'Underhood ID', 'Comb CO2'], axis=1, inplace=True)\n", 470 | "\n", 471 | "# confirm changes\n", 472 | "df_18.head()" 473 | ] 474 | }, 475 | { 476 | "cell_type": "markdown", 477 | "metadata": {}, 478 | "source": [ 479 | "### Rename Columns" 480 | ] 481 | }, 482 | { 483 | "cell_type": "code", 484 | "execution_count": 17, 485 | "metadata": { 486 | "scrolled": false 487 | }, 488 | "outputs": [ 489 | { 490 | "data": { 491 | "text/html": [ 492 | "
\n", 493 | "\n", 506 | "\n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | "
modeldisplcyltransdrivefuelCert Regionveh_classair_pollution_scorecity_mpghwy_mpgcmb_mpggreenhouse_gas_scoresmartway
0ACURA MDX3.7(6 cyl)Auto-S54WDGasolineCASUV71520174no
\n", 546 | "
" 547 | ], 548 | "text/plain": [ 549 | " model displ cyl trans drive fuel Cert Region veh_class \\\n", 550 | "0 ACURA MDX 3.7 (6 cyl) Auto-S5 4WD Gasoline CA SUV \n", 551 | "\n", 552 | " air_pollution_score city_mpg hwy_mpg cmb_mpg greenhouse_gas_score smartway \n", 553 | "0 7 15 20 17 4 no " 554 | ] 555 | }, 556 | "execution_count": 17, 557 | "metadata": {}, 558 | "output_type": "execute_result" 559 | } 560 | ], 561 | "source": [ 562 | "# rename Sales Area to Cert Region\n", 563 | "names_cols = list(df_08.columns.values)\n", 564 | "names_cols[6] = 'Cert Region'\n", 565 | "df_08.columns = names_cols\n", 566 | "\n", 567 | "# confirm changes\n", 568 | "df_08.head(1)" 569 | ] 570 | }, 571 | { 572 | "cell_type": "code", 573 | "execution_count": 18, 574 | "metadata": {}, 575 | "outputs": [ 576 | { 577 | "data": { 578 | "text/html": [ 579 | "
\n", 580 | "\n", 593 | "\n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | "
modeldisplcyltransdrivefuelcert_regionveh_classair_pollution_scorecity_mpghwy_mpgcmb_mpggreenhouse_gas_scoresmartway
0ACURA MDX3.7(6 cyl)Auto-S54WDGasolineCASUV71520174no
\n", 633 | "
" 634 | ], 635 | "text/plain": [ 636 | " model displ cyl trans drive fuel cert_region veh_class \\\n", 637 | "0 ACURA MDX 3.7 (6 cyl) Auto-S5 4WD Gasoline CA SUV \n", 638 | "\n", 639 | " air_pollution_score city_mpg hwy_mpg cmb_mpg greenhouse_gas_score smartway \n", 640 | "0 7 15 20 17 4 no " 641 | ] 642 | }, 643 | "execution_count": 18, 644 | "metadata": {}, 645 | "output_type": "execute_result" 646 | } 647 | ], 648 | "source": [ 649 | "# replace spaces with underscores and lowercase labels for 2008 dataset\n", 650 | "df_08.rename(columns=lambda x: x.strip().lower().replace(\" \", \"_\"), inplace=True)\n", 651 | "\n", 652 | "# confirm changes\n", 653 | "df_08.head(1)" 654 | ] 655 | }, 656 | { 657 | "cell_type": "code", 658 | "execution_count": 19, 659 | "metadata": {}, 660 | "outputs": [ 661 | { 662 | "data": { 663 | "text/html": [ 664 | "
\n", 665 | "\n", 678 | "\n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | "
modeldisplcyltransdrivefuelcert_regionveh_classair_pollution_scorecity_mpghwy_mpgcmb_mpggreenhouse_gas_scoresmartway
0ACURA RDX3.56.0SemiAuto-62WDGasolineFAsmall SUV32028235No
\n", 718 | "
" 719 | ], 720 | "text/plain": [ 721 | " model displ cyl trans drive fuel cert_region veh_class \\\n", 722 | "0 ACURA RDX 3.5 6.0 SemiAuto-6 2WD Gasoline FA small SUV \n", 723 | "\n", 724 | " air_pollution_score city_mpg hwy_mpg cmb_mpg greenhouse_gas_score smartway \n", 725 | "0 3 20 28 23 5 No " 726 | ] 727 | }, 728 | "execution_count": 19, 729 | "metadata": {}, 730 | "output_type": "execute_result" 731 | } 732 | ], 733 | "source": [ 734 | "# replace spaces with underscores and lowercase labels for 2018 dataset\n", 735 | "df_18.rename(columns=lambda x: x.strip().lower().replace(\" \", \"_\"), inplace=True)\n", 736 | "\n", 737 | "# confirm changes\n", 738 | "df_18.head(1)" 739 | ] 740 | }, 741 | { 742 | "cell_type": "code", 743 | "execution_count": 20, 744 | "metadata": {}, 745 | "outputs": [ 746 | { 747 | "data": { 748 | "text/plain": [ 749 | "array([ True, True, True, True, True, True, True, True, True,\n", 750 | " True, True, True, True, True], dtype=bool)" 751 | ] 752 | }, 753 | "execution_count": 20, 754 | "metadata": {}, 755 | "output_type": "execute_result" 756 | } 757 | ], 758 | "source": [ 759 | "# confirm column labels for 2008 and 2018 datasets are identical\n", 760 | "df_08.columns == df_18.columns" 761 | ] 762 | }, 763 | { 764 | "cell_type": "code", 765 | "execution_count": 21, 766 | "metadata": {}, 767 | "outputs": [ 768 | { 769 | "data": { 770 | "text/plain": [ 771 | "True" 772 | ] 773 | }, 774 | "execution_count": 21, 775 | "metadata": {}, 776 | "output_type": "execute_result" 777 | } 778 | ], 779 | "source": [ 780 | "# make sure they're all identical like this\n", 781 | "(df_08.columns == df_18.columns).all()" 782 | ] 783 | }, 784 | { 785 | "cell_type": "code", 786 | "execution_count": 22, 787 | "metadata": { 788 | "collapsed": true 789 | }, 790 | "outputs": [], 791 | "source": [ 792 | "# save new datasets for next section\n", 793 | "df_08.to_csv('data_08.csv', index=False)\n", 794 | "df_18.to_csv('data_18.csv', index=False)" 795 | ] 796 | }, 797 | { 798 | "cell_type": "code", 799 | "execution_count": null, 800 | "metadata": { 801 | "collapsed": true 802 | }, 803 | "outputs": [], 804 | "source": [] 805 | } 806 | ], 807 | "metadata": { 808 | "kernelspec": { 809 | "display_name": "Python 3", 810 | "language": "python", 811 | "name": "python3" 812 | }, 813 | "language_info": { 814 | "codemirror_mode": { 815 | "name": "ipython", 816 | "version": 3 817 | }, 818 | "file_extension": ".py", 819 | "mimetype": "text/x-python", 820 | "name": "python", 821 | "nbconvert_exporter": "python", 822 | "pygments_lexer": "ipython3", 823 | "version": "3.6.1" 824 | } 825 | }, 826 | "nbformat": 4, 827 | "nbformat_minor": 2 828 | } 829 | -------------------------------------------------------------------------------- /cleaning_practice.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Cleaning Practice\n", 8 | "Let's first practice handling missing values and duplicate data with `cancer_data_means.csv`." 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 2, 14 | "metadata": {}, 15 | "outputs": [ 16 | { 17 | "name": "stdout", 18 | "output_type": "stream", 19 | "text": [ 20 | "\n", 21 | "RangeIndex: 569 entries, 0 to 568\n", 22 | "Data columns (total 11 columns):\n", 23 | "id 569 non-null int64\n", 24 | "diagnosis 569 non-null object\n", 25 | "radius_mean 569 non-null float64\n", 26 | "texture_mean 548 non-null float64\n", 27 | "perimeter_mean 569 non-null float64\n", 28 | "area_mean 569 non-null float64\n", 29 | "smoothness_mean 521 non-null float64\n", 30 | "compactness_mean 569 non-null float64\n", 31 | "concavity_mean 569 non-null float64\n", 32 | "concave_points_mean 569 non-null float64\n", 33 | "symmetry_mean 504 non-null float64\n", 34 | "dtypes: float64(9), int64(1), object(1)\n", 35 | "memory usage: 49.0+ KB\n" 36 | ] 37 | } 38 | ], 39 | "source": [ 40 | "# import pandas and load cancer data\n", 41 | "import pandas as pd\n", 42 | "\n", 43 | "df_cancer = pd.read_csv('cancer_data_means.csv')\n", 44 | "\n", 45 | "# check which columns have missing values with info()\n", 46 | "df_cancer.info()" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 3, 52 | "metadata": {}, 53 | "outputs": [ 54 | { 55 | "name": "stdout", 56 | "output_type": "stream", 57 | "text": [ 58 | "\n", 59 | "RangeIndex: 569 entries, 0 to 568\n", 60 | "Data columns (total 11 columns):\n", 61 | "id 569 non-null int64\n", 62 | "diagnosis 569 non-null object\n", 63 | "radius_mean 569 non-null float64\n", 64 | "texture_mean 569 non-null float64\n", 65 | "perimeter_mean 569 non-null float64\n", 66 | "area_mean 569 non-null float64\n", 67 | "smoothness_mean 521 non-null float64\n", 68 | "compactness_mean 569 non-null float64\n", 69 | "concavity_mean 569 non-null float64\n", 70 | "concave_points_mean 569 non-null float64\n", 71 | "symmetry_mean 504 non-null float64\n", 72 | "dtypes: float64(9), int64(1), object(1)\n", 73 | "memory usage: 49.0+ KB\n" 74 | ] 75 | } 76 | ], 77 | "source": [ 78 | "# use means to fill in missing values\n", 79 | "mean_texture = df_cancer['texture_mean'].mean()\n", 80 | "df_cancer['texture_mean'].fillna(mean_texture, inplace = True)\n", 81 | "\n", 82 | "# confirm your correction with info()\n", 83 | "df_cancer.info(0)" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 4, 89 | "metadata": {}, 90 | "outputs": [ 91 | { 92 | "name": "stdout", 93 | "output_type": "stream", 94 | "text": [ 95 | "\n", 96 | "RangeIndex: 569 entries, 0 to 568\n", 97 | "Data columns (total 11 columns):\n", 98 | "id 569 non-null int64\n", 99 | "diagnosis 569 non-null object\n", 100 | "radius_mean 569 non-null float64\n", 101 | "texture_mean 569 non-null float64\n", 102 | "perimeter_mean 569 non-null float64\n", 103 | "area_mean 569 non-null float64\n", 104 | "smoothness_mean 569 non-null float64\n", 105 | "compactness_mean 569 non-null float64\n", 106 | "concavity_mean 569 non-null float64\n", 107 | "concave_points_mean 569 non-null float64\n", 108 | "symmetry_mean 504 non-null float64\n", 109 | "dtypes: float64(9), int64(1), object(1)\n", 110 | "memory usage: 49.0+ KB\n" 111 | ] 112 | } 113 | ], 114 | "source": [ 115 | "# use means to fill in missing values\n", 116 | "#convert for smoothness_mean\n", 117 | "mean_smoothness = df_cancer['smoothness_mean'].mean()\n", 118 | "df_cancer['smoothness_mean'].fillna(mean_smoothness, inplace = True)\n", 119 | "\n", 120 | "# confirm your correction with info()\n", 121 | "df_cancer.info(0)" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 5, 127 | "metadata": {}, 128 | "outputs": [ 129 | { 130 | "name": "stdout", 131 | "output_type": "stream", 132 | "text": [ 133 | "\n", 134 | "RangeIndex: 569 entries, 0 to 568\n", 135 | "Data columns (total 11 columns):\n", 136 | "id 569 non-null int64\n", 137 | "diagnosis 569 non-null object\n", 138 | "radius_mean 569 non-null float64\n", 139 | "texture_mean 569 non-null float64\n", 140 | "perimeter_mean 569 non-null float64\n", 141 | "area_mean 569 non-null float64\n", 142 | "smoothness_mean 569 non-null float64\n", 143 | "compactness_mean 569 non-null float64\n", 144 | "concavity_mean 569 non-null float64\n", 145 | "concave_points_mean 569 non-null float64\n", 146 | "symmetry_mean 569 non-null float64\n", 147 | "dtypes: float64(9), int64(1), object(1)\n", 148 | "memory usage: 49.0+ KB\n" 149 | ] 150 | } 151 | ], 152 | "source": [ 153 | "# use means to fill in missing values\n", 154 | "#convert for symmetry_mean\n", 155 | "mean_symmetry = df_cancer['symmetry_mean'].mean()\n", 156 | "df_cancer['symmetry_mean'].fillna(mean_symmetry, inplace = True)\n", 157 | "\n", 158 | "# confirm your correction with info()\n", 159 | "df_cancer.info(0)" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 8, 165 | "metadata": {}, 166 | "outputs": [ 167 | { 168 | "data": { 169 | "text/plain": [ 170 | "5" 171 | ] 172 | }, 173 | "execution_count": 8, 174 | "metadata": {}, 175 | "output_type": "execute_result" 176 | } 177 | ], 178 | "source": [ 179 | "# check for duplicates in the data\n", 180 | "sum(df_cancer.duplicated())" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 10, 186 | "metadata": {}, 187 | "outputs": [], 188 | "source": [ 189 | "# drop duplicates\n", 190 | "df_cancer.drop_duplicates(inplace = True)" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 11, 196 | "metadata": {}, 197 | "outputs": [ 198 | { 199 | "data": { 200 | "text/plain": [ 201 | "0" 202 | ] 203 | }, 204 | "execution_count": 11, 205 | "metadata": {}, 206 | "output_type": "execute_result" 207 | } 208 | ], 209 | "source": [ 210 | "# confirm correction by rechecking for duplicates in the data\n", 211 | "sum(df_cancer.duplicated())" 212 | ] 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "metadata": {}, 217 | "source": [ 218 | "## Renaming Columns\n", 219 | "Since we also previously changed our dataset to only include means of tumor features, the \"_mean\" at the end of each feature seems unnecessary. It just takes extra time to type in our analysis later. Let's come up with a list of new labels to assign to our columns." 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": 13, 225 | "metadata": {}, 226 | "outputs": [ 227 | { 228 | "data": { 229 | "text/plain": [ 230 | "['id',\n", 231 | " 'diagnosis',\n", 232 | " 'radius',\n", 233 | " 'texture',\n", 234 | " 'perimeter',\n", 235 | " 'area',\n", 236 | " 'smoothness',\n", 237 | " 'compactness',\n", 238 | " 'concavity',\n", 239 | " 'concave_points',\n", 240 | " 'symmetry']" 241 | ] 242 | }, 243 | "execution_count": 13, 244 | "metadata": {}, 245 | "output_type": "execute_result" 246 | } 247 | ], 248 | "source": [ 249 | "# remove \"_mean\" from column names\n", 250 | "new_labels = []\n", 251 | "for col in df_cancer.columns:\n", 252 | " if '_mean' in col:\n", 253 | " new_labels.append(col[:-5]) # exclude last 6 characters\n", 254 | " else:\n", 255 | " new_labels.append(col)\n", 256 | "\n", 257 | "# new labels for our columns\n", 258 | "new_labels" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": 14, 264 | "metadata": {}, 265 | "outputs": [ 266 | { 267 | "data": { 268 | "text/html": [ 269 | "
\n", 270 | "\n", 283 | "\n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | "
iddiagnosisradiustextureperimeterareasmoothnesscompactnessconcavityconcave_pointssymmetry
0842302M17.9919.293431122.801001.00.1184000.277600.30010.147100.2419
1842517M20.5717.770000132.901326.00.0847400.078640.08690.070170.1812
284300903M19.6921.250000130.001203.00.1096000.159900.19740.127900.2069
384348301M11.4220.38000077.58386.10.0960870.283900.24140.105200.2597
484358402M20.2914.340000135.101297.00.1003000.132800.19800.104300.1809
\n", 373 | "
" 374 | ], 375 | "text/plain": [ 376 | " id diagnosis radius texture perimeter area smoothness \\\n", 377 | "0 842302 M 17.99 19.293431 122.80 1001.0 0.118400 \n", 378 | "1 842517 M 20.57 17.770000 132.90 1326.0 0.084740 \n", 379 | "2 84300903 M 19.69 21.250000 130.00 1203.0 0.109600 \n", 380 | "3 84348301 M 11.42 20.380000 77.58 386.1 0.096087 \n", 381 | "4 84358402 M 20.29 14.340000 135.10 1297.0 0.100300 \n", 382 | "\n", 383 | " compactness concavity concave_points symmetry \n", 384 | "0 0.27760 0.3001 0.14710 0.2419 \n", 385 | "1 0.07864 0.0869 0.07017 0.1812 \n", 386 | "2 0.15990 0.1974 0.12790 0.2069 \n", 387 | "3 0.28390 0.2414 0.10520 0.2597 \n", 388 | "4 0.13280 0.1980 0.10430 0.1809 " 389 | ] 390 | }, 391 | "execution_count": 14, 392 | "metadata": {}, 393 | "output_type": "execute_result" 394 | } 395 | ], 396 | "source": [ 397 | "# assign new labels to columns in dataframe\n", 398 | "df_cancer.columns = new_labels\n", 399 | "\n", 400 | "# display first few rows of dataframe to confirm changes\n", 401 | "df_cancer.head()" 402 | ] 403 | }, 404 | { 405 | "cell_type": "code", 406 | "execution_count": 16, 407 | "metadata": { 408 | "collapsed": true 409 | }, 410 | "outputs": [], 411 | "source": [ 412 | "# save this for later\n", 413 | "df_cancer.to_csv('cancer_data_edited.csv', index=False)" 414 | ] 415 | }, 416 | { 417 | "cell_type": "code", 418 | "execution_count": null, 419 | "metadata": { 420 | "collapsed": true 421 | }, 422 | "outputs": [], 423 | "source": [] 424 | } 425 | ], 426 | "metadata": { 427 | "kernelspec": { 428 | "display_name": "Python 3", 429 | "language": "python", 430 | "name": "python3" 431 | }, 432 | "language_info": { 433 | "codemirror_mode": { 434 | "name": "ipython", 435 | "version": 3 436 | }, 437 | "file_extension": ".py", 438 | "mimetype": "text/x-python", 439 | "name": "python", 440 | "nbconvert_exporter": "python", 441 | "pygments_lexer": "ipython3", 442 | "version": "3.6.1" 443 | } 444 | }, 445 | "nbformat": 4, 446 | "nbformat_minor": 2 447 | } 448 | -------------------------------------------------------------------------------- /conclusions_groupby.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Drawing Conclusions Using Groupby" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 3, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "#Import packages\n", 19 | "import pandas as pd\n", 20 | "import numpy as np\n", 21 | "from matplotlib import pyplot as plt\n", 22 | "import matplotlib\n", 23 | "% matplotlib inline" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 4, 29 | "metadata": {}, 30 | "outputs": [ 31 | { 32 | "name": "stdout", 33 | "output_type": "stream", 34 | "text": [ 35 | "\n", 36 | "RangeIndex: 6497 entries, 0 to 6496\n", 37 | "Data columns (total 13 columns):\n", 38 | "fixed_acidity 6497 non-null float64\n", 39 | "volatile_acidity 6497 non-null float64\n", 40 | "citric_acid 6497 non-null float64\n", 41 | "residual_sugar 6497 non-null float64\n", 42 | "chlorides 6497 non-null float64\n", 43 | "free_sulfur_dioxide 6497 non-null float64\n", 44 | "total_sulfur_dioxide 6497 non-null float64\n", 45 | "density 6497 non-null float64\n", 46 | "pH 6497 non-null float64\n", 47 | "sulphates 6497 non-null float64\n", 48 | "alcohol 6497 non-null float64\n", 49 | "quality 6497 non-null int64\n", 50 | "color 6497 non-null object\n", 51 | "dtypes: float64(11), int64(1), object(1)\n", 52 | "memory usage: 659.9+ KB\n" 53 | ] 54 | } 55 | ], 56 | "source": [ 57 | "# Load `winequality_edited.csv`\n", 58 | "wine_df = pd.read_csv('winequality_edited.csv')\n", 59 | "wine_df.info()" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": { 65 | "collapsed": true 66 | }, 67 | "source": [ 68 | "### Is a certain type of wine associated with higher quality?" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 5, 74 | "metadata": {}, 75 | "outputs": [ 76 | { 77 | "data": { 78 | "text/plain": [ 79 | "color\n", 80 | "red 5.636023\n", 81 | "white 5.877909\n", 82 | "Name: quality, dtype: float64" 83 | ] 84 | }, 85 | "execution_count": 5, 86 | "metadata": {}, 87 | "output_type": "execute_result" 88 | } 89 | ], 90 | "source": [ 91 | "# Find the mean quality of each wine type (red and white) with groupby\n", 92 | "wine_df.groupby('color')['quality'].mean()" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "### What level of acidity receives the highest average rating?" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 7, 105 | "metadata": {}, 106 | "outputs": [ 107 | { 108 | "data": { 109 | "text/html": [ 110 | "
\n", 111 | "\n", 124 | "\n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | "
fixed_acidityvolatile_aciditycitric_acidresidual_sugarchloridesfree_sulfur_dioxidetotal_sulfur_dioxidedensitypHsulphatesalcoholquality
count6497.0000006497.0000006497.0000006497.0000006497.0000006497.0000006497.0000006497.0000006497.0000006497.0000006497.0000006497.000000
mean7.2153070.3396660.3186335.4432350.05603430.525319115.7445740.9946973.2185010.53126810.4918015.818378
std1.2964340.1646360.1453184.7578040.03503417.74940056.5218550.0029990.1607870.1488061.1927120.873255
min3.8000000.0800000.0000000.6000000.0090001.0000006.0000000.9871102.7200000.2200008.0000003.000000
25%6.4000000.2300000.2500001.8000000.03800017.00000077.0000000.9923403.1100000.4300009.5000005.000000
50%7.0000000.2900000.3100003.0000000.04700029.000000118.0000000.9948903.2100000.51000010.3000006.000000
75%7.7000000.4000000.3900008.1000000.06500041.000000156.0000000.9969903.3200000.60000011.3000006.000000
max15.9000001.5800001.66000065.8000000.611000289.000000440.0000001.0389804.0100002.00000014.9000009.000000
\n", 265 | "
" 266 | ], 267 | "text/plain": [ 268 | " fixed_acidity volatile_acidity citric_acid residual_sugar \\\n", 269 | "count 6497.000000 6497.000000 6497.000000 6497.000000 \n", 270 | "mean 7.215307 0.339666 0.318633 5.443235 \n", 271 | "std 1.296434 0.164636 0.145318 4.757804 \n", 272 | "min 3.800000 0.080000 0.000000 0.600000 \n", 273 | "25% 6.400000 0.230000 0.250000 1.800000 \n", 274 | "50% 7.000000 0.290000 0.310000 3.000000 \n", 275 | "75% 7.700000 0.400000 0.390000 8.100000 \n", 276 | "max 15.900000 1.580000 1.660000 65.800000 \n", 277 | "\n", 278 | " chlorides free_sulfur_dioxide total_sulfur_dioxide density \\\n", 279 | "count 6497.000000 6497.000000 6497.000000 6497.000000 \n", 280 | "mean 0.056034 30.525319 115.744574 0.994697 \n", 281 | "std 0.035034 17.749400 56.521855 0.002999 \n", 282 | "min 0.009000 1.000000 6.000000 0.987110 \n", 283 | "25% 0.038000 17.000000 77.000000 0.992340 \n", 284 | "50% 0.047000 29.000000 118.000000 0.994890 \n", 285 | "75% 0.065000 41.000000 156.000000 0.996990 \n", 286 | "max 0.611000 289.000000 440.000000 1.038980 \n", 287 | "\n", 288 | " pH sulphates alcohol quality \n", 289 | "count 6497.000000 6497.000000 6497.000000 6497.000000 \n", 290 | "mean 3.218501 0.531268 10.491801 5.818378 \n", 291 | "std 0.160787 0.148806 1.192712 0.873255 \n", 292 | "min 2.720000 0.220000 8.000000 3.000000 \n", 293 | "25% 3.110000 0.430000 9.500000 5.000000 \n", 294 | "50% 3.210000 0.510000 10.300000 6.000000 \n", 295 | "75% 3.320000 0.600000 11.300000 6.000000 \n", 296 | "max 4.010000 2.000000 14.900000 9.000000 " 297 | ] 298 | }, 299 | "execution_count": 7, 300 | "metadata": {}, 301 | "output_type": "execute_result" 302 | } 303 | ], 304 | "source": [ 305 | "# View the min, 25%, 50%, 75%, max pH values with Pandas describe\n", 306 | "# High: Lowest 25% of pH values\n", 307 | "# Moderately High: 25% - 50% of pH values\n", 308 | "# Medium: 50% - 75% of pH values\n", 309 | "# Low: 75% - max pH value\n", 310 | "wine_df.describe()" 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": 8, 316 | "metadata": { 317 | "collapsed": true 318 | }, 319 | "outputs": [], 320 | "source": [ 321 | "# Bin edges that will be used to \"cut\" the data into groups\n", 322 | "bin_edges = [ 2.72, 3.11,3.21 , 3.32,4.01 ] # Fill in this list with five values you just found" 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": 9, 328 | "metadata": { 329 | "collapsed": true 330 | }, 331 | "outputs": [], 332 | "source": [ 333 | "# Labels for the four acidity level groups\n", 334 | "bin_names = [ 'High','Moderately High' ,'Meduim' ,'Low' ] # Name each acidity level category" 335 | ] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "execution_count": 10, 340 | "metadata": {}, 341 | "outputs": [ 342 | { 343 | "data": { 344 | "text/html": [ 345 | "
\n", 346 | "\n", 359 | "\n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | "
fixed_acidityvolatile_aciditycitric_acidresidual_sugarchloridesfree_sulfur_dioxidetotal_sulfur_dioxidedensitypHsulphatesalcoholqualitycoloracidity_levels
07.40.700.001.90.07611.034.00.99783.510.569.45redLow
17.80.880.002.60.09825.067.00.99683.200.689.85redModerately High
27.80.760.042.30.09215.054.00.99703.260.659.85redMeduim
311.20.280.561.90.07517.060.00.99803.160.589.86redModerately High
47.40.700.001.90.07611.034.00.99783.510.569.45redLow
\n", 467 | "
" 468 | ], 469 | "text/plain": [ 470 | " fixed_acidity volatile_acidity citric_acid residual_sugar chlorides \\\n", 471 | "0 7.4 0.70 0.00 1.9 0.076 \n", 472 | "1 7.8 0.88 0.00 2.6 0.098 \n", 473 | "2 7.8 0.76 0.04 2.3 0.092 \n", 474 | "3 11.2 0.28 0.56 1.9 0.075 \n", 475 | "4 7.4 0.70 0.00 1.9 0.076 \n", 476 | "\n", 477 | " free_sulfur_dioxide total_sulfur_dioxide density pH sulphates \\\n", 478 | "0 11.0 34.0 0.9978 3.51 0.56 \n", 479 | "1 25.0 67.0 0.9968 3.20 0.68 \n", 480 | "2 15.0 54.0 0.9970 3.26 0.65 \n", 481 | "3 17.0 60.0 0.9980 3.16 0.58 \n", 482 | "4 11.0 34.0 0.9978 3.51 0.56 \n", 483 | "\n", 484 | " alcohol quality color acidity_levels \n", 485 | "0 9.4 5 red Low \n", 486 | "1 9.8 5 red Moderately High \n", 487 | "2 9.8 5 red Meduim \n", 488 | "3 9.8 6 red Moderately High \n", 489 | "4 9.4 5 red Low " 490 | ] 491 | }, 492 | "execution_count": 10, 493 | "metadata": {}, 494 | "output_type": "execute_result" 495 | } 496 | ], 497 | "source": [ 498 | "# Creates acidity_levels column\n", 499 | "wine_df['acidity_levels'] = pd.cut(wine_df['pH'], bin_edges, labels=bin_names)\n", 500 | "\n", 501 | "# Checks for successful creation of this column\n", 502 | "wine_df.head()" 503 | ] 504 | }, 505 | { 506 | "cell_type": "code", 507 | "execution_count": 11, 508 | "metadata": {}, 509 | "outputs": [ 510 | { 511 | "data": { 512 | "text/html": [ 513 | "
\n", 514 | "\n", 527 | "\n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | "
fixed_acidityvolatile_aciditycitric_acidresidual_sugarchloridesfree_sulfur_dioxidetotal_sulfur_dioxidedensitypHsulphatesalcoholquality
acidity_levels
High7.5439140.2946830.3707927.0888760.05513133.179965129.8974960.9947083.0290620.50393710.3302085.783343
Moderately High7.3650640.3185510.3405485.9319840.05466633.229154126.8158860.9946973.1648330.50930010.3910735.784540
Meduim7.1435660.3467510.3135854.7211590.05571528.983995111.1821380.9944763.2670100.54128710.6103695.850832
Low6.7699490.4038150.2439013.8489830.05877726.32751093.2449170.9948993.4333480.57413610.6560575.859593
\n", 623 | "
" 624 | ], 625 | "text/plain": [ 626 | " fixed_acidity volatile_acidity citric_acid residual_sugar \\\n", 627 | "acidity_levels \n", 628 | "High 7.543914 0.294683 0.370792 7.088876 \n", 629 | "Moderately High 7.365064 0.318551 0.340548 5.931984 \n", 630 | "Meduim 7.143566 0.346751 0.313585 4.721159 \n", 631 | "Low 6.769949 0.403815 0.243901 3.848983 \n", 632 | "\n", 633 | " chlorides free_sulfur_dioxide total_sulfur_dioxide \\\n", 634 | "acidity_levels \n", 635 | "High 0.055131 33.179965 129.897496 \n", 636 | "Moderately High 0.054666 33.229154 126.815886 \n", 637 | "Meduim 0.055715 28.983995 111.182138 \n", 638 | "Low 0.058777 26.327510 93.244917 \n", 639 | "\n", 640 | " density pH sulphates alcohol quality \n", 641 | "acidity_levels \n", 642 | "High 0.994708 3.029062 0.503937 10.330208 5.783343 \n", 643 | "Moderately High 0.994697 3.164833 0.509300 10.391073 5.784540 \n", 644 | "Meduim 0.994476 3.267010 0.541287 10.610369 5.850832 \n", 645 | "Low 0.994899 3.433348 0.574136 10.656057 5.859593 " 646 | ] 647 | }, 648 | "execution_count": 11, 649 | "metadata": {}, 650 | "output_type": "execute_result" 651 | } 652 | ], 653 | "source": [ 654 | "# Find the mean quality of each acidity level with groupby\n", 655 | "wine_df.groupby('acidity_levels').mean()" 656 | ] 657 | }, 658 | { 659 | "cell_type": "code", 660 | "execution_count": 13, 661 | "metadata": {}, 662 | "outputs": [], 663 | "source": [ 664 | "# Save changes for the next section\n", 665 | "wine_df.to_csv('winequality_edited.csv', index=False)" 666 | ] 667 | }, 668 | { 669 | "cell_type": "markdown", 670 | "metadata": {}, 671 | "source": [ 672 | "## QUIZ Q&A" 673 | ] 674 | }, 675 | { 676 | "cell_type": "markdown", 677 | "metadata": {}, 678 | "source": [ 679 | "### Is the mean quality of red wine greater than, less than, or equal to that of white wine?\n", 680 | "Less" 681 | ] 682 | }, 683 | { 684 | "cell_type": "markdown", 685 | "metadata": {}, 686 | "source": [ 687 | "### What level of acidity receives the highest average rating?\n", 688 | "Low acidity " 689 | ] 690 | }, 691 | { 692 | "cell_type": "code", 693 | "execution_count": null, 694 | "metadata": { 695 | "collapsed": true 696 | }, 697 | "outputs": [], 698 | "source": [] 699 | } 700 | ], 701 | "metadata": { 702 | "kernelspec": { 703 | "display_name": "Python 3", 704 | "language": "python", 705 | "name": "python3" 706 | }, 707 | "language_info": { 708 | "codemirror_mode": { 709 | "name": "ipython", 710 | "version": 3 711 | }, 712 | "file_extension": ".py", 713 | "mimetype": "text/x-python", 714 | "name": "python", 715 | "nbconvert_exporter": "python", 716 | "pygments_lexer": "ipython3", 717 | "version": "3.6.1" 718 | } 719 | }, 720 | "nbformat": 4, 721 | "nbformat_minor": 2 722 | } 723 | -------------------------------------------------------------------------------- /conclusions_query.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Drawing Conclusions Using Query" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "import pandas as pd" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": { 25 | "collapsed": true 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "# Load `winequality_edited.csv`\n", 30 | "\n", 31 | "df = pd.read_csv('winequality_edited.csv')" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 11, 37 | "metadata": {}, 38 | "outputs": [ 39 | { 40 | "name": "stdout", 41 | "output_type": "stream", 42 | "text": [ 43 | "\n", 44 | "RangeIndex: 6497 entries, 0 to 6496\n", 45 | "Data columns (total 14 columns):\n", 46 | "fixed_acidity 6497 non-null float64\n", 47 | "volatile_acidity 6497 non-null float64\n", 48 | "citric_acid 6497 non-null float64\n", 49 | "residual_sugar 6497 non-null float64\n", 50 | "chlorides 6497 non-null float64\n", 51 | "free_sulfur_dioxide 6497 non-null float64\n", 52 | "total_sulfur_dioxide 6497 non-null float64\n", 53 | "density 6497 non-null float64\n", 54 | "pH 6497 non-null float64\n", 55 | "sulphates 6497 non-null float64\n", 56 | "alcohol 6497 non-null float64\n", 57 | "quality 6497 non-null int64\n", 58 | "color 6497 non-null object\n", 59 | "acidity_levels 6496 non-null object\n", 60 | "dtypes: float64(11), int64(1), object(2)\n", 61 | "memory usage: 710.7+ KB\n" 62 | ] 63 | } 64 | ], 65 | "source": [ 66 | "df.info()" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": { 72 | "collapsed": true 73 | }, 74 | "source": [ 75 | "### Do wines with higher alcoholic content receive better ratings?" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 3, 81 | "metadata": {}, 82 | "outputs": [ 83 | { 84 | "data": { 85 | "text/plain": [ 86 | "10.3" 87 | ] 88 | }, 89 | "execution_count": 3, 90 | "metadata": {}, 91 | "output_type": "execute_result" 92 | } 93 | ], 94 | "source": [ 95 | "# get the median amount of alcohol content\n", 96 | "df['alcohol'].median()" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 5, 102 | "metadata": {}, 103 | "outputs": [ 104 | { 105 | "data": { 106 | "text/plain": [ 107 | "True" 108 | ] 109 | }, 110 | "execution_count": 5, 111 | "metadata": {}, 112 | "output_type": "execute_result" 113 | } 114 | ], 115 | "source": [ 116 | "# select samples with alcohol content less than the median\n", 117 | "low_alcohol = df.query('alcohol < 10.3')\n", 118 | "\n", 119 | "# select samples with alcohol content greater than or equal to the median\n", 120 | "high_alcohol = df.query('alcohol >= 10.3')\n", 121 | "\n", 122 | "# ensure these queries included each sample exactly once\n", 123 | "num_samples = df.shape[0]\n", 124 | "num_samples == low_alcohol['quality'].count() + high_alcohol['quality'].count() # should be True" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 9, 130 | "metadata": {}, 131 | "outputs": [ 132 | { 133 | "data": { 134 | "text/plain": [ 135 | "5.475920679886686" 136 | ] 137 | }, 138 | "execution_count": 9, 139 | "metadata": {}, 140 | "output_type": "execute_result" 141 | } 142 | ], 143 | "source": [ 144 | "# get mean quality rating for the low alcohol and high alcohol groups\n", 145 | "low_alcohol['quality'].mean()\n" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 10, 151 | "metadata": {}, 152 | "outputs": [ 153 | { 154 | "data": { 155 | "text/plain": [ 156 | "6.1460843373493974" 157 | ] 158 | }, 159 | "execution_count": 10, 160 | "metadata": {}, 161 | "output_type": "execute_result" 162 | } 163 | ], 164 | "source": [ 165 | "high_alcohol['quality'].mean()" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "#### QUESTION 1 OF 2\n", 173 | "\n", 174 | "*Do wines with higher alcoholic content generally receive better ratings?*\n", 175 | "Yes" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": {}, 181 | "source": [ 182 | "### Do sweeter wines receive better ratings?" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 13, 188 | "metadata": {}, 189 | "outputs": [ 190 | { 191 | "data": { 192 | "text/plain": [ 193 | "3.0" 194 | ] 195 | }, 196 | "execution_count": 13, 197 | "metadata": {}, 198 | "output_type": "execute_result" 199 | } 200 | ], 201 | "source": [ 202 | "# get the median amount of residual sugar\n", 203 | "df['residual_sugar'].median()" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 14, 209 | "metadata": {}, 210 | "outputs": [ 211 | { 212 | "data": { 213 | "text/plain": [ 214 | "True" 215 | ] 216 | }, 217 | "execution_count": 14, 218 | "metadata": {}, 219 | "output_type": "execute_result" 220 | } 221 | ], 222 | "source": [ 223 | "# select samples with residual sugar less than the median\n", 224 | "low_sugar = df.query('residual_sugar < 3.0')\n", 225 | "\n", 226 | "# select samples with residual sugar greater than or equal to the median\n", 227 | "high_sugar = df.query('residual_sugar >= 3.0')\n", 228 | "\n", 229 | "# ensure these queries included each sample exactly once\n", 230 | "num_samples == low_sugar['quality'].count() + high_sugar['quality'].count() # should be True" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": 15, 236 | "metadata": {}, 237 | "outputs": [ 238 | { 239 | "data": { 240 | "text/plain": [ 241 | "5.8088007437248219" 242 | ] 243 | }, 244 | "execution_count": 15, 245 | "metadata": {}, 246 | "output_type": "execute_result" 247 | } 248 | ], 249 | "source": [ 250 | "# get mean quality rating for the low sugar groups\n", 251 | "low_sugar['quality'].mean()" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": 16, 257 | "metadata": {}, 258 | "outputs": [ 259 | { 260 | "data": { 261 | "text/plain": [ 262 | "5.8278287461773699" 263 | ] 264 | }, 265 | "execution_count": 16, 266 | "metadata": {}, 267 | "output_type": "execute_result" 268 | } 269 | ], 270 | "source": [ 271 | "# get mean quality rating for the high sugar groups\n", 272 | "high_sugar['quality'].mean()" 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": {}, 278 | "source": [ 279 | "#### QUESTION 2 OF 2\n", 280 | "\n", 281 | "*Do sweeter wines generally receive higher ratings?*\n", 282 | "Yes" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": null, 288 | "metadata": { 289 | "collapsed": true 290 | }, 291 | "outputs": [], 292 | "source": [] 293 | } 294 | ], 295 | "metadata": { 296 | "kernelspec": { 297 | "display_name": "Python 3", 298 | "language": "python", 299 | "name": "python3" 300 | }, 301 | "language_info": { 302 | "codemirror_mode": { 303 | "name": "ipython", 304 | "version": 3 305 | }, 306 | "file_extension": ".py", 307 | "mimetype": "text/x-python", 308 | "name": "python", 309 | "nbconvert_exporter": "python", 310 | "pygments_lexer": "ipython3", 311 | "version": "3.6.1" 312 | } 313 | }, 314 | "nbformat": 4, 315 | "nbformat_minor": 2 316 | } 317 | -------------------------------------------------------------------------------- /conclusions_quiz.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Drawing Conclusions Quiz\n", 8 | "Use the space below to explore `store_data.csv` to answer the quiz questions below." 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 8, 14 | "metadata": { 15 | "collapsed": true 16 | }, 17 | "outputs": [], 18 | "source": [ 19 | "# imports and load data\n", 20 | "import pandas as pd\n", 21 | "import numpy as np\n", 22 | "from datetime import datetime\n", 23 | "% matplotlib inline\n", 24 | "\n", 25 | "df_store = pd.read_csv('store_data.csv')\n" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 2, 31 | "metadata": {}, 32 | "outputs": [ 33 | { 34 | "name": "stdout", 35 | "output_type": "stream", 36 | "text": [ 37 | "\n", 38 | "RangeIndex: 200 entries, 0 to 199\n", 39 | "Data columns (total 6 columns):\n", 40 | "week 200 non-null object\n", 41 | "storeA 200 non-null int64\n", 42 | "storeB 200 non-null int64\n", 43 | "storeC 200 non-null int64\n", 44 | "storeD 200 non-null int64\n", 45 | "storeE 200 non-null int64\n", 46 | "dtypes: int64(5), object(1)\n", 47 | "memory usage: 9.5+ KB\n" 48 | ] 49 | }, 50 | { 51 | "data": { 52 | "text/html": [ 53 | "
\n", 54 | "\n", 67 | "\n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | "
weekstoreAstoreBstoreCstoreDstoreE
02014-05-0426438257389362311294
12014-05-1164445736563470922907
22014-05-1896462552425354474736
32014-05-2559601074082646063949
42014-06-0174127374320839853023
\n", 127 | "
" 128 | ], 129 | "text/plain": [ 130 | " week storeA storeB storeC storeD storeE\n", 131 | "0 2014-05-04 2643 8257 3893 6231 1294\n", 132 | "1 2014-05-11 6444 5736 5634 7092 2907\n", 133 | "2 2014-05-18 9646 2552 4253 5447 4736\n", 134 | "3 2014-05-25 5960 10740 8264 6063 949\n", 135 | "4 2014-06-01 7412 7374 3208 3985 3023" 136 | ] 137 | }, 138 | "execution_count": 2, 139 | "metadata": {}, 140 | "output_type": "execute_result" 141 | } 142 | ], 143 | "source": [ 144 | "# explore data\n", 145 | "df_store.info()\n", 146 | "\n", 147 | "df_store.head()" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 3, 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "name": "stdout", 157 | "output_type": "stream", 158 | "text": [ 159 | "Total sales is 5115145\n", 160 | "Max total sales is 1351342\n", 161 | "store with max total sales is storeB\n" 162 | ] 163 | } 164 | ], 165 | "source": [ 166 | "# total sales for the last month\n", 167 | "#first make new column with total values for all stores on that date\n", 168 | "df_store['total'] = df_store['storeA'] + df_store['storeB'] + df_store['storeC'] + df_store['storeD'] +df_store['storeE'] \n", 169 | "#get total sales\n", 170 | "total_sales = sum(df_store['total'])\n", 171 | "print('Total sales is {}'.format(total_sales))\n", 172 | "list_store_names = ['storeA', 'storeB', 'storeC', 'storeD', 'storeE']\n", 173 | "\n", 174 | "list_total = []\n", 175 | "sum_A = sum(df_store['storeA'])\n", 176 | "sum_B = sum(df_store['storeB'])\n", 177 | "sum_C = sum(df_store['storeC'])\n", 178 | "sum_D = sum(df_store['storeD'])\n", 179 | "sum_E = sum(df_store['storeE'])\n", 180 | "list_total = [sum_A, sum_B, sum_C, sum_D, sum_E]\n", 181 | "max_tot = max(list_total)\n", 182 | "#compute max value and return position\n", 183 | "pos = list_total.index(max_tot)\n", 184 | "print(\"Max total sales is {}\".format(max_tot))\n", 185 | "print(\"store with max total sales is {}\".format(list_store_names[pos]))\n" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 44, 191 | "metadata": {}, 192 | "outputs": [ 193 | { 194 | "name": "stdout", 195 | "output_type": "stream", 196 | "text": [ 197 | "Store with max sales in last month is storeA\n" 198 | ] 199 | } 200 | ], 201 | "source": [ 202 | "#calculate sales for final month\n", 203 | "\n", 204 | "#find time range\n", 205 | "#df_store[180:] #inspect last months to determine span\n", 206 | "\n", 207 | "df_span_last = df_store[(df_store['week'] > '2018-02-04') & (df_store['week'] < '2018-02-25')]\n", 208 | "#print(df_span_last) #checkpoint\n", 209 | "a3 = sum(df_span_last['storeA'])\n", 210 | "b3 = sum(df_span_last['storeB'])\n", 211 | "c3 = sum(df_span_last['storeC'])\n", 212 | "d3 = sum(df_span_last['storeD'])\n", 213 | "e3 = sum(df_span_last['storeE'])\n", 214 | "last = [a3,b3,c3,d3,e3]\n", 215 | "max_last = max(last)\n", 216 | "#print(last, max_last) #checkpoint\n", 217 | "store_max_last = list_store_names[last.index(max_last)]\n", 218 | "print('Store with max sales in last month is {}'.format(store_max_last))\n", 219 | "\n", 220 | "\n" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": 4, 226 | "metadata": {}, 227 | "outputs": [ 228 | { 229 | "name": "stdout", 230 | "output_type": "stream", 231 | "text": [ 232 | "store with highest average sales [using lists] is storeB\n", 233 | "store with highest average sales [using dicts] is storeB\n" 234 | ] 235 | } 236 | ], 237 | "source": [ 238 | "# average sales\n", 239 | "avg_A = df_store['storeA'].mean()\n", 240 | "avg_B = df_store['storeB'].mean()\n", 241 | "avg_C = df_store['storeC'].mean()\n", 242 | "avg_D = df_store['storeD'].mean()\n", 243 | "avg_E = df_store['storeE'].mean()\n", 244 | "\n", 245 | "#compute store with highest average sales using lists\n", 246 | "list_avg = [avg_A, avg_B, avg_C, avg_D, avg_E]\n", 247 | "list_store_names = ['storeA', 'storeB', 'storeC', 'storeD', 'storeE']\n", 248 | "#print(list_avg) #check-point to verify contents as five averages\n", 249 | "\n", 250 | "max_avg = max(list_avg)\n", 251 | "#compute max value and return position\n", 252 | "pos = list_avg.index(max_avg)\n", 253 | "#use the position value to select store\n", 254 | "print(\"store with highest average sales [using lists] is {}\".format(list_store_names[pos]))\n", 255 | "\n", 256 | "#compute store with highest average sales using dicts\n", 257 | "dict_avg = {'storeA' : avg_A, 'storeB' : avg_B, 'storeC' : avg_C, \n", 258 | " 'storeD' : avg_D, 'storeE' : avg_E}\n", 259 | "dict_max = max(dict_avg, key=dict_avg.get)\n", 260 | "print(\"store with highest average sales [using dicts] is {}\".format(dict_max))\n" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": 5, 266 | "metadata": {}, 267 | "outputs": [ 268 | { 269 | "name": "stdout", 270 | "output_type": "stream", 271 | "text": [ 272 | " week storeA storeB storeC storeD storeE total\n", 273 | "97 2016-03-13 2054 1390 5112 5513 2536 16605\n", 274 | " storeA storeB storeC storeD storeE\n", 275 | "97 2054 1390 5112 5513 2536\n" 276 | ] 277 | } 278 | ], 279 | "source": [ 280 | "# sales on march 13, 2016\n", 281 | "\n", 282 | "# calculate sales on week of March 13th, 2016\n", 283 | "sale = df_store.loc[df_store['week'] == '2016-03-13']\n", 284 | "print(sale) #checkpoint to verify record returned\n", 285 | "\n", 286 | "del sale['week']\n", 287 | "del sale['total']\n", 288 | "print(sale)\n" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": 6, 294 | "metadata": {}, 295 | "outputs": [ 296 | { 297 | "name": "stdout", 298 | "output_type": "stream", 299 | "text": [ 300 | " week storeA storeB storeC storeD storeE total\n", 301 | "9 2014-07-06 8567 3228 927 3277 168 16167\n" 302 | ] 303 | } 304 | ], 305 | "source": [ 306 | "# worst week for store C\n", 307 | "sale_storeC = []\n", 308 | "sale_storeC = df_store['storeC']\n", 309 | "date_storeC = df_store['week']\n", 310 | "\n", 311 | "minC = min(sale_storeC)\n", 312 | "\n", 313 | "\n", 314 | "print(df_store.loc[df_store['storeC'] == minC])" 315 | ] 316 | }, 317 | { 318 | "cell_type": "code", 319 | "execution_count": 39, 320 | "metadata": {}, 321 | "outputs": [ 322 | { 323 | "name": "stdout", 324 | "output_type": "stream", 325 | "text": [ 326 | "2014-05-04\n", 327 | "2018-02-25\n", 328 | "[74852, 68640, 56848, 63367, 24636] 74852\n", 329 | "Store with max sales in last 3 month period is storeA\n" 330 | ] 331 | } 332 | ], 333 | "source": [ 334 | "# total sales during most recent 3 month period\n", 335 | "\n", 336 | "#find time range\n", 337 | "dates_ranges = df_store['week']\n", 338 | "print(min(dates_ranges))\n", 339 | "print(max(dates_ranges))\n", 340 | "#df_store[180:] #inspect last months to determine span\n", 341 | "\n", 342 | "#df_span = df_store[(df_store['week'] > '2017-09-03') & (df_store['week'] < '2017-12-17')]\n", 343 | "#print(df_span)\n", 344 | "\n", 345 | "\n", 346 | "df_span_last3 = df_store[(df_store['week'] > '2017-12-03') & (df_store['week'] < '2018-02-25')]\n", 347 | "#print(df_span_last3) #checkpoint\n", 348 | "a3 = sum(df_span_last3['storeA'])\n", 349 | "b3 = sum(df_span_last3['storeB'])\n", 350 | "c3 = sum(df_span_last3['storeC'])\n", 351 | "d3 = sum(df_span_last3['storeD'])\n", 352 | "e3 = sum(df_span_last3['storeE'])\n", 353 | "last3 = [a3,b3,c3,d3,e3]\n", 354 | "max_last3 = max(last3)\n", 355 | "#print(last3, max_last3) #checkpoint\n", 356 | "store_max_last3 = list_store_names[last3.index(max_last3)]\n", 357 | "print('Store with max sales in last 3 month period is {}'.format(store_max_last3))\n", 358 | "\n", 359 | "\n", 360 | "#df_new=df_store[df_store['week'] == '2018-02-25']\n", 361 | "#print(df_new)\n" 362 | ] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "execution_count": null, 367 | "metadata": { 368 | "collapsed": true 369 | }, 370 | "outputs": [], 371 | "source": [] 372 | } 373 | ], 374 | "metadata": { 375 | "kernelspec": { 376 | "display_name": "Python 3", 377 | "language": "python", 378 | "name": "python3" 379 | }, 380 | "language_info": { 381 | "codemirror_mode": { 382 | "name": "ipython", 383 | "version": 3 384 | }, 385 | "file_extension": ".py", 386 | "mimetype": "text/x-python", 387 | "name": "python", 388 | "nbconvert_exporter": "python", 389 | "pygments_lexer": "ipython3", 390 | "version": "3.6.1" 391 | } 392 | }, 393 | "nbformat": 4, 394 | "nbformat_minor": 2 395 | } 396 | -------------------------------------------------------------------------------- /fix_datatypes_cyl.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Fixing `cyl` Data Type\n", 8 | "- 2008: extract int from string\n", 9 | "- 2018: convert float to int" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 21, 15 | "metadata": { 16 | "collapsed": true 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "import pandas as pd" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 22, 26 | "metadata": { 27 | "collapsed": true 28 | }, 29 | "outputs": [], 30 | "source": [ 31 | "# load datasets\n", 32 | "\n", 33 | "df_08 = pd.read_csv('data_08.csv')\n", 34 | "df_18 = pd.read_csv('data_18.csv')" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 23, 40 | "metadata": {}, 41 | "outputs": [ 42 | { 43 | "data": { 44 | "text/plain": [ 45 | "6 409\n", 46 | "4 283\n", 47 | "8 199\n", 48 | "5 48\n", 49 | "12 30\n", 50 | "10 14\n", 51 | "2 2\n", 52 | "16 1\n", 53 | "Name: cyl, dtype: int64" 54 | ] 55 | }, 56 | "execution_count": 23, 57 | "metadata": {}, 58 | "output_type": "execute_result" 59 | } 60 | ], 61 | "source": [ 62 | "# check value counts for the 2008 cyl column\n", 63 | "df_08['cyl'].value_counts()" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "Read [this](https://stackoverflow.com/questions/35376387/extract-int-from-string-in-pandas) to help you extract ints from strings in Pandas for the next step." 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 24, 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "ename": "AttributeError", 80 | "evalue": "Can only use .str accessor with string values, which use np.object_ dtype in pandas", 81 | "output_type": "error", 82 | "traceback": [ 83 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 84 | "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", 85 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# Extract int from strings in the 2008 cyl column\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mdf_08\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cyl'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf_08\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cyl'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextract\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'(\\d+)'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 86 | "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 3075\u001b[0m if (name in self._internal_names_set or name in self._metadata or\n\u001b[1;32m 3076\u001b[0m name in self._accessors):\n\u001b[0;32m-> 3077\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3078\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3079\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mname\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_info_axis\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 87 | "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/base.py\u001b[0m in \u001b[0;36m__get__\u001b[0;34m(self, instance, owner)\u001b[0m\n\u001b[1;32m 241\u001b[0m \u001b[0;31m# this ensures that Series.str. is well defined\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 242\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maccessor_cls\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 243\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconstruct_accessor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minstance\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 244\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 245\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__set__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minstance\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 88 | "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/strings.py\u001b[0m in \u001b[0;36m_make_str_accessor\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1907\u001b[0m \u001b[0;31m# (instead of test for object dtype), but that isn't practical for\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1908\u001b[0m \u001b[0;31m# performance reasons until we have a str dtype (GH 9343)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1909\u001b[0;31m raise AttributeError(\"Can only use .str accessor with string \"\n\u001b[0m\u001b[1;32m 1910\u001b[0m \u001b[0;34m\"values, which use np.object_ dtype in \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1911\u001b[0m \"pandas\")\n", 89 | "\u001b[0;31mAttributeError\u001b[0m: Can only use .str accessor with string values, which use np.object_ dtype in pandas" 90 | ] 91 | } 92 | ], 93 | "source": [ 94 | "# Extract int from strings in the 2008 cyl column\n", 95 | "df_08['cyl'] = df_08['cyl'].str.extract('(\\d+)').astype(int)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 25, 101 | "metadata": {}, 102 | "outputs": [ 103 | { 104 | "data": { 105 | "text/plain": [ 106 | "6 409\n", 107 | "4 283\n", 108 | "8 199\n", 109 | "5 48\n", 110 | "12 30\n", 111 | "10 14\n", 112 | "2 2\n", 113 | "16 1\n", 114 | "Name: cyl, dtype: int64" 115 | ] 116 | }, 117 | "execution_count": 25, 118 | "metadata": {}, 119 | "output_type": "execute_result" 120 | } 121 | ], 122 | "source": [ 123 | "# Check value counts for 2008 cyl column again to confirm the change\n", 124 | "df_08['cyl'].value_counts()" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 26, 130 | "metadata": { 131 | "collapsed": true 132 | }, 133 | "outputs": [], 134 | "source": [ 135 | "# convert 2018 cyl column to int\n", 136 | "#df_18['cyl'] = int(df_18['cyl'])\n", 137 | "df_18.cyl = df_18.cyl.astype(int)" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 27, 143 | "metadata": {}, 144 | "outputs": [ 145 | { 146 | "data": { 147 | "text/plain": [ 148 | "4 365\n", 149 | "6 246\n", 150 | "8 153\n", 151 | "3 18\n", 152 | "12 9\n", 153 | "5 2\n", 154 | "16 1\n", 155 | "Name: cyl, dtype: int64" 156 | ] 157 | }, 158 | "execution_count": 27, 159 | "metadata": {}, 160 | "output_type": "execute_result" 161 | } 162 | ], 163 | "source": [ 164 | "# Check value counts for 2018 cyl column again to confirm the change\n", 165 | "df_18['cyl'].value_counts()" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 28, 171 | "metadata": { 172 | "collapsed": true 173 | }, 174 | "outputs": [], 175 | "source": [ 176 | "df_08.to_csv('data_08.csv', index=False)\n", 177 | "df_18.to_csv('data_18.csv', index=False)" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "metadata": { 184 | "collapsed": true 185 | }, 186 | "outputs": [], 187 | "source": [] 188 | } 189 | ], 190 | "metadata": { 191 | "kernelspec": { 192 | "display_name": "Python 3", 193 | "language": "python", 194 | "name": "python3" 195 | }, 196 | "language_info": { 197 | "codemirror_mode": { 198 | "name": "ipython", 199 | "version": 3 200 | }, 201 | "file_extension": ".py", 202 | "mimetype": "text/x-python", 203 | "name": "python", 204 | "nbconvert_exporter": "python", 205 | "pygments_lexer": "ipython3", 206 | "version": "3.6.1" 207 | } 208 | }, 209 | "nbformat": 4, 210 | "nbformat_minor": 2 211 | } 212 | -------------------------------------------------------------------------------- /matplotlib_example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "collapsed": true 7 | }, 8 | "source": [ 9 | "# Creating a Bar Chart Using Matplotlib" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": { 16 | "collapsed": true 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "import matplotlib.pyplot as plt\n", 21 | "% matplotlib inline" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "There are two required arguments in pyplot's `bar` function: the x-coordinates of the bars, and the heights of the bars." 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 3, 34 | "metadata": {}, 35 | "outputs": [ 36 | { 37 | "data": { 38 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAECRJREFUeJzt3W+snnV9x/H3ZxTU+K/8ObCmLVZj\ns8mWAV1DakiMs2bhz2JJBglmkUq6NNnYpnHJ1vlgxmUP8IlsbAumE7dinEJQR4foxgrE7AHoQRHB\n6jgSRk/K6FGg6pga3HcPzq/z2J72XOdf757f3q/kzn1dv+t73/f3l6v93Ne5zn3dJ1WFJKlfPzfq\nBiRJy8ugl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHVu1agbADjnnHNqw4YNo25D\nklaUhx9++DtVNTZX3SkR9Bs2bGB8fHzUbUjSipLkP4bUeepGkjpn0EtS5wx6SeqcQS9JnTPoJalz\nBr0kdc6gl6TOGfSS1DmDXpI6d0pcGav/Xzbs+tyoW+jWUzdeOeoWdAryiF6SOmfQS1LnBgV9ktVJ\n7kzyzST7k7w5yVlJ7k3yRLs/s9Umyc1JJpI8mmTT8k5BknQiQ4/o/xL4QlX9InAhsB/YBeyrqo3A\nvrYOcDmwsd12ArcsaceSpHmZM+iTvAZ4C3ArQFX9uKpeALYBe1rZHuCqtrwNuK2mPQisTrJmyTuX\nJA0y5Ij+DcAU8HdJvprko0leCZxXVc8AtPtzW/1a4MCMx0+2sZ+RZGeS8STjU1NTi5qEJOn4hgT9\nKmATcEtVXQz8Fz89TTObzDJWxwxU7a6qzVW1eWxszj+QIklaoCFBPwlMVtVDbf1OpoP/2SOnZNr9\noRn162c8fh1wcGnalSTN15xBX1X/CRxI8gttaCvwDWAvsL2NbQfuast7gevap2+2AIePnOKRJJ18\nQ6+M/X3gE0nOAJ4Ermf6TeKOJDuAp4FrWu09wBXABPBiq5UkjcigoK+qR4DNs2zaOkttATcssi9J\n0hLxylhJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0k\ndc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1Ln\nBgV9kqeSfD3JI0nG29hZSe5N8kS7P7ONJ8nNSSaSPJpk03JOQJJ0YvM5ov+1qrqoqja39V3Avqra\nCOxr6wCXAxvbbSdwy1I1K0mav8WcutkG7GnLe4CrZozfVtMeBFYnWbOI15EkLcLQoC/gX5I8nGRn\nGzuvqp4BaPfntvG1wIEZj51sY5KkEVg1sO7SqjqY5Fzg3iTfPEFtZhmrY4qm3zB2Apx//vkD25Ak\nzdegI/qqOtjuDwGfBS4Bnj1ySqbdH2rlk8D6GQ9fBxyc5Tl3V9Xmqto8Nja28BlIkk5ozqBP8sok\nrz6yDPw68BiwF9jeyrYDd7XlvcB17dM3W4DDR07xSJJOviGnbs4DPpvkSP0/VNUXknwZuCPJDuBp\n4JpWfw9wBTABvAhcv+RdS5IGmzPoq+pJ4MJZxr8LbJ1lvIAblqQ7SdKieWWsJHXOoJekzhn0ktQ5\ng16SOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPo\nJalzBr0kdc6gl6TOGfSS1DmDXpI6N+cfB5ekDbs+N+oWuvXUjVcu+2t4RC9JnTPoJalzBr0kdc6g\nl6TODQ76JKcl+WqSu9v665M8lOSJJLcnOaONv6ytT7TtG5andUnSEPM5on8PsH/G+oeAm6pqI/A8\nsKON7wCer6o3Aje1OknSiAwK+iTrgCuBj7b1AG8D7mwle4Cr2vK2tk7bvrXVS5JGYOgR/V8AfwT8\nT1s/G3ihql5q65PA2ra8FjgA0LYfbvU/I8nOJONJxqemphbYviRpLnMGfZLfAA5V1cMzh2cprQHb\nfjpQtbuqNlfV5rGxsUHNSpLmb8iVsZcC70hyBfBy4DVMH+GvTrKqHbWvAw62+klgPTCZZBXwWuC5\nJe9ckjTInEf0VfUnVbWuqjYA1wL3VdVvAfcDV7ey7cBdbXlvW6dtv6+qjjmilySdHIv5HP0fA+9L\nMsH0Ofhb2/itwNlt/H3ArsW1KElajHl9qVlVPQA80JafBC6ZpeaHwDVL0JskaQl4Zawkdc6gl6TO\nGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxB\nL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzcwZ9kpcn+VKSryV5\nPMkH2/jrkzyU5Ikktyc5o42/rK1PtO0blncKkqQTGXJE/yPgbVV1IXARcFmSLcCHgJuqaiPwPLCj\n1e8Anq+qNwI3tTpJ0ojMGfQ17Qdt9fR2K+BtwJ1tfA9wVVve1tZp27cmyZJ1LEmal0Hn6JOcluQR\n4BBwL/Bt4IWqeqmVTAJr2/Ja4ABA234YOHspm5YkDTco6KvqJ1V1EbAOuAR402xl7X62o/c6eiDJ\nziTjScanpqaG9itJmqd5feqmql4AHgC2AKuTrGqb1gEH2/IksB6gbX8t8Nwsz7W7qjZX1eaxsbGF\ndS9JmtOQT92MJVndll8BvB3YD9wPXN3KtgN3teW9bZ22/b6qOuaIXpJ0cqyau4Q1wJ4kpzH9xnBH\nVd2d5BvAp5L8OfBV4NZWfyvw8SQTTB/JX7sMfUuSBpoz6KvqUeDiWcafZPp8/dHjPwSuWZLuJEmL\n5pWxktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6Seqc\nQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknq3Jx/HPxUt2HX50bdQreeuvHKUbcg\naQl4RC9JnTPoJalzBr0kdW7OoE+yPsn9SfYneTzJe9r4WUnuTfJEuz+zjSfJzUkmkjyaZNNyT0KS\ndHxDjuhfAv6wqt4EbAFuSHIBsAvYV1UbgX1tHeByYGO77QRuWfKuJUmDzRn0VfVMVX2lLX8f2A+s\nBbYBe1rZHuCqtrwNuK2mPQisTrJmyTuXJA0yr3P0STYAFwMPAedV1TMw/WYAnNvK1gIHZjxsso1J\nkkZgcNAneRXwaeC9VfW9E5XOMlazPN/OJONJxqempoa2IUmap0FBn+R0pkP+E1X1mTb87JFTMu3+\nUBufBNbPePg64ODRz1lVu6tqc1VtHhsbW2j/kqQ5DPnUTYBbgf1V9eEZm/YC29vyduCuGePXtU/f\nbAEOHznFI0k6+YZ8BcKlwLuAryd5pI29H7gRuCPJDuBp4Jq27R7gCmACeBG4fkk7liTNy5xBX1X/\nxuzn3QG2zlJfwA2L7EuStES8MlaSOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNe\nkjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWp\ncwa9JHXOoJekzhn0ktQ5g16SOjdn0Cf5WJJDSR6bMXZWknuTPNHuz2zjSXJzkokkjybZtJzNS5Lm\nNuSI/u+By44a2wXsq6qNwL62DnA5sLHddgK3LE2bkqSFmjPoq+qLwHNHDW8D9rTlPcBVM8Zvq2kP\nAquTrFmqZiVJ87fQc/TnVdUzAO3+3Da+Fjgwo26yjR0jyc4k40nGp6amFtiGJGkuS/3L2MwyVrMV\nVtXuqtpcVZvHxsaWuA1J0hELDfpnj5ySafeH2vgksH5G3Trg4MLbkyQt1kKDfi+wvS1vB+6aMX5d\n+/TNFuDwkVM8kqTRWDVXQZJPAm8FzkkyCXwAuBG4I8kO4GngmlZ+D3AFMAG8CFy/DD1LkuZhzqCv\nqnceZ9PWWWoLuGGxTUmSlo5XxkpS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BL\nUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1\nzqCXpM4Z9JLUOYNekjq3LEGf5LIk30oykWTXcryGJGmYJQ/6JKcBfwNcDlwAvDPJBUv9OpKkYZbj\niP4SYKKqnqyqHwOfArYtw+tIkgZYjqBfCxyYsT7ZxiRJI7BqGZ4zs4zVMUXJTmBnW/1Bkm/N2HwO\n8J1l6O1UsGLmlg/Nq3zFzGueVtS83GfACpvXIvfZ64Y8aDmCfhJYP2N9HXDw6KKq2g3snu0JkoxX\n1eZl6G3kep2b81p5ep1br/OChc9tOU7dfBnYmOT1Sc4ArgX2LsPrSJIGWPIj+qp6KcnvAf8MnAZ8\nrKoeX+rXkSQNsxynbqiqe4B7FvEUs57S6USvc3NeK0+vc+t1XrDAuaXqmN+TSpI64lcgSFLnRhr0\nc31VQpJ3J5lK8ki7/fYo+pyvJB9LcijJY8fZniQ3t3k/mmTTye5xIQbM661JDs/YX396sntciCTr\nk9yfZH+Sx5O8Z5aalbrPhsxtxe23JC9P8qUkX2vz+uAsNS9LcnvbZw8l2XDyO52fgfOafy5W1Uhu\nTP+i9tvAG4AzgK8BFxxV827gr0fV4yLm9hZgE/DYcbZfAXye6WsOtgAPjbrnJZrXW4G7R93nAua1\nBtjUll8N/Pss/xZX6j4bMrcVt9/afnhVWz4deAjYclTN7wIfacvXArePuu8lmte8c3GUR/TdflVC\nVX0ReO4EJduA22rag8DqJGtOTncLN2BeK1JVPVNVX2nL3wf2c+zV3Ct1nw2Z24rT9sMP2urp7Xb0\nLxy3AXva8p3A1iSzXdB5yhg4r3kbZdAP/aqE32w/Kt+ZZP0s21einr8m4s3tx87PJ/mlUTczX+3H\n+4uZPpKaacXvsxPMDVbgfktyWpJHgEPAvVV13H1WVS8Bh4GzT26X8zdgXjDPXBxl0A/5qoR/AjZU\n1a8A/8pP351XukFfE7ECfQV4XVVdCPwV8I8j7mdekrwK+DTw3qr63tGbZ3nIitlnc8xtRe63qvpJ\nVV3E9NX3lyT55aNKVuQ+GzCveefiKIN+zq9KqKrvVtWP2urfAr96knpbboO+JmKlqarvHfmxs6av\npTg9yTkjbmuQJKczHYSfqKrPzFKyYvfZXHNbyfsNoKpeAB4ALjtq0//tsySrgNeygk49Hm9eC8nF\nUQb9nF+VcNQ50HcwfX6xB3uB69onObYAh6vqmVE3tVhJfv7IOdAklzD97+u7o+1qbq3nW4H9VfXh\n45StyH02ZG4rcb8lGUuyui2/Ang78M2jyvYC29vy1cB91X6beaoaMq+F5OKyXBk7RB3nqxKS/Bkw\nXlV7gT9I8g7gJabfid89qn7nI8knmf4kwzlJJoEPMP1LFarqI0xfNXwFMAG8CFw/mk7nZ8C8rgZ+\nJ8lLwH8D157q/7GaS4F3AV9v50YB3g+cDyt7nzFsbitxv60B9mT6Dx39HHBHVd19VH7cCnw8yQTT\n+XHt6NodbMi85p2LXhkrSZ3zylhJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5/4X\nZ8Vx7M5o+qYAAAAASUVORK5CYII=\n", 39 | "text/plain": [ 40 | "" 41 | ] 42 | }, 43 | "metadata": {}, 44 | "output_type": "display_data" 45 | } 46 | ], 47 | "source": [ 48 | "plt.bar([1, 2, 3], [224, 620, 425]);" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "You can specify the x tick labels using pyplot's `xticks` function, or by specifying another parameter in the `bar` function. The two cells below accomplish the same thing." 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 4, 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "data": { 65 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAADgNJREFUeJzt3W+MpWdZx/Hvzy4FRGH7Z9o0u1un\nho2AL4Bm0iwpUewSQ1vD9gU1EGM3zSbzpioGE119Y4yalMRQbWKabCi6RRSaCnYDDdosVANJK1Mp\nC2UhHWvtTrZ2p/SPYoOkevli7g3D7umeZ3bO7JnefD/JyfM813Ofc66Tk/3Nvfc5z0yqCklSv35s\n2g1IkjaWQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknq3JZpNwBw8cUX1+zs7LTb\nkKRXlIcffviZqpoZN25TBP3s7CwLCwvTbkOSXlGS/PuQcS7dSFLnDHpJ6pxBL0mdM+glqXMGvSR1\nzqCXpM4Z9JLUOYNekjpn0EtS5zbFlbH60TK7/3PTbqFbT9x6/bRb0CbkjF6SOmfQS1LnBgV9kq1J\n7knyrSRHk7wjyYVJ7k/yWNte0MYmye1JFpMcSXLlxr4ESdKZDJ3R/xnw+ap6E/BW4CiwHzhcVTuB\nw+0Y4FpgZ7vNA3dMtGNJ0pqMDfokrwd+DrgToKq+X1XPA3uAg23YQeCGtr8HuKtWPAhsTXLZxDuX\nJA0yZEb/08Ay8BdJvprko0leB1xaVU8BtO0lbfw24Niq+y+12g9JMp9kIcnC8vLyul6EJOnlDQn6\nLcCVwB1V9Xbgv/nBMs0oGVGr0wpVB6pqrqrmZmbG/oEUSdJZGhL0S8BSVT3Uju9hJfifPrkk07Yn\nVo3fser+24Hjk2lXkrRWY4O+qv4DOJbkZ1ppN/BN4BCwt9X2Ave2/UPATe3bN7uAF04u8UiSzr2h\nV8b+OvCJJOcDjwM3s/JD4u4k+4AngRvb2PuA64BF4MU2VpI0JYOCvqoeAeZGnNo9YmwBt6yzL0nS\nhHhlrCR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6\nZ9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXOD\ngj7JE0m+nuSRJAutdmGS+5M81rYXtHqS3J5kMcmRJFdu5AuQJJ3ZWmb0v1BVb6uquXa8HzhcVTuB\nw+0Y4FpgZ7vNA3dMqllJ0tqtZ+lmD3Cw7R8EblhVv6tWPAhsTXLZOp5HkrQOQ4O+gH9I8nCS+Va7\ntKqeAmjbS1p9G3Bs1X2XWk2SNAVbBo67uqqOJ7kEuD/Jt84wNiNqddqglR8Y8wCXX375wDYkSWs1\naEZfVcfb9gTwGeAq4OmTSzJte6INXwJ2rLr7duD4iMc8UFVzVTU3MzNz9q9AknRGY4M+yeuS/OTJ\nfeAXgW8Ah4C9bdhe4N62fwi4qX37ZhfwwsklHknSuTdk6eZS4DNJTo7/66r6fJKvAHcn2Qc8CdzY\nxt8HXAcsAi8CN0+8a0nSYGODvqoeB946ov4dYPeIegG3TKQ7SdK6eWWsJHXOoJekzhn0ktQ5g16S\nOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalz\nBr0kdc6gl6TOGfSS1DmDXpI6N/aPg0vS7P7PTbuFbj1x6/Ub/hzO6CWpcwa9JHXOoJekzhn0ktS5\nwUGf5LwkX03y2XZ8RZKHkjyW5FNJzm/1V7fjxXZ+dmNalyQNsZYZ/QeBo6uOPwzcVlU7geeAfa2+\nD3iuqt4I3NbGSZKmZFDQJ9kOXA98tB0HuAa4pw05CNzQ9ve0Y9r53W28JGkKhs7o/xT4beD/2vFF\nwPNV9VI7XgK2tf1twDGAdv6FNv6HJJlPspBkYXl5+SzblySNMzbok/wScKKqHl5dHjG0Bpz7QaHq\nQFXNVdXczMzMoGYlSWs35MrYq4H3JrkOeA3welZm+FuTbGmz9u3A8TZ+CdgBLCXZArwBeHbinUuS\nBhk7o6+q362q7VU1C7wf+EJV/QrwReB9bdhe4N62f6gd085/oapOm9FLks6N9XyP/neADyVZZGUN\n/s5WvxO4qNU/BOxfX4uSpPVY0y81q6oHgAfa/uPAVSPGfA+4cQK9SZImwCtjJalzBr0kdc6gl6TO\nGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxB\nL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnRsb9Elek+Sfk3wtyaNJ/qDV\nr0jyUJLHknwqyfmt/up2vNjOz27sS5AkncmQGf3/ANdU1VuBtwHvSbIL+DBwW1XtBJ4D9rXx+4Dn\nquqNwG1tnCRpSsYGfa34bjt8VbsVcA1wT6sfBG5o+3vaMe387iSZWMeSpDUZtEaf5LwkjwAngPuB\nfwWer6qX2pAlYFvb3wYcA2jnXwAummTTkqThBgV9Vf1vVb0N2A5cBbx51LC2HTV7r1MLSeaTLCRZ\nWF5eHtqvJGmN1vStm6p6HngA2AVsTbKlndoOHG/7S8AOgHb+DcCzIx7rQFXNVdXczMzM2XUvSRpr\nyLduZpJsbfuvBd4NHAW+CLyvDdsL3Nv2D7Vj2vkvVNVpM3pJ0rmxZfwQLgMOJjmPlR8Md1fVZ5N8\nE/hkkj8Cvgrc2cbfCXw8ySIrM/n3b0DfkqSBxgZ9VR0B3j6i/jgr6/Wn1r8H3DiR7iRJ6+aVsZLU\nOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z\n6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6tzYPw6+2c3u/9y0W+jWE7deP+0WJE2AM3pJ\n6pxBL0mdM+glqXNjgz7JjiRfTHI0yaNJPtjqFya5P8ljbXtBqyfJ7UkWkxxJcuVGvwhJ0ssbMqN/\nCfitqnozsAu4JclbgP3A4araCRxuxwDXAjvbbR64Y+JdS5IGGxv0VfVUVf1L2/8v4CiwDdgDHGzD\nDgI3tP09wF214kFga5LLJt65JGmQNa3RJ5kF3g48BFxaVU/Byg8D4JI2bBtwbNXdllpNkjQFg4M+\nyU8Afwv8ZlX955mGjqjViMebT7KQZGF5eXloG5KkNRoU9ElexUrIf6KqPt3KT59ckmnbE62+BOxY\ndfftwPFTH7OqDlTVXFXNzczMnG3/kqQxhnzrJsCdwNGq+siqU4eAvW1/L3DvqvpN7ds3u4AXTi7x\nSJLOvSG/AuFq4FeBryd5pNV+D7gVuDvJPuBJ4MZ27j7gOmAReBG4eaIdS5LWZGzQV9WXGL3uDrB7\nxPgCbllnX5KkCfHKWEnqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0md\nM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmD\nXpI6Z9BLUucMeknq3NigT/KxJCeSfGNV7cIk9yd5rG0vaPUkuT3JYpIjSa7cyOYlSeMNmdH/JfCe\nU2r7gcNVtRM43I4BrgV2tts8cMdk2pQkna2xQV9V/wQ8e0p5D3Cw7R8EblhVv6tWPAhsTXLZpJqV\nJK3d2a7RX1pVTwG07SWtvg04tmrcUqudJsl8koUkC8vLy2fZhiRpnEl/GJsRtRo1sKoOVNVcVc3N\nzMxMuA1J0klnG/RPn1ySadsTrb4E7Fg1bjtw/OzbkySt19kG/SFgb9vfC9y7qn5T+/bNLuCFk0s8\nkqTp2DJuQJK/Ad4FXJxkCfh94Fbg7iT7gCeBG9vw+4DrgEXgReDmDehZkrQGY4O+qj7wMqd2jxhb\nwC3rbUqSNDleGStJnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9\nJHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS\n5wx6SerchgR9kvck+XaSxST7N+I5JEnDTDzok5wH/DlwLfAW4ANJ3jLp55EkDbMRM/qrgMWqeryq\nvg98EtizAc8jSRpgI4J+G3Bs1fFSq0mSpmDLBjxmRtTqtEHJPDDfDr+b5Nsb0MtmdDHwzLSbGCIf\nnnYHm8Ir5v0C37PmR+k9+6khgzYi6JeAHauOtwPHTx1UVQeAAxvw/JtakoWqmpt2HxrG9+uVx/fs\ndBuxdPMVYGeSK5KcD7wfOLQBzyNJGmDiM/qqeinJrwF/D5wHfKyqHp3080iShtmIpRuq6j7gvo14\n7A78yC1XvcL5fr3y+J6dIlWnfU4qSeqIvwJBkjpn0EsjJJlN8o1p9yFNgkEvSZ0z6M+RJH+X5OEk\nj7aLxbT5bUlyMMmRJPck+fFpN6QzS3JTe7++luTj0+5ns/DD2HMkyYVV9WyS17JyrcHPV9V3pt2X\nRksyC/wb8M6q+nKSjwHfrKo/mWpjellJfhb4NHB1VT1z8t/ctPvaDJzRnzu/keRrwIOsXDm8c8r9\naLxjVfXltv9XwDun2YzGuga4p6qeATDkf2BDvkevH5bkXcC7gXdU1YtJHgBeM9WmNMSp/931v7+b\nW/A9GskZ/bnxBuC5FvJvAnZNuyENcnmSd7T9DwBfmmYzGusw8MtJLoKV5dIp97NpGPTnxudZ+WDv\nCPCHrCzfaPM7Cuxt79uFwB1T7kdn0H7Vyh8D/9iWST8y5ZY2DT+MlaTOOaOXpM4Z9JLUOYNekjpn\n0EtS5wx6SeqcQS9JnTPoJalzBr0kde7/AWlGQDBbdAN6AAAAAElFTkSuQmCC\n", 66 | "text/plain": [ 67 | "" 68 | ] 69 | }, 70 | "metadata": {}, 71 | "output_type": "display_data" 72 | } 73 | ], 74 | "source": [ 75 | "# plot bars\n", 76 | "plt.bar([1, 2, 3], [224, 620, 425])\n", 77 | "\n", 78 | "# specify x coordinates of tick labels and their labels\n", 79 | "plt.xticks([1, 2, 3], ['a', 'b', 'c']);" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 5, 85 | "metadata": {}, 86 | "outputs": [ 87 | { 88 | "data": { 89 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAADgNJREFUeJzt3W+MpWdZx/Hvzy4FRGH7Z9o0u1un\nho2AL4Bm0iwpUewSQ1vD9gU1EGM3zSbzpioGE119Y4yalMRQbWKabCi6RRSaCnYDDdosVANJK1Mp\nC2UhHWvtTrZ2p/SPYoOkevli7g3D7umeZ3bO7JnefD/JyfM813Ofc66Tk/3Nvfc5z0yqCklSv35s\n2g1IkjaWQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknq3JZpNwBw8cUX1+zs7LTb\nkKRXlIcffviZqpoZN25TBP3s7CwLCwvTbkOSXlGS/PuQcS7dSFLnDHpJ6pxBL0mdM+glqXMGvSR1\nzqCXpM4Z9JLUOYNekjpn0EtS5zbFlbH60TK7/3PTbqFbT9x6/bRb0CbkjF6SOmfQS1LnBgV9kq1J\n7knyrSRHk7wjyYVJ7k/yWNte0MYmye1JFpMcSXLlxr4ESdKZDJ3R/xnw+ap6E/BW4CiwHzhcVTuB\nw+0Y4FpgZ7vNA3dMtGNJ0pqMDfokrwd+DrgToKq+X1XPA3uAg23YQeCGtr8HuKtWPAhsTXLZxDuX\nJA0yZEb/08Ay8BdJvprko0leB1xaVU8BtO0lbfw24Niq+y+12g9JMp9kIcnC8vLyul6EJOnlDQn6\nLcCVwB1V9Xbgv/nBMs0oGVGr0wpVB6pqrqrmZmbG/oEUSdJZGhL0S8BSVT3Uju9hJfifPrkk07Yn\nVo3fser+24Hjk2lXkrRWY4O+qv4DOJbkZ1ppN/BN4BCwt9X2Ave2/UPATe3bN7uAF04u8UiSzr2h\nV8b+OvCJJOcDjwM3s/JD4u4k+4AngRvb2PuA64BF4MU2VpI0JYOCvqoeAeZGnNo9YmwBt6yzL0nS\nhHhlrCR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6\nZ9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXOD\ngj7JE0m+nuSRJAutdmGS+5M81rYXtHqS3J5kMcmRJFdu5AuQJJ3ZWmb0v1BVb6uquXa8HzhcVTuB\nw+0Y4FpgZ7vNA3dMqllJ0tqtZ+lmD3Cw7R8EblhVv6tWPAhsTXLZOp5HkrQOQ4O+gH9I8nCS+Va7\ntKqeAmjbS1p9G3Bs1X2XWk2SNAVbBo67uqqOJ7kEuD/Jt84wNiNqddqglR8Y8wCXX375wDYkSWs1\naEZfVcfb9gTwGeAq4OmTSzJte6INXwJ2rLr7duD4iMc8UFVzVTU3MzNz9q9AknRGY4M+yeuS/OTJ\nfeAXgW8Ah4C9bdhe4N62fwi4qX37ZhfwwsklHknSuTdk6eZS4DNJTo7/66r6fJKvAHcn2Qc8CdzY\nxt8HXAcsAi8CN0+8a0nSYGODvqoeB946ov4dYPeIegG3TKQ7SdK6eWWsJHXOoJekzhn0ktQ5g16S\nOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalz\nBr0kdc6gl6TOGfSS1DmDXpI6N/aPg0vS7P7PTbuFbj1x6/Ub/hzO6CWpcwa9JHXOoJekzhn0ktS5\nwUGf5LwkX03y2XZ8RZKHkjyW5FNJzm/1V7fjxXZ+dmNalyQNsZYZ/QeBo6uOPwzcVlU7geeAfa2+\nD3iuqt4I3NbGSZKmZFDQJ9kOXA98tB0HuAa4pw05CNzQ9ve0Y9r53W28JGkKhs7o/xT4beD/2vFF\nwPNV9VI7XgK2tf1twDGAdv6FNv6HJJlPspBkYXl5+SzblySNMzbok/wScKKqHl5dHjG0Bpz7QaHq\nQFXNVdXczMzMoGYlSWs35MrYq4H3JrkOeA3welZm+FuTbGmz9u3A8TZ+CdgBLCXZArwBeHbinUuS\nBhk7o6+q362q7VU1C7wf+EJV/QrwReB9bdhe4N62f6gd085/oapOm9FLks6N9XyP/neADyVZZGUN\n/s5WvxO4qNU/BOxfX4uSpPVY0y81q6oHgAfa/uPAVSPGfA+4cQK9SZImwCtjJalzBr0kdc6gl6TO\nGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxB\nL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnRsb9Elek+Sfk3wtyaNJ/qDV\nr0jyUJLHknwqyfmt/up2vNjOz27sS5AkncmQGf3/ANdU1VuBtwHvSbIL+DBwW1XtBJ4D9rXx+4Dn\nquqNwG1tnCRpSsYGfa34bjt8VbsVcA1wT6sfBG5o+3vaMe387iSZWMeSpDUZtEaf5LwkjwAngPuB\nfwWer6qX2pAlYFvb3wYcA2jnXwAummTTkqThBgV9Vf1vVb0N2A5cBbx51LC2HTV7r1MLSeaTLCRZ\nWF5eHtqvJGmN1vStm6p6HngA2AVsTbKlndoOHG/7S8AOgHb+DcCzIx7rQFXNVdXczMzM2XUvSRpr\nyLduZpJsbfuvBd4NHAW+CLyvDdsL3Nv2D7Vj2vkvVNVpM3pJ0rmxZfwQLgMOJjmPlR8Md1fVZ5N8\nE/hkkj8Cvgrc2cbfCXw8ySIrM/n3b0DfkqSBxgZ9VR0B3j6i/jgr6/Wn1r8H3DiR7iRJ6+aVsZLU\nOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z\n6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6tzYPw6+2c3u/9y0W+jWE7deP+0WJE2AM3pJ\n6pxBL0mdM+glqXNjgz7JjiRfTHI0yaNJPtjqFya5P8ljbXtBqyfJ7UkWkxxJcuVGvwhJ0ssbMqN/\nCfitqnozsAu4JclbgP3A4araCRxuxwDXAjvbbR64Y+JdS5IGGxv0VfVUVf1L2/8v4CiwDdgDHGzD\nDgI3tP09wF214kFga5LLJt65JGmQNa3RJ5kF3g48BFxaVU/Byg8D4JI2bBtwbNXdllpNkjQFg4M+\nyU8Afwv8ZlX955mGjqjViMebT7KQZGF5eXloG5KkNRoU9ElexUrIf6KqPt3KT59ckmnbE62+BOxY\ndfftwPFTH7OqDlTVXFXNzczMnG3/kqQxhnzrJsCdwNGq+siqU4eAvW1/L3DvqvpN7ds3u4AXTi7x\nSJLOvSG/AuFq4FeBryd5pNV+D7gVuDvJPuBJ4MZ27j7gOmAReBG4eaIdS5LWZGzQV9WXGL3uDrB7\nxPgCbllnX5KkCfHKWEnqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0md\nM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmD\nXpI6Z9BLUucMeknq3NigT/KxJCeSfGNV7cIk9yd5rG0vaPUkuT3JYpIjSa7cyOYlSeMNmdH/JfCe\nU2r7gcNVtRM43I4BrgV2tts8cMdk2pQkna2xQV9V/wQ8e0p5D3Cw7R8EblhVv6tWPAhsTXLZpJqV\nJK3d2a7RX1pVTwG07SWtvg04tmrcUqudJsl8koUkC8vLy2fZhiRpnEl/GJsRtRo1sKoOVNVcVc3N\nzMxMuA1J0klnG/RPn1ySadsTrb4E7Fg1bjtw/OzbkySt19kG/SFgb9vfC9y7qn5T+/bNLuCFk0s8\nkqTp2DJuQJK/Ad4FXJxkCfh94Fbg7iT7gCeBG9vw+4DrgEXgReDmDehZkrQGY4O+qj7wMqd2jxhb\nwC3rbUqSNDleGStJnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9\nJHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS\n5wx6SerchgR9kvck+XaSxST7N+I5JEnDTDzok5wH/DlwLfAW4ANJ3jLp55EkDbMRM/qrgMWqeryq\nvg98EtizAc8jSRpgI4J+G3Bs1fFSq0mSpmDLBjxmRtTqtEHJPDDfDr+b5Nsb0MtmdDHwzLSbGCIf\nnnYHm8Ir5v0C37PmR+k9+6khgzYi6JeAHauOtwPHTx1UVQeAAxvw/JtakoWqmpt2HxrG9+uVx/fs\ndBuxdPMVYGeSK5KcD7wfOLQBzyNJGmDiM/qqeinJrwF/D5wHfKyqHp3080iShtmIpRuq6j7gvo14\n7A78yC1XvcL5fr3y+J6dIlWnfU4qSeqIvwJBkjpn0EsjJJlN8o1p9yFNgkEvSZ0z6M+RJH+X5OEk\nj7aLxbT5bUlyMMmRJPck+fFpN6QzS3JTe7++luTj0+5ns/DD2HMkyYVV9WyS17JyrcHPV9V3pt2X\nRksyC/wb8M6q+nKSjwHfrKo/mWpjellJfhb4NHB1VT1z8t/ctPvaDJzRnzu/keRrwIOsXDm8c8r9\naLxjVfXltv9XwDun2YzGuga4p6qeATDkf2BDvkevH5bkXcC7gXdU1YtJHgBeM9WmNMSp/931v7+b\nW/A9GskZ/bnxBuC5FvJvAnZNuyENcnmSd7T9DwBfmmYzGusw8MtJLoKV5dIp97NpGPTnxudZ+WDv\nCPCHrCzfaPM7Cuxt79uFwB1T7kdn0H7Vyh8D/9iWST8y5ZY2DT+MlaTOOaOXpM4Z9JLUOYNekjpn\n0EtS5wx6SeqcQS9JnTPoJalzBr0kde7/AWlGQDBbdAN6AAAAAElFTkSuQmCC\n", 90 | "text/plain": [ 91 | "" 92 | ] 93 | }, 94 | "metadata": {}, 95 | "output_type": "display_data" 96 | } 97 | ], 98 | "source": [ 99 | "# plot bars with x tick labels\n", 100 | "plt.bar([1, 2, 3], [224, 620, 425], tick_label=['a', 'b', 'c']);" 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "Set the title and label axes like this." 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": { 114 | "collapsed": true 115 | }, 116 | "outputs": [], 117 | "source": [ 118 | "plt.bar([1, 2, 3], [224, 620, 425], tick_label=['a', 'b', 'c'])\n", 119 | "plt.title('Some Title')\n", 120 | "plt.xlabel('Some X Label')\n", 121 | "plt.ylabel('Some Y Label');" 122 | ] 123 | } 124 | ], 125 | "metadata": { 126 | "kernelspec": { 127 | "display_name": "Python 3", 128 | "language": "python", 129 | "name": "python3" 130 | }, 131 | "language_info": { 132 | "codemirror_mode": { 133 | "name": "ipython", 134 | "version": 3 135 | }, 136 | "file_extension": ".py", 137 | "mimetype": "text/x-python", 138 | "name": "python", 139 | "nbconvert_exporter": "python", 140 | "pygments_lexer": "ipython3", 141 | "version": "3.6.1" 142 | } 143 | }, 144 | "nbformat": 4, 145 | "nbformat_minor": 2 146 | } 147 | -------------------------------------------------------------------------------- /plots-pandas.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import pandas as pd\n", 12 | "% matplotlib inline" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [ 20 | { 21 | "data": { 22 | "text/html": [ 23 | "
\n", 24 | "\n", 37 | "\n", 38 | " \n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | "
yearcitycountryavg_tempsevenDayMAFiveYearMATenYearMA
01743ChicagoUnited States5.44NaNNaNNaN
11744ChicagoUnited States11.73NaNNaNNaN
21745ChicagoUnited States1.80NaNNaNNaN
31746ChicagoUnited States0.00NaNNaNNaN
41747ChicagoUnited States0.00NaN3.794NaN
\n", 103 | "
" 104 | ], 105 | "text/plain": [ 106 | " year city country avg_temp sevenDayMA FiveYearMA TenYearMA\n", 107 | "0 1743 Chicago United States 5.44 NaN NaN NaN\n", 108 | "1 1744 Chicago United States 11.73 NaN NaN NaN\n", 109 | "2 1745 Chicago United States 1.80 NaN NaN NaN\n", 110 | "3 1746 Chicago United States 0.00 NaN NaN NaN\n", 111 | "4 1747 Chicago United States 0.00 NaN 3.794 NaN" 112 | ] 113 | }, 114 | "execution_count": 2, 115 | "metadata": {}, 116 | "output_type": "execute_result" 117 | } 118 | ], 119 | "source": [ 120 | "df_temp = pd.read_csv('ChicagoResults.csv')\n", 121 | "df_temp.head()" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 7, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "df_19 = df_temp[df_temp['year'] > 1800]" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 8, 136 | "metadata": {}, 137 | "outputs": [ 138 | { 139 | "data": { 140 | "text/html": [ 141 | "
\n", 142 | "\n", 155 | "\n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | "
yearcitycountryavg_tempsevenDayMAFiveYearMATenYearMA
581801ChicagoUnited States10.4710.10142910.15610.083
591802ChicagoUnited States10.6610.19428610.35610.187
601803ChicagoUnited States10.4810.27428610.36610.215
611804ChicagoUnited States10.4110.38142910.42610.226
621805ChicagoUnited States10.6310.41000010.53010.288
\n", 221 | "
" 222 | ], 223 | "text/plain": [ 224 | " year city country avg_temp sevenDayMA FiveYearMA TenYearMA\n", 225 | "58 1801 Chicago United States 10.47 10.101429 10.156 10.083\n", 226 | "59 1802 Chicago United States 10.66 10.194286 10.356 10.187\n", 227 | "60 1803 Chicago United States 10.48 10.274286 10.366 10.215\n", 228 | "61 1804 Chicago United States 10.41 10.381429 10.426 10.226\n", 229 | "62 1805 Chicago United States 10.63 10.410000 10.530 10.288" 230 | ] 231 | }, 232 | "execution_count": 8, 233 | "metadata": {}, 234 | "output_type": "execute_result" 235 | } 236 | ], 237 | "source": [ 238 | "df_19.head()" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": 10, 244 | "metadata": {}, 245 | "outputs": [ 246 | { 247 | "data": { 248 | "text/html": [ 249 | "
\n", 250 | "\n", 263 | "\n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | "
yearcitycountryavg_tempsevenDayMAFiveYearMATenYearMA
1581901ChicagoUnited States9.8710.08857110.1829.953
1591902ChicagoUnited States10.0410.19142910.18010.036
1601903ChicagoUnited States9.5710.07428610.04610.102
1611904ChicagoUnited States8.569.8614299.7349.879
1621905ChicagoUnited States9.429.7442869.4929.889
\n", 329 | "
" 330 | ], 331 | "text/plain": [ 332 | " year city country avg_temp sevenDayMA FiveYearMA TenYearMA\n", 333 | "158 1901 Chicago United States 9.87 10.088571 10.182 9.953\n", 334 | "159 1902 Chicago United States 10.04 10.191429 10.180 10.036\n", 335 | "160 1903 Chicago United States 9.57 10.074286 10.046 10.102\n", 336 | "161 1904 Chicago United States 8.56 9.861429 9.734 9.879\n", 337 | "162 1905 Chicago United States 9.42 9.744286 9.492 9.889" 338 | ] 339 | }, 340 | "execution_count": 10, 341 | "metadata": {}, 342 | "output_type": "execute_result" 343 | } 344 | ], 345 | "source": [ 346 | "mask = df_temp['year'] > 1900\n", 347 | "df_20 = df_temp[mask]\n", 348 | "df_20.head()" 349 | ] 350 | }, 351 | { 352 | "cell_type": "code", 353 | "execution_count": 11, 354 | "metadata": {}, 355 | "outputs": [ 356 | { 357 | "data": { 358 | "text/plain": [ 359 | "count 213.000000\n", 360 | "mean 10.140751\n", 361 | "std 0.892496\n", 362 | "min 7.800000\n", 363 | "25% 9.560000\n", 364 | "50% 10.140000\n", 365 | "75% 10.660000\n", 366 | "max 12.820000\n", 367 | "Name: avg_temp, dtype: float64" 368 | ] 369 | }, 370 | "execution_count": 11, 371 | "metadata": {}, 372 | "output_type": "execute_result" 373 | } 374 | ], 375 | "source": [ 376 | "df_19['avg_temp'].describe()" 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": null, 382 | "metadata": { 383 | "collapsed": true 384 | }, 385 | "outputs": [], 386 | "source": [] 387 | } 388 | ], 389 | "metadata": { 390 | "kernelspec": { 391 | "display_name": "Python 3", 392 | "language": "python", 393 | "name": "python3" 394 | }, 395 | "language_info": { 396 | "codemirror_mode": { 397 | "name": "ipython", 398 | "version": 3 399 | }, 400 | "file_extension": ".py", 401 | "mimetype": "text/x-python", 402 | "name": "python", 403 | "nbconvert_exporter": "python", 404 | "pygments_lexer": "ipython3", 405 | "version": "3.6.2" 406 | } 407 | }, 408 | "nbformat": 4, 409 | "nbformat_minor": 2 410 | } 411 | -------------------------------------------------------------------------------- /plotting_type_quality.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "collapsed": true 7 | }, 8 | "source": [ 9 | "# Plotting Wine Type and Quality with Matplotlib" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 14, 15 | "metadata": { 16 | "collapsed": true 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "import numpy as np\n", 21 | "import pandas as pd\n", 22 | "import matplotlib.pyplot as plt\n", 23 | "% matplotlib inline\n", 24 | "import seaborn as sns\n", 25 | "sns.set_style('darkgrid')\n", 26 | "\n", 27 | "wine_df = pd.read_csv('winequality_edited.csv')" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "### Create arrays for red bar heights white bar heights\n", 35 | "Remember, there's a bar for each combination of color and quality rating. Each bar's height is based on the proportion of samples of that color with that quality rating.\n", 36 | "1. Red bar proportions = counts for each quality rating / total # of red samples\n", 37 | "2. White bar proportions = counts for each quality rating / total # of white samples" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 15, 43 | "metadata": {}, 44 | "outputs": [ 45 | { 46 | "data": { 47 | "text/plain": [ 48 | "color quality\n", 49 | "red 3 10\n", 50 | " 4 53\n", 51 | " 5 681\n", 52 | " 6 638\n", 53 | " 7 199\n", 54 | " 8 18\n", 55 | "white 3 20\n", 56 | " 4 163\n", 57 | " 5 1457\n", 58 | " 6 2198\n", 59 | " 7 880\n", 60 | " 8 175\n", 61 | " 9 5\n", 62 | "Name: pH, dtype: int64" 63 | ] 64 | }, 65 | "execution_count": 15, 66 | "metadata": {}, 67 | "output_type": "execute_result" 68 | } 69 | ], 70 | "source": [ 71 | "# get counts for each rating and color\n", 72 | "color_counts = wine_df.groupby(['color', 'quality']).count()['pH']\n", 73 | "color_counts" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 16, 79 | "metadata": {}, 80 | "outputs": [ 81 | { 82 | "data": { 83 | "text/plain": [ 84 | "color\n", 85 | "red 1599\n", 86 | "white 4898\n", 87 | "Name: pH, dtype: int64" 88 | ] 89 | }, 90 | "execution_count": 16, 91 | "metadata": {}, 92 | "output_type": "execute_result" 93 | } 94 | ], 95 | "source": [ 96 | "# get total counts for each color\n", 97 | "color_totals = wine_df.groupby('color').count()['pH']\n", 98 | "color_totals" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 17, 104 | "metadata": {}, 105 | "outputs": [ 106 | { 107 | "data": { 108 | "text/plain": [ 109 | "quality\n", 110 | "3 0.006254\n", 111 | "4 0.033146\n", 112 | "5 0.425891\n", 113 | "6 0.398999\n", 114 | "7 0.124453\n", 115 | "8 0.011257\n", 116 | "Name: pH, dtype: float64" 117 | ] 118 | }, 119 | "execution_count": 17, 120 | "metadata": {}, 121 | "output_type": "execute_result" 122 | } 123 | ], 124 | "source": [ 125 | "# get proportions by dividing red rating counts by total # of red samples\n", 126 | "red_proportions = color_counts['red'] / color_totals['red']\n", 127 | "red_proportions" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 18, 133 | "metadata": {}, 134 | "outputs": [ 135 | { 136 | "data": { 137 | "text/plain": [ 138 | "quality\n", 139 | "3 0.004083\n", 140 | "4 0.033279\n", 141 | "5 0.297468\n", 142 | "6 0.448755\n", 143 | "7 0.179665\n", 144 | "8 0.035729\n", 145 | "9 0.001021\n", 146 | "Name: pH, dtype: float64" 147 | ] 148 | }, 149 | "execution_count": 18, 150 | "metadata": {}, 151 | "output_type": "execute_result" 152 | } 153 | ], 154 | "source": [ 155 | "# get proportions by dividing white rating counts by total # of white samples\n", 156 | "white_proportions = color_counts['white'] / color_totals['white']\n", 157 | "white_proportions" 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": {}, 163 | "source": [ 164 | "### Plot proportions on a bar chart\n", 165 | "Set the x coordinate location for each rating group and and width of each bar." 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 19, 171 | "metadata": { 172 | "collapsed": true 173 | }, 174 | "outputs": [], 175 | "source": [ 176 | "ind = np.arange(len(red_proportions)) # the x locations for the groups\n", 177 | "width = 0.35 # the width of the bars" 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": {}, 183 | "source": [ 184 | "Now let’s create the plot." 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 21, 190 | "metadata": {}, 191 | "outputs": [ 192 | { 193 | "ename": "ValueError", 194 | "evalue": "shape mismatch: objects cannot be broadcast to a single shape", 195 | "output_type": "error", 196 | "traceback": [ 197 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 198 | "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", 199 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# plot bars\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mred_bars\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mind\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mred_proportions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwidth\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolor\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'r'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0malpha\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m.7\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'Red Wine'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mwhite_bars\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mind\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mwidth\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwhite_proportions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwidth\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolor\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'w'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0malpha\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m.7\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'White Wine'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;31m# title and labels\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 200 | "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/matplotlib/pyplot.py\u001b[0m in \u001b[0;36mbar\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 2625\u001b[0m mplDeprecation)\n\u001b[1;32m 2626\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2627\u001b[0;31m \u001b[0mret\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0max\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2628\u001b[0m \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2629\u001b[0m \u001b[0max\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_hold\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mwashold\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 201 | "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/matplotlib/__init__.py\u001b[0m in \u001b[0;36minner\u001b[0;34m(ax, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1708\u001b[0m warnings.warn(msg % (label_namer, func.__name__),\n\u001b[1;32m 1709\u001b[0m RuntimeWarning, stacklevel=2)\n\u001b[0;32m-> 1710\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0max\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1711\u001b[0m \u001b[0mpre_doc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minner\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__doc__\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1712\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mpre_doc\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 202 | "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/matplotlib/axes/_axes.py\u001b[0m in \u001b[0;36mbar\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 2079\u001b[0m x, height, width, y, linewidth = np.broadcast_arrays(\n\u001b[1;32m 2080\u001b[0m \u001b[0;31m# Make args iterable too.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2081\u001b[0;31m np.atleast_1d(x), height, width, y, linewidth)\n\u001b[0m\u001b[1;32m 2082\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2083\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0morientation\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'vertical'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 203 | "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/numpy/lib/stride_tricks.py\u001b[0m in \u001b[0;36mbroadcast_arrays\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 247\u001b[0m \u001b[0margs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_m\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msubok\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msubok\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0m_m\u001b[0m \u001b[0;32min\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 248\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 249\u001b[0;31m \u001b[0mshape\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_broadcast_shape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 250\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 251\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mshape\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0marray\u001b[0m \u001b[0;32min\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 204 | "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/numpy/lib/stride_tricks.py\u001b[0m in \u001b[0;36m_broadcast_shape\u001b[0;34m(*args)\u001b[0m\n\u001b[1;32m 182\u001b[0m \u001b[0;31m# use the old-iterator because np.nditer does not handle size 0 arrays\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 183\u001b[0m \u001b[0;31m# consistently\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 184\u001b[0;31m \u001b[0mb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbroadcast\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m32\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 185\u001b[0m \u001b[0;31m# unfortunately, it cannot handle 32 or more arguments directly\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 186\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mpos\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m32\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m31\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 205 | "\u001b[0;31mValueError\u001b[0m: shape mismatch: objects cannot be broadcast to a single shape" 206 | ] 207 | }, 208 | { 209 | "data": { 210 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXYAAAD1CAYAAABEDd6nAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAGixJREFUeJzt3X9sE+f9B/D35Ywp1CRuEbaj4VhC\nMRudHfhjY6gwEJc5HnhRCjFICDSGlqKtdFCxUe2X3BGpjWir0VCkQEaxqoWh8WP5RnD6glonzGSw\nwcYkL2NSl6rWzERuWYFAShfD4e8fqO78TagvxMb2k/frr5z9Od/zOR9vjgfHj5RKpVIgIiJhlBV6\nAERElFsMdiIiwTDYiYgEw2AnIhIMg52ISDAMdiIiwZgKPQAAGBy8lfdjWCxTMTw8kvfjFBJ7FAN7\nFMOj6HHWrBljPj5p7thNJrnQQ8g79igG9iiGQvY4aYKdiGiyYLATEQmGwU5EJBgGOxGRYBjsRESC\nYbATEQmGwU5EJBgGOxGRYIriN0+JyjesNVQnm2SU39Wz1t3sODLRIRGVLN6xExEJhsFORCQYBjsR\nkWAY7EREgmGwExEJhsFORCQYBjsRkWAY7EREgjEU7NFoFH6/Hz6fD+3t7Q+sO3XqFD7/+c/jL3/5\nS/qx/fv3w+fzwe/34+zZsxMfMRERfaasv3mq6zqam5sRDodht9sRDAahKAqqq6sz6oaHh/HLX/4S\n8+fPTz/W398PVVWhqio0TcOmTZtw+vRpyLL4y2IRERVK1jv2WCwGl8sFp9MJs9mMQCCASCQyqq61\ntRVNTU2YOnVq+rFIJIJAIACz2Qyn0wmXy4VYLJbbDoiIKEPWYNc0DQ6HI71tt9uhaVpGzeXLlzEw\nMIDly5ePe18iIsqtrFMxqVRq1GOSJKV/vnfvHlpaWtDS0jLufT9hsUzN+4reslwGq3V6Xo9RaKXc\no2zw/ZckCVMM1BbjeZCfaTBUJ0kSZo7xZ+f/0/+na6JDKphSvlaNKmSPWYPd4XBgYGAgva1pGmw2\nW3r7o48+wnvvvYdvfvObAIDBwUF897vfRVtbW9Z9PzE8PDKhJoywWqfjxo3beT9OIZVyj0a+sREA\npphk3DHy7Y5FeB4mQ49GlfK1atSj6HHWrBljPp51Ksbr9SIejyORSCCZTEJVVSiKkn5+xowZ+MMf\n/oDu7m50d3djwYIFaGtrg9frhaIoUFUVyWQSiUQC8XgcNTU1ueuKiIhGyXrHbjKZEAqF0NTUBF3X\n0djYCLfbjdbWVng8HtTW1j5wX7fbjRUrVmDlypWQZRmhUIifiCEiyjMpNdZE+CM2OHgr78fgP/2K\nm9GFNgxPUxThQhuToUejSvlaNaqop2KIiKi0MNiJiATDYCciEgyDnYhIMAx2IiLBMNiJiATDYCci\nEgyDnYhIMAx2IiLBMNiJiATDYCciEgyDnYhIMAx2IiLBMNiJiATDYCciEoyhYI9Go/D7/fD5fGhv\nbx/1/OHDh1FfX4+GhgasW7cO/f39AIArV66gpqYGDQ0NaGhoQCgUyu3oiYholKwrKOm6jubmZoTD\nYdjtdgSDQSiKgurq6nRNfX091q1bBwCIRCJoaWnBW2+9BQCoqqpCV1fpLrpLRFRqst6xx2IxuFwu\nOJ1OmM1mBAIBRCKRjBqLxZL++eOPP4YkSbkfKRERGZL1jl3TNDgcjvS23W5HLBYbVXfo0CGEw2Hc\nuXMHb7/9dvrxK1eu4JlnnoHFYsELL7yAL33pSzkaOhERjSVrsI+1JOpYd+Tr16/H+vXrceLECbS1\ntWHXrl2w2Wzo6enBE088gb6+PmzZsgWqqmbc4QOAxTIVJlN+F7mW5TJYrdPzeoxCK+UeZYPvvyRJ\nmGKgthjPw2To0ahSvlaNKmSPWYPd4XBgYGAgva1pGmw22wPrA4EAfvaznwEAzGYzzGYzAMDj8aCq\nqgoffPABvF5vxj7DwyMPM/Zx4eK5xa3cwOLNwDgWei7C8zAZejSqlK9Vo4p6MWuv14t4PI5EIoFk\nMglVVaEoSkZNPB5P/3zmzBm4XC4AwLVr16Dr9y/QRCKBeDwOp9P5sD0QEZEBWe/YTSYTQqEQmpqa\noOs6Ghsb4Xa70draCo/Hg9raWnR0dOD8+fMwmUwoLy/Hrl27AAAXL17Enj17IMsyZFnGzp07YbVa\n894UEdFkJqXGmkR/xAYHb+X9GPynX3Er37DWUJ3haYqOIxMdUs5Nhh6NKuVr1aiinoohIqLSwmAn\nIhIMg52ISDAMdiIiwTDYiYgEw2AnIhIMg52ISDAMdiIiwTDYiYgEw2AnIhIMg52ISDAMdiIiwTDY\niYgEw2AnIhIMg52ISDAMdiIiwRgK9mg0Cr/fD5/Ph/b29lHPHz58GPX19WhoaMC6devQ39+ffm7/\n/v3w+Xzw+/04e/Zs7kZORERjyro0nq7raG5uRjgcht1uRzAYhKIoqK6uTtfU19dj3bp1AIBIJIKW\nlha89dZb6O/vh6qqUFUVmqZh06ZNOH36NGTZ2GrtREQ0flnv2GOxGFwuF5xOJ8xmMwKBACKRSEaN\nxWJJ//zxxx9DkiQA90M+EAjAbDbD6XTC5XIhFovluAUiIvpvWe/YNU2Dw+FIb9vt9jHD+dChQwiH\nw7hz5w7efvvt9L7z58/P2FfTtFH7WixTYTLl9y5elstgtU7P6zEKrZR7lA2+/5IkYYqB2mI8D5Oh\nR6NK+Vo1qpA9Zg32sda6/uSO/L+tX78e69evx4kTJ9DW1oZdu3YZ3nd4eMToeB8aF88tbuUGFm8G\nxrHQcxGeh8nQo1GlfK0aVdSLWTscDgwMDKS3NU2DzWZ7YH0gEMC77777UPsSEdHEZQ12r9eLeDyO\nRCKBZDIJVVWhKEpGTTweT/985swZuFwuAICiKFBVFclkEolEAvF4HDU1NbntgIiIMmSdijGZTAiF\nQmhqaoKu62hsbITb7UZrays8Hg9qa2vR0dGB8+fPw2Qyoby8HLt27QIAuN1urFixAitXroQsywiF\nQvxEDBFRnkmpsSbCH7HBwVt5Pwbn9Ipb+Ya1huoMzz93HJnokHJuMvRoVClfq0YV9Rw7ERGVFgY7\nEZFgGOxERIJhsBMRCYbBTkQkGAY7EZFgGOxERIJhsBMRCYbBTkQkGAY7EZFgGOxERIJhsBMRCYbB\nTkQkGAY7EZFgGOxERILJutAGAESjUbz88su4d+8e1qxZg82bN2c8Hw6HcfToUciyjCeffBKvvPIK\nPve5zwEA5s2bh7lz5wIAKisrsW/fvhy3QERE/y1rsOu6jubmZoTDYdjtdgSDQSiKgurq6nTNvHnz\ncPz4cUybNg2/+tWv8Nprr+GNN94AADz22GPo6urKXwdERJQh61RMLBaDy+WC0+mE2WxGIBBAJBLJ\nqFm0aBGmTZsGAFiwYEHGAtZERPRoZQ12TdPgcDjS23a7HZqmPbD+2LFjWLp0aXp7ZGQEq1evxtq1\na/Huu+9OcLhERJRN1qmYsZZElSRpzNquri709fWho6Mj/VhPTw/sdjsSiQQ2btyIuXPnoqqqKmM/\ni2UqTKb8LnIty2WwWqfn9RiFVso9ygbff0mSMMVAbTGeh8nQo1GlfK0aVcgeswa7w+HImFrRNA02\nm21U3blz57Bv3z50dHTAbDanH7fb7QAAp9OJhQsX4vLly6OCfXh45KEbMIqL5xa3cgOLNwPjWOi5\nCM/DZOjRqFK+Vo0q6sWsvV4v4vE4EokEkskkVFWFoigZNZcvX0YoFEJbWxtmzpyZfnxoaAjJZBIA\ncO3aNVy6dCnjP12JiCj3st6xm0wmhEIhNDU1Qdd1NDY2wu12o7W1FR6PB7W1tXj11Vdx+/ZtbNu2\nDcCnH2t8//338dJLL0GSJKRSKTz77LMMdiKiPJNSY02iP2KDg7fyfgz+06+4lW9Ya6jO8DRFx5GJ\nDinnJkOPRpXytWpUUU/FEBFRaWGwExEJhsFORCQYBjsRkWAY7EREgmGwExEJhsFORCQYBjsRkWAY\n7EREgmGwExEJhsFORCQYBjsRkWAY7EREgmGwExEJhsFORCQYQ8EejUbh9/vh8/nQ3t4+6vlwOIyV\nK1eivr4eGzduxD//+c/0c52dnairq0NdXR06OztzN3IiIhpT1mDXdR3Nzc04cOAAVFXFyZMn0d/f\nn1Ezb948HD9+HCdOnIDf78drr70GALhx4wb27t2LI0eO4OjRo9i7dy+Ghoby0wkREQEwEOyxWAwu\nlwtOpxNmsxmBQACRSCSjZtGiRZg2bRoAYMGCBenFr3t7e7F48WJYrVZUVFRg8eLFOHv2bB7aICKi\nT2QNdk3T4HA40tt2ux2apj2w/tixY1i6dOlD7UtERBOXdTHrsZZElSRpzNquri709fWho6NjXPta\nLFNhMslZBzsRslwGq3V6Xo9RaKXco2zw/ZckCVMM1BbjeZgMPRpVyteqUYXsMWuwOxyO9NQKcP8u\n3Gazjao7d+4c9u3bh46ODpjN5vS+Fy5cyNh34cKFo/YdHh55qMGPBxfPLW7lBhZvBsax0HMRnofJ\n0KNRpXytGlXUi1l7vV7E43EkEgkkk0moqgpFUTJqLl++jFAohLa2NsycOTP9+JIlS9Db24uhoSEM\nDQ2ht7cXS5YsmWArRET0WbLesZtMJoRCITQ1NUHXdTQ2NsLtdqO1tRUejwe1tbV49dVXcfv2bWzb\ntg0AUFlZiX379sFqteK5555DMBgEAGzZsgVWqzW/HRERTXJSaqyJ8EdscPBW3o/Bf/oVt/INaw3V\nGZ6m6Dgy0SHl3GTo0ahSvlaNKuqpGCIiKi0MdiIiwTDYiYgEw2AnIhIMg52ISDAMdiIiwTDYiYgE\nw2AnIhIMg52ISDAMdiIiwTDYiYgEw2AnIhIMg52ISDAMdiIiwTDYiYgEw2AnIhKMoWCPRqPw+/3w\n+Xxob28f9fzFixexatUqPPXUUzh16lTGc/PmzUNDQwMaGhrwne98JzejJiKiB8q6NJ6u62hubkY4\nHIbdbkcwGISiKKiurk7XVFZWoqWlBQcPHhy1/2OPPYaurq7cjpqIiB4oa7DHYjG4XC44nU4AQCAQ\nQCQSyQj22bNnAwDKyjizQ0RUaFmDXdM0OByO9LbdbkcsFjN8gJGREaxevRomkwmbN2/G1772tVE1\nFstUmEyy4dd8GLJcBqt1el6PUWil3KNs8P2XJAlTDNQW43mYDD0aVcrXqlGF7DFrsI+11rUkSYYP\n0NPTA7vdjkQigY0bN2Lu3LmoqqrKqBkeHjH8eg+Li+cWt3IDizcD41jouQjPw2To0ahSvlaNKurF\nrB0OBwYGBtLbmqbBZrMZPrDdbgcAOJ1OLFy4EJcvXza8LxERjV/WYPd6vYjH40gkEkgmk1BVFYqi\nGHrxoaEhJJNJAMC1a9dw6dKljLl5IiLKvaxTMSaTCaFQCE1NTdB1HY2NjXC73WhtbYXH40FtbS1i\nsRief/553Lx5Ez09PXjzzTehqiref/99vPTSS5AkCalUCs8++yyDnYgoz6TUWJPoj9jg4K28H4Nz\nesWtfMNaQ3WG5587jkx0SDk3GXo0qpSvVaOKeo6diIhKC4OdiEgwDHYiIsEw2ImIBMNgJyISDIOd\niEgwDHYiIsFk/QUlIiKjjH5WXzbJhr47p5Q/q19IvGMnIhIMg52ISDAMdiIiwTDYiYgEw2AnIhIM\ng52ISDAMdiIiwRgK9mg0Cr/fD5/Ph/b29lHPX7x4EatWrcJTTz2FU6dOZTzX2dmJuro61NXVobOz\nMzejJiKiB8r6C0q6rqO5uRnhcBh2ux3BYBCKomSshFRZWYmWlhYcPHgwY98bN25g7969OH78OCRJ\nwurVq6EoCioqKnLfCRERATBwxx6LxeByueB0OmE2mxEIBBCJRDJqZs+ejS984QsoK8t8ud7eXixe\nvBhWqxUVFRVYvHgxzp49m9sOiIgoQ9Zg1zQNDocjvW2326FpmqEXn8i+RET0cLJOxYy1JKokSYZe\n3Oi+FstUmEyyodd8WLJcBqt1el6PUWil3KNs8P2XJAlTDNQW43lgj58q5R6NKuSfx6zB7nA4MDAw\nkN7WNA02m83QizscDly4cCFj34ULF46qGx4eMfR6E8HFc4ubkS+EAsax0HMRngf2+KlS7tGool7M\n2uv1Ih6PI5FIIJlMQlVVKIpi6KBLlixBb28vhoaGMDQ0hN7eXixZsmR8IycionHJesduMpkQCoXQ\n1NQEXdfR2NgIt9uN1tZWeDwe1NbWIhaL4fnnn8fNmzfR09ODN998E6qqwmq14rnnnkMwGAQAbNmy\nBVarNe9NERFNZoa+j33ZsmVYtmxZxmPbtm1L/1xTU4NoNDrmvsFgMB3sRESUf/zNUyIiwTDYiYgE\nw2AnIhIMg52ISDAMdiIiwTDYiYgEw2AnIhIMg52ISDAMdiIiwTDYiYgEw2AnIhIMg52ISDAMdiIi\nwTDYiYgEw2AnIhKMoWCPRqPw+/3w+Xxob28f9XwymcQLL7wAn8+HNWvW4MqVKwCAK1euoKamBg0N\nDWhoaEAoFMrt6ImIaJSsC23ouo7m5maEw2HY7XYEg0EoioLq6up0zdGjR1FeXo533nkHqqri9ddf\nxxtvvAEAqKqqQldXV/46ICKiDFnv2GOxGFwuF5xOJ8xmMwKBACKRSEZNd3c3Vq1aBQDw+/04f/48\nUqlUfkZMRESfKWuwa5oGh8OR3rbb7dA0bVRNZWUlgPtrpM6YMQPXr18HcH865plnnsGGDRvwxz/+\nMZdjJyKiMWSdihnrzluSJEM1NpsNPT09eOKJJ9DX14ctW7ZAVVVYLJaMWotlKkwmebxjHxdZLoPV\nOj2vxyi0Uu5RNvj+S5KEKQZqi/E8sMdPlXKPRhXyz2PWYHc4HBgYGEhva5oGm802qubq1atwOBy4\ne/cubt26BavVCkmSYDabAQAejwdVVVX44IMP4PV6M/YfHh7JRS+fyWqdjhs3buf9OIVUyj2W39UN\n1U0xybhjoPZmEZ4H9vipUu7RqEfx53HWrBljPp51Ksbr9SIejyORSCCZTEJVVSiKklGjKAo6OzsB\nAKdPn8aiRYsgSRKuXbsGXb//5iUSCcTjcTidzon2QkREnyHrHbvJZEIoFEJTUxN0XUdjYyPcbjda\nW1vh8XhQW1uLYDCIHTt2wOfzoaKiArt37wYAXLx4EXv27IEsy5BlGTt37oTVas17U0REk1nWYAeA\nZcuWYdmyZRmPbdu2Lf3z1KlTsWfPnlH7+f1++P3+CQ6RiIjGg795SkQkGAY7EZFgGOxERIIxNMdO\nhVW+Ya2hOtkkG/q42c2OIxMdEhEVMd6xExEJhsFORCQYBjsRkWAY7EREgmGwExEJhsFORCQYBjsR\nkWAY7EREgmGwExEJhsFORCQYfqUAEdE4lMJXfPCOnYhIMIbu2KPRKF5++WXcu3cPa9aswebNmzOe\nTyaTePHFF/HXv/4VVqsVu3fvxuzZswEA+/fvx7Fjx1BWVoaf/vSn+OpXv5rTBkrhb08iokcp6x27\nrutobm7GgQMHoKoqTp48if7+/oyao0ePory8HO+88w6+9a1v4fXXXwcA9Pf3Q1VVqKqKAwcOYOfO\nnek1UImIKD+yBnssFoPL5YLT6YTZbEYgEEAkEsmo6e7uxqpVqwDcXw7v/PnzSKVSiEQiCAQCMJvN\ncDqdcLlciMVi+emEiIgAGJiK0TQNDocjvW2320eFs6ZpqKysvP+CJhNmzJiB69evQ9M0zJ8/P2Nf\nTdNGHWPWrBkP3QBO/6/h0qkGamY9/Ejyhz1mYI/3sccCKYEes96xp1KpUY9JkmSoxsi+RESUW1mD\n3eFwYGBgIL2taRpsNtuomqtXrwIA7t69i1u3bsFqtRral4iIcitrsHu9XsTjcSQSCSSTSaiqCkVR\nMmoURUFnZycA4PTp01i0aBEkSYKiKFBVFclkEolEAvF4HDU1NfnphIiIABiYYzeZTAiFQmhqaoKu\n62hsbITb7UZrays8Hg9qa2sRDAaxY8cO+Hw+VFRUYPfu3QAAt9uNFStWYOXKlZBlGaFQCLIs572p\n/y/bxzVL3Y9+9COcOXMGM2fOxMmTJws9nJy7evUqXnzxRfz73/9GWVkZ1q5di40bNxZ6WDk1MjKC\n9evXI5lMQtd1+P1+bN26tdDDyotPcsRut2P//v2FHk7OKYqCxx9/HGVlZZBlGb/5zW8e/SBSgrt7\n926qtrY29Y9//CM1MjKSqq+vT/39738v9LBy6sKFC6m+vr5UIBAo9FDyQtO0VF9fXyqVSqVu3bqV\nqqurE+49vHfvXmp4eDiVSqVSyWQyFQwGU3/+858LPKr8OHjwYGr79u2pzZs3F3ooebF8+fLUhx9+\nWNAxCP+bp0Y+rlnqvvzlL6OioqLQw8gbm82GL37xiwAAi8WCOXPmjPnpqlImSRIef/xxAPf/n+ru\n3btCftBgYGAAZ86cQTAYLPRQhCZ8sI/1cU3RQmEyuXLlCv72t79lfIxWFLquo6GhAU8//TSefvpp\nIXt85ZVXsGPHDpSViR093/72t7F69Wr8+te/LsjxxT67MPZxTSoNH330EbZu3Yof//jHsFgshR5O\nzsmyjK6uLvz2t79FLBbDe++9V+gh5VRPTw+efPJJeDyeQg8lrw4fPozOzk784he/wKFDh3Dx4sVH\nPgbhg50fuRTDnTt3sHXrVtTX16Ourq7Qw8mr8vJyfOUrX8HZs2cLPZScunTpErq7u6EoCrZv347f\n//73+MEPflDoYeWc3W4HAMycORM+n68gv20vfLAb+bgmFbdUKoWf/OQnmDNnDjZt2lTo4eTFtWvX\ncPPmTQDAf/7zH5w7dw5z5swp8Khy6/vf/z6i0Si6u7vx85//HIsWLUp/r5Qobt++jeHh4fTPv/vd\n7+B2ux/5OIT/PvYHfVxTJNu3b8eFCxdw/fp1LF26FN/73vewZs2aQg8rZ/70pz+hq6sLc+fORUND\nA4D7PS9btqzAI8udf/3rX/jhD38IXdeRSqXw9a9/HcuXLy/0sGicPvzwQ2zZsgXA/f8z+cY3voGl\nS5c+8nFIqbEmoYmIqGQJPxVDRDTZMNiJiATDYCciEgyDnYhIMAx2IiLBMNiJiATDYCciEgyDnYhI\nMP8HDmMzSpqwHYkAAAAASUVORK5CYII=\n", 211 | "text/plain": [ 212 | "" 213 | ] 214 | }, 215 | "metadata": {}, 216 | "output_type": "display_data" 217 | } 218 | ], 219 | "source": [ 220 | "# plot bars\n", 221 | "red_bars = plt.bar(ind, red_proportions, width, color='r', alpha=.7, label='Red Wine')\n", 222 | "white_bars = plt.bar(ind + width, white_proportions, width, color='w', alpha=.7, label='White Wine')\n", 223 | "\n", 224 | "# title and labels\n", 225 | "plt.ylabel('Proportion')\n", 226 | "plt.xlabel('Quality')\n", 227 | "plt.title('Proportion by Wine Color and Quality')\n", 228 | "locations = ind + width / 2 # xtick locations\n", 229 | "labels = ['3', '4', '5', '6', '7', '8', '9'] # xtick labels\n", 230 | "plt.xticks(locations, labels)\n", 231 | "\n", 232 | "# legend\n", 233 | "plt.legend()" 234 | ] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "metadata": {}, 239 | "source": [ 240 | "Oh, that didn't work because we're missing a red wine value for a the 9 rating. Even though this number is a 0, we need it for our plot. Run the last two cells after running the cell below." 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 22, 246 | "metadata": {}, 247 | "outputs": [ 248 | { 249 | "data": { 250 | "text/plain": [ 251 | "quality\n", 252 | "3 0.006254\n", 253 | "4 0.033146\n", 254 | "5 0.425891\n", 255 | "6 0.398999\n", 256 | "7 0.124453\n", 257 | "8 0.011257\n", 258 | "9 0.000000\n", 259 | "Name: pH, dtype: float64" 260 | ] 261 | }, 262 | "execution_count": 22, 263 | "metadata": {}, 264 | "output_type": "execute_result" 265 | } 266 | ], 267 | "source": [ 268 | "red_proportions['9'] = 0\n", 269 | "red_proportions" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": null, 275 | "metadata": { 276 | "collapsed": true 277 | }, 278 | "outputs": [], 279 | "source": [] 280 | } 281 | ], 282 | "metadata": { 283 | "kernelspec": { 284 | "display_name": "Python 3", 285 | "language": "python", 286 | "name": "python3" 287 | }, 288 | "language_info": { 289 | "codemirror_mode": { 290 | "name": "ipython", 291 | "version": 3 292 | }, 293 | "file_extension": ".py", 294 | "mimetype": "text/x-python", 295 | "name": "python", 296 | "nbconvert_exporter": "python", 297 | "pygments_lexer": "ipython3", 298 | "version": "3.6.1" 299 | } 300 | }, 301 | "nbformat": 4, 302 | "nbformat_minor": 2 303 | } 304 | --------------------------------------------------------------------------------