├── Cleaning.ipynb ├── Drawing-Conclusions-pynb.txt ├── Histogram-Practice.ipynb ├── Pandas_Functions_Formulae.xlsx ├── README.md ├── appending.ipynb ├── appending_rename.ipynb ├── assessing.ipynb ├── assessing_case2.ipynb ├── assessing_quiz.html ├── assessing_quiz.ipynb ├── cleaning_column_labels.ipynb ├── cleaning_practice.ipynb ├── conclusions-quiz-solutions.ipynb.txt ├── conclusions_groupby.ipynb ├── conclusions_query.ipynb ├── conclusions_quiz.ipynb ├── drawing_conclusions_Fuel.ipynb ├── eda_visuals.ipynb ├── eda_visuals_practise_functions.ipynb ├── exploring_visuals.ipynb ├── fix_datatypes_air_pollution.ipynb ├── fix_datatypes_cyl.ipynb ├── matplotlib_example.ipynb ├── plots-pandas.ipynb ├── plotting_type_quality.ipynb ├── query_filter.ipynb ├── reading_csv.ipynb ├── visuals_quiz.ipynb ├── wine_visualizations.ipynb ├── winequality-red.csv ├── winequality-white.csv └── winequality_edited.csv /Cleaning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "#import packages\n", 12 | "import pandas as pd\n", 13 | "\n", 14 | "#read csv document\n", 15 | "df = pd.read_csv('ChicagoResults.csv')" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 2, 21 | "metadata": {}, 22 | "outputs": [ 23 | { 24 | "name": "stdout", 25 | "output_type": "stream", 26 | "text": [ 27 | "\n", 28 | "RangeIndex: 271 entries, 0 to 270\n", 29 | "Data columns (total 7 columns):\n", 30 | "year 271 non-null int64\n", 31 | "city 271 non-null object\n", 32 | "country 271 non-null object\n", 33 | "avg_temp 271 non-null float64\n", 34 | "sevenDayMA 265 non-null float64\n", 35 | "FiveYearMA 267 non-null float64\n", 36 | "TenYearMA 262 non-null float64\n", 37 | "dtypes: float64(4), int64(1), object(2)\n", 38 | "memory usage: 14.9+ KB\n" 39 | ] 40 | } 41 | ], 42 | "source": [ 43 | "df.info()" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 4, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "mean_FiveYearMA = df['FiveYearMA'].mean()" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 5, 58 | "metadata": {}, 59 | "outputs": [ 60 | { 61 | "data": { 62 | "text/plain": [ 63 | "0 9.893401\n", 64 | "1 9.893401\n", 65 | "2 9.893401\n", 66 | "3 9.893401\n", 67 | "4 3.794000\n", 68 | "5 2.706000\n", 69 | "6 0.360000\n", 70 | "7 2.098000\n", 71 | "8 4.336000\n", 72 | "9 5.236000\n", 73 | "10 7.244000\n", 74 | "11 9.372000\n", 75 | "12 8.756000\n", 76 | "13 8.748000\n", 77 | "14 9.748000\n", 78 | "15 9.462000\n", 79 | "16 9.252000\n", 80 | "17 9.380000\n", 81 | "18 9.230000\n", 82 | "19 9.294000\n", 83 | "20 9.116000\n", 84 | "21 9.324000\n", 85 | "22 9.670000\n", 86 | "23 9.688000\n", 87 | "24 9.606000\n", 88 | "25 9.852000\n", 89 | "26 9.684000\n", 90 | "27 9.660000\n", 91 | "28 9.650000\n", 92 | "29 9.822000\n", 93 | " ... \n", 94 | "241 10.436000\n", 95 | "242 10.448000\n", 96 | "243 10.504000\n", 97 | "244 10.914000\n", 98 | "245 10.898000\n", 99 | "246 10.726000\n", 100 | "247 11.004000\n", 101 | "248 11.156000\n", 102 | "249 10.796000\n", 103 | "250 10.660000\n", 104 | "251 10.812000\n", 105 | "252 10.626000\n", 106 | "253 10.222000\n", 107 | "254 10.202000\n", 108 | "255 10.728000\n", 109 | "256 10.960000\n", 110 | "257 11.064000\n", 111 | "258 11.492000\n", 112 | "259 11.770000\n", 113 | "260 11.304000\n", 114 | "261 11.148000\n", 115 | "262 11.246000\n", 116 | "263 11.280000\n", 117 | "264 11.264000\n", 118 | "265 11.216000\n", 119 | "266 11.088000\n", 120 | "267 11.136000\n", 121 | "268 11.004000\n", 122 | "269 11.278000\n", 123 | "270 11.548000\n", 124 | "Name: FiveYearMA, Length: 271, dtype: float64" 125 | ] 126 | }, 127 | "execution_count": 5, 128 | "metadata": {}, 129 | "output_type": "execute_result" 130 | } 131 | ], 132 | "source": [ 133 | "df['FiveYearMA'].fillna(mean_FiveYearMA)" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 7, 139 | "metadata": {}, 140 | "outputs": [ 141 | { 142 | "data": { 143 | "text/html": [ 144 | "

\n", 145 | "\n", 158 | "\n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | "

	year	city	country	avg_temp	sevenDayMA	FiveYearMA	TenYearMA
0	1743	Chicago	United States	5.44	NaN	NaN	NaN
1	1744	Chicago	United States	11.73	NaN	NaN	NaN
2	1745	Chicago	United States	1.80	NaN	NaN	NaN
3	1746	Chicago	United States	0.00	NaN	NaN	NaN
4	1747	Chicago	United States	0.00	NaN	3.794	NaN
5	1748	Chicago	United States	0.00	NaN	2.706	NaN
6	1749	Chicago	United States	0.00	2.710000	0.360	NaN
7	1750	Chicago	United States	10.49	3.431429	2.098	NaN
8	1751	Chicago	United States	11.19	3.354286	4.336	NaN
9	1752	Chicago	United States	4.50	3.740000	5.236	4.515
10	1753	Chicago	United States	10.04	5.174286	7.244	4.975

\n", 284 | "

" 285 | ], 286 | "text/plain": [ 287 | " year city country avg_temp sevenDayMA FiveYearMA TenYearMA\n", 288 | "0 1743 Chicago United States 5.44 NaN NaN NaN\n", 289 | "1 1744 Chicago United States 11.73 NaN NaN NaN\n", 290 | "2 1745 Chicago United States 1.80 NaN NaN NaN\n", 291 | "3 1746 Chicago United States 0.00 NaN NaN NaN\n", 292 | "4 1747 Chicago United States 0.00 NaN 3.794 NaN\n", 293 | "5 1748 Chicago United States 0.00 NaN 2.706 NaN\n", 294 | "6 1749 Chicago United States 0.00 2.710000 0.360 NaN\n", 295 | "7 1750 Chicago United States 10.49 3.431429 2.098 NaN\n", 296 | "8 1751 Chicago United States 11.19 3.354286 4.336 NaN\n", 297 | "9 1752 Chicago United States 4.50 3.740000 5.236 4.515\n", 298 | "10 1753 Chicago United States 10.04 5.174286 7.244 4.975" 299 | ] 300 | }, 301 | "execution_count": 7, 302 | "metadata": {}, 303 | "output_type": "execute_result" 304 | } 305 | ], 306 | "source": [ 307 | "df.head(11)" 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": 8, 313 | "metadata": { 314 | "collapsed": true 315 | }, 316 | "outputs": [], 317 | "source": [ 318 | "df['FiveYearMA'] = df['FiveYearMA'].fillna(mean_FiveYearMA)" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": 9, 324 | "metadata": { 325 | "scrolled": true 326 | }, 327 | "outputs": [ 328 | { 329 | "data": { 330 | "text/html": [ 331 | "

\n", 332 | "\n", 345 | "\n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | "

	year	city	country	avg_temp	sevenDayMA	FiveYearMA	TenYearMA
0	1743	Chicago	United States	5.44	NaN	9.893401	NaN
1	1744	Chicago	United States	11.73	NaN	9.893401	NaN
2	1745	Chicago	United States	1.80	NaN	9.893401	NaN
3	1746	Chicago	United States	0.00	NaN	9.893401	NaN
4	1747	Chicago	United States	0.00	NaN	3.794000	NaN
5	1748	Chicago	United States	0.00	NaN	2.706000	NaN
6	1749	Chicago	United States	0.00	2.710000	0.360000	NaN
7	1750	Chicago	United States	10.49	3.431429	2.098000	NaN
8	1751	Chicago	United States	11.19	3.354286	4.336000	NaN
9	1752	Chicago	United States	4.50	3.740000	5.236000	4.515

\n", 461 | "

" 462 | ], 463 | "text/plain": [ 464 | " year city country avg_temp sevenDayMA FiveYearMA TenYearMA\n", 465 | "0 1743 Chicago United States 5.44 NaN 9.893401 NaN\n", 466 | "1 1744 Chicago United States 11.73 NaN 9.893401 NaN\n", 467 | "2 1745 Chicago United States 1.80 NaN 9.893401 NaN\n", 468 | "3 1746 Chicago United States 0.00 NaN 9.893401 NaN\n", 469 | "4 1747 Chicago United States 0.00 NaN 3.794000 NaN\n", 470 | "5 1748 Chicago United States 0.00 NaN 2.706000 NaN\n", 471 | "6 1749 Chicago United States 0.00 2.710000 0.360000 NaN\n", 472 | "7 1750 Chicago United States 10.49 3.431429 2.098000 NaN\n", 473 | "8 1751 Chicago United States 11.19 3.354286 4.336000 NaN\n", 474 | "9 1752 Chicago United States 4.50 3.740000 5.236000 4.515" 475 | ] 476 | }, 477 | "execution_count": 9, 478 | "metadata": {}, 479 | "output_type": "execute_result" 480 | } 481 | ], 482 | "source": [ 483 | "df.head(10)" 484 | ] 485 | }, 486 | { 487 | "cell_type": "code", 488 | "execution_count": 11, 489 | "metadata": {}, 490 | "outputs": [], 491 | "source": [ 492 | "mean_TenYearMA = df['TenYearMA'].mean()" 493 | ] 494 | }, 495 | { 496 | "cell_type": "code", 497 | "execution_count": 12, 498 | "metadata": { 499 | "collapsed": true 500 | }, 501 | "outputs": [], 502 | "source": [ 503 | "df['TenYearMA'].fillna(mean_TenYearMA, inplace = True)" 504 | ] 505 | }, 506 | { 507 | "cell_type": "code", 508 | "execution_count": 13, 509 | "metadata": {}, 510 | "outputs": [ 511 | { 512 | "data": { 513 | "text/html": [ 514 | "

\n", 515 | "\n", 528 | "\n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | "

	year	city	country	avg_temp	sevenDayMA	FiveYearMA	TenYearMA
0	1743	Chicago	United States	5.44	NaN	9.893401	9.949863
1	1744	Chicago	United States	11.73	NaN	9.893401	9.949863
2	1745	Chicago	United States	1.80	NaN	9.893401	9.949863
3	1746	Chicago	United States	0.00	NaN	9.893401	9.949863
4	1747	Chicago	United States	0.00	NaN	3.794000	9.949863
5	1748	Chicago	United States	0.00	NaN	2.706000	9.949863
6	1749	Chicago	United States	0.00	2.710000	0.360000	9.949863
7	1750	Chicago	United States	10.49	3.431429	2.098000	9.949863
8	1751	Chicago	United States	11.19	3.354286	4.336000	9.949863
9	1752	Chicago	United States	4.50	3.740000	5.236000	4.515000

\n", 644 | "

" 645 | ], 646 | "text/plain": [ 647 | " year city country avg_temp sevenDayMA FiveYearMA TenYearMA\n", 648 | "0 1743 Chicago United States 5.44 NaN 9.893401 9.949863\n", 649 | "1 1744 Chicago United States 11.73 NaN 9.893401 9.949863\n", 650 | "2 1745 Chicago United States 1.80 NaN 9.893401 9.949863\n", 651 | "3 1746 Chicago United States 0.00 NaN 9.893401 9.949863\n", 652 | "4 1747 Chicago United States 0.00 NaN 3.794000 9.949863\n", 653 | "5 1748 Chicago United States 0.00 NaN 2.706000 9.949863\n", 654 | "6 1749 Chicago United States 0.00 2.710000 0.360000 9.949863\n", 655 | "7 1750 Chicago United States 10.49 3.431429 2.098000 9.949863\n", 656 | "8 1751 Chicago United States 11.19 3.354286 4.336000 9.949863\n", 657 | "9 1752 Chicago United States 4.50 3.740000 5.236000 4.515000" 658 | ] 659 | }, 660 | "execution_count": 13, 661 | "metadata": {}, 662 | "output_type": "execute_result" 663 | } 664 | ], 665 | "source": [ 666 | "df.head(10)" 667 | ] 668 | }, 669 | { 670 | "cell_type": "code", 671 | "execution_count": 14, 672 | "metadata": {}, 673 | "outputs": [ 674 | { 675 | "data": { 676 | "text/plain": [ 677 | "0 False\n", 678 | "1 False\n", 679 | "2 False\n", 680 | "3 False\n", 681 | "4 False\n", 682 | "5 False\n", 683 | "6 False\n", 684 | "7 False\n", 685 | "8 False\n", 686 | "9 False\n", 687 | "10 False\n", 688 | "11 False\n", 689 | "12 False\n", 690 | "13 False\n", 691 | "14 False\n", 692 | "15 False\n", 693 | "16 False\n", 694 | "17 False\n", 695 | "18 False\n", 696 | "19 False\n", 697 | "20 False\n", 698 | "21 False\n", 699 | "22 False\n", 700 | "23 False\n", 701 | "24 False\n", 702 | "25 False\n", 703 | "26 False\n", 704 | "27 False\n", 705 | "28 False\n", 706 | "29 False\n", 707 | " ... \n", 708 | "241 False\n", 709 | "242 False\n", 710 | "243 False\n", 711 | "244 False\n", 712 | "245 False\n", 713 | "246 False\n", 714 | "247 False\n", 715 | "248 False\n", 716 | "249 False\n", 717 | "250 False\n", 718 | "251 False\n", 719 | "252 False\n", 720 | "253 False\n", 721 | "254 False\n", 722 | "255 False\n", 723 | "256 False\n", 724 | "257 False\n", 725 | "258 False\n", 726 | "259 False\n", 727 | "260 False\n", 728 | "261 False\n", 729 | "262 False\n", 730 | "263 False\n", 731 | "264 False\n", 732 | "265 False\n", 733 | "266 False\n", 734 | "267 False\n", 735 | "268 False\n", 736 | "269 False\n", 737 | "270 False\n", 738 | "Length: 271, dtype: bool" 739 | ] 740 | }, 741 | "execution_count": 14, 742 | "metadata": {}, 743 | "output_type": "execute_result" 744 | } 745 | ], 746 | "source": [ 747 | "df.duplicated()" 748 | ] 749 | }, 750 | { 751 | "cell_type": "code", 752 | "execution_count": 15, 753 | "metadata": {}, 754 | "outputs": [ 755 | { 756 | "data": { 757 | "text/plain": [ 758 | "0" 759 | ] 760 | }, 761 | "execution_count": 15, 762 | "metadata": {}, 763 | "output_type": "execute_result" 764 | } 765 | ], 766 | "source": [ 767 | "sum(df.duplicated())" 768 | ] 769 | }, 770 | { 771 | "cell_type": "code", 772 | "execution_count": 16, 773 | "metadata": { 774 | "collapsed": true 775 | }, 776 | "outputs": [], 777 | "source": [ 778 | "df.drop_duplicates(inplace = True)" 779 | ] 780 | }, 781 | { 782 | "cell_type": "code", 783 | "execution_count": 18, 784 | "metadata": {}, 785 | "outputs": [ 786 | { 787 | "name": "stdout", 788 | "output_type": "stream", 789 | "text": [ 790 | "\n", 791 | "Int64Index: 271 entries, 0 to 270\n", 792 | "Data columns (total 7 columns):\n", 793 | "year 271 non-null int64\n", 794 | "city 271 non-null object\n", 795 | "country 271 non-null object\n", 796 | "avg_temp 271 non-null float64\n", 797 | "sevenDayMA 265 non-null float64\n", 798 | "FiveYearMA 271 non-null float64\n", 799 | "TenYearMA 271 non-null float64\n", 800 | "dtypes: float64(4), int64(1), object(2)\n", 801 | "memory usage: 16.9+ KB\n" 802 | ] 803 | } 804 | ], 805 | "source": [ 806 | "df.info(0)" 807 | ] 808 | }, 809 | { 810 | "cell_type": "code", 811 | "execution_count": 19, 812 | "metadata": { 813 | "collapsed": true 814 | }, 815 | "outputs": [], 816 | "source": [ 817 | "#df['timestamp'] = pd.to_datetime(df['timestamp'])" 818 | ] 819 | }, 820 | { 821 | "cell_type": "code", 822 | "execution_count": null, 823 | "metadata": { 824 | "collapsed": true 825 | }, 826 | "outputs": [], 827 | "source": [] 828 | } 829 | ], 830 | "metadata": { 831 | "kernelspec": { 832 | "display_name": "Python 3", 833 | "language": "python", 834 | "name": "python3" 835 | }, 836 | "language_info": { 837 | "codemirror_mode": { 838 | "name": "ipython", 839 | "version": 3 840 | }, 841 | "file_extension": ".py", 842 | "mimetype": "text/x-python", 843 | "name": "python", 844 | "nbconvert_exporter": "python", 845 | "pygments_lexer": "ipython3", 846 | "version": "3.6.2" 847 | } 848 | }, 849 | "nbformat": 4, 850 | "nbformat_minor": 2 851 | } 852 | -------------------------------------------------------------------------------- /Drawing-Conclusions-pynb.txt: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | -------------------------------------------------------------------------------- /Pandas_Functions_Formulae.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nirupamaprv/Data-Analysis/05892f4058faf9ccc6aa90995e90d4ae7bd0bf7c/Pandas_Functions_Formulae.xlsx -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | Practice Exercise from Data Analysis course of Udacity DAND 3 | 4 | - .ipynb file contains code with Markdown cells from Jupyter Notebook. 5 | - Exercises solved by self to answer questions for assignment. 6 | - .html file is .pynb converted to web version for easy viewing 7 | - .csv files contain data on which data analysis was conducted 8 | 9 | ## Datasets and Project Summary 10 | ### Chicago temperature set 11 | - Cleaning.ipynb - Data cleaning practice 12 | - Histogram-Practice.ipynb - Practicing creating plots 13 | - plots-pandas.ipynb - computing values for plots 14 | 15 | ### Wine Dataset 16 | - appending.ipynb - Appending data from different datasets 17 | - appending_rename.ipynb - Appending, renaming and saving data from different datasets 18 | - assessing_quiz.ipynb - answering Quiz using pandas 19 | - conclusions_groupby.ipynb - using groupby function to analyze quality, ratings and other Questions. 20 | - conclusions_query.ipynb - drawing conclusions to Qs on ratings 21 | - eda_visuals.ipynb - Addressing Qs on wine dataset using different plots 22 | - eda_visuals_practise_functions.ipynb - Addressing additional Qs using different plots; Here, varying colors are used to differentiate groups 23 | - plotting_type_quality.ipynb - Creating plots with matplotlib for ratings 24 | - wine_visualizations.ipynb - Use Matplotlib to create bar charts that visualize the conclusions made with groupby and queries 25 | 26 | ### Cancer Dataset 27 | - assessing.ipynb - inspecting datasets, data types, selecting different ranges 28 | - cleaning_practice.ipynb - practicing data wrangling 29 | 30 | 31 | ### Auto Dataset for 2008 and 2018 models 32 | - assessing_case2.ipynb - answering Quiz using pandas 33 | - cleaning_column_labels.ipynb - data wrangling 34 | - drawing_conclusions_Fuel.ipynb - Making inferences and comparisons on fuel efficiency, improvements, classes, etc. and visualizing using histograms and pie charts 35 | - exploring_visuals.ipynb - Making inferences and comparisons using visualizations 36 | - fix_datatypes_air_pollution.ipynb - Data Wrangling 37 | fix_datatypes_cyl.ipynb - Datatypes transformation 38 | - query_filter.ipynb - Data Wrangling 39 | 40 | ### Other Datasets 41 | - matplotlib_example.ipynb - Practicing bar charts 42 | - conclusions_quiz.ipynb - **Store Sales Dataset** - Analyzing sales figures and periods to determine performance and revenue 43 | - reading_csv.ipynb - **Student Scores Data**- Reading, writing and inspecting values 44 | - visuals_quiz.ipynb - **Powerplant Data** - creating plots using matplotlib and answering Quiz questions 45 | 46 | 47 | -------------------------------------------------------------------------------- /assessing_case2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "collapsed": true 7 | }, 8 | "source": [ 9 | "# Assessing\n", 10 | "Use the space below to explore `all_alpha_08.csv` and `all_alpha_18.csv` to answer the quiz questions below." 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 28, 16 | "metadata": {}, 17 | "outputs": [ 18 | { 19 | "name": "stdout", 20 | "output_type": "stream", 21 | "text": [ 22 | "\n", 23 | "RangeIndex: 1611 entries, 0 to 1610\n", 24 | "Data columns (total 18 columns):\n", 25 | "Model 1611 non-null object\n", 26 | "Displ 1609 non-null float64\n", 27 | "Cyl 1609 non-null float64\n", 28 | "Trans 1611 non-null object\n", 29 | "Drive 1611 non-null object\n", 30 | "Fuel 1611 non-null object\n", 31 | "Cert Region 1611 non-null object\n", 32 | "Stnd 1611 non-null object\n", 33 | "Stnd Description 1611 non-null object\n", 34 | "Underhood ID 1611 non-null object\n", 35 | "Veh Class 1611 non-null object\n", 36 | "Air Pollution Score 1611 non-null int64\n", 37 | "City MPG 1611 non-null object\n", 38 | "Hwy MPG 1611 non-null object\n", 39 | "Cmb MPG 1611 non-null object\n", 40 | "Greenhouse Gas Score 1611 non-null int64\n", 41 | "SmartWay 1611 non-null object\n", 42 | "Comb CO2 1611 non-null object\n", 43 | "dtypes: float64(2), int64(2), object(14)\n", 44 | "memory usage: 226.6+ KB\n" 45 | ] 46 | } 47 | ], 48 | "source": [ 49 | "import pandas as pd\n", 50 | "\n", 51 | "df_18 = pd.read_csv('all_alpha_18.csv')\n", 52 | "df_18.info()\n" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 29, 58 | "metadata": {}, 59 | "outputs": [ 60 | { 61 | "name": "stdout", 62 | "output_type": "stream", 63 | "text": [ 64 | "\n", 65 | "RangeIndex: 2404 entries, 0 to 2403\n", 66 | "Data columns (total 18 columns):\n", 67 | "Model 2404 non-null object\n", 68 | "Displ 2404 non-null float64\n", 69 | "Cyl 2205 non-null object\n", 70 | "Trans 2205 non-null object\n", 71 | "Drive 2311 non-null object\n", 72 | "Fuel 2404 non-null object\n", 73 | "Sales Area 2404 non-null object\n", 74 | "Stnd 2404 non-null object\n", 75 | "Underhood ID 2404 non-null object\n", 76 | "Veh Class 2404 non-null object\n", 77 | "Air Pollution Score 2404 non-null object\n", 78 | "FE Calc Appr 2205 non-null object\n", 79 | "City MPG 2205 non-null object\n", 80 | "Hwy MPG 2205 non-null object\n", 81 | "Cmb MPG 2205 non-null object\n", 82 | "Unadj Cmb MPG 2205 non-null float64\n", 83 | "Greenhouse Gas Score 2205 non-null object\n", 84 | "SmartWay 2404 non-null object\n", 85 | "dtypes: float64(2), object(16)\n", 86 | "memory usage: 338.1+ KB\n" 87 | ] 88 | } 89 | ], 90 | "source": [ 91 | "df_08 = pd.read_csv('all_alpha_08.csv')\n", 92 | "df_08.info()" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 30, 98 | "metadata": {}, 99 | "outputs": [ 100 | { 101 | "data": { 102 | "text/plain": [ 103 | "0" 104 | ] 105 | }, 106 | "execution_count": 30, 107 | "metadata": {}, 108 | "output_type": "execute_result" 109 | } 110 | ], 111 | "source": [ 112 | "sum(df_18.duplicated())" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 31, 118 | "metadata": {}, 119 | "outputs": [ 120 | { 121 | "data": { 122 | "text/plain": [ 123 | "25" 124 | ] 125 | }, 126 | "execution_count": 31, 127 | "metadata": {}, 128 | "output_type": "execute_result" 129 | } 130 | ], 131 | "source": [ 132 | "sum(df_08.duplicated())" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 32, 138 | "metadata": {}, 139 | "outputs": [ 140 | { 141 | "data": { 142 | "text/plain": [ 143 | "Gasoline 1492\n", 144 | "Ethanol/Gas 55\n", 145 | "Diesel 38\n", 146 | "Gasoline/Electricity 24\n", 147 | "Electricity 2\n", 148 | "Name: Fuel, dtype: int64" 149 | ] 150 | }, 151 | "execution_count": 32, 152 | "metadata": {}, 153 | "output_type": "execute_result" 154 | } 155 | ], 156 | "source": [ 157 | "df_18.Fuel.value_counts()" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 33, 163 | "metadata": {}, 164 | "outputs": [ 165 | { 166 | "data": { 167 | "text/plain": [ 168 | "Gasoline 2318\n", 169 | "ethanol/gas 72\n", 170 | "diesel 11\n", 171 | "CNG 2\n", 172 | "ethanol 1\n", 173 | "Name: Fuel, dtype: int64" 174 | ] 175 | }, 176 | "execution_count": 33, 177 | "metadata": {}, 178 | "output_type": "execute_result" 179 | } 180 | ], 181 | "source": [ 182 | "df_08.Fuel.value_counts()" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 34, 188 | "metadata": {}, 189 | "outputs": [ 190 | { 191 | "data": { 192 | "text/plain": [ 193 | "1611" 194 | ] 195 | }, 196 | "execution_count": 34, 197 | "metadata": {}, 198 | "output_type": "execute_result" 199 | } 200 | ], 201 | "source": [ 202 | "sum(df_18.Model.value_counts())" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": 35, 208 | "metadata": {}, 209 | "outputs": [ 210 | { 211 | "data": { 212 | "text/html": [ 213 | "

\n", 214 | "\n", 227 | "\n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | "

	Displ	Cyl	Air Pollution Score	Greenhouse Gas Score
count	1609.000000	1609.000000	1611.000000	1611.000000
mean	3.055687	5.479180	3.958411	4.711359
std	1.344574	1.749121	1.824303	1.657429
min	1.200000	3.000000	1.000000	1.000000
25%	2.000000	4.000000	3.000000	4.000000
50%	3.000000	6.000000	3.000000	5.000000
75%	3.600000	6.000000	5.000000	6.000000
max	8.000000	16.000000	10.000000	10.000000

\n", 296 | "

" 297 | ], 298 | "text/plain": [ 299 | " Displ Cyl Air Pollution Score Greenhouse Gas Score\n", 300 | "count 1609.000000 1609.000000 1611.000000 1611.000000\n", 301 | "mean 3.055687 5.479180 3.958411 4.711359\n", 302 | "std 1.344574 1.749121 1.824303 1.657429\n", 303 | "min 1.200000 3.000000 1.000000 1.000000\n", 304 | "25% 2.000000 4.000000 3.000000 4.000000\n", 305 | "50% 3.000000 6.000000 3.000000 5.000000\n", 306 | "75% 3.600000 6.000000 5.000000 6.000000\n", 307 | "max 8.000000 16.000000 10.000000 10.000000" 308 | ] 309 | }, 310 | "execution_count": 35, 311 | "metadata": {}, 312 | "output_type": "execute_result" 313 | } 314 | ], 315 | "source": [ 316 | "df_18.describe()" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": 36, 322 | "metadata": {}, 323 | "outputs": [ 324 | { 325 | "data": { 326 | "text/html": [ 327 | "

\n", 328 | "\n", 341 | "\n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | "

	Model	Displ	Cyl	Trans	Drive	Fuel	Cert Region	Stnd	Stnd Description	Underhood ID	Veh Class	Air Pollution Score	City MPG	Hwy MPG	Cmb MPG	Greenhouse Gas Score	SmartWay	Comb CO2
0	ACURA RDX	3.5	6.0	SemiAuto-6	2WD	Gasoline	FA	T3B125	Federal Tier 3 Bin 125	JHNXT03.5GV3	small SUV	3	20	28	23	5	No	386
1	ACURA RDX	3.5	6.0	SemiAuto-6	2WD	Gasoline	CA	U2	California LEV-II ULEV	JHNXT03.5GV3	small SUV	3	20	28	23	5	No	386
2	ACURA RDX	3.5	6.0	SemiAuto-6	4WD	Gasoline	FA	T3B125	Federal Tier 3 Bin 125	JHNXT03.5GV3	small SUV	3	19	27	22	4	No	402
3	ACURA RDX	3.5	6.0	SemiAuto-6	4WD	Gasoline	CA	U2	California LEV-II ULEV	JHNXT03.5GV3	small SUV	3	19	27	22	4	No	402
4	ACURA TLX	2.4	4.0	AMS-8	2WD	Gasoline	CA	L3ULEV125	California LEV-III ULEV125	JHNXV02.4WH3	small car	3	23	33	27	6	No	330

\n", 473 | "

" 474 | ], 475 | "text/plain": [ 476 | " Model Displ Cyl Trans Drive Fuel Cert Region Stnd \\\n", 477 | "0 ACURA RDX 3.5 6.0 SemiAuto-6 2WD Gasoline FA T3B125 \n", 478 | "1 ACURA RDX 3.5 6.0 SemiAuto-6 2WD Gasoline CA U2 \n", 479 | "2 ACURA RDX 3.5 6.0 SemiAuto-6 4WD Gasoline FA T3B125 \n", 480 | "3 ACURA RDX 3.5 6.0 SemiAuto-6 4WD Gasoline CA U2 \n", 481 | "4 ACURA TLX 2.4 4.0 AMS-8 2WD Gasoline CA L3ULEV125 \n", 482 | "\n", 483 | " Stnd Description Underhood ID Veh Class Air Pollution Score \\\n", 484 | "0 Federal Tier 3 Bin 125 JHNXT03.5GV3 small SUV 3 \n", 485 | "1 California LEV-II ULEV JHNXT03.5GV3 small SUV 3 \n", 486 | "2 Federal Tier 3 Bin 125 JHNXT03.5GV3 small SUV 3 \n", 487 | "3 California LEV-II ULEV JHNXT03.5GV3 small SUV 3 \n", 488 | "4 California LEV-III ULEV125 JHNXV02.4WH3 small car 3 \n", 489 | "\n", 490 | " City MPG Hwy MPG Cmb MPG Greenhouse Gas Score SmartWay Comb CO2 \n", 491 | "0 20 28 23 5 No 386 \n", 492 | "1 20 28 23 5 No 386 \n", 493 | "2 19 27 22 4 No 402 \n", 494 | "3 19 27 22 4 No 402 \n", 495 | "4 23 33 27 6 No 330 " 496 | ] 497 | }, 498 | "execution_count": 36, 499 | "metadata": {}, 500 | "output_type": "execute_result" 501 | } 502 | ], 503 | "source": [ 504 | "df_18.head()" 505 | ] 506 | }, 507 | { 508 | "cell_type": "code", 509 | "execution_count": 37, 510 | "metadata": {}, 511 | "outputs": [ 512 | { 513 | "data": { 514 | "text/html": [ 515 | "

\n", 516 | "\n", 529 | "\n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | "

	Displ	Unadj Cmb MPG
count	2404.000000	2205.000000
mean	3.748918	23.916104
std	1.335785	6.366170
min	1.300000	10.018400
25%	2.500000	19.113900
50%	3.500000	23.921300
75%	4.800000	27.869300
max	8.400000	65.777800

\n", 580 | "

" 581 | ], 582 | "text/plain": [ 583 | " Displ Unadj Cmb MPG\n", 584 | "count 2404.000000 2205.000000\n", 585 | "mean 3.748918 23.916104\n", 586 | "std 1.335785 6.366170\n", 587 | "min 1.300000 10.018400\n", 588 | "25% 2.500000 19.113900\n", 589 | "50% 3.500000 23.921300\n", 590 | "75% 4.800000 27.869300\n", 591 | "max 8.400000 65.777800" 592 | ] 593 | }, 594 | "execution_count": 37, 595 | "metadata": {}, 596 | "output_type": "execute_result" 597 | } 598 | ], 599 | "source": [ 600 | "df_08.describe()\n" 601 | ] 602 | }, 603 | { 604 | "cell_type": "code", 605 | "execution_count": 38, 606 | "metadata": {}, 607 | "outputs": [ 608 | { 609 | "data": { 610 | "text/plain": [ 611 | "2205" 612 | ] 613 | }, 614 | "execution_count": 38, 615 | "metadata": {}, 616 | "output_type": "execute_result" 617 | } 618 | ], 619 | "source": [ 620 | "sum(df_08.Cyl.value_counts())" 621 | ] 622 | }, 623 | { 624 | "cell_type": "code", 625 | "execution_count": 39, 626 | "metadata": {}, 627 | "outputs": [ 628 | { 629 | "data": { 630 | "text/plain": [ 631 | "Model 436\n", 632 | "Displ 47\n", 633 | "Cyl 8\n", 634 | "Trans 14\n", 635 | "Drive 2\n", 636 | "Fuel 5\n", 637 | "Sales Area 3\n", 638 | "Stnd 12\n", 639 | "Underhood ID 343\n", 640 | "Veh Class 9\n", 641 | "Air Pollution Score 13\n", 642 | "FE Calc Appr 2\n", 643 | "City MPG 39\n", 644 | "Hwy MPG 43\n", 645 | "Cmb MPG 38\n", 646 | "Unadj Cmb MPG 721\n", 647 | "Greenhouse Gas Score 20\n", 648 | "SmartWay 2\n", 649 | "dtype: int64" 650 | ] 651 | }, 652 | "execution_count": 39, 653 | "metadata": {}, 654 | "output_type": "execute_result" 655 | } 656 | ], 657 | "source": [ 658 | "df_08.nunique()" 659 | ] 660 | }, 661 | { 662 | "cell_type": "code", 663 | "execution_count": 40, 664 | "metadata": {}, 665 | "outputs": [ 666 | { 667 | "data": { 668 | "text/plain": [ 669 | "Model 367\n", 670 | "Displ 36\n", 671 | "Cyl 7\n", 672 | "Trans 26\n", 673 | "Drive 2\n", 674 | "Fuel 5\n", 675 | "Cert Region 2\n", 676 | "Stnd 19\n", 677 | "Stnd Description 19\n", 678 | "Underhood ID 230\n", 679 | "Veh Class 9\n", 680 | "Air Pollution Score 6\n", 681 | "City MPG 58\n", 682 | "Hwy MPG 62\n", 683 | "Cmb MPG 57\n", 684 | "Greenhouse Gas Score 10\n", 685 | "SmartWay 3\n", 686 | "Comb CO2 299\n", 687 | "dtype: int64" 688 | ] 689 | }, 690 | "execution_count": 40, 691 | "metadata": {}, 692 | "output_type": "execute_result" 693 | } 694 | ], 695 | "source": [ 696 | "df_18.nunique()" 697 | ] 698 | }, 699 | { 700 | "cell_type": "code", 701 | "execution_count": null, 702 | "metadata": { 703 | "collapsed": true 704 | }, 705 | "outputs": [], 706 | "source": [] 707 | }, 708 | { 709 | "cell_type": "code", 710 | "execution_count": null, 711 | "metadata": { 712 | "collapsed": true 713 | }, 714 | "outputs": [], 715 | "source": [] 716 | }, 717 | { 718 | "cell_type": "code", 719 | "execution_count": null, 720 | "metadata": { 721 | "collapsed": true 722 | }, 723 | "outputs": [], 724 | "source": [] 725 | } 726 | ], 727 | "metadata": { 728 | "kernelspec": { 729 | "display_name": "Python 3", 730 | "language": "python", 731 | "name": "python3" 732 | }, 733 | "language_info": { 734 | "codemirror_mode": { 735 | "name": "ipython", 736 | "version": 3 737 | }, 738 | "file_extension": ".py", 739 | "mimetype": "text/x-python", 740 | "name": "python", 741 | "nbconvert_exporter": "python", 742 | "pygments_lexer": "ipython3", 743 | "version": "3.6.1" 744 | } 745 | }, 746 | "nbformat": 4, 747 | "nbformat_minor": 2 748 | } 749 | -------------------------------------------------------------------------------- /assessing_quiz.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Assessing\n", 8 | "Use the space below to explore `winequality-red.csv` and `winequality-white.csv` to answer the quiz questions below.\n", 9 | "\n", 10 | "Assessing Data\n", 11 | "Using Pandas, explore winequality-red.csv and winequality-white.csv in the Jupyter notebook below to answer quiz questions below the notebook about these characteristics of the datasets:\n", 12 | "\n", 13 | "number of samples in each dataset\n", 14 | "number of columns in each dataset\n", 15 | "features with missing values\n", 16 | "duplicate rows in the white wine dataset\n", 17 | "number of unique values for quality in each dataset\n", 18 | "mean density of the red wine dataset\n", 19 | "This data was originally taken from here:\n", 20 | "https://archive.ics.uci.edu/ml/datasets/Wine+Quality" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 1, 26 | "metadata": { 27 | "collapsed": true 28 | }, 29 | "outputs": [], 30 | "source": [ 31 | "import pandas as pd\n", 32 | "# maptplotlib inline\n", 33 | "\n", 34 | "df_red = pd.read_csv('winequality-red.csv', sep = ';')\n", 35 | "df_white = pd.read_csv('winequality-white.csv',sep = ';')" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 2, 41 | "metadata": {}, 42 | "outputs": [ 43 | { 44 | "name": "stdout", 45 | "output_type": "stream", 46 | "text": [ 47 | "\n", 48 | "RangeIndex: 1599 entries, 0 to 1598\n", 49 | "Data columns (total 12 columns):\n", 50 | "fixed acidity 1599 non-null float64\n", 51 | "volatile acidity 1599 non-null float64\n", 52 | "citric acid 1599 non-null float64\n", 53 | "residual sugar 1599 non-null float64\n", 54 | "chlorides 1599 non-null float64\n", 55 | "free sulfur dioxide 1599 non-null float64\n", 56 | "total sulfur dioxide 1599 non-null float64\n", 57 | "density 1599 non-null float64\n", 58 | "pH 1599 non-null float64\n", 59 | "sulphates 1599 non-null float64\n", 60 | "alcohol 1599 non-null float64\n", 61 | "quality 1599 non-null int64\n", 62 | "dtypes: float64(11), int64(1)\n", 63 | "memory usage: 150.0 KB\n", 64 | "\n", 65 | "RangeIndex: 4898 entries, 0 to 4897\n", 66 | "Data columns (total 12 columns):\n", 67 | "fixed acidity 4898 non-null float64\n", 68 | "volatile acidity 4898 non-null float64\n", 69 | "citric acid 4898 non-null float64\n", 70 | "residual sugar 4898 non-null float64\n", 71 | "chlorides 4898 non-null float64\n", 72 | "free sulfur dioxide 4898 non-null float64\n", 73 | "total sulfur dioxide 4898 non-null float64\n", 74 | "density 4898 non-null float64\n", 75 | "pH 4898 non-null float64\n", 76 | "sulphates 4898 non-null float64\n", 77 | "alcohol 4898 non-null float64\n", 78 | "quality 4898 non-null int64\n", 79 | "dtypes: float64(11), int64(1)\n", 80 | "memory usage: 459.3 KB\n" 81 | ] 82 | } 83 | ], 84 | "source": [ 85 | "df_red.info()\n", 86 | "df_white.info()" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 3, 92 | "metadata": {}, 93 | "outputs": [ 94 | { 95 | "data": { 96 | "text/html": [ 97 | "

\n", 98 | "\n", 111 | "\n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | "

	fixed acidity	volatile acidity	citric acid	residual sugar	chlorides	free sulfur dioxide	total sulfur dioxide	density	pH	sulphates	alcohol	quality
0	7.4	0.70	0.00	1.9	0.076	11.0	34.0	0.9978	3.51	0.56	9.4	5
1	7.8	0.88	0.00	2.6	0.098	25.0	67.0	0.9968	3.20	0.68	9.8	5
2	7.8	0.76	0.04	2.3	0.092	15.0	54.0	0.9970	3.26	0.65	9.8	5
3	11.2	0.28	0.56	1.9	0.075	17.0	60.0	0.9980	3.16	0.58	9.8	6
4	7.4	0.70	0.00	1.9	0.076	11.0	34.0	0.9978	3.51	0.56	9.4	5

\n", 207 | "

" 208 | ], 209 | "text/plain": [ 210 | " fixed acidity volatile acidity citric acid residual sugar chlorides \\\n", 211 | "0 7.4 0.70 0.00 1.9 0.076 \n", 212 | "1 7.8 0.88 0.00 2.6 0.098 \n", 213 | "2 7.8 0.76 0.04 2.3 0.092 \n", 214 | "3 11.2 0.28 0.56 1.9 0.075 \n", 215 | "4 7.4 0.70 0.00 1.9 0.076 \n", 216 | "\n", 217 | " free sulfur dioxide total sulfur dioxide density pH sulphates \\\n", 218 | "0 11.0 34.0 0.9978 3.51 0.56 \n", 219 | "1 25.0 67.0 0.9968 3.20 0.68 \n", 220 | "2 15.0 54.0 0.9970 3.26 0.65 \n", 221 | "3 17.0 60.0 0.9980 3.16 0.58 \n", 222 | "4 11.0 34.0 0.9978 3.51 0.56 \n", 223 | "\n", 224 | " alcohol quality \n", 225 | "0 9.4 5 \n", 226 | "1 9.8 5 \n", 227 | "2 9.8 5 \n", 228 | "3 9.8 6 \n", 229 | "4 9.4 5 " 230 | ] 231 | }, 232 | "execution_count": 3, 233 | "metadata": {}, 234 | "output_type": "execute_result" 235 | } 236 | ], 237 | "source": [ 238 | "df_red.head()" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": 4, 244 | "metadata": {}, 245 | "outputs": [ 246 | { 247 | "data": { 248 | "text/plain": [ 249 | "240" 250 | ] 251 | }, 252 | "execution_count": 4, 253 | "metadata": {}, 254 | "output_type": "execute_result" 255 | } 256 | ], 257 | "source": [ 258 | "#number of duplicate rows\n", 259 | "sum(df_red.duplicated())" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": 5, 265 | "metadata": {}, 266 | "outputs": [ 267 | { 268 | "data": { 269 | "text/plain": [ 270 | "937" 271 | ] 272 | }, 273 | "execution_count": 5, 274 | "metadata": {}, 275 | "output_type": "execute_result" 276 | } 277 | ], 278 | "source": [ 279 | "#number of duplicate rows\n", 280 | "sum(df_white.duplicated())" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": 7, 286 | "metadata": {}, 287 | "outputs": [ 288 | { 289 | "name": "stdout", 290 | "output_type": "stream", 291 | "text": [ 292 | "Unique values of quality for red wine are 6\n", 293 | "unique values of quality for white wine are 7\n" 294 | ] 295 | } 296 | ], 297 | "source": [ 298 | "#number of unique values of quality\n", 299 | "print(\"Unique values of quality for red wine are {}\".format(len(df_red.quality.unique())))\n", 300 | "print(\"unique values of quality for white wine are {}\".format(len(df_white.quality.unique())))" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": 8, 306 | "metadata": {}, 307 | "outputs": [ 308 | { 309 | "data": { 310 | "text/plain": [ 311 | "0.9967466791744833" 312 | ] 313 | }, 314 | "execution_count": 8, 315 | "metadata": {}, 316 | "output_type": "execute_result" 317 | } 318 | ], 319 | "source": [ 320 | "df_red['density'].mean()" 321 | ] 322 | }, 323 | { 324 | "cell_type": "markdown", 325 | "metadata": {}, 326 | "source": [ 327 | "# Questions and Answers to Quiz\n", 328 | "\n", 329 | "Answers derived by code calculations provided above\n", 330 | "\n", 331 | "- How many samples of red wine are there? 1599\n", 332 | "- How many samples of white wine are there? 4898\n", 333 | "- How many columns are in each dataset? 12\n", 334 | "- Which features have missing values? None of these features have missing values\n", 335 | "- How many duplicate rows are in the white wine dataset? 937\n", 336 | "- Are duplicate rows in these datasets significant/ need to be dropped? Not necessarily\n", 337 | "- How many unique values of quality are in the red wine dataset? 6\n", 338 | "- How many unique values of quality are in the white wine dataset? 7\n", 339 | "- What is the mean density in the red wine dataset? 0.996747\n", 340 | "\n", 341 | "\n" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": null, 347 | "metadata": { 348 | "collapsed": true 349 | }, 350 | "outputs": [], 351 | "source": [] 352 | } 353 | ], 354 | "metadata": { 355 | "kernelspec": { 356 | "display_name": "Python 3", 357 | "language": "python", 358 | "name": "python3" 359 | }, 360 | "language_info": { 361 | "codemirror_mode": { 362 | "name": "ipython", 363 | "version": 3 364 | }, 365 | "file_extension": ".py", 366 | "mimetype": "text/x-python", 367 | "name": "python", 368 | "nbconvert_exporter": "python", 369 | "pygments_lexer": "ipython3", 370 | "version": "3.6.2" 371 | } 372 | }, 373 | "nbformat": 4, 374 | "nbformat_minor": 2 375 | } 376 | -------------------------------------------------------------------------------- /cleaning_column_labels.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Cleaning Column Labels\n", 8 | "Use `all_alpha_08.csv` and `all_alpha_18.csv`" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "metadata": { 15 | "collapsed": true 16 | }, 17 | "outputs": [], 18 | "source": [ 19 | "import pandas as pd" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "metadata": { 26 | "collapsed": true 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "# load datasets\n", 31 | "\n", 32 | "df_08 = pd.read_csv('all_alpha_08.csv')\n", 33 | "df_18 = pd.read_csv('all_alpha_18.csv')" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 3, 39 | "metadata": {}, 40 | "outputs": [ 41 | { 42 | "data": { 43 | "text/html": [ 44 | "

\n", 45 | "\n", 58 | "\n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | "

	Model	Displ	Cyl	Trans	Drive	Fuel	Sales Area	Stnd	Underhood ID	Veh Class	Air Pollution Score	FE Calc Appr	City MPG	Hwy MPG	Cmb MPG	Unadj Cmb MPG	Greenhouse Gas Score	SmartWay
0	ACURA MDX	3.7	(6 cyl)	Auto-S5	4WD	Gasoline	CA	U2	8HNXT03.7PKR	SUV	7	Drv	15	20	17	22.0527	4	no

\n", 106 | "

" 107 | ], 108 | "text/plain": [ 109 | " Model Displ Cyl Trans Drive Fuel Sales Area Stnd \\\n", 110 | "0 ACURA MDX 3.7 (6 cyl) Auto-S5 4WD Gasoline CA U2 \n", 111 | "\n", 112 | " Underhood ID Veh Class Air Pollution Score FE Calc Appr City MPG Hwy MPG \\\n", 113 | "0 8HNXT03.7PKR SUV 7 Drv 15 20 \n", 114 | "\n", 115 | " Cmb MPG Unadj Cmb MPG Greenhouse Gas Score SmartWay \n", 116 | "0 17 22.0527 4 no " 117 | ] 118 | }, 119 | "execution_count": 3, 120 | "metadata": {}, 121 | "output_type": "execute_result" 122 | } 123 | ], 124 | "source": [ 125 | "# view 2008 dataset\n", 126 | "df_08.head(1)" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 4, 132 | "metadata": {}, 133 | "outputs": [ 134 | { 135 | "data": { 136 | "text/html": [ 137 | "

\n", 138 | "\n", 151 | "\n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | "

	Model	Displ	Cyl	Trans	Drive	Fuel	Cert Region	Stnd	Stnd Description	Underhood ID	Veh Class	Air Pollution Score	City MPG	Hwy MPG	Cmb MPG	Greenhouse Gas Score	SmartWay	Comb CO2
0	ACURA RDX	3.5	6.0	SemiAuto-6	2WD	Gasoline	FA	T3B125	Federal Tier 3 Bin 125	JHNXT03.5GV3	small SUV	3	20	28	23	5	No	386

\n", 199 | "

" 200 | ], 201 | "text/plain": [ 202 | " Model Displ Cyl Trans Drive Fuel Cert Region Stnd \\\n", 203 | "0 ACURA RDX 3.5 6.0 SemiAuto-6 2WD Gasoline FA T3B125 \n", 204 | "\n", 205 | " Stnd Description Underhood ID Veh Class Air Pollution Score \\\n", 206 | "0 Federal Tier 3 Bin 125 JHNXT03.5GV3 small SUV 3 \n", 207 | "\n", 208 | " City MPG Hwy MPG Cmb MPG Greenhouse Gas Score SmartWay Comb CO2 \n", 209 | "0 20 28 23 5 No 386 " 210 | ] 211 | }, 212 | "execution_count": 4, 213 | "metadata": {}, 214 | "output_type": "execute_result" 215 | } 216 | ], 217 | "source": [ 218 | "# view 2018 dataset\n", 219 | "df_18.head(1)" 220 | ] 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "metadata": {}, 225 | "source": [ 226 | "### Drop Extraneous Columns" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": 5, 232 | "metadata": {}, 233 | "outputs": [ 234 | { 235 | "data": { 236 | "text/html": [ 237 | "

\n", 238 | "\n", 251 | "\n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | "

	Model	Displ	Cyl	Trans	Drive	Fuel	Sales Area	Veh Class	Air Pollution Score	City MPG	Hwy MPG	Cmb MPG	Greenhouse Gas Score	SmartWay
0	ACURA MDX	3.7	(6 cyl)	Auto-S5	4WD	Gasoline	CA	SUV	7	15	20	17	4	no

\n", 291 | "

" 292 | ], 293 | "text/plain": [ 294 | " Model Displ Cyl Trans Drive Fuel Sales Area Veh Class \\\n", 295 | "0 ACURA MDX 3.7 (6 cyl) Auto-S5 4WD Gasoline CA SUV \n", 296 | "\n", 297 | " Air Pollution Score City MPG Hwy MPG Cmb MPG Greenhouse Gas Score SmartWay \n", 298 | "0 7 15 20 17 4 no " 299 | ] 300 | }, 301 | "execution_count": 5, 302 | "metadata": {}, 303 | "output_type": "execute_result" 304 | } 305 | ], 306 | "source": [ 307 | "# drop columns from 2008 dataset\n", 308 | "df_08.drop(['Stnd', 'Underhood ID', 'FE Calc Appr', 'Unadj Cmb MPG'], axis=1, inplace=True)\n", 309 | "\n", 310 | "# confirm changes\n", 311 | "df_08.head(1)" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": 6, 317 | "metadata": {}, 318 | "outputs": [ 319 | { 320 | "data": { 321 | "text/html": [ 322 | "

\n", 323 | "\n", 336 | "\n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | "

	Model	Displ	Cyl	Trans	Drive	Fuel	Cert Region	Veh Class	Air Pollution Score	City MPG	Hwy MPG	Cmb MPG	Greenhouse Gas Score	SmartWay
0	ACURA RDX	3.5	6.0	SemiAuto-6	2WD	Gasoline	FA	small SUV	3	20	28	23	5	No
1	ACURA RDX	3.5	6.0	SemiAuto-6	2WD	Gasoline	CA	small SUV	3	20	28	23	5	No
2	ACURA RDX	3.5	6.0	SemiAuto-6	4WD	Gasoline	FA	small SUV	3	19	27	22	4	No
3	ACURA RDX	3.5	6.0	SemiAuto-6	4WD	Gasoline	CA	small SUV	3	19	27	22	4	No
4	ACURA TLX	2.4	4.0	AMS-8	2WD	Gasoline	CA	small car	3	23	33	27	6	No

\n", 444 | "

" 445 | ], 446 | "text/plain": [ 447 | " Model Displ Cyl Trans Drive Fuel Cert Region Veh Class \\\n", 448 | "0 ACURA RDX 3.5 6.0 SemiAuto-6 2WD Gasoline FA small SUV \n", 449 | "1 ACURA RDX 3.5 6.0 SemiAuto-6 2WD Gasoline CA small SUV \n", 450 | "2 ACURA RDX 3.5 6.0 SemiAuto-6 4WD Gasoline FA small SUV \n", 451 | "3 ACURA RDX 3.5 6.0 SemiAuto-6 4WD Gasoline CA small SUV \n", 452 | "4 ACURA TLX 2.4 4.0 AMS-8 2WD Gasoline CA small car \n", 453 | "\n", 454 | " Air Pollution Score City MPG Hwy MPG Cmb MPG Greenhouse Gas Score SmartWay \n", 455 | "0 3 20 28 23 5 No \n", 456 | "1 3 20 28 23 5 No \n", 457 | "2 3 19 27 22 4 No \n", 458 | "3 3 19 27 22 4 No \n", 459 | "4 3 23 33 27 6 No " 460 | ] 461 | }, 462 | "execution_count": 6, 463 | "metadata": {}, 464 | "output_type": "execute_result" 465 | } 466 | ], 467 | "source": [ 468 | "# drop columns from 2018 dataset\n", 469 | "df_18.drop(['Stnd', 'Stnd Description', 'Underhood ID', 'Comb CO2'], axis=1, inplace=True)\n", 470 | "\n", 471 | "# confirm changes\n", 472 | "df_18.head()" 473 | ] 474 | }, 475 | { 476 | "cell_type": "markdown", 477 | "metadata": {}, 478 | "source": [ 479 | "### Rename Columns" 480 | ] 481 | }, 482 | { 483 | "cell_type": "code", 484 | "execution_count": 17, 485 | "metadata": { 486 | "scrolled": false 487 | }, 488 | "outputs": [ 489 | { 490 | "data": { 491 | "text/html": [ 492 | "

\n", 493 | "\n", 506 | "\n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | "

	model	displ	cyl	trans	drive	fuel	Cert Region	veh_class	air_pollution_score	city_mpg	hwy_mpg	cmb_mpg	greenhouse_gas_score	smartway
0	ACURA MDX	3.7	(6 cyl)	Auto-S5	4WD	Gasoline	CA	SUV	7	15	20	17	4	no

\n", 546 | "

" 547 | ], 548 | "text/plain": [ 549 | " model displ cyl trans drive fuel Cert Region veh_class \\\n", 550 | "0 ACURA MDX 3.7 (6 cyl) Auto-S5 4WD Gasoline CA SUV \n", 551 | "\n", 552 | " air_pollution_score city_mpg hwy_mpg cmb_mpg greenhouse_gas_score smartway \n", 553 | "0 7 15 20 17 4 no " 554 | ] 555 | }, 556 | "execution_count": 17, 557 | "metadata": {}, 558 | "output_type": "execute_result" 559 | } 560 | ], 561 | "source": [ 562 | "# rename Sales Area to Cert Region\n", 563 | "names_cols = list(df_08.columns.values)\n", 564 | "names_cols[6] = 'Cert Region'\n", 565 | "df_08.columns = names_cols\n", 566 | "\n", 567 | "# confirm changes\n", 568 | "df_08.head(1)" 569 | ] 570 | }, 571 | { 572 | "cell_type": "code", 573 | "execution_count": 18, 574 | "metadata": {}, 575 | "outputs": [ 576 | { 577 | "data": { 578 | "text/html": [ 579 | "

\n", 580 | "\n", 593 | "\n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | "

	model	displ	cyl	trans	drive	fuel	cert_region	veh_class	air_pollution_score	city_mpg	hwy_mpg	cmb_mpg	greenhouse_gas_score	smartway
0	ACURA MDX	3.7	(6 cyl)	Auto-S5	4WD	Gasoline	CA	SUV	7	15	20	17	4	no

\n", 633 | "

" 634 | ], 635 | "text/plain": [ 636 | " model displ cyl trans drive fuel cert_region veh_class \\\n", 637 | "0 ACURA MDX 3.7 (6 cyl) Auto-S5 4WD Gasoline CA SUV \n", 638 | "\n", 639 | " air_pollution_score city_mpg hwy_mpg cmb_mpg greenhouse_gas_score smartway \n", 640 | "0 7 15 20 17 4 no " 641 | ] 642 | }, 643 | "execution_count": 18, 644 | "metadata": {}, 645 | "output_type": "execute_result" 646 | } 647 | ], 648 | "source": [ 649 | "# replace spaces with underscores and lowercase labels for 2008 dataset\n", 650 | "df_08.rename(columns=lambda x: x.strip().lower().replace(\" \", \"_\"), inplace=True)\n", 651 | "\n", 652 | "# confirm changes\n", 653 | "df_08.head(1)" 654 | ] 655 | }, 656 | { 657 | "cell_type": "code", 658 | "execution_count": 19, 659 | "metadata": {}, 660 | "outputs": [ 661 | { 662 | "data": { 663 | "text/html": [ 664 | "

\n", 665 | "\n", 678 | "\n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | "

	model	displ	cyl	trans	drive	fuel	cert_region	veh_class	air_pollution_score	city_mpg	hwy_mpg	cmb_mpg	greenhouse_gas_score	smartway
0	ACURA RDX	3.5	6.0	SemiAuto-6	2WD	Gasoline	FA	small SUV	3	20	28	23	5	No

\n", 718 | "

" 719 | ], 720 | "text/plain": [ 721 | " model displ cyl trans drive fuel cert_region veh_class \\\n", 722 | "0 ACURA RDX 3.5 6.0 SemiAuto-6 2WD Gasoline FA small SUV \n", 723 | "\n", 724 | " air_pollution_score city_mpg hwy_mpg cmb_mpg greenhouse_gas_score smartway \n", 725 | "0 3 20 28 23 5 No " 726 | ] 727 | }, 728 | "execution_count": 19, 729 | "metadata": {}, 730 | "output_type": "execute_result" 731 | } 732 | ], 733 | "source": [ 734 | "# replace spaces with underscores and lowercase labels for 2018 dataset\n", 735 | "df_18.rename(columns=lambda x: x.strip().lower().replace(\" \", \"_\"), inplace=True)\n", 736 | "\n", 737 | "# confirm changes\n", 738 | "df_18.head(1)" 739 | ] 740 | }, 741 | { 742 | "cell_type": "code", 743 | "execution_count": 20, 744 | "metadata": {}, 745 | "outputs": [ 746 | { 747 | "data": { 748 | "text/plain": [ 749 | "array([ True, True, True, True, True, True, True, True, True,\n", 750 | " True, True, True, True, True], dtype=bool)" 751 | ] 752 | }, 753 | "execution_count": 20, 754 | "metadata": {}, 755 | "output_type": "execute_result" 756 | } 757 | ], 758 | "source": [ 759 | "# confirm column labels for 2008 and 2018 datasets are identical\n", 760 | "df_08.columns == df_18.columns" 761 | ] 762 | }, 763 | { 764 | "cell_type": "code", 765 | "execution_count": 21, 766 | "metadata": {}, 767 | "outputs": [ 768 | { 769 | "data": { 770 | "text/plain": [ 771 | "True" 772 | ] 773 | }, 774 | "execution_count": 21, 775 | "metadata": {}, 776 | "output_type": "execute_result" 777 | } 778 | ], 779 | "source": [ 780 | "# make sure they're all identical like this\n", 781 | "(df_08.columns == df_18.columns).all()" 782 | ] 783 | }, 784 | { 785 | "cell_type": "code", 786 | "execution_count": 22, 787 | "metadata": { 788 | "collapsed": true 789 | }, 790 | "outputs": [], 791 | "source": [ 792 | "# save new datasets for next section\n", 793 | "df_08.to_csv('data_08.csv', index=False)\n", 794 | "df_18.to_csv('data_18.csv', index=False)" 795 | ] 796 | }, 797 | { 798 | "cell_type": "code", 799 | "execution_count": null, 800 | "metadata": { 801 | "collapsed": true 802 | }, 803 | "outputs": [], 804 | "source": [] 805 | } 806 | ], 807 | "metadata": { 808 | "kernelspec": { 809 | "display_name": "Python 3", 810 | "language": "python", 811 | "name": "python3" 812 | }, 813 | "language_info": { 814 | "codemirror_mode": { 815 | "name": "ipython", 816 | "version": 3 817 | }, 818 | "file_extension": ".py", 819 | "mimetype": "text/x-python", 820 | "name": "python", 821 | "nbconvert_exporter": "python", 822 | "pygments_lexer": "ipython3", 823 | "version": "3.6.1" 824 | } 825 | }, 826 | "nbformat": 4, 827 | "nbformat_minor": 2 828 | } 829 | -------------------------------------------------------------------------------- /cleaning_practice.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Cleaning Practice\n", 8 | "Let's first practice handling missing values and duplicate data with `cancer_data_means.csv`." 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 2, 14 | "metadata": {}, 15 | "outputs": [ 16 | { 17 | "name": "stdout", 18 | "output_type": "stream", 19 | "text": [ 20 | "\n", 21 | "RangeIndex: 569 entries, 0 to 568\n", 22 | "Data columns (total 11 columns):\n", 23 | "id 569 non-null int64\n", 24 | "diagnosis 569 non-null object\n", 25 | "radius_mean 569 non-null float64\n", 26 | "texture_mean 548 non-null float64\n", 27 | "perimeter_mean 569 non-null float64\n", 28 | "area_mean 569 non-null float64\n", 29 | "smoothness_mean 521 non-null float64\n", 30 | "compactness_mean 569 non-null float64\n", 31 | "concavity_mean 569 non-null float64\n", 32 | "concave_points_mean 569 non-null float64\n", 33 | "symmetry_mean 504 non-null float64\n", 34 | "dtypes: float64(9), int64(1), object(1)\n", 35 | "memory usage: 49.0+ KB\n" 36 | ] 37 | } 38 | ], 39 | "source": [ 40 | "# import pandas and load cancer data\n", 41 | "import pandas as pd\n", 42 | "\n", 43 | "df_cancer = pd.read_csv('cancer_data_means.csv')\n", 44 | "\n", 45 | "# check which columns have missing values with info()\n", 46 | "df_cancer.info()" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 3, 52 | "metadata": {}, 53 | "outputs": [ 54 | { 55 | "name": "stdout", 56 | "output_type": "stream", 57 | "text": [ 58 | "\n", 59 | "RangeIndex: 569 entries, 0 to 568\n", 60 | "Data columns (total 11 columns):\n", 61 | "id 569 non-null int64\n", 62 | "diagnosis 569 non-null object\n", 63 | "radius_mean 569 non-null float64\n", 64 | "texture_mean 569 non-null float64\n", 65 | "perimeter_mean 569 non-null float64\n", 66 | "area_mean 569 non-null float64\n", 67 | "smoothness_mean 521 non-null float64\n", 68 | "compactness_mean 569 non-null float64\n", 69 | "concavity_mean 569 non-null float64\n", 70 | "concave_points_mean 569 non-null float64\n", 71 | "symmetry_mean 504 non-null float64\n", 72 | "dtypes: float64(9), int64(1), object(1)\n", 73 | "memory usage: 49.0+ KB\n" 74 | ] 75 | } 76 | ], 77 | "source": [ 78 | "# use means to fill in missing values\n", 79 | "mean_texture = df_cancer['texture_mean'].mean()\n", 80 | "df_cancer['texture_mean'].fillna(mean_texture, inplace = True)\n", 81 | "\n", 82 | "# confirm your correction with info()\n", 83 | "df_cancer.info(0)" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 4, 89 | "metadata": {}, 90 | "outputs": [ 91 | { 92 | "name": "stdout", 93 | "output_type": "stream", 94 | "text": [ 95 | "\n", 96 | "RangeIndex: 569 entries, 0 to 568\n", 97 | "Data columns (total 11 columns):\n", 98 | "id 569 non-null int64\n", 99 | "diagnosis 569 non-null object\n", 100 | "radius_mean 569 non-null float64\n", 101 | "texture_mean 569 non-null float64\n", 102 | "perimeter_mean 569 non-null float64\n", 103 | "area_mean 569 non-null float64\n", 104 | "smoothness_mean 569 non-null float64\n", 105 | "compactness_mean 569 non-null float64\n", 106 | "concavity_mean 569 non-null float64\n", 107 | "concave_points_mean 569 non-null float64\n", 108 | "symmetry_mean 504 non-null float64\n", 109 | "dtypes: float64(9), int64(1), object(1)\n", 110 | "memory usage: 49.0+ KB\n" 111 | ] 112 | } 113 | ], 114 | "source": [ 115 | "# use means to fill in missing values\n", 116 | "#convert for smoothness_mean\n", 117 | "mean_smoothness = df_cancer['smoothness_mean'].mean()\n", 118 | "df_cancer['smoothness_mean'].fillna(mean_smoothness, inplace = True)\n", 119 | "\n", 120 | "# confirm your correction with info()\n", 121 | "df_cancer.info(0)" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 5, 127 | "metadata": {}, 128 | "outputs": [ 129 | { 130 | "name": "stdout", 131 | "output_type": "stream", 132 | "text": [ 133 | "\n", 134 | "RangeIndex: 569 entries, 0 to 568\n", 135 | "Data columns (total 11 columns):\n", 136 | "id 569 non-null int64\n", 137 | "diagnosis 569 non-null object\n", 138 | "radius_mean 569 non-null float64\n", 139 | "texture_mean 569 non-null float64\n", 140 | "perimeter_mean 569 non-null float64\n", 141 | "area_mean 569 non-null float64\n", 142 | "smoothness_mean 569 non-null float64\n", 143 | "compactness_mean 569 non-null float64\n", 144 | "concavity_mean 569 non-null float64\n", 145 | "concave_points_mean 569 non-null float64\n", 146 | "symmetry_mean 569 non-null float64\n", 147 | "dtypes: float64(9), int64(1), object(1)\n", 148 | "memory usage: 49.0+ KB\n" 149 | ] 150 | } 151 | ], 152 | "source": [ 153 | "# use means to fill in missing values\n", 154 | "#convert for symmetry_mean\n", 155 | "mean_symmetry = df_cancer['symmetry_mean'].mean()\n", 156 | "df_cancer['symmetry_mean'].fillna(mean_symmetry, inplace = True)\n", 157 | "\n", 158 | "# confirm your correction with info()\n", 159 | "df_cancer.info(0)" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 8, 165 | "metadata": {}, 166 | "outputs": [ 167 | { 168 | "data": { 169 | "text/plain": [ 170 | "5" 171 | ] 172 | }, 173 | "execution_count": 8, 174 | "metadata": {}, 175 | "output_type": "execute_result" 176 | } 177 | ], 178 | "source": [ 179 | "# check for duplicates in the data\n", 180 | "sum(df_cancer.duplicated())" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 10, 186 | "metadata": {}, 187 | "outputs": [], 188 | "source": [ 189 | "# drop duplicates\n", 190 | "df_cancer.drop_duplicates(inplace = True)" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 11, 196 | "metadata": {}, 197 | "outputs": [ 198 | { 199 | "data": { 200 | "text/plain": [ 201 | "0" 202 | ] 203 | }, 204 | "execution_count": 11, 205 | "metadata": {}, 206 | "output_type": "execute_result" 207 | } 208 | ], 209 | "source": [ 210 | "# confirm correction by rechecking for duplicates in the data\n", 211 | "sum(df_cancer.duplicated())" 212 | ] 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "metadata": {}, 217 | "source": [ 218 | "## Renaming Columns\n", 219 | "Since we also previously changed our dataset to only include means of tumor features, the \"_mean\" at the end of each feature seems unnecessary. It just takes extra time to type in our analysis later. Let's come up with a list of new labels to assign to our columns." 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": 13, 225 | "metadata": {}, 226 | "outputs": [ 227 | { 228 | "data": { 229 | "text/plain": [ 230 | "['id',\n", 231 | " 'diagnosis',\n", 232 | " 'radius',\n", 233 | " 'texture',\n", 234 | " 'perimeter',\n", 235 | " 'area',\n", 236 | " 'smoothness',\n", 237 | " 'compactness',\n", 238 | " 'concavity',\n", 239 | " 'concave_points',\n", 240 | " 'symmetry']" 241 | ] 242 | }, 243 | "execution_count": 13, 244 | "metadata": {}, 245 | "output_type": "execute_result" 246 | } 247 | ], 248 | "source": [ 249 | "# remove \"_mean\" from column names\n", 250 | "new_labels = []\n", 251 | "for col in df_cancer.columns:\n", 252 | " if '_mean' in col:\n", 253 | " new_labels.append(col[:-5]) # exclude last 6 characters\n", 254 | " else:\n", 255 | " new_labels.append(col)\n", 256 | "\n", 257 | "# new labels for our columns\n", 258 | "new_labels" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": 14, 264 | "metadata": {}, 265 | "outputs": [ 266 | { 267 | "data": { 268 | "text/html": [ 269 | "

\n", 270 | "\n", 283 | "\n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | "

	id	diagnosis	radius	texture	perimeter	area	smoothness	compactness	concavity	concave_points	symmetry
0	842302	M	17.99	19.293431	122.80	1001.0	0.118400	0.27760	0.3001	0.14710	0.2419
1	842517	M	20.57	17.770000	132.90	1326.0	0.084740	0.07864	0.0869	0.07017	0.1812
2	84300903	M	19.69	21.250000	130.00	1203.0	0.109600	0.15990	0.1974	0.12790	0.2069
3	84348301	M	11.42	20.380000	77.58	386.1	0.096087	0.28390	0.2414	0.10520	0.2597
4	84358402	M	20.29	14.340000	135.10	1297.0	0.100300	0.13280	0.1980	0.10430	0.1809

\n", 373 | "

" 374 | ], 375 | "text/plain": [ 376 | " id diagnosis radius texture perimeter area smoothness \\\n", 377 | "0 842302 M 17.99 19.293431 122.80 1001.0 0.118400 \n", 378 | "1 842517 M 20.57 17.770000 132.90 1326.0 0.084740 \n", 379 | "2 84300903 M 19.69 21.250000 130.00 1203.0 0.109600 \n", 380 | "3 84348301 M 11.42 20.380000 77.58 386.1 0.096087 \n", 381 | "4 84358402 M 20.29 14.340000 135.10 1297.0 0.100300 \n", 382 | "\n", 383 | " compactness concavity concave_points symmetry \n", 384 | "0 0.27760 0.3001 0.14710 0.2419 \n", 385 | "1 0.07864 0.0869 0.07017 0.1812 \n", 386 | "2 0.15990 0.1974 0.12790 0.2069 \n", 387 | "3 0.28390 0.2414 0.10520 0.2597 \n", 388 | "4 0.13280 0.1980 0.10430 0.1809 " 389 | ] 390 | }, 391 | "execution_count": 14, 392 | "metadata": {}, 393 | "output_type": "execute_result" 394 | } 395 | ], 396 | "source": [ 397 | "# assign new labels to columns in dataframe\n", 398 | "df_cancer.columns = new_labels\n", 399 | "\n", 400 | "# display first few rows of dataframe to confirm changes\n", 401 | "df_cancer.head()" 402 | ] 403 | }, 404 | { 405 | "cell_type": "code", 406 | "execution_count": 16, 407 | "metadata": { 408 | "collapsed": true 409 | }, 410 | "outputs": [], 411 | "source": [ 412 | "# save this for later\n", 413 | "df_cancer.to_csv('cancer_data_edited.csv', index=False)" 414 | ] 415 | }, 416 | { 417 | "cell_type": "code", 418 | "execution_count": null, 419 | "metadata": { 420 | "collapsed": true 421 | }, 422 | "outputs": [], 423 | "source": [] 424 | } 425 | ], 426 | "metadata": { 427 | "kernelspec": { 428 | "display_name": "Python 3", 429 | "language": "python", 430 | "name": "python3" 431 | }, 432 | "language_info": { 433 | "codemirror_mode": { 434 | "name": "ipython", 435 | "version": 3 436 | }, 437 | "file_extension": ".py", 438 | "mimetype": "text/x-python", 439 | "name": "python", 440 | "nbconvert_exporter": "python", 441 | "pygments_lexer": "ipython3", 442 | "version": "3.6.1" 443 | } 444 | }, 445 | "nbformat": 4, 446 | "nbformat_minor": 2 447 | } 448 | -------------------------------------------------------------------------------- /conclusions_groupby.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Drawing Conclusions Using Groupby" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 3, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "#Import packages\n", 19 | "import pandas as pd\n", 20 | "import numpy as np\n", 21 | "from matplotlib import pyplot as plt\n", 22 | "import matplotlib\n", 23 | "% matplotlib inline" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 4, 29 | "metadata": {}, 30 | "outputs": [ 31 | { 32 | "name": "stdout", 33 | "output_type": "stream", 34 | "text": [ 35 | "\n", 36 | "RangeIndex: 6497 entries, 0 to 6496\n", 37 | "Data columns (total 13 columns):\n", 38 | "fixed_acidity 6497 non-null float64\n", 39 | "volatile_acidity 6497 non-null float64\n", 40 | "citric_acid 6497 non-null float64\n", 41 | "residual_sugar 6497 non-null float64\n", 42 | "chlorides 6497 non-null float64\n", 43 | "free_sulfur_dioxide 6497 non-null float64\n", 44 | "total_sulfur_dioxide 6497 non-null float64\n", 45 | "density 6497 non-null float64\n", 46 | "pH 6497 non-null float64\n", 47 | "sulphates 6497 non-null float64\n", 48 | "alcohol 6497 non-null float64\n", 49 | "quality 6497 non-null int64\n", 50 | "color 6497 non-null object\n", 51 | "dtypes: float64(11), int64(1), object(1)\n", 52 | "memory usage: 659.9+ KB\n" 53 | ] 54 | } 55 | ], 56 | "source": [ 57 | "# Load `winequality_edited.csv`\n", 58 | "wine_df = pd.read_csv('winequality_edited.csv')\n", 59 | "wine_df.info()" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": { 65 | "collapsed": true 66 | }, 67 | "source": [ 68 | "### Is a certain type of wine associated with higher quality?" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 5, 74 | "metadata": {}, 75 | "outputs": [ 76 | { 77 | "data": { 78 | "text/plain": [ 79 | "color\n", 80 | "red 5.636023\n", 81 | "white 5.877909\n", 82 | "Name: quality, dtype: float64" 83 | ] 84 | }, 85 | "execution_count": 5, 86 | "metadata": {}, 87 | "output_type": "execute_result" 88 | } 89 | ], 90 | "source": [ 91 | "# Find the mean quality of each wine type (red and white) with groupby\n", 92 | "wine_df.groupby('color')['quality'].mean()" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "### What level of acidity receives the highest average rating?" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 7, 105 | "metadata": {}, 106 | "outputs": [ 107 | { 108 | "data": { 109 | "text/html": [ 110 | "

\n", 111 | "\n", 124 | "\n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | "

	fixed_acidity	volatile_acidity	citric_acid	residual_sugar	chlorides	free_sulfur_dioxide	total_sulfur_dioxide	density	pH	sulphates	alcohol	quality
count	6497.000000	6497.000000	6497.000000	6497.000000	6497.000000	6497.000000	6497.000000	6497.000000	6497.000000	6497.000000	6497.000000	6497.000000
mean	7.215307	0.339666	0.318633	5.443235	0.056034	30.525319	115.744574	0.994697	3.218501	0.531268	10.491801	5.818378
std	1.296434	0.164636	0.145318	4.757804	0.035034	17.749400	56.521855	0.002999	0.160787	0.148806	1.192712	0.873255
min	3.800000	0.080000	0.000000	0.600000	0.009000	1.000000	6.000000	0.987110	2.720000	0.220000	8.000000	3.000000
25%	6.400000	0.230000	0.250000	1.800000	0.038000	17.000000	77.000000	0.992340	3.110000	0.430000	9.500000	5.000000
50%	7.000000	0.290000	0.310000	3.000000	0.047000	29.000000	118.000000	0.994890	3.210000	0.510000	10.300000	6.000000
75%	7.700000	0.400000	0.390000	8.100000	0.065000	41.000000	156.000000	0.996990	3.320000	0.600000	11.300000	6.000000
max	15.900000	1.580000	1.660000	65.800000	0.611000	289.000000	440.000000	1.038980	4.010000	2.000000	14.900000	9.000000

\n", 265 | "

" 266 | ], 267 | "text/plain": [ 268 | " fixed_acidity volatile_acidity citric_acid residual_sugar \\\n", 269 | "count 6497.000000 6497.000000 6497.000000 6497.000000 \n", 270 | "mean 7.215307 0.339666 0.318633 5.443235 \n", 271 | "std 1.296434 0.164636 0.145318 4.757804 \n", 272 | "min 3.800000 0.080000 0.000000 0.600000 \n", 273 | "25% 6.400000 0.230000 0.250000 1.800000 \n", 274 | "50% 7.000000 0.290000 0.310000 3.000000 \n", 275 | "75% 7.700000 0.400000 0.390000 8.100000 \n", 276 | "max 15.900000 1.580000 1.660000 65.800000 \n", 277 | "\n", 278 | " chlorides free_sulfur_dioxide total_sulfur_dioxide density \\\n", 279 | "count 6497.000000 6497.000000 6497.000000 6497.000000 \n", 280 | "mean 0.056034 30.525319 115.744574 0.994697 \n", 281 | "std 0.035034 17.749400 56.521855 0.002999 \n", 282 | "min 0.009000 1.000000 6.000000 0.987110 \n", 283 | "25% 0.038000 17.000000 77.000000 0.992340 \n", 284 | "50% 0.047000 29.000000 118.000000 0.994890 \n", 285 | "75% 0.065000 41.000000 156.000000 0.996990 \n", 286 | "max 0.611000 289.000000 440.000000 1.038980 \n", 287 | "\n", 288 | " pH sulphates alcohol quality \n", 289 | "count 6497.000000 6497.000000 6497.000000 6497.000000 \n", 290 | "mean 3.218501 0.531268 10.491801 5.818378 \n", 291 | "std 0.160787 0.148806 1.192712 0.873255 \n", 292 | "min 2.720000 0.220000 8.000000 3.000000 \n", 293 | "25% 3.110000 0.430000 9.500000 5.000000 \n", 294 | "50% 3.210000 0.510000 10.300000 6.000000 \n", 295 | "75% 3.320000 0.600000 11.300000 6.000000 \n", 296 | "max 4.010000 2.000000 14.900000 9.000000 " 297 | ] 298 | }, 299 | "execution_count": 7, 300 | "metadata": {}, 301 | "output_type": "execute_result" 302 | } 303 | ], 304 | "source": [ 305 | "# View the min, 25%, 50%, 75%, max pH values with Pandas describe\n", 306 | "# High: Lowest 25% of pH values\n", 307 | "# Moderately High: 25% - 50% of pH values\n", 308 | "# Medium: 50% - 75% of pH values\n", 309 | "# Low: 75% - max pH value\n", 310 | "wine_df.describe()" 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": 8, 316 | "metadata": { 317 | "collapsed": true 318 | }, 319 | "outputs": [], 320 | "source": [ 321 | "# Bin edges that will be used to \"cut\" the data into groups\n", 322 | "bin_edges = [ 2.72, 3.11,3.21 , 3.32,4.01 ] # Fill in this list with five values you just found" 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": 9, 328 | "metadata": { 329 | "collapsed": true 330 | }, 331 | "outputs": [], 332 | "source": [ 333 | "# Labels for the four acidity level groups\n", 334 | "bin_names = [ 'High','Moderately High' ,'Meduim' ,'Low' ] # Name each acidity level category" 335 | ] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "execution_count": 10, 340 | "metadata": {}, 341 | "outputs": [ 342 | { 343 | "data": { 344 | "text/html": [ 345 | "

\n", 346 | "\n", 359 | "\n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | "

	fixed_acidity	volatile_acidity	citric_acid	residual_sugar	chlorides	free_sulfur_dioxide	total_sulfur_dioxide	density	pH	sulphates	alcohol	quality	color	acidity_levels
0	7.4	0.70	0.00	1.9	0.076	11.0	34.0	0.9978	3.51	0.56	9.4	5	red	Low
1	7.8	0.88	0.00	2.6	0.098	25.0	67.0	0.9968	3.20	0.68	9.8	5	red	Moderately High
2	7.8	0.76	0.04	2.3	0.092	15.0	54.0	0.9970	3.26	0.65	9.8	5	red	Meduim
3	11.2	0.28	0.56	1.9	0.075	17.0	60.0	0.9980	3.16	0.58	9.8	6	red	Moderately High
4	7.4	0.70	0.00	1.9	0.076	11.0	34.0	0.9978	3.51	0.56	9.4	5	red	Low

\n", 467 | "

" 468 | ], 469 | "text/plain": [ 470 | " fixed_acidity volatile_acidity citric_acid residual_sugar chlorides \\\n", 471 | "0 7.4 0.70 0.00 1.9 0.076 \n", 472 | "1 7.8 0.88 0.00 2.6 0.098 \n", 473 | "2 7.8 0.76 0.04 2.3 0.092 \n", 474 | "3 11.2 0.28 0.56 1.9 0.075 \n", 475 | "4 7.4 0.70 0.00 1.9 0.076 \n", 476 | "\n", 477 | " free_sulfur_dioxide total_sulfur_dioxide density pH sulphates \\\n", 478 | "0 11.0 34.0 0.9978 3.51 0.56 \n", 479 | "1 25.0 67.0 0.9968 3.20 0.68 \n", 480 | "2 15.0 54.0 0.9970 3.26 0.65 \n", 481 | "3 17.0 60.0 0.9980 3.16 0.58 \n", 482 | "4 11.0 34.0 0.9978 3.51 0.56 \n", 483 | "\n", 484 | " alcohol quality color acidity_levels \n", 485 | "0 9.4 5 red Low \n", 486 | "1 9.8 5 red Moderately High \n", 487 | "2 9.8 5 red Meduim \n", 488 | "3 9.8 6 red Moderately High \n", 489 | "4 9.4 5 red Low " 490 | ] 491 | }, 492 | "execution_count": 10, 493 | "metadata": {}, 494 | "output_type": "execute_result" 495 | } 496 | ], 497 | "source": [ 498 | "# Creates acidity_levels column\n", 499 | "wine_df['acidity_levels'] = pd.cut(wine_df['pH'], bin_edges, labels=bin_names)\n", 500 | "\n", 501 | "# Checks for successful creation of this column\n", 502 | "wine_df.head()" 503 | ] 504 | }, 505 | { 506 | "cell_type": "code", 507 | "execution_count": 11, 508 | "metadata": {}, 509 | "outputs": [ 510 | { 511 | "data": { 512 | "text/html": [ 513 | "

\n", 514 | "\n", 527 | "\n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | "

	fixed_acidity	volatile_acidity	citric_acid	residual_sugar	chlorides	free_sulfur_dioxide	total_sulfur_dioxide	density	pH	sulphates	alcohol	quality
acidity_levels
High	7.543914	0.294683	0.370792	7.088876	0.055131	33.179965	129.897496	0.994708	3.029062	0.503937	10.330208	5.783343
Moderately High	7.365064	0.318551	0.340548	5.931984	0.054666	33.229154	126.815886	0.994697	3.164833	0.509300	10.391073	5.784540
Meduim	7.143566	0.346751	0.313585	4.721159	0.055715	28.983995	111.182138	0.994476	3.267010	0.541287	10.610369	5.850832
Low	6.769949	0.403815	0.243901	3.848983	0.058777	26.327510	93.244917	0.994899	3.433348	0.574136	10.656057	5.859593

\n", 623 | "

" 624 | ], 625 | "text/plain": [ 626 | " fixed_acidity volatile_acidity citric_acid residual_sugar \\\n", 627 | "acidity_levels \n", 628 | "High 7.543914 0.294683 0.370792 7.088876 \n", 629 | "Moderately High 7.365064 0.318551 0.340548 5.931984 \n", 630 | "Meduim 7.143566 0.346751 0.313585 4.721159 \n", 631 | "Low 6.769949 0.403815 0.243901 3.848983 \n", 632 | "\n", 633 | " chlorides free_sulfur_dioxide total_sulfur_dioxide \\\n", 634 | "acidity_levels \n", 635 | "High 0.055131 33.179965 129.897496 \n", 636 | "Moderately High 0.054666 33.229154 126.815886 \n", 637 | "Meduim 0.055715 28.983995 111.182138 \n", 638 | "Low 0.058777 26.327510 93.244917 \n", 639 | "\n", 640 | " density pH sulphates alcohol quality \n", 641 | "acidity_levels \n", 642 | "High 0.994708 3.029062 0.503937 10.330208 5.783343 \n", 643 | "Moderately High 0.994697 3.164833 0.509300 10.391073 5.784540 \n", 644 | "Meduim 0.994476 3.267010 0.541287 10.610369 5.850832 \n", 645 | "Low 0.994899 3.433348 0.574136 10.656057 5.859593 " 646 | ] 647 | }, 648 | "execution_count": 11, 649 | "metadata": {}, 650 | "output_type": "execute_result" 651 | } 652 | ], 653 | "source": [ 654 | "# Find the mean quality of each acidity level with groupby\n", 655 | "wine_df.groupby('acidity_levels').mean()" 656 | ] 657 | }, 658 | { 659 | "cell_type": "code", 660 | "execution_count": 13, 661 | "metadata": {}, 662 | "outputs": [], 663 | "source": [ 664 | "# Save changes for the next section\n", 665 | "wine_df.to_csv('winequality_edited.csv', index=False)" 666 | ] 667 | }, 668 | { 669 | "cell_type": "markdown", 670 | "metadata": {}, 671 | "source": [ 672 | "## QUIZ Q&A" 673 | ] 674 | }, 675 | { 676 | "cell_type": "markdown", 677 | "metadata": {}, 678 | "source": [ 679 | "### Is the mean quality of red wine greater than, less than, or equal to that of white wine?\n", 680 | "Less" 681 | ] 682 | }, 683 | { 684 | "cell_type": "markdown", 685 | "metadata": {}, 686 | "source": [ 687 | "### What level of acidity receives the highest average rating?\n", 688 | "Low acidity " 689 | ] 690 | }, 691 | { 692 | "cell_type": "code", 693 | "execution_count": null, 694 | "metadata": { 695 | "collapsed": true 696 | }, 697 | "outputs": [], 698 | "source": [] 699 | } 700 | ], 701 | "metadata": { 702 | "kernelspec": { 703 | "display_name": "Python 3", 704 | "language": "python", 705 | "name": "python3" 706 | }, 707 | "language_info": { 708 | "codemirror_mode": { 709 | "name": "ipython", 710 | "version": 3 711 | }, 712 | "file_extension": ".py", 713 | "mimetype": "text/x-python", 714 | "name": "python", 715 | "nbconvert_exporter": "python", 716 | "pygments_lexer": "ipython3", 717 | "version": "3.6.1" 718 | } 719 | }, 720 | "nbformat": 4, 721 | "nbformat_minor": 2 722 | } 723 | -------------------------------------------------------------------------------- /conclusions_query.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Drawing Conclusions Using Query" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "import pandas as pd" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": { 25 | "collapsed": true 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "# Load `winequality_edited.csv`\n", 30 | "\n", 31 | "df = pd.read_csv('winequality_edited.csv')" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 11, 37 | "metadata": {}, 38 | "outputs": [ 39 | { 40 | "name": "stdout", 41 | "output_type": "stream", 42 | "text": [ 43 | "\n", 44 | "RangeIndex: 6497 entries, 0 to 6496\n", 45 | "Data columns (total 14 columns):\n", 46 | "fixed_acidity 6497 non-null float64\n", 47 | "volatile_acidity 6497 non-null float64\n", 48 | "citric_acid 6497 non-null float64\n", 49 | "residual_sugar 6497 non-null float64\n", 50 | "chlorides 6497 non-null float64\n", 51 | "free_sulfur_dioxide 6497 non-null float64\n", 52 | "total_sulfur_dioxide 6497 non-null float64\n", 53 | "density 6497 non-null float64\n", 54 | "pH 6497 non-null float64\n", 55 | "sulphates 6497 non-null float64\n", 56 | "alcohol 6497 non-null float64\n", 57 | "quality 6497 non-null int64\n", 58 | "color 6497 non-null object\n", 59 | "acidity_levels 6496 non-null object\n", 60 | "dtypes: float64(11), int64(1), object(2)\n", 61 | "memory usage: 710.7+ KB\n" 62 | ] 63 | } 64 | ], 65 | "source": [ 66 | "df.info()" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": { 72 | "collapsed": true 73 | }, 74 | "source": [ 75 | "### Do wines with higher alcoholic content receive better ratings?" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 3, 81 | "metadata": {}, 82 | "outputs": [ 83 | { 84 | "data": { 85 | "text/plain": [ 86 | "10.3" 87 | ] 88 | }, 89 | "execution_count": 3, 90 | "metadata": {}, 91 | "output_type": "execute_result" 92 | } 93 | ], 94 | "source": [ 95 | "# get the median amount of alcohol content\n", 96 | "df['alcohol'].median()" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 5, 102 | "metadata": {}, 103 | "outputs": [ 104 | { 105 | "data": { 106 | "text/plain": [ 107 | "True" 108 | ] 109 | }, 110 | "execution_count": 5, 111 | "metadata": {}, 112 | "output_type": "execute_result" 113 | } 114 | ], 115 | "source": [ 116 | "# select samples with alcohol content less than the median\n", 117 | "low_alcohol = df.query('alcohol < 10.3')\n", 118 | "\n", 119 | "# select samples with alcohol content greater than or equal to the median\n", 120 | "high_alcohol = df.query('alcohol >= 10.3')\n", 121 | "\n", 122 | "# ensure these queries included each sample exactly once\n", 123 | "num_samples = df.shape[0]\n", 124 | "num_samples == low_alcohol['quality'].count() + high_alcohol['quality'].count() # should be True" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 9, 130 | "metadata": {}, 131 | "outputs": [ 132 | { 133 | "data": { 134 | "text/plain": [ 135 | "5.475920679886686" 136 | ] 137 | }, 138 | "execution_count": 9, 139 | "metadata": {}, 140 | "output_type": "execute_result" 141 | } 142 | ], 143 | "source": [ 144 | "# get mean quality rating for the low alcohol and high alcohol groups\n", 145 | "low_alcohol['quality'].mean()\n" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 10, 151 | "metadata": {}, 152 | "outputs": [ 153 | { 154 | "data": { 155 | "text/plain": [ 156 | "6.1460843373493974" 157 | ] 158 | }, 159 | "execution_count": 10, 160 | "metadata": {}, 161 | "output_type": "execute_result" 162 | } 163 | ], 164 | "source": [ 165 | "high_alcohol['quality'].mean()" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "#### QUESTION 1 OF 2\n", 173 | "\n", 174 | "*Do wines with higher alcoholic content generally receive better ratings?*\n", 175 | "Yes" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": {}, 181 | "source": [ 182 | "### Do sweeter wines receive better ratings?" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 13, 188 | "metadata": {}, 189 | "outputs": [ 190 | { 191 | "data": { 192 | "text/plain": [ 193 | "3.0" 194 | ] 195 | }, 196 | "execution_count": 13, 197 | "metadata": {}, 198 | "output_type": "execute_result" 199 | } 200 | ], 201 | "source": [ 202 | "# get the median amount of residual sugar\n", 203 | "df['residual_sugar'].median()" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 14, 209 | "metadata": {}, 210 | "outputs": [ 211 | { 212 | "data": { 213 | "text/plain": [ 214 | "True" 215 | ] 216 | }, 217 | "execution_count": 14, 218 | "metadata": {}, 219 | "output_type": "execute_result" 220 | } 221 | ], 222 | "source": [ 223 | "# select samples with residual sugar less than the median\n", 224 | "low_sugar = df.query('residual_sugar < 3.0')\n", 225 | "\n", 226 | "# select samples with residual sugar greater than or equal to the median\n", 227 | "high_sugar = df.query('residual_sugar >= 3.0')\n", 228 | "\n", 229 | "# ensure these queries included each sample exactly once\n", 230 | "num_samples == low_sugar['quality'].count() + high_sugar['quality'].count() # should be True" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": 15, 236 | "metadata": {}, 237 | "outputs": [ 238 | { 239 | "data": { 240 | "text/plain": [ 241 | "5.8088007437248219" 242 | ] 243 | }, 244 | "execution_count": 15, 245 | "metadata": {}, 246 | "output_type": "execute_result" 247 | } 248 | ], 249 | "source": [ 250 | "# get mean quality rating for the low sugar groups\n", 251 | "low_sugar['quality'].mean()" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": 16, 257 | "metadata": {}, 258 | "outputs": [ 259 | { 260 | "data": { 261 | "text/plain": [ 262 | "5.8278287461773699" 263 | ] 264 | }, 265 | "execution_count": 16, 266 | "metadata": {}, 267 | "output_type": "execute_result" 268 | } 269 | ], 270 | "source": [ 271 | "# get mean quality rating for the high sugar groups\n", 272 | "high_sugar['quality'].mean()" 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": {}, 278 | "source": [ 279 | "#### QUESTION 2 OF 2\n", 280 | "\n", 281 | "*Do sweeter wines generally receive higher ratings?*\n", 282 | "Yes" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": null, 288 | "metadata": { 289 | "collapsed": true 290 | }, 291 | "outputs": [], 292 | "source": [] 293 | } 294 | ], 295 | "metadata": { 296 | "kernelspec": { 297 | "display_name": "Python 3", 298 | "language": "python", 299 | "name": "python3" 300 | }, 301 | "language_info": { 302 | "codemirror_mode": { 303 | "name": "ipython", 304 | "version": 3 305 | }, 306 | "file_extension": ".py", 307 | "mimetype": "text/x-python", 308 | "name": "python", 309 | "nbconvert_exporter": "python", 310 | "pygments_lexer": "ipython3", 311 | "version": "3.6.1" 312 | } 313 | }, 314 | "nbformat": 4, 315 | "nbformat_minor": 2 316 | } 317 | -------------------------------------------------------------------------------- /conclusions_quiz.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Drawing Conclusions Quiz\n", 8 | "Use the space below to explore `store_data.csv` to answer the quiz questions below." 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 8, 14 | "metadata": { 15 | "collapsed": true 16 | }, 17 | "outputs": [], 18 | "source": [ 19 | "# imports and load data\n", 20 | "import pandas as pd\n", 21 | "import numpy as np\n", 22 | "from datetime import datetime\n", 23 | "% matplotlib inline\n", 24 | "\n", 25 | "df_store = pd.read_csv('store_data.csv')\n" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 2, 31 | "metadata": {}, 32 | "outputs": [ 33 | { 34 | "name": "stdout", 35 | "output_type": "stream", 36 | "text": [ 37 | "\n", 38 | "RangeIndex: 200 entries, 0 to 199\n", 39 | "Data columns (total 6 columns):\n", 40 | "week 200 non-null object\n", 41 | "storeA 200 non-null int64\n", 42 | "storeB 200 non-null int64\n", 43 | "storeC 200 non-null int64\n", 44 | "storeD 200 non-null int64\n", 45 | "storeE 200 non-null int64\n", 46 | "dtypes: int64(5), object(1)\n", 47 | "memory usage: 9.5+ KB\n" 48 | ] 49 | }, 50 | { 51 | "data": { 52 | "text/html": [ 53 | "

\n", 54 | "\n", 67 | "\n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | "

	week	storeA	storeB	storeC	storeD	storeE
0	2014-05-04	2643	8257	3893	6231	1294
1	2014-05-11	6444	5736	5634	7092	2907
2	2014-05-18	9646	2552	4253	5447	4736
3	2014-05-25	5960	10740	8264	6063	949
4	2014-06-01	7412	7374	3208	3985	3023

\n", 127 | "

" 128 | ], 129 | "text/plain": [ 130 | " week storeA storeB storeC storeD storeE\n", 131 | "0 2014-05-04 2643 8257 3893 6231 1294\n", 132 | "1 2014-05-11 6444 5736 5634 7092 2907\n", 133 | "2 2014-05-18 9646 2552 4253 5447 4736\n", 134 | "3 2014-05-25 5960 10740 8264 6063 949\n", 135 | "4 2014-06-01 7412 7374 3208 3985 3023" 136 | ] 137 | }, 138 | "execution_count": 2, 139 | "metadata": {}, 140 | "output_type": "execute_result" 141 | } 142 | ], 143 | "source": [ 144 | "# explore data\n", 145 | "df_store.info()\n", 146 | "\n", 147 | "df_store.head()" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 3, 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "name": "stdout", 157 | "output_type": "stream", 158 | "text": [ 159 | "Total sales is 5115145\n", 160 | "Max total sales is 1351342\n", 161 | "store with max total sales is storeB\n" 162 | ] 163 | } 164 | ], 165 | "source": [ 166 | "# total sales for the last month\n", 167 | "#first make new column with total values for all stores on that date\n", 168 | "df_store['total'] = df_store['storeA'] + df_store['storeB'] + df_store['storeC'] + df_store['storeD'] +df_store['storeE'] \n", 169 | "#get total sales\n", 170 | "total_sales = sum(df_store['total'])\n", 171 | "print('Total sales is {}'.format(total_sales))\n", 172 | "list_store_names = ['storeA', 'storeB', 'storeC', 'storeD', 'storeE']\n", 173 | "\n", 174 | "list_total = []\n", 175 | "sum_A = sum(df_store['storeA'])\n", 176 | "sum_B = sum(df_store['storeB'])\n", 177 | "sum_C = sum(df_store['storeC'])\n", 178 | "sum_D = sum(df_store['storeD'])\n", 179 | "sum_E = sum(df_store['storeE'])\n", 180 | "list_total = [sum_A, sum_B, sum_C, sum_D, sum_E]\n", 181 | "max_tot = max(list_total)\n", 182 | "#compute max value and return position\n", 183 | "pos = list_total.index(max_tot)\n", 184 | "print(\"Max total sales is {}\".format(max_tot))\n", 185 | "print(\"store with max total sales is {}\".format(list_store_names[pos]))\n" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 44, 191 | "metadata": {}, 192 | "outputs": [ 193 | { 194 | "name": "stdout", 195 | "output_type": "stream", 196 | "text": [ 197 | "Store with max sales in last month is storeA\n" 198 | ] 199 | } 200 | ], 201 | "source": [ 202 | "#calculate sales for final month\n", 203 | "\n", 204 | "#find time range\n", 205 | "#df_store[180:] #inspect last months to determine span\n", 206 | "\n", 207 | "df_span_last = df_store[(df_store['week'] > '2018-02-04') & (df_store['week'] < '2018-02-25')]\n", 208 | "#print(df_span_last) #checkpoint\n", 209 | "a3 = sum(df_span_last['storeA'])\n", 210 | "b3 = sum(df_span_last['storeB'])\n", 211 | "c3 = sum(df_span_last['storeC'])\n", 212 | "d3 = sum(df_span_last['storeD'])\n", 213 | "e3 = sum(df_span_last['storeE'])\n", 214 | "last = [a3,b3,c3,d3,e3]\n", 215 | "max_last = max(last)\n", 216 | "#print(last, max_last) #checkpoint\n", 217 | "store_max_last = list_store_names[last.index(max_last)]\n", 218 | "print('Store with max sales in last month is {}'.format(store_max_last))\n", 219 | "\n", 220 | "\n" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": 4, 226 | "metadata": {}, 227 | "outputs": [ 228 | { 229 | "name": "stdout", 230 | "output_type": "stream", 231 | "text": [ 232 | "store with highest average sales [using lists] is storeB\n", 233 | "store with highest average sales [using dicts] is storeB\n" 234 | ] 235 | } 236 | ], 237 | "source": [ 238 | "# average sales\n", 239 | "avg_A = df_store['storeA'].mean()\n", 240 | "avg_B = df_store['storeB'].mean()\n", 241 | "avg_C = df_store['storeC'].mean()\n", 242 | "avg_D = df_store['storeD'].mean()\n", 243 | "avg_E = df_store['storeE'].mean()\n", 244 | "\n", 245 | "#compute store with highest average sales using lists\n", 246 | "list_avg = [avg_A, avg_B, avg_C, avg_D, avg_E]\n", 247 | "list_store_names = ['storeA', 'storeB', 'storeC', 'storeD', 'storeE']\n", 248 | "#print(list_avg) #check-point to verify contents as five averages\n", 249 | "\n", 250 | "max_avg = max(list_avg)\n", 251 | "#compute max value and return position\n", 252 | "pos = list_avg.index(max_avg)\n", 253 | "#use the position value to select store\n", 254 | "print(\"store with highest average sales [using lists] is {}\".format(list_store_names[pos]))\n", 255 | "\n", 256 | "#compute store with highest average sales using dicts\n", 257 | "dict_avg = {'storeA' : avg_A, 'storeB' : avg_B, 'storeC' : avg_C, \n", 258 | " 'storeD' : avg_D, 'storeE' : avg_E}\n", 259 | "dict_max = max(dict_avg, key=dict_avg.get)\n", 260 | "print(\"store with highest average sales [using dicts] is {}\".format(dict_max))\n" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": 5, 266 | "metadata": {}, 267 | "outputs": [ 268 | { 269 | "name": "stdout", 270 | "output_type": "stream", 271 | "text": [ 272 | " week storeA storeB storeC storeD storeE total\n", 273 | "97 2016-03-13 2054 1390 5112 5513 2536 16605\n", 274 | " storeA storeB storeC storeD storeE\n", 275 | "97 2054 1390 5112 5513 2536\n" 276 | ] 277 | } 278 | ], 279 | "source": [ 280 | "# sales on march 13, 2016\n", 281 | "\n", 282 | "# calculate sales on week of March 13th, 2016\n", 283 | "sale = df_store.loc[df_store['week'] == '2016-03-13']\n", 284 | "print(sale) #checkpoint to verify record returned\n", 285 | "\n", 286 | "del sale['week']\n", 287 | "del sale['total']\n", 288 | "print(sale)\n" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": 6, 294 | "metadata": {}, 295 | "outputs": [ 296 | { 297 | "name": "stdout", 298 | "output_type": "stream", 299 | "text": [ 300 | " week storeA storeB storeC storeD storeE total\n", 301 | "9 2014-07-06 8567 3228 927 3277 168 16167\n" 302 | ] 303 | } 304 | ], 305 | "source": [ 306 | "# worst week for store C\n", 307 | "sale_storeC = []\n", 308 | "sale_storeC = df_store['storeC']\n", 309 | "date_storeC = df_store['week']\n", 310 | "\n", 311 | "minC = min(sale_storeC)\n", 312 | "\n", 313 | "\n", 314 | "print(df_store.loc[df_store['storeC'] == minC])" 315 | ] 316 | }, 317 | { 318 | "cell_type": "code", 319 | "execution_count": 39, 320 | "metadata": {}, 321 | "outputs": [ 322 | { 323 | "name": "stdout", 324 | "output_type": "stream", 325 | "text": [ 326 | "2014-05-04\n", 327 | "2018-02-25\n", 328 | "[74852, 68640, 56848, 63367, 24636] 74852\n", 329 | "Store with max sales in last 3 month period is storeA\n" 330 | ] 331 | } 332 | ], 333 | "source": [ 334 | "# total sales during most recent 3 month period\n", 335 | "\n", 336 | "#find time range\n", 337 | "dates_ranges = df_store['week']\n", 338 | "print(min(dates_ranges))\n", 339 | "print(max(dates_ranges))\n", 340 | "#df_store[180:] #inspect last months to determine span\n", 341 | "\n", 342 | "#df_span = df_store[(df_store['week'] > '2017-09-03') & (df_store['week'] < '2017-12-17')]\n", 343 | "#print(df_span)\n", 344 | "\n", 345 | "\n", 346 | "df_span_last3 = df_store[(df_store['week'] > '2017-12-03') & (df_store['week'] < '2018-02-25')]\n", 347 | "#print(df_span_last3) #checkpoint\n", 348 | "a3 = sum(df_span_last3['storeA'])\n", 349 | "b3 = sum(df_span_last3['storeB'])\n", 350 | "c3 = sum(df_span_last3['storeC'])\n", 351 | "d3 = sum(df_span_last3['storeD'])\n", 352 | "e3 = sum(df_span_last3['storeE'])\n", 353 | "last3 = [a3,b3,c3,d3,e3]\n", 354 | "max_last3 = max(last3)\n", 355 | "#print(last3, max_last3) #checkpoint\n", 356 | "store_max_last3 = list_store_names[last3.index(max_last3)]\n", 357 | "print('Store with max sales in last 3 month period is {}'.format(store_max_last3))\n", 358 | "\n", 359 | "\n", 360 | "#df_new=df_store[df_store['week'] == '2018-02-25']\n", 361 | "#print(df_new)\n" 362 | ] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "execution_count": null, 367 | "metadata": { 368 | "collapsed": true 369 | }, 370 | "outputs": [], 371 | "source": [] 372 | } 373 | ], 374 | "metadata": { 375 | "kernelspec": { 376 | "display_name": "Python 3", 377 | "language": "python", 378 | "name": "python3" 379 | }, 380 | "language_info": { 381 | "codemirror_mode": { 382 | "name": "ipython", 383 | "version": 3 384 | }, 385 | "file_extension": ".py", 386 | "mimetype": "text/x-python", 387 | "name": "python", 388 | "nbconvert_exporter": "python", 389 | "pygments_lexer": "ipython3", 390 | "version": "3.6.1" 391 | } 392 | }, 393 | "nbformat": 4, 394 | "nbformat_minor": 2 395 | } 396 | -------------------------------------------------------------------------------- /fix_datatypes_cyl.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Fixing `cyl` Data Type\n", 8 | "- 2008: extract int from string\n", 9 | "- 2018: convert float to int" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 21, 15 | "metadata": { 16 | "collapsed": true 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "import pandas as pd" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 22, 26 | "metadata": { 27 | "collapsed": true 28 | }, 29 | "outputs": [], 30 | "source": [ 31 | "# load datasets\n", 32 | "\n", 33 | "df_08 = pd.read_csv('data_08.csv')\n", 34 | "df_18 = pd.read_csv('data_18.csv')" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 23, 40 | "metadata": {}, 41 | "outputs": [ 42 | { 43 | "data": { 44 | "text/plain": [ 45 | "6 409\n", 46 | "4 283\n", 47 | "8 199\n", 48 | "5 48\n", 49 | "12 30\n", 50 | "10 14\n", 51 | "2 2\n", 52 | "16 1\n", 53 | "Name: cyl, dtype: int64" 54 | ] 55 | }, 56 | "execution_count": 23, 57 | "metadata": {}, 58 | "output_type": "execute_result" 59 | } 60 | ], 61 | "source": [ 62 | "# check value counts for the 2008 cyl column\n", 63 | "df_08['cyl'].value_counts()" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "Read [this](https://stackoverflow.com/questions/35376387/extract-int-from-string-in-pandas) to help you extract ints from strings in Pandas for the next step." 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 24, 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "ename": "AttributeError", 80 | "evalue": "Can only use .str accessor with string values, which use np.object_ dtype in pandas", 81 | "output_type": "error", 82 | "traceback": [ 83 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 84 | "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", 85 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# Extract int from strings in the 2008 cyl column\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mdf_08\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cyl'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf_08\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cyl'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextract\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'(\\d+)'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 86 | "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 3075\u001b[0m if (name in self._internal_names_set or name in self._metadata or\n\u001b[1;32m 3076\u001b[0m name in self._accessors):\n\u001b[0;32m-> 3077\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3078\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3079\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mname\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_info_axis\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 87 | "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/base.py\u001b[0m in \u001b[0;36m__get__\u001b[0;34m(self, instance, owner)\u001b[0m\n\u001b[1;32m 241\u001b[0m \u001b[0;31m# this ensures that Series.str. is well defined\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 242\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maccessor_cls\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 243\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconstruct_accessor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minstance\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 244\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 245\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__set__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minstance\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 88 | "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/strings.py\u001b[0m in \u001b[0;36m_make_str_accessor\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1907\u001b[0m \u001b[0;31m# (instead of test for object dtype), but that isn't practical for\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1908\u001b[0m \u001b[0;31m# performance reasons until we have a str dtype (GH 9343)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1909\u001b[0;31m raise AttributeError(\"Can only use .str accessor with string \"\n\u001b[0m\u001b[1;32m 1910\u001b[0m \u001b[0;34m\"values, which use np.object_ dtype in \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1911\u001b[0m \"pandas\")\n", 89 | "\u001b[0;31mAttributeError\u001b[0m: Can only use .str accessor with string values, which use np.object_ dtype in pandas" 90 | ] 91 | } 92 | ], 93 | "source": [ 94 | "# Extract int from strings in the 2008 cyl column\n", 95 | "df_08['cyl'] = df_08['cyl'].str.extract('(\\d+)').astype(int)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 25, 101 | "metadata": {}, 102 | "outputs": [ 103 | { 104 | "data": { 105 | "text/plain": [ 106 | "6 409\n", 107 | "4 283\n", 108 | "8 199\n", 109 | "5 48\n", 110 | "12 30\n", 111 | "10 14\n", 112 | "2 2\n", 113 | "16 1\n", 114 | "Name: cyl, dtype: int64" 115 | ] 116 | }, 117 | "execution_count": 25, 118 | "metadata": {}, 119 | "output_type": "execute_result" 120 | } 121 | ], 122 | "source": [ 123 | "# Check value counts for 2008 cyl column again to confirm the change\n", 124 | "df_08['cyl'].value_counts()" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 26, 130 | "metadata": { 131 | "collapsed": true 132 | }, 133 | "outputs": [], 134 | "source": [ 135 | "# convert 2018 cyl column to int\n", 136 | "#df_18['cyl'] = int(df_18['cyl'])\n", 137 | "df_18.cyl = df_18.cyl.astype(int)" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 27, 143 | "metadata": {}, 144 | "outputs": [ 145 | { 146 | "data": { 147 | "text/plain": [ 148 | "4 365\n", 149 | "6 246\n", 150 | "8 153\n", 151 | "3 18\n", 152 | "12 9\n", 153 | "5 2\n", 154 | "16 1\n", 155 | "Name: cyl, dtype: int64" 156 | ] 157 | }, 158 | "execution_count": 27, 159 | "metadata": {}, 160 | "output_type": "execute_result" 161 | } 162 | ], 163 | "source": [ 164 | "# Check value counts for 2018 cyl column again to confirm the change\n", 165 | "df_18['cyl'].value_counts()" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 28, 171 | "metadata": { 172 | "collapsed": true 173 | }, 174 | "outputs": [], 175 | "source": [ 176 | "df_08.to_csv('data_08.csv', index=False)\n", 177 | "df_18.to_csv('data_18.csv', index=False)" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "metadata": { 184 | "collapsed": true 185 | }, 186 | "outputs": [], 187 | "source": [] 188 | } 189 | ], 190 | "metadata": { 191 | "kernelspec": { 192 | "display_name": "Python 3", 193 | "language": "python", 194 | "name": "python3" 195 | }, 196 | "language_info": { 197 | "codemirror_mode": { 198 | "name": "ipython", 199 | "version": 3 200 | }, 201 | "file_extension": ".py", 202 | "mimetype": "text/x-python", 203 | "name": "python", 204 | "nbconvert_exporter": "python", 205 | "pygments_lexer": "ipython3", 206 | "version": "3.6.1" 207 | } 208 | }, 209 | "nbformat": 4, 210 | "nbformat_minor": 2 211 | } 212 | -------------------------------------------------------------------------------- /matplotlib_example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "collapsed": true 7 | }, 8 | "source": [ 9 | "# Creating a Bar Chart Using Matplotlib" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": { 16 | "collapsed": true 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "import matplotlib.pyplot as plt\n", 21 | "% matplotlib inline" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "There are two required arguments in pyplot's `bar` function: the x-coordinates of the bars, and the heights of the bars." 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 3, 34 | "metadata": {}, 35 | "outputs": [ 36 | { 37 | "data": { 38 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAECRJREFUeJzt3W+snnV9x/H3ZxTU+K/8ObCmLVZj\ns8mWAV1DakiMs2bhz2JJBglmkUq6NNnYpnHJ1vlgxmUP8IlsbAumE7dinEJQR4foxgrE7AHoQRHB\n6jgSRk/K6FGg6pga3HcPzq/z2J72XOdf757f3q/kzn1dv+t73/f3l6v93Ne5zn3dJ1WFJKlfPzfq\nBiRJy8ugl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHVu1agbADjnnHNqw4YNo25D\nklaUhx9++DtVNTZX3SkR9Bs2bGB8fHzUbUjSipLkP4bUeepGkjpn0EtS5wx6SeqcQS9JnTPoJalz\nBr0kdc6gl6TOGfSS1DmDXpI6d0pcGav/Xzbs+tyoW+jWUzdeOeoWdAryiF6SOmfQS1LnBgV9ktVJ\n7kzyzST7k7w5yVlJ7k3yRLs/s9Umyc1JJpI8mmTT8k5BknQiQ4/o/xL4QlX9InAhsB/YBeyrqo3A\nvrYOcDmwsd12ArcsaceSpHmZM+iTvAZ4C3ArQFX9uKpeALYBe1rZHuCqtrwNuK2mPQisTrJmyTuX\nJA0y5Ij+DcAU8HdJvprko0leCZxXVc8AtPtzW/1a4MCMx0+2sZ+RZGeS8STjU1NTi5qEJOn4hgT9\nKmATcEtVXQz8Fz89TTObzDJWxwxU7a6qzVW1eWxszj+QIklaoCFBPwlMVtVDbf1OpoP/2SOnZNr9\noRn162c8fh1wcGnalSTN15xBX1X/CRxI8gttaCvwDWAvsL2NbQfuast7gevap2+2AIePnOKRJJ18\nQ6+M/X3gE0nOAJ4Ermf6TeKOJDuAp4FrWu09wBXABPBiq5UkjcigoK+qR4DNs2zaOkttATcssi9J\n0hLxylhJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0k\ndc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1Ln\nBgV9kqeSfD3JI0nG29hZSe5N8kS7P7ONJ8nNSSaSPJpk03JOQJJ0YvM5ov+1qrqoqja39V3Avqra\nCOxr6wCXAxvbbSdwy1I1K0mav8WcutkG7GnLe4CrZozfVtMeBFYnWbOI15EkLcLQoC/gX5I8nGRn\nGzuvqp4BaPfntvG1wIEZj51sY5KkEVg1sO7SqjqY5Fzg3iTfPEFtZhmrY4qm3zB2Apx//vkD25Ak\nzdegI/qqOtjuDwGfBS4Bnj1ySqbdH2rlk8D6GQ9fBxyc5Tl3V9Xmqto8Nja28BlIkk5ozqBP8sok\nrz6yDPw68BiwF9jeyrYDd7XlvcB17dM3W4DDR07xSJJOviGnbs4DPpvkSP0/VNUXknwZuCPJDuBp\n4JpWfw9wBTABvAhcv+RdS5IGmzPoq+pJ4MJZxr8LbJ1lvIAblqQ7SdKieWWsJHXOoJekzhn0ktQ5\ng16SOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPo\nJalzBr0kdc6gl6TOGfSS1DmDXpI6N+cfB5ekDbs+N+oWuvXUjVcu+2t4RC9JnTPoJalzBr0kdc6g\nl6TODQ76JKcl+WqSu9v665M8lOSJJLcnOaONv6ytT7TtG5andUnSEPM5on8PsH/G+oeAm6pqI/A8\nsKON7wCer6o3Aje1OknSiAwK+iTrgCuBj7b1AG8D7mwle4Cr2vK2tk7bvrXVS5JGYOgR/V8AfwT8\nT1s/G3ihql5q65PA2ra8FjgA0LYfbvU/I8nOJONJxqemphbYviRpLnMGfZLfAA5V1cMzh2cprQHb\nfjpQtbuqNlfV5rGxsUHNSpLmb8iVsZcC70hyBfBy4DVMH+GvTrKqHbWvAw62+klgPTCZZBXwWuC5\nJe9ckjTInEf0VfUnVbWuqjYA1wL3VdVvAfcDV7ey7cBdbXlvW6dtv6+qjjmilySdHIv5HP0fA+9L\nMsH0Ofhb2/itwNlt/H3ArsW1KElajHl9qVlVPQA80JafBC6ZpeaHwDVL0JskaQl4Zawkdc6gl6TO\nGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxB\nL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzcwZ9kpcn+VKSryV5\nPMkH2/jrkzyU5Ikktyc5o42/rK1PtO0blncKkqQTGXJE/yPgbVV1IXARcFmSLcCHgJuqaiPwPLCj\n1e8Anq+qNwI3tTpJ0ojMGfQ17Qdt9fR2K+BtwJ1tfA9wVVve1tZp27cmyZJ1LEmal0Hn6JOcluQR\n4BBwL/Bt4IWqeqmVTAJr2/Ja4ABA234YOHspm5YkDTco6KvqJ1V1EbAOuAR402xl7X62o/c6eiDJ\nziTjScanpqaG9itJmqd5feqmql4AHgC2AKuTrGqb1gEH2/IksB6gbX8t8Nwsz7W7qjZX1eaxsbGF\ndS9JmtOQT92MJVndll8BvB3YD9wPXN3KtgN3teW9bZ22/b6qOuaIXpJ0cqyau4Q1wJ4kpzH9xnBH\nVd2d5BvAp5L8OfBV4NZWfyvw8SQTTB/JX7sMfUuSBpoz6KvqUeDiWcafZPp8/dHjPwSuWZLuJEmL\n5pWxktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6Seqc\nQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknq3Jx/HPxUt2HX50bdQreeuvHKUbcg\naQl4RC9JnTPoJalzBr0kdW7OoE+yPsn9SfYneTzJe9r4WUnuTfJEuz+zjSfJzUkmkjyaZNNyT0KS\ndHxDjuhfAv6wqt4EbAFuSHIBsAvYV1UbgX1tHeByYGO77QRuWfKuJUmDzRn0VfVMVX2lLX8f2A+s\nBbYBe1rZHuCqtrwNuK2mPQisTrJmyTuXJA0yr3P0STYAFwMPAedV1TMw/WYAnNvK1gIHZjxsso1J\nkkZgcNAneRXwaeC9VfW9E5XOMlazPN/OJONJxqempoa2IUmap0FBn+R0pkP+E1X1mTb87JFTMu3+\nUBufBNbPePg64ODRz1lVu6tqc1VtHhsbW2j/kqQ5DPnUTYBbgf1V9eEZm/YC29vyduCuGePXtU/f\nbAEOHznFI0k6+YZ8BcKlwLuAryd5pI29H7gRuCPJDuBp4Jq27R7gCmACeBG4fkk7liTNy5xBX1X/\nxuzn3QG2zlJfwA2L7EuStES8MlaSOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNe\nkjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWp\ncwa9JHXOoJekzhn0ktQ5g16SOjdn0Cf5WJJDSR6bMXZWknuTPNHuz2zjSXJzkokkjybZtJzNS5Lm\nNuSI/u+By44a2wXsq6qNwL62DnA5sLHddgK3LE2bkqSFmjPoq+qLwHNHDW8D9rTlPcBVM8Zvq2kP\nAquTrFmqZiVJ87fQc/TnVdUzAO3+3Da+Fjgwo26yjR0jyc4k40nGp6amFtiGJGkuS/3L2MwyVrMV\nVtXuqtpcVZvHxsaWuA1J0hELDfpnj5ySafeH2vgksH5G3Trg4MLbkyQt1kKDfi+wvS1vB+6aMX5d\n+/TNFuDwkVM8kqTRWDVXQZJPAm8FzkkyCXwAuBG4I8kO4GngmlZ+D3AFMAG8CFy/DD1LkuZhzqCv\nqnceZ9PWWWoLuGGxTUmSlo5XxkpS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BL\nUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1\nzqCXpM4Z9JLUOYNekjq3LEGf5LIk30oykWTXcryGJGmYJQ/6JKcBfwNcDlwAvDPJBUv9OpKkYZbj\niP4SYKKqnqyqHwOfArYtw+tIkgZYjqBfCxyYsT7ZxiRJI7BqGZ4zs4zVMUXJTmBnW/1Bkm/N2HwO\n8J1l6O1UsGLmlg/Nq3zFzGueVtS83GfACpvXIvfZ64Y8aDmCfhJYP2N9HXDw6KKq2g3snu0JkoxX\n1eZl6G3kep2b81p5ep1br/OChc9tOU7dfBnYmOT1Sc4ArgX2LsPrSJIGWPIj+qp6KcnvAf8MnAZ8\nrKoeX+rXkSQNsxynbqiqe4B7FvEUs57S6USvc3NeK0+vc+t1XrDAuaXqmN+TSpI64lcgSFLnRhr0\nc31VQpJ3J5lK8ki7/fYo+pyvJB9LcijJY8fZniQ3t3k/mmTTye5xIQbM661JDs/YX396sntciCTr\nk9yfZH+Sx5O8Z5aalbrPhsxtxe23JC9P8qUkX2vz+uAsNS9LcnvbZw8l2XDyO52fgfOafy5W1Uhu\nTP+i9tvAG4AzgK8BFxxV827gr0fV4yLm9hZgE/DYcbZfAXye6WsOtgAPjbrnJZrXW4G7R93nAua1\nBtjUll8N/Pss/xZX6j4bMrcVt9/afnhVWz4deAjYclTN7wIfacvXArePuu8lmte8c3GUR/TdflVC\nVX0ReO4EJduA22rag8DqJGtOTncLN2BeK1JVPVNVX2nL3wf2c+zV3Ct1nw2Z24rT9sMP2urp7Xb0\nLxy3AXva8p3A1iSzXdB5yhg4r3kbZdAP/aqE32w/Kt+ZZP0s21einr8m4s3tx87PJ/mlUTczX+3H\n+4uZPpKaacXvsxPMDVbgfktyWpJHgEPAvVV13H1WVS8Bh4GzT26X8zdgXjDPXBxl0A/5qoR/AjZU\n1a8A/8pP351XukFfE7ECfQV4XVVdCPwV8I8j7mdekrwK+DTw3qr63tGbZ3nIitlnc8xtRe63qvpJ\nVV3E9NX3lyT55aNKVuQ+GzCveefiKIN+zq9KqKrvVtWP2urfAr96knpbboO+JmKlqarvHfmxs6av\npTg9yTkjbmuQJKczHYSfqKrPzFKyYvfZXHNbyfsNoKpeAB4ALjtq0//tsySrgNeygk49Hm9eC8nF\nUQb9nF+VcNQ50HcwfX6xB3uB69onObYAh6vqmVE3tVhJfv7IOdAklzD97+u7o+1qbq3nW4H9VfXh\n45StyH02ZG4rcb8lGUuyui2/Ang78M2jyvYC29vy1cB91X6beaoaMq+F5OKyXBk7RB3nqxKS/Bkw\nXlV7gT9I8g7gJabfid89qn7nI8knmf4kwzlJJoEPMP1LFarqI0xfNXwFMAG8CFw/mk7nZ8C8rgZ+\nJ8lLwH8D157q/7GaS4F3AV9v50YB3g+cDyt7nzFsbitxv60B9mT6Dx39HHBHVd19VH7cCnw8yQTT\n+XHt6NodbMi85p2LXhkrSZ3zylhJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5/4X\nZ8Vx7M5o+qYAAAAASUVORK5CYII=\n", 39 | "text/plain": [ 40 | "" 41 | ] 42 | }, 43 | "metadata": {}, 44 | "output_type": "display_data" 45 | } 46 | ], 47 | "source": [ 48 | "plt.bar([1, 2, 3], [224, 620, 425]);" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "You can specify the x tick labels using pyplot's `xticks` function, or by specifying another parameter in the `bar` function. The two cells below accomplish the same thing." 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 4, 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "data": { 65 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAADgNJREFUeJzt3W+MpWdZx/Hvzy4FRGH7Z9o0u1un\nho2AL4Bm0iwpUewSQ1vD9gU1EGM3zSbzpioGE119Y4yalMRQbWKabCi6RRSaCnYDDdosVANJK1Mp\nC2UhHWvtTrZ2p/SPYoOkevli7g3D7umeZ3bO7JnefD/JyfM813Ofc66Tk/3Nvfc5z0yqCklSv35s\n2g1IkjaWQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknq3JZpNwBw8cUX1+zs7LTb\nkKRXlIcffviZqpoZN25TBP3s7CwLCwvTbkOSXlGS/PuQcS7dSFLnDHpJ6pxBL0mdM+glqXMGvSR1\nzqCXpM4Z9JLUOYNekjpn0EtS5zbFlbH60TK7/3PTbqFbT9x6/bRb0CbkjF6SOmfQS1LnBgV9kq1J\n7knyrSRHk7wjyYVJ7k/yWNte0MYmye1JFpMcSXLlxr4ESdKZDJ3R/xnw+ap6E/BW4CiwHzhcVTuB\nw+0Y4FpgZ7vNA3dMtGNJ0pqMDfokrwd+DrgToKq+X1XPA3uAg23YQeCGtr8HuKtWPAhsTXLZxDuX\nJA0yZEb/08Ay8BdJvprko0leB1xaVU8BtO0lbfw24Niq+y+12g9JMp9kIcnC8vLyul6EJOnlDQn6\nLcCVwB1V9Xbgv/nBMs0oGVGr0wpVB6pqrqrmZmbG/oEUSdJZGhL0S8BSVT3Uju9hJfifPrkk07Yn\nVo3fser+24Hjk2lXkrRWY4O+qv4DOJbkZ1ppN/BN4BCwt9X2Ave2/UPATe3bN7uAF04u8UiSzr2h\nV8b+OvCJJOcDjwM3s/JD4u4k+4AngRvb2PuA64BF4MU2VpI0JYOCvqoeAeZGnNo9YmwBt6yzL0nS\nhHhlrCR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6\nZ9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXOD\ngj7JE0m+nuSRJAutdmGS+5M81rYXtHqS3J5kMcmRJFdu5AuQJJ3ZWmb0v1BVb6uquXa8HzhcVTuB\nw+0Y4FpgZ7vNA3dMqllJ0tqtZ+lmD3Cw7R8EblhVv6tWPAhsTXLZOp5HkrQOQ4O+gH9I8nCS+Va7\ntKqeAmjbS1p9G3Bs1X2XWk2SNAVbBo67uqqOJ7kEuD/Jt84wNiNqddqglR8Y8wCXX375wDYkSWs1\naEZfVcfb9gTwGeAq4OmTSzJte6INXwJ2rLr7duD4iMc8UFVzVTU3MzNz9q9AknRGY4M+yeuS/OTJ\nfeAXgW8Ah4C9bdhe4N62fwi4qX37ZhfwwsklHknSuTdk6eZS4DNJTo7/66r6fJKvAHcn2Qc8CdzY\nxt8HXAcsAi8CN0+8a0nSYGODvqoeB946ov4dYPeIegG3TKQ7SdK6eWWsJHXOoJekzhn0ktQ5g16S\nOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalz\nBr0kdc6gl6TOGfSS1DmDXpI6N/aPg0vS7P7PTbuFbj1x6/Ub/hzO6CWpcwa9JHXOoJekzhn0ktS5\nwUGf5LwkX03y2XZ8RZKHkjyW5FNJzm/1V7fjxXZ+dmNalyQNsZYZ/QeBo6uOPwzcVlU7geeAfa2+\nD3iuqt4I3NbGSZKmZFDQJ9kOXA98tB0HuAa4pw05CNzQ9ve0Y9r53W28JGkKhs7o/xT4beD/2vFF\nwPNV9VI7XgK2tf1twDGAdv6FNv6HJJlPspBkYXl5+SzblySNMzbok/wScKKqHl5dHjG0Bpz7QaHq\nQFXNVdXczMzMoGYlSWs35MrYq4H3JrkOeA3welZm+FuTbGmz9u3A8TZ+CdgBLCXZArwBeHbinUuS\nBhk7o6+q362q7VU1C7wf+EJV/QrwReB9bdhe4N62f6gd085/oapOm9FLks6N9XyP/neADyVZZGUN\n/s5WvxO4qNU/BOxfX4uSpPVY0y81q6oHgAfa/uPAVSPGfA+4cQK9SZImwCtjJalzBr0kdc6gl6TO\nGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxB\nL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnRsb9Elek+Sfk3wtyaNJ/qDV\nr0jyUJLHknwqyfmt/up2vNjOz27sS5AkncmQGf3/ANdU1VuBtwHvSbIL+DBwW1XtBJ4D9rXx+4Dn\nquqNwG1tnCRpSsYGfa34bjt8VbsVcA1wT6sfBG5o+3vaMe387iSZWMeSpDUZtEaf5LwkjwAngPuB\nfwWer6qX2pAlYFvb3wYcA2jnXwAummTTkqThBgV9Vf1vVb0N2A5cBbx51LC2HTV7r1MLSeaTLCRZ\nWF5eHtqvJGmN1vStm6p6HngA2AVsTbKlndoOHG/7S8AOgHb+DcCzIx7rQFXNVdXczMzM2XUvSRpr\nyLduZpJsbfuvBd4NHAW+CLyvDdsL3Nv2D7Vj2vkvVNVpM3pJ0rmxZfwQLgMOJjmPlR8Md1fVZ5N8\nE/hkkj8Cvgrc2cbfCXw8ySIrM/n3b0DfkqSBxgZ9VR0B3j6i/jgr6/Wn1r8H3DiR7iRJ6+aVsZLU\nOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z\n6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6tzYPw6+2c3u/9y0W+jWE7deP+0WJE2AM3pJ\n6pxBL0mdM+glqXNjgz7JjiRfTHI0yaNJPtjqFya5P8ljbXtBqyfJ7UkWkxxJcuVGvwhJ0ssbMqN/\nCfitqnozsAu4JclbgP3A4araCRxuxwDXAjvbbR64Y+JdS5IGGxv0VfVUVf1L2/8v4CiwDdgDHGzD\nDgI3tP09wF214kFga5LLJt65JGmQNa3RJ5kF3g48BFxaVU/Byg8D4JI2bBtwbNXdllpNkjQFg4M+\nyU8Afwv8ZlX955mGjqjViMebT7KQZGF5eXloG5KkNRoU9ElexUrIf6KqPt3KT59ckmnbE62+BOxY\ndfftwPFTH7OqDlTVXFXNzczMnG3/kqQxhnzrJsCdwNGq+siqU4eAvW1/L3DvqvpN7ds3u4AXTi7x\nSJLOvSG/AuFq4FeBryd5pNV+D7gVuDvJPuBJ4MZ27j7gOmAReBG4eaIdS5LWZGzQV9WXGL3uDrB7\nxPgCbllnX5KkCfHKWEnqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0md\nM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmD\nXpI6Z9BLUucMeknq3NigT/KxJCeSfGNV7cIk9yd5rG0vaPUkuT3JYpIjSa7cyOYlSeMNmdH/JfCe\nU2r7gcNVtRM43I4BrgV2tts8cMdk2pQkna2xQV9V/wQ8e0p5D3Cw7R8EblhVv6tWPAhsTXLZpJqV\nJK3d2a7RX1pVTwG07SWtvg04tmrcUqudJsl8koUkC8vLy2fZhiRpnEl/GJsRtRo1sKoOVNVcVc3N\nzMxMuA1J0klnG/RPn1ySadsTrb4E7Fg1bjtw/OzbkySt19kG/SFgb9vfC9y7qn5T+/bNLuCFk0s8\nkqTp2DJuQJK/Ad4FXJxkCfh94Fbg7iT7gCeBG9vw+4DrgEXgReDmDehZkrQGY4O+qj7wMqd2jxhb\nwC3rbUqSNDleGStJnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9\nJHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS\n5wx6SerchgR9kvck+XaSxST7N+I5JEnDTDzok5wH/DlwLfAW4ANJ3jLp55EkDbMRM/qrgMWqeryq\nvg98EtizAc8jSRpgI4J+G3Bs1fFSq0mSpmDLBjxmRtTqtEHJPDDfDr+b5Nsb0MtmdDHwzLSbGCIf\nnnYHm8Ir5v0C37PmR+k9+6khgzYi6JeAHauOtwPHTx1UVQeAAxvw/JtakoWqmpt2HxrG9+uVx/fs\ndBuxdPMVYGeSK5KcD7wfOLQBzyNJGmDiM/qqeinJrwF/D5wHfKyqHp3080iShtmIpRuq6j7gvo14\n7A78yC1XvcL5fr3y+J6dIlWnfU4qSeqIvwJBkjpn0EsjJJlN8o1p9yFNgkEvSZ0z6M+RJH+X5OEk\nj7aLxbT5bUlyMMmRJPck+fFpN6QzS3JTe7++luTj0+5ns/DD2HMkyYVV9WyS17JyrcHPV9V3pt2X\nRksyC/wb8M6q+nKSjwHfrKo/mWpjellJfhb4NHB1VT1z8t/ctPvaDJzRnzu/keRrwIOsXDm8c8r9\naLxjVfXltv9XwDun2YzGuga4p6qeATDkf2BDvkevH5bkXcC7gXdU1YtJHgBeM9WmNMSp/931v7+b\nW/A9GskZ/bnxBuC5FvJvAnZNuyENcnmSd7T9DwBfmmYzGusw8MtJLoKV5dIp97NpGPTnxudZ+WDv\nCPCHrCzfaPM7Cuxt79uFwB1T7kdn0H7Vyh8D/9iWST8y5ZY2DT+MlaTOOaOXpM4Z9JLUOYNekjpn\n0EtS5wx6SeqcQS9JnTPoJalzBr0kde7/AWlGQDBbdAN6AAAAAElFTkSuQmCC\n", 66 | "text/plain": [ 67 | "" 68 | ] 69 | }, 70 | "metadata": {}, 71 | "output_type": "display_data" 72 | } 73 | ], 74 | "source": [ 75 | "# plot bars\n", 76 | "plt.bar([1, 2, 3], [224, 620, 425])\n", 77 | "\n", 78 | "# specify x coordinates of tick labels and their labels\n", 79 | "plt.xticks([1, 2, 3], ['a', 'b', 'c']);" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 5, 85 | "metadata": {}, 86 | "outputs": [ 87 | { 88 | "data": { 89 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAADgNJREFUeJzt3W+MpWdZx/Hvzy4FRGH7Z9o0u1un\nho2AL4Bm0iwpUewSQ1vD9gU1EGM3zSbzpioGE119Y4yalMRQbWKabCi6RRSaCnYDDdosVANJK1Mp\nC2UhHWvtTrZ2p/SPYoOkevli7g3D7umeZ3bO7JnefD/JyfM813Ofc66Tk/3Nvfc5z0yqCklSv35s\n2g1IkjaWQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknq3JZpNwBw8cUX1+zs7LTb\nkKRXlIcffviZqpoZN25TBP3s7CwLCwvTbkOSXlGS/PuQcS7dSFLnDHpJ6pxBL0mdM+glqXMGvSR1\nzqCXpM4Z9JLUOYNekjpn0EtS5zbFlbH60TK7/3PTbqFbT9x6/bRb0CbkjF6SOmfQS1LnBgV9kq1J\n7knyrSRHk7wjyYVJ7k/yWNte0MYmye1JFpMcSXLlxr4ESdKZDJ3R/xnw+ap6E/BW4CiwHzhcVTuB\nw+0Y4FpgZ7vNA3dMtGNJ0pqMDfokrwd+DrgToKq+X1XPA3uAg23YQeCGtr8HuKtWPAhsTXLZxDuX\nJA0yZEb/08Ay8BdJvprko0leB1xaVU8BtO0lbfw24Niq+y+12g9JMp9kIcnC8vLyul6EJOnlDQn6\nLcCVwB1V9Xbgv/nBMs0oGVGr0wpVB6pqrqrmZmbG/oEUSdJZGhL0S8BSVT3Uju9hJfifPrkk07Yn\nVo3fser+24Hjk2lXkrRWY4O+qv4DOJbkZ1ppN/BN4BCwt9X2Ave2/UPATe3bN7uAF04u8UiSzr2h\nV8b+OvCJJOcDjwM3s/JD4u4k+4AngRvb2PuA64BF4MU2VpI0JYOCvqoeAeZGnNo9YmwBt6yzL0nS\nhHhlrCR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6\nZ9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXOD\ngj7JE0m+nuSRJAutdmGS+5M81rYXtHqS3J5kMcmRJFdu5AuQJJ3ZWmb0v1BVb6uquXa8HzhcVTuB\nw+0Y4FpgZ7vNA3dMqllJ0tqtZ+lmD3Cw7R8EblhVv6tWPAhsTXLZOp5HkrQOQ4O+gH9I8nCS+Va7\ntKqeAmjbS1p9G3Bs1X2XWk2SNAVbBo67uqqOJ7kEuD/Jt84wNiNqddqglR8Y8wCXX375wDYkSWs1\naEZfVcfb9gTwGeAq4OmTSzJte6INXwJ2rLr7duD4iMc8UFVzVTU3MzNz9q9AknRGY4M+yeuS/OTJ\nfeAXgW8Ah4C9bdhe4N62fwi4qX37ZhfwwsklHknSuTdk6eZS4DNJTo7/66r6fJKvAHcn2Qc8CdzY\nxt8HXAcsAi8CN0+8a0nSYGODvqoeB946ov4dYPeIegG3TKQ7SdK6eWWsJHXOoJekzhn0ktQ5g16S\nOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalz\nBr0kdc6gl6TOGfSS1DmDXpI6N/aPg0vS7P7PTbuFbj1x6/Ub/hzO6CWpcwa9JHXOoJekzhn0ktS5\nwUGf5LwkX03y2XZ8RZKHkjyW5FNJzm/1V7fjxXZ+dmNalyQNsZYZ/QeBo6uOPwzcVlU7geeAfa2+\nD3iuqt4I3NbGSZKmZFDQJ9kOXA98tB0HuAa4pw05CNzQ9ve0Y9r53W28JGkKhs7o/xT4beD/2vFF\nwPNV9VI7XgK2tf1twDGAdv6FNv6HJJlPspBkYXl5+SzblySNMzbok/wScKKqHl5dHjG0Bpz7QaHq\nQFXNVdXczMzMoGYlSWs35MrYq4H3JrkOeA3welZm+FuTbGmz9u3A8TZ+CdgBLCXZArwBeHbinUuS\nBhk7o6+q362q7VU1C7wf+EJV/QrwReB9bdhe4N62f6gd085/oapOm9FLks6N9XyP/neADyVZZGUN\n/s5WvxO4qNU/BOxfX4uSpPVY0y81q6oHgAfa/uPAVSPGfA+4cQK9SZImwCtjJalzBr0kdc6gl6TO\nGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxB\nL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnRsb9Elek+Sfk3wtyaNJ/qDV\nr0jyUJLHknwqyfmt/up2vNjOz27sS5AkncmQGf3/ANdU1VuBtwHvSbIL+DBwW1XtBJ4D9rXx+4Dn\nquqNwG1tnCRpSsYGfa34bjt8VbsVcA1wT6sfBG5o+3vaMe387iSZWMeSpDUZtEaf5LwkjwAngPuB\nfwWer6qX2pAlYFvb3wYcA2jnXwAummTTkqThBgV9Vf1vVb0N2A5cBbx51LC2HTV7r1MLSeaTLCRZ\nWF5eHtqvJGmN1vStm6p6HngA2AVsTbKlndoOHG/7S8AOgHb+DcCzIx7rQFXNVdXczMzM2XUvSRpr\nyLduZpJsbfuvBd4NHAW+CLyvDdsL3Nv2D7Vj2vkvVNVpM3pJ0rmxZfwQLgMOJjmPlR8Md1fVZ5N8\nE/hkkj8Cvgrc2cbfCXw8ySIrM/n3b0DfkqSBxgZ9VR0B3j6i/jgr6/Wn1r8H3DiR7iRJ6+aVsZLU\nOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z\n6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6tzYPw6+2c3u/9y0W+jWE7deP+0WJE2AM3pJ\n6pxBL0mdM+glqXNjgz7JjiRfTHI0yaNJPtjqFya5P8ljbXtBqyfJ7UkWkxxJcuVGvwhJ0ssbMqN/\nCfitqnozsAu4JclbgP3A4araCRxuxwDXAjvbbR64Y+JdS5IGGxv0VfVUVf1L2/8v4CiwDdgDHGzD\nDgI3tP09wF214kFga5LLJt65JGmQNa3RJ5kF3g48BFxaVU/Byg8D4JI2bBtwbNXdllpNkjQFg4M+\nyU8Afwv8ZlX955mGjqjViMebT7KQZGF5eXloG5KkNRoU9ElexUrIf6KqPt3KT59ckmnbE62+BOxY\ndfftwPFTH7OqDlTVXFXNzczMnG3/kqQxhnzrJsCdwNGq+siqU4eAvW1/L3DvqvpN7ds3u4AXTi7x\nSJLOvSG/AuFq4FeBryd5pNV+D7gVuDvJPuBJ4MZ27j7gOmAReBG4eaIdS5LWZGzQV9WXGL3uDrB7\nxPgCbllnX5KkCfHKWEnqnEEvSZ0z6CWpcwa9JHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0md\nM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS5wx6SeqcQS9JnTPoJalzBr0kdc6gl6TOGfSS1DmD\nXpI6Z9BLUucMeknq3NigT/KxJCeSfGNV7cIk9yd5rG0vaPUkuT3JYpIjSa7cyOYlSeMNmdH/JfCe\nU2r7gcNVtRM43I4BrgV2tts8cMdk2pQkna2xQV9V/wQ8e0p5D3Cw7R8EblhVv6tWPAhsTXLZpJqV\nJK3d2a7RX1pVTwG07SWtvg04tmrcUqudJsl8koUkC8vLy2fZhiRpnEl/GJsRtRo1sKoOVNVcVc3N\nzMxMuA1J0klnG/RPn1ySadsTrb4E7Fg1bjtw/OzbkySt19kG/SFgb9vfC9y7qn5T+/bNLuCFk0s8\nkqTp2DJuQJK/Ad4FXJxkCfh94Fbg7iT7gCeBG9vw+4DrgEXgReDmDehZkrQGY4O+qj7wMqd2jxhb\nwC3rbUqSNDleGStJnTPoJalzBr0kdc6gl6TOGfSS1DmDXpI6Z9BLUucMeknqnEEvSZ0z6CWpcwa9\nJHXOoJekzhn0ktQ5g16SOmfQS1LnDHpJ6pxBL0mdM+glqXMGvSR1zqCXpM4Z9JLUOYNekjpn0EtS\n5wx6SerchgR9kvck+XaSxST7N+I5JEnDTDzok5wH/DlwLfAW4ANJ3jLp55EkDbMRM/qrgMWqeryq\nvg98EtizAc8jSRpgI4J+G3Bs1fFSq0mSpmDLBjxmRtTqtEHJPDDfDr+b5Nsb0MtmdDHwzLSbGCIf\nnnYHm8Ir5v0C37PmR+k9+6khgzYi6JeAHauOtwPHTx1UVQeAAxvw/JtakoWqmpt2HxrG9+uVx/fs\ndBuxdPMVYGeSK5KcD7wfOLQBzyNJGmDiM/qqeinJrwF/D5wHfKyqHp3080iShtmIpRuq6j7gvo14\n7A78yC1XvcL5fr3y+J6dIlWnfU4qSeqIvwJBkjpn0EsjJJlN8o1p9yFNgkEvSZ0z6M+RJH+X5OEk\nj7aLxbT5bUlyMMmRJPck+fFpN6QzS3JTe7++luTj0+5ns/DD2HMkyYVV9WyS17JyrcHPV9V3pt2X\nRksyC/wb8M6q+nKSjwHfrKo/mWpjellJfhb4NHB1VT1z8t/ctPvaDJzRnzu/keRrwIOsXDm8c8r9\naLxjVfXltv9XwDun2YzGuga4p6qeATDkf2BDvkevH5bkXcC7gXdU1YtJHgBeM9WmNMSp/931v7+b\nW/A9GskZ/bnxBuC5FvJvAnZNuyENcnmSd7T9DwBfmmYzGusw8MtJLoKV5dIp97NpGPTnxudZ+WDv\nCPCHrCzfaPM7Cuxt79uFwB1T7kdn0H7Vyh8D/9iWST8y5ZY2DT+MlaTOOaOXpM4Z9JLUOYNekjpn\n0EtS5wx6SeqcQS9JnTPoJalzBr0kde7/AWlGQDBbdAN6AAAAAElFTkSuQmCC\n", 90 | "text/plain": [ 91 | "" 92 | ] 93 | }, 94 | "metadata": {}, 95 | "output_type": "display_data" 96 | } 97 | ], 98 | "source": [ 99 | "# plot bars with x tick labels\n", 100 | "plt.bar([1, 2, 3], [224, 620, 425], tick_label=['a', 'b', 'c']);" 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "Set the title and label axes like this." 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": { 114 | "collapsed": true 115 | }, 116 | "outputs": [], 117 | "source": [ 118 | "plt.bar([1, 2, 3], [224, 620, 425], tick_label=['a', 'b', 'c'])\n", 119 | "plt.title('Some Title')\n", 120 | "plt.xlabel('Some X Label')\n", 121 | "plt.ylabel('Some Y Label');" 122 | ] 123 | } 124 | ], 125 | "metadata": { 126 | "kernelspec": { 127 | "display_name": "Python 3", 128 | "language": "python", 129 | "name": "python3" 130 | }, 131 | "language_info": { 132 | "codemirror_mode": { 133 | "name": "ipython", 134 | "version": 3 135 | }, 136 | "file_extension": ".py", 137 | "mimetype": "text/x-python", 138 | "name": "python", 139 | "nbconvert_exporter": "python", 140 | "pygments_lexer": "ipython3", 141 | "version": "3.6.1" 142 | } 143 | }, 144 | "nbformat": 4, 145 | "nbformat_minor": 2 146 | } 147 | -------------------------------------------------------------------------------- /plots-pandas.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import pandas as pd\n", 12 | "% matplotlib inline" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [ 20 | { 21 | "data": { 22 | "text/html": [ 23 | "

\n", 24 | "\n", 37 | "\n", 38 | " \n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | "

	year	city	country	avg_temp	sevenDayMA	FiveYearMA	TenYearMA
0	1743	Chicago	United States	5.44	NaN	NaN	NaN
1	1744	Chicago	United States	11.73	NaN	NaN	NaN
2	1745	Chicago	United States	1.80	NaN	NaN	NaN
3	1746	Chicago	United States	0.00	NaN	NaN	NaN
4	1747	Chicago	United States	0.00	NaN	3.794	NaN

\n", 103 | "

" 104 | ], 105 | "text/plain": [ 106 | " year city country avg_temp sevenDayMA FiveYearMA TenYearMA\n", 107 | "0 1743 Chicago United States 5.44 NaN NaN NaN\n", 108 | "1 1744 Chicago United States 11.73 NaN NaN NaN\n", 109 | "2 1745 Chicago United States 1.80 NaN NaN NaN\n", 110 | "3 1746 Chicago United States 0.00 NaN NaN NaN\n", 111 | "4 1747 Chicago United States 0.00 NaN 3.794 NaN" 112 | ] 113 | }, 114 | "execution_count": 2, 115 | "metadata": {}, 116 | "output_type": "execute_result" 117 | } 118 | ], 119 | "source": [ 120 | "df_temp = pd.read_csv('ChicagoResults.csv')\n", 121 | "df_temp.head()" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 7, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "df_19 = df_temp[df_temp['year'] > 1800]" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 8, 136 | "metadata": {}, 137 | "outputs": [ 138 | { 139 | "data": { 140 | "text/html": [ 141 | "

\n", 142 | "\n", 155 | "\n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | "

	year	city	country	avg_temp	sevenDayMA	FiveYearMA	TenYearMA
58	1801	Chicago	United States	10.47	10.101429	10.156	10.083
59	1802	Chicago	United States	10.66	10.194286	10.356	10.187
60	1803	Chicago	United States	10.48	10.274286	10.366	10.215
61	1804	Chicago	United States	10.41	10.381429	10.426	10.226
62	1805	Chicago	United States	10.63	10.410000	10.530	10.288

\n", 221 | "

" 222 | ], 223 | "text/plain": [ 224 | " year city country avg_temp sevenDayMA FiveYearMA TenYearMA\n", 225 | "58 1801 Chicago United States 10.47 10.101429 10.156 10.083\n", 226 | "59 1802 Chicago United States 10.66 10.194286 10.356 10.187\n", 227 | "60 1803 Chicago United States 10.48 10.274286 10.366 10.215\n", 228 | "61 1804 Chicago United States 10.41 10.381429 10.426 10.226\n", 229 | "62 1805 Chicago United States 10.63 10.410000 10.530 10.288" 230 | ] 231 | }, 232 | "execution_count": 8, 233 | "metadata": {}, 234 | "output_type": "execute_result" 235 | } 236 | ], 237 | "source": [ 238 | "df_19.head()" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": 10, 244 | "metadata": {}, 245 | "outputs": [ 246 | { 247 | "data": { 248 | "text/html": [ 249 | "

\n", 250 | "\n", 263 | "\n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | "

	year	city	country	avg_temp	sevenDayMA	FiveYearMA	TenYearMA
158	1901	Chicago	United States	9.87	10.088571	10.182	9.953
159	1902	Chicago	United States	10.04	10.191429	10.180	10.036
160	1903	Chicago	United States	9.57	10.074286	10.046	10.102
161	1904	Chicago	United States	8.56	9.861429	9.734	9.879
162	1905	Chicago	United States	9.42	9.744286	9.492	9.889

\n", 329 | "

" 330 | ], 331 | "text/plain": [ 332 | " year city country avg_temp sevenDayMA FiveYearMA TenYearMA\n", 333 | "158 1901 Chicago United States 9.87 10.088571 10.182 9.953\n", 334 | "159 1902 Chicago United States 10.04 10.191429 10.180 10.036\n", 335 | "160 1903 Chicago United States 9.57 10.074286 10.046 10.102\n", 336 | "161 1904 Chicago United States 8.56 9.861429 9.734 9.879\n", 337 | "162 1905 Chicago United States 9.42 9.744286 9.492 9.889" 338 | ] 339 | }, 340 | "execution_count": 10, 341 | "metadata": {}, 342 | "output_type": "execute_result" 343 | } 344 | ], 345 | "source": [ 346 | "mask = df_temp['year'] > 1900\n", 347 | "df_20 = df_temp[mask]\n", 348 | "df_20.head()" 349 | ] 350 | }, 351 | { 352 | "cell_type": "code", 353 | "execution_count": 11, 354 | "metadata": {}, 355 | "outputs": [ 356 | { 357 | "data": { 358 | "text/plain": [ 359 | "count 213.000000\n", 360 | "mean 10.140751\n", 361 | "std 0.892496\n", 362 | "min 7.800000\n", 363 | "25% 9.560000\n", 364 | "50% 10.140000\n", 365 | "75% 10.660000\n", 366 | "max 12.820000\n", 367 | "Name: avg_temp, dtype: float64" 368 | ] 369 | }, 370 | "execution_count": 11, 371 | "metadata": {}, 372 | "output_type": "execute_result" 373 | } 374 | ], 375 | "source": [ 376 | "df_19['avg_temp'].describe()" 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": null, 382 | "metadata": { 383 | "collapsed": true 384 | }, 385 | "outputs": [], 386 | "source": [] 387 | } 388 | ], 389 | "metadata": { 390 | "kernelspec": { 391 | "display_name": "Python 3", 392 | "language": "python", 393 | "name": "python3" 394 | }, 395 | "language_info": { 396 | "codemirror_mode": { 397 | "name": "ipython", 398 | "version": 3 399 | }, 400 | "file_extension": ".py", 401 | "mimetype": "text/x-python", 402 | "name": "python", 403 | "nbconvert_exporter": "python", 404 | "pygments_lexer": "ipython3", 405 | "version": "3.6.2" 406 | } 407 | }, 408 | "nbformat": 4, 409 | "nbformat_minor": 2 410 | } 411 | -------------------------------------------------------------------------------- /plotting_type_quality.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "collapsed": true 7 | }, 8 | "source": [ 9 | "# Plotting Wine Type and Quality with Matplotlib" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 14, 15 | "metadata": { 16 | "collapsed": true 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "import numpy as np\n", 21 | "import pandas as pd\n", 22 | "import matplotlib.pyplot as plt\n", 23 | "% matplotlib inline\n", 24 | "import seaborn as sns\n", 25 | "sns.set_style('darkgrid')\n", 26 | "\n", 27 | "wine_df = pd.read_csv('winequality_edited.csv')" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "### Create arrays for red bar heights white bar heights\n", 35 | "Remember, there's a bar for each combination of color and quality rating. Each bar's height is based on the proportion of samples of that color with that quality rating.\n", 36 | "1. Red bar proportions = counts for each quality rating / total # of red samples\n", 37 | "2. White bar proportions = counts for each quality rating / total # of white samples" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 15, 43 | "metadata": {}, 44 | "outputs": [ 45 | { 46 | "data": { 47 | "text/plain": [ 48 | "color quality\n", 49 | "red 3 10\n", 50 | " 4 53\n", 51 | " 5 681\n", 52 | " 6 638\n", 53 | " 7 199\n", 54 | " 8 18\n", 55 | "white 3 20\n", 56 | " 4 163\n", 57 | " 5 1457\n", 58 | " 6 2198\n", 59 | " 7 880\n", 60 | " 8 175\n", 61 | " 9 5\n", 62 | "Name: pH, dtype: int64" 63 | ] 64 | }, 65 | "execution_count": 15, 66 | "metadata": {}, 67 | "output_type": "execute_result" 68 | } 69 | ], 70 | "source": [ 71 | "# get counts for each rating and color\n", 72 | "color_counts = wine_df.groupby(['color', 'quality']).count()['pH']\n", 73 | "color_counts" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 16, 79 | "metadata": {}, 80 | "outputs": [ 81 | { 82 | "data": { 83 | "text/plain": [ 84 | "color\n", 85 | "red 1599\n", 86 | "white 4898\n", 87 | "Name: pH, dtype: int64" 88 | ] 89 | }, 90 | "execution_count": 16, 91 | "metadata": {}, 92 | "output_type": "execute_result" 93 | } 94 | ], 95 | "source": [ 96 | "# get total counts for each color\n", 97 | "color_totals = wine_df.groupby('color').count()['pH']\n", 98 | "color_totals" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 17, 104 | "metadata": {}, 105 | "outputs": [ 106 | { 107 | "data": { 108 | "text/plain": [ 109 | "quality\n", 110 | "3 0.006254\n", 111 | "4 0.033146\n", 112 | "5 0.425891\n", 113 | "6 0.398999\n", 114 | "7 0.124453\n", 115 | "8 0.011257\n", 116 | "Name: pH, dtype: float64" 117 | ] 118 | }, 119 | "execution_count": 17, 120 | "metadata": {}, 121 | "output_type": "execute_result" 122 | } 123 | ], 124 | "source": [ 125 | "# get proportions by dividing red rating counts by total # of red samples\n", 126 | "red_proportions = color_counts['red'] / color_totals['red']\n", 127 | "red_proportions" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 18, 133 | "metadata": {}, 134 | "outputs": [ 135 | { 136 | "data": { 137 | "text/plain": [ 138 | "quality\n", 139 | "3 0.004083\n", 140 | "4 0.033279\n", 141 | "5 0.297468\n", 142 | "6 0.448755\n", 143 | "7 0.179665\n", 144 | "8 0.035729\n", 145 | "9 0.001021\n", 146 | "Name: pH, dtype: float64" 147 | ] 148 | }, 149 | "execution_count": 18, 150 | "metadata": {}, 151 | "output_type": "execute_result" 152 | } 153 | ], 154 | "source": [ 155 | "# get proportions by dividing white rating counts by total # of white samples\n", 156 | "white_proportions = color_counts['white'] / color_totals['white']\n", 157 | "white_proportions" 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": {}, 163 | "source": [ 164 | "### Plot proportions on a bar chart\n", 165 | "Set the x coordinate location for each rating group and and width of each bar." 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 19, 171 | "metadata": { 172 | "collapsed": true 173 | }, 174 | "outputs": [], 175 | "source": [ 176 | "ind = np.arange(len(red_proportions)) # the x locations for the groups\n", 177 | "width = 0.35 # the width of the bars" 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": {}, 183 | "source": [ 184 | "Now let’s create the plot." 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 21, 190 | "metadata": {}, 191 | "outputs": [ 192 | { 193 | "ename": "ValueError", 194 | "evalue": "shape mismatch: objects cannot be broadcast to a single shape", 195 | "output_type": "error", 196 | "traceback": [ 197 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 198 | "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", 199 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# plot bars\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mred_bars\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mind\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mred_proportions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwidth\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolor\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'r'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0malpha\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m.7\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'Red Wine'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mwhite_bars\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mind\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mwidth\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwhite_proportions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwidth\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolor\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'w'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0malpha\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m.7\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'White Wine'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;31m# title and labels\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 200 | "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/matplotlib/pyplot.py\u001b[0m in \u001b[0;36mbar\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 2625\u001b[0m mplDeprecation)\n\u001b[1;32m 2626\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2627\u001b[0;31m \u001b[0mret\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0max\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2628\u001b[0m \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2629\u001b[0m \u001b[0max\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_hold\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mwashold\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 201 | "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/matplotlib/__init__.py\u001b[0m in \u001b[0;36minner\u001b[0;34m(ax, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1708\u001b[0m warnings.warn(msg % (label_namer, func.__name__),\n\u001b[1;32m 1709\u001b[0m RuntimeWarning, stacklevel=2)\n\u001b[0;32m-> 1710\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0max\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1711\u001b[0m \u001b[0mpre_doc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minner\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__doc__\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1712\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mpre_doc\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 202 | "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/matplotlib/axes/_axes.py\u001b[0m in \u001b[0;36mbar\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 2079\u001b[0m x, height, width, y, linewidth = np.broadcast_arrays(\n\u001b[1;32m 2080\u001b[0m \u001b[0;31m# Make args iterable too.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2081\u001b[0;31m np.atleast_1d(x), height, width, y, linewidth)\n\u001b[0m\u001b[1;32m 2082\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2083\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0morientation\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'vertical'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 203 | "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/numpy/lib/stride_tricks.py\u001b[0m in \u001b[0;36mbroadcast_arrays\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 247\u001b[0m \u001b[0margs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_m\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msubok\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msubok\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0m_m\u001b[0m \u001b[0;32min\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 248\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 249\u001b[0;31m \u001b[0mshape\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_broadcast_shape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 250\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 251\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mshape\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0marray\u001b[0m \u001b[0;32min\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 204 | "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/numpy/lib/stride_tricks.py\u001b[0m in \u001b[0;36m_broadcast_shape\u001b[0;34m(*args)\u001b[0m\n\u001b[1;32m 182\u001b[0m \u001b[0;31m# use the old-iterator because np.nditer does not handle size 0 arrays\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 183\u001b[0m \u001b[0;31m# consistently\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 184\u001b[0;31m \u001b[0mb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbroadcast\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m32\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 185\u001b[0m \u001b[0;31m# unfortunately, it cannot handle 32 or more arguments directly\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 186\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mpos\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m32\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m31\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 205 | "\u001b[0;31mValueError\u001b[0m: shape mismatch: objects cannot be broadcast to a single shape" 206 | ] 207 | }, 208 | { 209 | "data": { 210 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXYAAAD1CAYAAABEDd6nAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAGixJREFUeJzt3X9sE+f9B/D35Ywp1CRuEbaj4VhC\nMRudHfhjY6gwEJc5HnhRCjFICDSGlqKtdFCxUe2X3BGpjWir0VCkQEaxqoWh8WP5RnD6glonzGSw\nwcYkL2NSl6rWzERuWYFAShfD4e8fqO78TagvxMb2k/frr5z9Od/zOR9vjgfHj5RKpVIgIiJhlBV6\nAERElFsMdiIiwTDYiYgEw2AnIhIMg52ISDAMdiIiwZgKPQAAGBy8lfdjWCxTMTw8kvfjFBJ7FAN7\nFMOj6HHWrBljPj5p7thNJrnQQ8g79igG9iiGQvY4aYKdiGiyYLATEQmGwU5EJBgGOxGRYBjsRESC\nYbATEQmGwU5EJBgGOxGRYIriN0+JyjesNVQnm2SU39Wz1t3sODLRIRGVLN6xExEJhsFORCQYBjsR\nkWAY7EREgmGwExEJhsFORCQYBjsRkWAY7EREgjEU7NFoFH6/Hz6fD+3t7Q+sO3XqFD7/+c/jL3/5\nS/qx/fv3w+fzwe/34+zZsxMfMRERfaasv3mq6zqam5sRDodht9sRDAahKAqqq6sz6oaHh/HLX/4S\n8+fPTz/W398PVVWhqio0TcOmTZtw+vRpyLL4y2IRERVK1jv2WCwGl8sFp9MJs9mMQCCASCQyqq61\ntRVNTU2YOnVq+rFIJIJAIACz2Qyn0wmXy4VYLJbbDoiIKEPWYNc0DQ6HI71tt9uhaVpGzeXLlzEw\nMIDly5ePe18iIsqtrFMxqVRq1GOSJKV/vnfvHlpaWtDS0jLufT9hsUzN+4reslwGq3V6Xo9RaKXc\no2zw/ZckCVMM1BbjeZCfaTBUJ0kSZo7xZ+f/0/+na6JDKphSvlaNKmSPWYPd4XBgYGAgva1pGmw2\nW3r7o48+wnvvvYdvfvObAIDBwUF897vfRVtbW9Z9PzE8PDKhJoywWqfjxo3beT9OIZVyj0a+sREA\npphk3DHy7Y5FeB4mQ49GlfK1atSj6HHWrBljPp51Ksbr9SIejyORSCCZTEJVVSiKkn5+xowZ+MMf\n/oDu7m50d3djwYIFaGtrg9frhaIoUFUVyWQSiUQC8XgcNTU1ueuKiIhGyXrHbjKZEAqF0NTUBF3X\n0djYCLfbjdbWVng8HtTW1j5wX7fbjRUrVmDlypWQZRmhUIifiCEiyjMpNdZE+CM2OHgr78fgP/2K\nm9GFNgxPUxThQhuToUejSvlaNaqop2KIiKi0MNiJiATDYCciEgyDnYhIMAx2IiLBMNiJiATDYCci\nEgyDnYhIMAx2IiLBMNiJiATDYCciEgyDnYhIMAx2IiLBMNiJiATDYCciEoyhYI9Go/D7/fD5fGhv\nbx/1/OHDh1FfX4+GhgasW7cO/f39AIArV66gpqYGDQ0NaGhoQCgUyu3oiYholKwrKOm6jubmZoTD\nYdjtdgSDQSiKgurq6nRNfX091q1bBwCIRCJoaWnBW2+9BQCoqqpCV1fpLrpLRFRqst6xx2IxuFwu\nOJ1OmM1mBAIBRCKRjBqLxZL++eOPP4YkSbkfKRERGZL1jl3TNDgcjvS23W5HLBYbVXfo0CGEw2Hc\nuXMHb7/9dvrxK1eu4JlnnoHFYsELL7yAL33pSzkaOhERjSVrsI+1JOpYd+Tr16/H+vXrceLECbS1\ntWHXrl2w2Wzo6enBE088gb6+PmzZsgWqqmbc4QOAxTIVJlN+F7mW5TJYrdPzeoxCK+UeZYPvvyRJ\nmGKgthjPw2To0ahSvlaNKmSPWYPd4XBgYGAgva1pGmw22wPrA4EAfvaznwEAzGYzzGYzAMDj8aCq\nqgoffPABvF5vxj7DwyMPM/Zx4eK5xa3cwOLNwDgWei7C8zAZejSqlK9Vo4p6MWuv14t4PI5EIoFk\nMglVVaEoSkZNPB5P/3zmzBm4XC4AwLVr16Dr9y/QRCKBeDwOp9P5sD0QEZEBWe/YTSYTQqEQmpqa\noOs6Ghsb4Xa70draCo/Hg9raWnR0dOD8+fMwmUwoLy/Hrl27AAAXL17Enj17IMsyZFnGzp07YbVa\n894UEdFkJqXGmkR/xAYHb+X9GPynX3Er37DWUJ3haYqOIxMdUs5Nhh6NKuVr1aiinoohIqLSwmAn\nIhIMg52ISDAMdiIiwTDYiYgEw2AnIhIMg52ISDAMdiIiwTDYiYgEw2AnIhIMg52ISDAMdiIiwTDY\niYgEw2AnIhIMg52ISDAMdiIiwRgK9mg0Cr/fD5/Ph/b29lHPHz58GPX19WhoaMC6devQ39+ffm7/\n/v3w+Xzw+/04e/Zs7kZORERjyro0nq7raG5uRjgcht1uRzAYhKIoqK6uTtfU19dj3bp1AIBIJIKW\nlha89dZb6O/vh6qqUFUVmqZh06ZNOH36NGTZ2GrtREQ0flnv2GOxGFwuF5xOJ8xmMwKBACKRSEaN\nxWJJ//zxxx9DkiQA90M+EAjAbDbD6XTC5XIhFovluAUiIvpvWe/YNU2Dw+FIb9vt9jHD+dChQwiH\nw7hz5w7efvvt9L7z58/P2FfTtFH7WixTYTLl9y5elstgtU7P6zEKrZR7lA2+/5IkYYqB2mI8D5Oh\nR6NK+Vo1qpA9Zg32sda6/uSO/L+tX78e69evx4kTJ9DW1oZdu3YZ3nd4eMToeB8aF88tbuUGFm8G\nxrHQcxGeh8nQo1GlfK0aVdSLWTscDgwMDKS3NU2DzWZ7YH0gEMC77777UPsSEdHEZQ12r9eLeDyO\nRCKBZDIJVVWhKEpGTTweT/985swZuFwuAICiKFBVFclkEolEAvF4HDU1NbntgIiIMmSdijGZTAiF\nQmhqaoKu62hsbITb7UZrays8Hg9qa2vR0dGB8+fPw2Qyoby8HLt27QIAuN1urFixAitXroQsywiF\nQvxEDBFRnkmpsSbCH7HBwVt5Pwbn9Ipb+Ya1huoMzz93HJnokHJuMvRoVClfq0YV9Rw7ERGVFgY7\nEZFgGOxERIJhsBMRCYbBTkQkGAY7EZFgGOxERIJhsBMRCYbBTkQkGAY7EZFgGOxERIJhsBMRCYbB\nTkQkGAY7EZFgGOxERILJutAGAESjUbz88su4d+8e1qxZg82bN2c8Hw6HcfToUciyjCeffBKvvPIK\nPve5zwEA5s2bh7lz5wIAKisrsW/fvhy3QERE/y1rsOu6jubmZoTDYdjtdgSDQSiKgurq6nTNvHnz\ncPz4cUybNg2/+tWv8Nprr+GNN94AADz22GPo6urKXwdERJQh61RMLBaDy+WC0+mE2WxGIBBAJBLJ\nqFm0aBGmTZsGAFiwYEHGAtZERPRoZQ12TdPgcDjS23a7HZqmPbD+2LFjWLp0aXp7ZGQEq1evxtq1\na/Huu+9OcLhERJRN1qmYsZZElSRpzNquri709fWho6Mj/VhPTw/sdjsSiQQ2btyIuXPnoqqqKmM/\ni2UqTKb8LnIty2WwWqfn9RiFVso9ygbff0mSMMVAbTGeh8nQo1GlfK0aVcgeswa7w+HImFrRNA02\nm21U3blz57Bv3z50dHTAbDanH7fb7QAAp9OJhQsX4vLly6OCfXh45KEbMIqL5xa3cgOLNwPjWOi5\nCM/DZOjRqFK+Vo0q6sWsvV4v4vE4EokEkskkVFWFoigZNZcvX0YoFEJbWxtmzpyZfnxoaAjJZBIA\ncO3aNVy6dCnjP12JiCj3st6xm0wmhEIhNDU1Qdd1NDY2wu12o7W1FR6PB7W1tXj11Vdx+/ZtbNu2\nDcCnH2t8//338dJLL0GSJKRSKTz77LMMdiKiPJNSY02iP2KDg7fyfgz+06+4lW9Ya6jO8DRFx5GJ\nDinnJkOPRpXytWpUUU/FEBFRaWGwExEJhsFORCQYBjsRkWAY7EREgmGwExEJhsFORCQYBjsRkWAY\n7EREgmGwExEJhsFORCQYBjsRkWAY7EREgmGwExEJhsFORCQYQ8EejUbh9/vh8/nQ3t4+6vlwOIyV\nK1eivr4eGzduxD//+c/0c52dnairq0NdXR06OztzN3IiIhpT1mDXdR3Nzc04cOAAVFXFyZMn0d/f\nn1Ezb948HD9+HCdOnIDf78drr70GALhx4wb27t2LI0eO4OjRo9i7dy+Ghoby0wkREQEwEOyxWAwu\nlwtOpxNmsxmBQACRSCSjZtGiRZg2bRoAYMGCBenFr3t7e7F48WJYrVZUVFRg8eLFOHv2bB7aICKi\nT2QNdk3T4HA40tt2ux2apj2w/tixY1i6dOlD7UtERBOXdTHrsZZElSRpzNquri709fWho6NjXPta\nLFNhMslZBzsRslwGq3V6Xo9RaKXco2zw/ZckCVMM1BbjeZgMPRpVyteqUYXsMWuwOxyO9NQKcP8u\n3Gazjao7d+4c9u3bh46ODpjN5vS+Fy5cyNh34cKFo/YdHh55qMGPBxfPLW7lBhZvBsax0HMRnofJ\n0KNRpXytGlXUi1l7vV7E43EkEgkkk0moqgpFUTJqLl++jFAohLa2NsycOTP9+JIlS9Db24uhoSEM\nDQ2ht7cXS5YsmWArRET0WbLesZtMJoRCITQ1NUHXdTQ2NsLtdqO1tRUejwe1tbV49dVXcfv2bWzb\ntg0AUFlZiX379sFqteK5555DMBgEAGzZsgVWqzW/HRERTXJSaqyJ8EdscPBW3o/Bf/oVt/INaw3V\nGZ6m6Dgy0SHl3GTo0ahSvlaNKuqpGCIiKi0MdiIiwTDYiYgEw2AnIhIMg52ISDAMdiIiwTDYiYgE\nw2AnIhIMg52ISDAMdiIiwTDYiYgEw2AnIhIMg52ISDAMdiIiwTDYiYgEw2AnIhKMoWCPRqPw+/3w\n+Xxob28f9fzFixexatUqPPXUUzh16lTGc/PmzUNDQwMaGhrwne98JzejJiKiB8q6NJ6u62hubkY4\nHIbdbkcwGISiKKiurk7XVFZWoqWlBQcPHhy1/2OPPYaurq7cjpqIiB4oa7DHYjG4XC44nU4AQCAQ\nQCQSyQj22bNnAwDKyjizQ0RUaFmDXdM0OByO9LbdbkcsFjN8gJGREaxevRomkwmbN2/G1772tVE1\nFstUmEyy4dd8GLJcBqt1el6PUWil3KNs8P2XJAlTDNQW43mYDD0aVcrXqlGF7DFrsI+11rUkSYYP\n0NPTA7vdjkQigY0bN2Lu3LmoqqrKqBkeHjH8eg+Li+cWt3IDizcD41jouQjPw2To0ahSvlaNKurF\nrB0OBwYGBtLbmqbBZrMZPrDdbgcAOJ1OLFy4EJcvXza8LxERjV/WYPd6vYjH40gkEkgmk1BVFYqi\nGHrxoaEhJJNJAMC1a9dw6dKljLl5IiLKvaxTMSaTCaFQCE1NTdB1HY2NjXC73WhtbYXH40FtbS1i\nsRief/553Lx5Ez09PXjzzTehqiref/99vPTSS5AkCalUCs8++yyDnYgoz6TUWJPoj9jg4K28H4Nz\nesWtfMNaQ3WG5587jkx0SDk3GXo0qpSvVaOKeo6diIhKC4OdiEgwDHYiIsEw2ImIBMNgJyISDIOd\niEgwDHYiIsFk/QUlIiKjjH5WXzbJhr47p5Q/q19IvGMnIhIMg52ISDAMdiIiwTDYiYgEw2AnIhIM\ng52ISDAMdiIiwRgK9mg0Cr/fD5/Ph/b29lHPX7x4EatWrcJTTz2FU6dOZTzX2dmJuro61NXVobOz\nMzejJiKiB8r6C0q6rqO5uRnhcBh2ux3BYBCKomSshFRZWYmWlhYcPHgwY98bN25g7969OH78OCRJ\nwurVq6EoCioqKnLfCRERATBwxx6LxeByueB0OmE2mxEIBBCJRDJqZs+ejS984QsoK8t8ud7eXixe\nvBhWqxUVFRVYvHgxzp49m9sOiIgoQ9Zg1zQNDocjvW2326FpmqEXn8i+RET0cLJOxYy1JKokSYZe\n3Oi+FstUmEyyodd8WLJcBqt1el6PUWil3KNs8P2XJAlTDNQW43lgj58q5R6NKuSfx6zB7nA4MDAw\nkN7WNA02m83QizscDly4cCFj34ULF46qGx4eMfR6E8HFc4ubkS+EAsax0HMRngf2+KlS7tGool7M\n2uv1Ih6PI5FIIJlMQlVVKIpi6KBLlixBb28vhoaGMDQ0hN7eXixZsmR8IycionHJesduMpkQCoXQ\n1NQEXdfR2NgIt9uN1tZWeDwe1NbWIhaL4fnnn8fNmzfR09ODN998E6qqwmq14rnnnkMwGAQAbNmy\nBVarNe9NERFNZoa+j33ZsmVYtmxZxmPbtm1L/1xTU4NoNDrmvsFgMB3sRESUf/zNUyIiwTDYiYgE\nw2AnIhIMg52ISDAMdiIiwTDYiYgEw2AnIhIMg52ISDAMdiIiwTDYiYgEw2AnIhIMg52ISDAMdiIi\nwTDYiYgEw2AnIhKMoWCPRqPw+/3w+Xxob28f9XwymcQLL7wAn8+HNWvW4MqVKwCAK1euoKamBg0N\nDWhoaEAoFMrt6ImIaJSsC23ouo7m5maEw2HY7XYEg0EoioLq6up0zdGjR1FeXo533nkHqqri9ddf\nxxtvvAEAqKqqQldXV/46ICKiDFnv2GOxGFwuF5xOJ8xmMwKBACKRSEZNd3c3Vq1aBQDw+/04f/48\nUqlUfkZMRESfKWuwa5oGh8OR3rbb7dA0bVRNZWUlgPtrpM6YMQPXr18HcH865plnnsGGDRvwxz/+\nMZdjJyKiMWSdihnrzluSJEM1NpsNPT09eOKJJ9DX14ctW7ZAVVVYLJaMWotlKkwmebxjHxdZLoPV\nOj2vxyi0Uu5RNvj+S5KEKQZqi/E8sMdPlXKPRhXyz2PWYHc4HBgYGEhva5oGm802qubq1atwOBy4\ne/cubt26BavVCkmSYDabAQAejwdVVVX44IMP4PV6M/YfHh7JRS+fyWqdjhs3buf9OIVUyj2W39UN\n1U0xybhjoPZmEZ4H9vipUu7RqEfx53HWrBljPp51Ksbr9SIejyORSCCZTEJVVSiKklGjKAo6OzsB\nAKdPn8aiRYsgSRKuXbsGXb//5iUSCcTjcTidzon2QkREnyHrHbvJZEIoFEJTUxN0XUdjYyPcbjda\nW1vh8XhQW1uLYDCIHTt2wOfzoaKiArt37wYAXLx4EXv27IEsy5BlGTt37oTVas17U0REk1nWYAeA\nZcuWYdmyZRmPbdu2Lf3z1KlTsWfPnlH7+f1++P3+CQ6RiIjGg795SkQkGAY7EZFgGOxERIIxNMdO\nhVW+Ya2hOtkkG/q42c2OIxMdEhEVMd6xExEJhsFORCQYBjsRkWAY7EREgmGwExEJhsFORCQYBjsR\nkWAY7EREgmGwExEJhsFORCQYfqUAEdE4lMJXfPCOnYhIMIbu2KPRKF5++WXcu3cPa9aswebNmzOe\nTyaTePHFF/HXv/4VVqsVu3fvxuzZswEA+/fvx7Fjx1BWVoaf/vSn+OpXv5rTBkrhb08iokcp6x27\nrutobm7GgQMHoKoqTp48if7+/oyao0ePory8HO+88w6+9a1v4fXXXwcA9Pf3Q1VVqKqKAwcOYOfO\nnek1UImIKD+yBnssFoPL5YLT6YTZbEYgEEAkEsmo6e7uxqpVqwDcXw7v/PnzSKVSiEQiCAQCMJvN\ncDqdcLlciMVi+emEiIgAGJiK0TQNDocjvW2320eFs6ZpqKysvP+CJhNmzJiB69evQ9M0zJ8/P2Nf\nTdNGHWPWrBkP3QBO/6/h0qkGamY9/Ejyhz1mYI/3sccCKYEes96xp1KpUY9JkmSoxsi+RESUW1mD\n3eFwYGBgIL2taRpsNtuomqtXrwIA7t69i1u3bsFqtRral4iIcitrsHu9XsTjcSQSCSSTSaiqCkVR\nMmoURUFnZycA4PTp01i0aBEkSYKiKFBVFclkEolEAvF4HDU1NfnphIiIABiYYzeZTAiFQmhqaoKu\n62hsbITb7UZrays8Hg9qa2sRDAaxY8cO+Hw+VFRUYPfu3QAAt9uNFStWYOXKlZBlGaFQCLIs572p\n/y/bxzVL3Y9+9COcOXMGM2fOxMmTJws9nJy7evUqXnzxRfz73/9GWVkZ1q5di40bNxZ6WDk1MjKC\n9evXI5lMQtd1+P1+bN26tdDDyotPcsRut2P//v2FHk7OKYqCxx9/HGVlZZBlGb/5zW8e/SBSgrt7\n926qtrY29Y9//CM1MjKSqq+vT/39738v9LBy6sKFC6m+vr5UIBAo9FDyQtO0VF9fXyqVSqVu3bqV\nqqurE+49vHfvXmp4eDiVSqVSyWQyFQwGU3/+858LPKr8OHjwYGr79u2pzZs3F3ooebF8+fLUhx9+\nWNAxCP+bp0Y+rlnqvvzlL6OioqLQw8gbm82GL37xiwAAi8WCOXPmjPnpqlImSRIef/xxAPf/n+ru\n3btCftBgYGAAZ86cQTAYLPRQhCZ8sI/1cU3RQmEyuXLlCv72t79lfIxWFLquo6GhAU8//TSefvpp\nIXt85ZVXsGPHDpSViR093/72t7F69Wr8+te/LsjxxT67MPZxTSoNH330EbZu3Yof//jHsFgshR5O\nzsmyjK6uLvz2t79FLBbDe++9V+gh5VRPTw+efPJJeDyeQg8lrw4fPozOzk784he/wKFDh3Dx4sVH\nPgbhg50fuRTDnTt3sHXrVtTX16Ourq7Qw8mr8vJyfOUrX8HZs2cLPZScunTpErq7u6EoCrZv347f\n//73+MEPflDoYeWc3W4HAMycORM+n68gv20vfLAb+bgmFbdUKoWf/OQnmDNnDjZt2lTo4eTFtWvX\ncPPmTQDAf/7zH5w7dw5z5swp8Khy6/vf/z6i0Si6u7vx85//HIsWLUp/r5Qobt++jeHh4fTPv/vd\n7+B2ux/5OIT/PvYHfVxTJNu3b8eFCxdw/fp1LF26FN/73vewZs2aQg8rZ/70pz+hq6sLc+fORUND\nA4D7PS9btqzAI8udf/3rX/jhD38IXdeRSqXw9a9/HcuXLy/0sGicPvzwQ2zZsgXA/f8z+cY3voGl\nS5c+8nFIqbEmoYmIqGQJPxVDRDTZMNiJiATDYCciEgyDnYhIMAx2IiLBMNiJiATDYCciEgyDnYhI\nMP8HDmMzSpqwHYkAAAAASUVORK5CYII=\n", 211 | "text/plain": [ 212 | "" 213 | ] 214 | }, 215 | "metadata": {}, 216 | "output_type": "display_data" 217 | } 218 | ], 219 | "source": [ 220 | "# plot bars\n", 221 | "red_bars = plt.bar(ind, red_proportions, width, color='r', alpha=.7, label='Red Wine')\n", 222 | "white_bars = plt.bar(ind + width, white_proportions, width, color='w', alpha=.7, label='White Wine')\n", 223 | "\n", 224 | "# title and labels\n", 225 | "plt.ylabel('Proportion')\n", 226 | "plt.xlabel('Quality')\n", 227 | "plt.title('Proportion by Wine Color and Quality')\n", 228 | "locations = ind + width / 2 # xtick locations\n", 229 | "labels = ['3', '4', '5', '6', '7', '8', '9'] # xtick labels\n", 230 | "plt.xticks(locations, labels)\n", 231 | "\n", 232 | "# legend\n", 233 | "plt.legend()" 234 | ] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "metadata": {}, 239 | "source": [ 240 | "Oh, that didn't work because we're missing a red wine value for a the 9 rating. Even though this number is a 0, we need it for our plot. Run the last two cells after running the cell below." 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 22, 246 | "metadata": {}, 247 | "outputs": [ 248 | { 249 | "data": { 250 | "text/plain": [ 251 | "quality\n", 252 | "3 0.006254\n", 253 | "4 0.033146\n", 254 | "5 0.425891\n", 255 | "6 0.398999\n", 256 | "7 0.124453\n", 257 | "8 0.011257\n", 258 | "9 0.000000\n", 259 | "Name: pH, dtype: float64" 260 | ] 261 | }, 262 | "execution_count": 22, 263 | "metadata": {}, 264 | "output_type": "execute_result" 265 | } 266 | ], 267 | "source": [ 268 | "red_proportions['9'] = 0\n", 269 | "red_proportions" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": null, 275 | "metadata": { 276 | "collapsed": true 277 | }, 278 | "outputs": [], 279 | "source": [] 280 | } 281 | ], 282 | "metadata": { 283 | "kernelspec": { 284 | "display_name": "Python 3", 285 | "language": "python", 286 | "name": "python3" 287 | }, 288 | "language_info": { 289 | "codemirror_mode": { 290 | "name": "ipython", 291 | "version": 3 292 | }, 293 | "file_extension": ".py", 294 | "mimetype": "text/x-python", 295 | "name": "python", 296 | "nbconvert_exporter": "python", 297 | "pygments_lexer": "ipython3", 298 | "version": "3.6.1" 299 | } 300 | }, 301 | "nbformat": 4, 302 | "nbformat_minor": 2 303 | } 304 | --------------------------------------------------------------------------------