├── .gitignore ├── All.ipynb ├── Exercises-1.ipynb ├── Exercises-2.ipynb ├── Exercises-3.ipynb ├── Exercises-4.ipynb ├── Exercises-5.ipynb ├── Exercises-6.ipynb ├── FUNDING.yml ├── LICENSE.txt ├── README.md ├── Solutions-1.ipynb ├── Solutions-2.ipynb ├── Solutions-3.ipynb ├── Solutions-4.ipynb ├── Solutions-5.ipynb ├── Solutions-6.ipynb ├── build ├── BUILD.py ├── BUILD.sh └── split.py ├── cheat-sheet.txt ├── email.txt ├── images ├── Diagrams.ipynb ├── Makefile ├── loop.sh ├── sample.csv └── tmp.dot ├── requirements.sh ├── sales1.csv ├── sales2.csv ├── script.txt ├── style-notebook.css ├── style-table.css └── youtube.png /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints/ 2 | build/ApplyDiffs* 3 | build/README 4 | build/filesizes* 5 | build/*.list.gz 6 | build/*.csv 7 | data/*.csv 8 | *.html 9 | *.png 10 | -------------------------------------------------------------------------------- /Exercises-1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%matplotlib inline\n", 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [ 18 | { 19 | "data": { 20 | "text/html": [ 21 | "" 57 | ], 58 | "text/plain": [ 59 | "" 60 | ] 61 | }, 62 | "execution_count": 2, 63 | "metadata": {}, 64 | "output_type": "execute_result" 65 | } 66 | ], 67 | "source": [ 68 | "from IPython.core.display import HTML\n", 69 | "css = open('style-table.css').read() + open('style-notebook.css').read()\n", 70 | "HTML(''.format(css))" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 3, 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "data": { 80 | "text/html": [ 81 | "
\n", 82 | "\n", 95 | "\n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | "
titleyear
0The Rising Son1990
1The Thousand Plane Raid1969
2The Spider and the Fly1949
3Country2000
4The Golden Note2017
\n", 131 | "
" 132 | ], 133 | "text/plain": [ 134 | " title year\n", 135 | "0 The Rising Son 1990\n", 136 | "1 The Thousand Plane Raid 1969\n", 137 | "2 The Spider and the Fly 1949\n", 138 | "3 Country 2000\n", 139 | "4 The Golden Note 2017" 140 | ] 141 | }, 142 | "execution_count": 3, 143 | "metadata": {}, 144 | "output_type": "execute_result" 145 | } 146 | ], 147 | "source": [ 148 | "titles = pd.read_csv('data/titles.csv')\n", 149 | "titles.head()" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 4, 155 | "metadata": {}, 156 | "outputs": [ 157 | { 158 | "data": { 159 | "text/html": [ 160 | "
\n", 161 | "\n", 174 | "\n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | "
titleyearnametypecharactern
0Closet Monster2015Buffy #1actorBuffy 431.0
1Suuri illusioni1985Homo $actorGuests22.0
2Battle of the Sexes2017$hutteractorBobby Riggs Fan10.0
3Secret in Their Eyes2015$hutteractor2002 Dodger FanNaN
4Steve Jobs2015$hutteractor1988 Opera House PatronNaN
\n", 234 | "
" 235 | ], 236 | "text/plain": [ 237 | " title year name type character n\n", 238 | "0 Closet Monster 2015 Buffy #1 actor Buffy 4 31.0\n", 239 | "1 Suuri illusioni 1985 Homo $ actor Guests 22.0\n", 240 | "2 Battle of the Sexes 2017 $hutter actor Bobby Riggs Fan 10.0\n", 241 | "3 Secret in Their Eyes 2015 $hutter actor 2002 Dodger Fan NaN\n", 242 | "4 Steve Jobs 2015 $hutter actor 1988 Opera House Patron NaN" 243 | ] 244 | }, 245 | "execution_count": 4, 246 | "metadata": {}, 247 | "output_type": "execute_result" 248 | } 249 | ], 250 | "source": [ 251 | "cast = pd.read_csv('data/cast.csv')\n", 252 | "cast.head()" 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "metadata": { 258 | "collapsed": true 259 | }, 260 | "source": [ 261 | "### How many movies are listed in the titles dataframe?" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": null, 267 | "metadata": { 268 | "collapsed": true 269 | }, 270 | "outputs": [], 271 | "source": [] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": null, 276 | "metadata": { 277 | "collapsed": true 278 | }, 279 | "outputs": [], 280 | "source": [] 281 | }, 282 | { 283 | "cell_type": "markdown", 284 | "metadata": { 285 | "collapsed": true 286 | }, 287 | "source": [ 288 | "### What are the earliest two films listed in the titles dataframe?" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": null, 294 | "metadata": { 295 | "collapsed": true 296 | }, 297 | "outputs": [], 298 | "source": [] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": null, 303 | "metadata": { 304 | "collapsed": true 305 | }, 306 | "outputs": [], 307 | "source": [] 308 | }, 309 | { 310 | "cell_type": "markdown", 311 | "metadata": { 312 | "collapsed": true 313 | }, 314 | "source": [ 315 | "### How many movies have the single-word title \"Hamlet\"?" 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": null, 321 | "metadata": { 322 | "collapsed": true 323 | }, 324 | "outputs": [], 325 | "source": [] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": null, 330 | "metadata": { 331 | "collapsed": true 332 | }, 333 | "outputs": [], 334 | "source": [] 335 | }, 336 | { 337 | "cell_type": "markdown", 338 | "metadata": { 339 | "collapsed": true 340 | }, 341 | "source": [ 342 | "### How many movies are titled \"North by Northwest\"?" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": null, 348 | "metadata": { 349 | "collapsed": true 350 | }, 351 | "outputs": [], 352 | "source": [] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": null, 357 | "metadata": { 358 | "collapsed": true 359 | }, 360 | "outputs": [], 361 | "source": [] 362 | }, 363 | { 364 | "cell_type": "markdown", 365 | "metadata": { 366 | "collapsed": true 367 | }, 368 | "source": [ 369 | "### When was the first movie titled \"Hamlet\" made?" 370 | ] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "execution_count": null, 375 | "metadata": { 376 | "collapsed": true 377 | }, 378 | "outputs": [], 379 | "source": [] 380 | }, 381 | { 382 | "cell_type": "code", 383 | "execution_count": null, 384 | "metadata": { 385 | "collapsed": true 386 | }, 387 | "outputs": [], 388 | "source": [] 389 | }, 390 | { 391 | "cell_type": "markdown", 392 | "metadata": { 393 | "collapsed": true 394 | }, 395 | "source": [ 396 | "### List all of the \"Treasure Island\" movies from earliest to most recent." 397 | ] 398 | }, 399 | { 400 | "cell_type": "code", 401 | "execution_count": null, 402 | "metadata": { 403 | "collapsed": true 404 | }, 405 | "outputs": [], 406 | "source": [] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "execution_count": null, 411 | "metadata": { 412 | "collapsed": true 413 | }, 414 | "outputs": [], 415 | "source": [] 416 | }, 417 | { 418 | "cell_type": "markdown", 419 | "metadata": { 420 | "collapsed": true 421 | }, 422 | "source": [ 423 | "### How many movies were made in the year 1950?" 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": null, 429 | "metadata": { 430 | "collapsed": true 431 | }, 432 | "outputs": [], 433 | "source": [] 434 | }, 435 | { 436 | "cell_type": "code", 437 | "execution_count": null, 438 | "metadata": { 439 | "collapsed": true 440 | }, 441 | "outputs": [], 442 | "source": [] 443 | }, 444 | { 445 | "cell_type": "markdown", 446 | "metadata": { 447 | "collapsed": true 448 | }, 449 | "source": [ 450 | "### How many movies were made in the year 1960?" 451 | ] 452 | }, 453 | { 454 | "cell_type": "code", 455 | "execution_count": null, 456 | "metadata": { 457 | "collapsed": true 458 | }, 459 | "outputs": [], 460 | "source": [] 461 | }, 462 | { 463 | "cell_type": "code", 464 | "execution_count": null, 465 | "metadata": { 466 | "collapsed": true 467 | }, 468 | "outputs": [], 469 | "source": [] 470 | }, 471 | { 472 | "cell_type": "markdown", 473 | "metadata": { 474 | "collapsed": true 475 | }, 476 | "source": [ 477 | "### How many movies were made from 1950 through 1959?" 478 | ] 479 | }, 480 | { 481 | "cell_type": "code", 482 | "execution_count": null, 483 | "metadata": { 484 | "collapsed": true 485 | }, 486 | "outputs": [], 487 | "source": [] 488 | }, 489 | { 490 | "cell_type": "code", 491 | "execution_count": null, 492 | "metadata": { 493 | "collapsed": true 494 | }, 495 | "outputs": [], 496 | "source": [] 497 | }, 498 | { 499 | "cell_type": "markdown", 500 | "metadata": { 501 | "collapsed": true 502 | }, 503 | "source": [ 504 | "### In what years has a movie titled \"Batman\" been released?" 505 | ] 506 | }, 507 | { 508 | "cell_type": "code", 509 | "execution_count": null, 510 | "metadata": { 511 | "collapsed": true 512 | }, 513 | "outputs": [], 514 | "source": [] 515 | }, 516 | { 517 | "cell_type": "code", 518 | "execution_count": null, 519 | "metadata": { 520 | "collapsed": true 521 | }, 522 | "outputs": [], 523 | "source": [] 524 | }, 525 | { 526 | "cell_type": "markdown", 527 | "metadata": { 528 | "collapsed": true 529 | }, 530 | "source": [ 531 | "### How many roles were there in the movie \"Inception\"?" 532 | ] 533 | }, 534 | { 535 | "cell_type": "code", 536 | "execution_count": null, 537 | "metadata": { 538 | "collapsed": true 539 | }, 540 | "outputs": [], 541 | "source": [] 542 | }, 543 | { 544 | "cell_type": "code", 545 | "execution_count": null, 546 | "metadata": { 547 | "collapsed": true 548 | }, 549 | "outputs": [], 550 | "source": [] 551 | }, 552 | { 553 | "cell_type": "markdown", 554 | "metadata": { 555 | "collapsed": true 556 | }, 557 | "source": [ 558 | "### How many roles in the movie \"Inception\" are NOT ranked by an \"n\" value?" 559 | ] 560 | }, 561 | { 562 | "cell_type": "code", 563 | "execution_count": null, 564 | "metadata": { 565 | "collapsed": true 566 | }, 567 | "outputs": [], 568 | "source": [] 569 | }, 570 | { 571 | "cell_type": "code", 572 | "execution_count": null, 573 | "metadata": { 574 | "collapsed": true 575 | }, 576 | "outputs": [], 577 | "source": [] 578 | }, 579 | { 580 | "cell_type": "markdown", 581 | "metadata": { 582 | "collapsed": true 583 | }, 584 | "source": [ 585 | "### But how many roles in the movie \"Inception\" did receive an \"n\" value?" 586 | ] 587 | }, 588 | { 589 | "cell_type": "code", 590 | "execution_count": null, 591 | "metadata": { 592 | "collapsed": true 593 | }, 594 | "outputs": [], 595 | "source": [] 596 | }, 597 | { 598 | "cell_type": "code", 599 | "execution_count": null, 600 | "metadata": { 601 | "collapsed": true 602 | }, 603 | "outputs": [], 604 | "source": [] 605 | }, 606 | { 607 | "cell_type": "markdown", 608 | "metadata": { 609 | "collapsed": true 610 | }, 611 | "source": [ 612 | "### Display the cast of \"North by Northwest\" in their correct \"n\"-value order, ignoring roles that did not earn a numeric \"n\" value." 613 | ] 614 | }, 615 | { 616 | "cell_type": "code", 617 | "execution_count": null, 618 | "metadata": { 619 | "collapsed": true 620 | }, 621 | "outputs": [], 622 | "source": [] 623 | }, 624 | { 625 | "cell_type": "code", 626 | "execution_count": null, 627 | "metadata": { 628 | "collapsed": true 629 | }, 630 | "outputs": [], 631 | "source": [] 632 | }, 633 | { 634 | "cell_type": "markdown", 635 | "metadata": { 636 | "collapsed": true 637 | }, 638 | "source": [ 639 | "### Display the entire cast, in \"n\"-order, of the 1972 film \"Sleuth\"." 640 | ] 641 | }, 642 | { 643 | "cell_type": "code", 644 | "execution_count": null, 645 | "metadata": { 646 | "collapsed": true 647 | }, 648 | "outputs": [], 649 | "source": [] 650 | }, 651 | { 652 | "cell_type": "code", 653 | "execution_count": null, 654 | "metadata": { 655 | "collapsed": true 656 | }, 657 | "outputs": [], 658 | "source": [] 659 | }, 660 | { 661 | "cell_type": "markdown", 662 | "metadata": { 663 | "collapsed": true 664 | }, 665 | "source": [ 666 | "### Now display the entire cast, in \"n\"-order, of the 2007 version of \"Sleuth\"." 667 | ] 668 | }, 669 | { 670 | "cell_type": "code", 671 | "execution_count": null, 672 | "metadata": { 673 | "collapsed": true 674 | }, 675 | "outputs": [], 676 | "source": [] 677 | }, 678 | { 679 | "cell_type": "code", 680 | "execution_count": null, 681 | "metadata": { 682 | "collapsed": true 683 | }, 684 | "outputs": [], 685 | "source": [] 686 | }, 687 | { 688 | "cell_type": "markdown", 689 | "metadata": { 690 | "collapsed": true 691 | }, 692 | "source": [ 693 | "### How many roles were credited in the silent 1921 version of Hamlet?" 694 | ] 695 | }, 696 | { 697 | "cell_type": "code", 698 | "execution_count": null, 699 | "metadata": { 700 | "collapsed": true 701 | }, 702 | "outputs": [], 703 | "source": [] 704 | }, 705 | { 706 | "cell_type": "code", 707 | "execution_count": null, 708 | "metadata": { 709 | "collapsed": true 710 | }, 711 | "outputs": [], 712 | "source": [] 713 | }, 714 | { 715 | "cell_type": "markdown", 716 | "metadata": { 717 | "collapsed": true 718 | }, 719 | "source": [ 720 | "### How many roles were credited in Branagh’s 1996 Hamlet?" 721 | ] 722 | }, 723 | { 724 | "cell_type": "code", 725 | "execution_count": null, 726 | "metadata": { 727 | "collapsed": true 728 | }, 729 | "outputs": [], 730 | "source": [] 731 | }, 732 | { 733 | "cell_type": "code", 734 | "execution_count": null, 735 | "metadata": { 736 | "collapsed": true 737 | }, 738 | "outputs": [], 739 | "source": [] 740 | }, 741 | { 742 | "cell_type": "markdown", 743 | "metadata": { 744 | "collapsed": true 745 | }, 746 | "source": [ 747 | "### How many \"Hamlet\" roles have been listed in all film credits through history?" 748 | ] 749 | }, 750 | { 751 | "cell_type": "code", 752 | "execution_count": null, 753 | "metadata": { 754 | "collapsed": true 755 | }, 756 | "outputs": [], 757 | "source": [] 758 | }, 759 | { 760 | "cell_type": "code", 761 | "execution_count": null, 762 | "metadata": { 763 | "collapsed": true 764 | }, 765 | "outputs": [], 766 | "source": [] 767 | }, 768 | { 769 | "cell_type": "markdown", 770 | "metadata": { 771 | "collapsed": true 772 | }, 773 | "source": [ 774 | "### How many people have played an \"Ophelia\"?" 775 | ] 776 | }, 777 | { 778 | "cell_type": "code", 779 | "execution_count": null, 780 | "metadata": { 781 | "collapsed": true 782 | }, 783 | "outputs": [], 784 | "source": [] 785 | }, 786 | { 787 | "cell_type": "code", 788 | "execution_count": null, 789 | "metadata": { 790 | "collapsed": true 791 | }, 792 | "outputs": [], 793 | "source": [] 794 | }, 795 | { 796 | "cell_type": "markdown", 797 | "metadata": { 798 | "collapsed": true 799 | }, 800 | "source": [ 801 | "### How many people have played a role called \"The Dude\"?" 802 | ] 803 | }, 804 | { 805 | "cell_type": "code", 806 | "execution_count": null, 807 | "metadata": { 808 | "collapsed": true 809 | }, 810 | "outputs": [], 811 | "source": [] 812 | }, 813 | { 814 | "cell_type": "code", 815 | "execution_count": null, 816 | "metadata": { 817 | "collapsed": true 818 | }, 819 | "outputs": [], 820 | "source": [] 821 | }, 822 | { 823 | "cell_type": "markdown", 824 | "metadata": { 825 | "collapsed": true 826 | }, 827 | "source": [ 828 | "### How many people have played a role called \"The Stranger\"?" 829 | ] 830 | }, 831 | { 832 | "cell_type": "code", 833 | "execution_count": null, 834 | "metadata": { 835 | "collapsed": true 836 | }, 837 | "outputs": [], 838 | "source": [] 839 | }, 840 | { 841 | "cell_type": "code", 842 | "execution_count": null, 843 | "metadata": { 844 | "collapsed": true 845 | }, 846 | "outputs": [], 847 | "source": [] 848 | }, 849 | { 850 | "cell_type": "markdown", 851 | "metadata": { 852 | "collapsed": true 853 | }, 854 | "source": [ 855 | "### How many roles has Sidney Poitier played throughout his career?" 856 | ] 857 | }, 858 | { 859 | "cell_type": "code", 860 | "execution_count": null, 861 | "metadata": { 862 | "collapsed": true 863 | }, 864 | "outputs": [], 865 | "source": [] 866 | }, 867 | { 868 | "cell_type": "code", 869 | "execution_count": null, 870 | "metadata": { 871 | "collapsed": true 872 | }, 873 | "outputs": [], 874 | "source": [] 875 | }, 876 | { 877 | "cell_type": "markdown", 878 | "metadata": { 879 | "collapsed": true 880 | }, 881 | "source": [ 882 | "### How many roles has Judi Dench played?" 883 | ] 884 | }, 885 | { 886 | "cell_type": "code", 887 | "execution_count": null, 888 | "metadata": { 889 | "collapsed": true 890 | }, 891 | "outputs": [], 892 | "source": [] 893 | }, 894 | { 895 | "cell_type": "code", 896 | "execution_count": null, 897 | "metadata": { 898 | "collapsed": true 899 | }, 900 | "outputs": [], 901 | "source": [] 902 | }, 903 | { 904 | "cell_type": "markdown", 905 | "metadata": { 906 | "collapsed": true 907 | }, 908 | "source": [ 909 | "### List the supporting roles (having n=2) played by Cary Grant in the 1940s, in order by year." 910 | ] 911 | }, 912 | { 913 | "cell_type": "code", 914 | "execution_count": null, 915 | "metadata": { 916 | "collapsed": true 917 | }, 918 | "outputs": [], 919 | "source": [] 920 | }, 921 | { 922 | "cell_type": "code", 923 | "execution_count": null, 924 | "metadata": { 925 | "collapsed": true 926 | }, 927 | "outputs": [], 928 | "source": [] 929 | }, 930 | { 931 | "cell_type": "markdown", 932 | "metadata": { 933 | "collapsed": true 934 | }, 935 | "source": [ 936 | "### List the leading roles that Cary Grant played in the 1940s in order by year." 937 | ] 938 | }, 939 | { 940 | "cell_type": "code", 941 | "execution_count": null, 942 | "metadata": { 943 | "collapsed": true 944 | }, 945 | "outputs": [], 946 | "source": [] 947 | }, 948 | { 949 | "cell_type": "code", 950 | "execution_count": null, 951 | "metadata": { 952 | "collapsed": true 953 | }, 954 | "outputs": [], 955 | "source": [] 956 | }, 957 | { 958 | "cell_type": "markdown", 959 | "metadata": { 960 | "collapsed": true 961 | }, 962 | "source": [ 963 | "### How many roles were available for actors in the 1950s?" 964 | ] 965 | }, 966 | { 967 | "cell_type": "code", 968 | "execution_count": null, 969 | "metadata": { 970 | "collapsed": true 971 | }, 972 | "outputs": [], 973 | "source": [] 974 | }, 975 | { 976 | "cell_type": "code", 977 | "execution_count": null, 978 | "metadata": { 979 | "collapsed": true 980 | }, 981 | "outputs": [], 982 | "source": [] 983 | }, 984 | { 985 | "cell_type": "markdown", 986 | "metadata": { 987 | "collapsed": true 988 | }, 989 | "source": [ 990 | "### How many roles were available for actresses in the 1950s?" 991 | ] 992 | }, 993 | { 994 | "cell_type": "code", 995 | "execution_count": null, 996 | "metadata": { 997 | "collapsed": true 998 | }, 999 | "outputs": [], 1000 | "source": [] 1001 | }, 1002 | { 1003 | "cell_type": "code", 1004 | "execution_count": null, 1005 | "metadata": { 1006 | "collapsed": true 1007 | }, 1008 | "outputs": [], 1009 | "source": [] 1010 | }, 1011 | { 1012 | "cell_type": "markdown", 1013 | "metadata": { 1014 | "collapsed": true 1015 | }, 1016 | "source": [ 1017 | "### How many leading roles (n=1) were available from the beginning of film history through 1980?" 1018 | ] 1019 | }, 1020 | { 1021 | "cell_type": "code", 1022 | "execution_count": null, 1023 | "metadata": { 1024 | "collapsed": true 1025 | }, 1026 | "outputs": [], 1027 | "source": [] 1028 | }, 1029 | { 1030 | "cell_type": "code", 1031 | "execution_count": null, 1032 | "metadata": { 1033 | "collapsed": true 1034 | }, 1035 | "outputs": [], 1036 | "source": [] 1037 | }, 1038 | { 1039 | "cell_type": "markdown", 1040 | "metadata": { 1041 | "collapsed": true 1042 | }, 1043 | "source": [ 1044 | "### How many non-leading roles were available through from the beginning of film history through 1980?" 1045 | ] 1046 | }, 1047 | { 1048 | "cell_type": "code", 1049 | "execution_count": null, 1050 | "metadata": { 1051 | "collapsed": true 1052 | }, 1053 | "outputs": [], 1054 | "source": [] 1055 | }, 1056 | { 1057 | "cell_type": "code", 1058 | "execution_count": null, 1059 | "metadata": { 1060 | "collapsed": true 1061 | }, 1062 | "outputs": [], 1063 | "source": [] 1064 | }, 1065 | { 1066 | "cell_type": "markdown", 1067 | "metadata": { 1068 | "collapsed": true 1069 | }, 1070 | "source": [ 1071 | "### How many roles through 1980 were minor enough that they did not warrant a numeric \"n\" rank?" 1072 | ] 1073 | }, 1074 | { 1075 | "cell_type": "code", 1076 | "execution_count": null, 1077 | "metadata": { 1078 | "collapsed": true 1079 | }, 1080 | "outputs": [], 1081 | "source": [] 1082 | }, 1083 | { 1084 | "cell_type": "code", 1085 | "execution_count": null, 1086 | "metadata": { 1087 | "collapsed": true 1088 | }, 1089 | "outputs": [], 1090 | "source": [] 1091 | } 1092 | ], 1093 | "metadata": { 1094 | "kernelspec": { 1095 | "display_name": "Python 3", 1096 | "language": "python", 1097 | "name": "python3" 1098 | }, 1099 | "language_info": { 1100 | "codemirror_mode": { 1101 | "name": "ipython", 1102 | "version": 3 1103 | }, 1104 | "file_extension": ".py", 1105 | "mimetype": "text/x-python", 1106 | "name": "python", 1107 | "nbconvert_exporter": "python", 1108 | "pygments_lexer": "ipython3", 1109 | "version": "3.6.8" 1110 | } 1111 | }, 1112 | "nbformat": 4, 1113 | "nbformat_minor": 1 1114 | } 1115 | -------------------------------------------------------------------------------- /Exercises-2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%matplotlib inline\n", 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [ 18 | { 19 | "data": { 20 | "text/html": [ 21 | "" 57 | ], 58 | "text/plain": [ 59 | "" 60 | ] 61 | }, 62 | "execution_count": 2, 63 | "metadata": {}, 64 | "output_type": "execute_result" 65 | } 66 | ], 67 | "source": [ 68 | "from IPython.core.display import HTML\n", 69 | "css = open('style-table.css').read() + open('style-notebook.css').read()\n", 70 | "HTML(''.format(css))" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 3, 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "data": { 80 | "text/html": [ 81 | "
\n", 82 | "\n", 95 | "\n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | "
titleyear
0The Rising Son1990
1The Thousand Plane Raid1969
2The Spider and the Fly1949
3Country2000
4The Golden Note2017
\n", 131 | "
" 132 | ], 133 | "text/plain": [ 134 | " title year\n", 135 | "0 The Rising Son 1990\n", 136 | "1 The Thousand Plane Raid 1969\n", 137 | "2 The Spider and the Fly 1949\n", 138 | "3 Country 2000\n", 139 | "4 The Golden Note 2017" 140 | ] 141 | }, 142 | "execution_count": 3, 143 | "metadata": {}, 144 | "output_type": "execute_result" 145 | } 146 | ], 147 | "source": [ 148 | "titles = pd.read_csv('data/titles.csv')\n", 149 | "titles.head()" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 4, 155 | "metadata": {}, 156 | "outputs": [ 157 | { 158 | "data": { 159 | "text/html": [ 160 | "
\n", 161 | "\n", 174 | "\n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | "
titleyearnametypecharactern
0Closet Monster2015Buffy #1actorBuffy 431.0
1Suuri illusioni1985Homo $actorGuests22.0
2Battle of the Sexes2017$hutteractorBobby Riggs Fan10.0
3Secret in Their Eyes2015$hutteractor2002 Dodger FanNaN
4Steve Jobs2015$hutteractor1988 Opera House PatronNaN
\n", 234 | "
" 235 | ], 236 | "text/plain": [ 237 | " title year name type character n\n", 238 | "0 Closet Monster 2015 Buffy #1 actor Buffy 4 31.0\n", 239 | "1 Suuri illusioni 1985 Homo $ actor Guests 22.0\n", 240 | "2 Battle of the Sexes 2017 $hutter actor Bobby Riggs Fan 10.0\n", 241 | "3 Secret in Their Eyes 2015 $hutter actor 2002 Dodger Fan NaN\n", 242 | "4 Steve Jobs 2015 $hutter actor 1988 Opera House Patron NaN" 243 | ] 244 | }, 245 | "execution_count": 4, 246 | "metadata": {}, 247 | "output_type": "execute_result" 248 | } 249 | ], 250 | "source": [ 251 | "cast = pd.read_csv('data/cast.csv')\n", 252 | "cast.head()" 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "metadata": { 258 | "collapsed": true 259 | }, 260 | "source": [ 261 | "### What are the ten most common movie names of all time?" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": null, 267 | "metadata": { 268 | "collapsed": true 269 | }, 270 | "outputs": [], 271 | "source": [] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": null, 276 | "metadata": { 277 | "collapsed": true 278 | }, 279 | "outputs": [], 280 | "source": [] 281 | }, 282 | { 283 | "cell_type": "markdown", 284 | "metadata": { 285 | "collapsed": true 286 | }, 287 | "source": [ 288 | "### Which three years of the 1930s saw the most films released?" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": null, 294 | "metadata": { 295 | "collapsed": true 296 | }, 297 | "outputs": [], 298 | "source": [] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": null, 303 | "metadata": { 304 | "collapsed": true 305 | }, 306 | "outputs": [], 307 | "source": [] 308 | }, 309 | { 310 | "cell_type": "markdown", 311 | "metadata": { 312 | "collapsed": true 313 | }, 314 | "source": [ 315 | "### Plot the number of films that have been released each decade over the history of cinema." 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": null, 321 | "metadata": { 322 | "collapsed": true 323 | }, 324 | "outputs": [], 325 | "source": [] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": null, 330 | "metadata": { 331 | "collapsed": true 332 | }, 333 | "outputs": [], 334 | "source": [] 335 | }, 336 | { 337 | "cell_type": "markdown", 338 | "metadata": { 339 | "collapsed": true 340 | }, 341 | "source": [ 342 | "### Plot the number of \"Hamlet\" films made each decade." 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": null, 348 | "metadata": { 349 | "collapsed": true 350 | }, 351 | "outputs": [], 352 | "source": [] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": null, 357 | "metadata": { 358 | "collapsed": true 359 | }, 360 | "outputs": [], 361 | "source": [] 362 | }, 363 | { 364 | "cell_type": "markdown", 365 | "metadata": { 366 | "collapsed": true 367 | }, 368 | "source": [ 369 | "### Plot the number of \"Rustler\" characters in each decade of the history of film." 370 | ] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "execution_count": null, 375 | "metadata": { 376 | "collapsed": true 377 | }, 378 | "outputs": [], 379 | "source": [] 380 | }, 381 | { 382 | "cell_type": "code", 383 | "execution_count": null, 384 | "metadata": { 385 | "collapsed": true 386 | }, 387 | "outputs": [], 388 | "source": [] 389 | }, 390 | { 391 | "cell_type": "markdown", 392 | "metadata": { 393 | "collapsed": true 394 | }, 395 | "source": [ 396 | "### Plot the number of \"Hamlet\" characters each decade." 397 | ] 398 | }, 399 | { 400 | "cell_type": "code", 401 | "execution_count": null, 402 | "metadata": { 403 | "collapsed": true 404 | }, 405 | "outputs": [], 406 | "source": [] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "execution_count": null, 411 | "metadata": { 412 | "collapsed": true 413 | }, 414 | "outputs": [], 415 | "source": [] 416 | }, 417 | { 418 | "cell_type": "markdown", 419 | "metadata": { 420 | "collapsed": true 421 | }, 422 | "source": [ 423 | "### What are the 11 most common character names in movie history?" 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": null, 429 | "metadata": { 430 | "collapsed": true 431 | }, 432 | "outputs": [], 433 | "source": [] 434 | }, 435 | { 436 | "cell_type": "code", 437 | "execution_count": null, 438 | "metadata": { 439 | "collapsed": true 440 | }, 441 | "outputs": [], 442 | "source": [] 443 | }, 444 | { 445 | "cell_type": "markdown", 446 | "metadata": { 447 | "collapsed": true 448 | }, 449 | "source": [ 450 | "### Who are the 10 people most often credited as \"Herself\" in film history?" 451 | ] 452 | }, 453 | { 454 | "cell_type": "code", 455 | "execution_count": null, 456 | "metadata": { 457 | "collapsed": true 458 | }, 459 | "outputs": [], 460 | "source": [] 461 | }, 462 | { 463 | "cell_type": "code", 464 | "execution_count": null, 465 | "metadata": { 466 | "collapsed": true 467 | }, 468 | "outputs": [], 469 | "source": [] 470 | }, 471 | { 472 | "cell_type": "markdown", 473 | "metadata": { 474 | "collapsed": true 475 | }, 476 | "source": [ 477 | "### Who are the 10 people most often credited as \"Himself\" in film history?" 478 | ] 479 | }, 480 | { 481 | "cell_type": "code", 482 | "execution_count": null, 483 | "metadata": { 484 | "collapsed": true 485 | }, 486 | "outputs": [], 487 | "source": [] 488 | }, 489 | { 490 | "cell_type": "code", 491 | "execution_count": null, 492 | "metadata": { 493 | "collapsed": true 494 | }, 495 | "outputs": [], 496 | "source": [] 497 | }, 498 | { 499 | "cell_type": "markdown", 500 | "metadata": { 501 | "collapsed": true 502 | }, 503 | "source": [ 504 | "### Which actors or actresses appeared in the most movies in the year 1945?" 505 | ] 506 | }, 507 | { 508 | "cell_type": "code", 509 | "execution_count": null, 510 | "metadata": { 511 | "collapsed": true 512 | }, 513 | "outputs": [], 514 | "source": [] 515 | }, 516 | { 517 | "cell_type": "code", 518 | "execution_count": null, 519 | "metadata": { 520 | "collapsed": true 521 | }, 522 | "outputs": [], 523 | "source": [] 524 | }, 525 | { 526 | "cell_type": "markdown", 527 | "metadata": { 528 | "collapsed": true 529 | }, 530 | "source": [ 531 | "### Which actors or actresses appeared in the most movies in the year 1985?" 532 | ] 533 | }, 534 | { 535 | "cell_type": "code", 536 | "execution_count": null, 537 | "metadata": { 538 | "collapsed": true 539 | }, 540 | "outputs": [], 541 | "source": [] 542 | }, 543 | { 544 | "cell_type": "code", 545 | "execution_count": null, 546 | "metadata": { 547 | "collapsed": true 548 | }, 549 | "outputs": [], 550 | "source": [] 551 | }, 552 | { 553 | "cell_type": "markdown", 554 | "metadata": { 555 | "collapsed": true 556 | }, 557 | "source": [ 558 | "### Plot how many roles Mammootty has played in each year of his career." 559 | ] 560 | }, 561 | { 562 | "cell_type": "code", 563 | "execution_count": null, 564 | "metadata": { 565 | "collapsed": true 566 | }, 567 | "outputs": [], 568 | "source": [] 569 | }, 570 | { 571 | "cell_type": "code", 572 | "execution_count": null, 573 | "metadata": { 574 | "collapsed": true 575 | }, 576 | "outputs": [], 577 | "source": [] 578 | }, 579 | { 580 | "cell_type": "markdown", 581 | "metadata": { 582 | "collapsed": true 583 | }, 584 | "source": [ 585 | "### What are the 10 most frequent roles that start with the phrase \"Patron in\"?" 586 | ] 587 | }, 588 | { 589 | "cell_type": "code", 590 | "execution_count": null, 591 | "metadata": { 592 | "collapsed": true 593 | }, 594 | "outputs": [], 595 | "source": [] 596 | }, 597 | { 598 | "cell_type": "code", 599 | "execution_count": null, 600 | "metadata": { 601 | "collapsed": true 602 | }, 603 | "outputs": [], 604 | "source": [] 605 | }, 606 | { 607 | "cell_type": "markdown", 608 | "metadata": { 609 | "collapsed": true 610 | }, 611 | "source": [ 612 | "### What are the 10 most frequent roles that start with the word \"Science\"?" 613 | ] 614 | }, 615 | { 616 | "cell_type": "code", 617 | "execution_count": null, 618 | "metadata": { 619 | "collapsed": true 620 | }, 621 | "outputs": [], 622 | "source": [] 623 | }, 624 | { 625 | "cell_type": "code", 626 | "execution_count": null, 627 | "metadata": { 628 | "collapsed": true 629 | }, 630 | "outputs": [], 631 | "source": [] 632 | }, 633 | { 634 | "cell_type": "markdown", 635 | "metadata": { 636 | "collapsed": true 637 | }, 638 | "source": [ 639 | "### Plot the n-values of the roles that Judi Dench has played over her career." 640 | ] 641 | }, 642 | { 643 | "cell_type": "code", 644 | "execution_count": null, 645 | "metadata": { 646 | "collapsed": true 647 | }, 648 | "outputs": [], 649 | "source": [] 650 | }, 651 | { 652 | "cell_type": "code", 653 | "execution_count": null, 654 | "metadata": { 655 | "collapsed": true 656 | }, 657 | "outputs": [], 658 | "source": [] 659 | }, 660 | { 661 | "cell_type": "markdown", 662 | "metadata": { 663 | "collapsed": true 664 | }, 665 | "source": [ 666 | "### Plot the n-values of Cary Grant's roles through his career." 667 | ] 668 | }, 669 | { 670 | "cell_type": "code", 671 | "execution_count": null, 672 | "metadata": { 673 | "collapsed": true 674 | }, 675 | "outputs": [], 676 | "source": [] 677 | }, 678 | { 679 | "cell_type": "code", 680 | "execution_count": null, 681 | "metadata": { 682 | "collapsed": true 683 | }, 684 | "outputs": [], 685 | "source": [] 686 | }, 687 | { 688 | "cell_type": "markdown", 689 | "metadata": { 690 | "collapsed": true 691 | }, 692 | "source": [ 693 | "### Plot the n-value of the roles that Sidney Poitier has acted over the years." 694 | ] 695 | }, 696 | { 697 | "cell_type": "code", 698 | "execution_count": null, 699 | "metadata": { 700 | "collapsed": true 701 | }, 702 | "outputs": [], 703 | "source": [] 704 | }, 705 | { 706 | "cell_type": "code", 707 | "execution_count": null, 708 | "metadata": { 709 | "collapsed": true 710 | }, 711 | "outputs": [], 712 | "source": [] 713 | }, 714 | { 715 | "cell_type": "markdown", 716 | "metadata": { 717 | "collapsed": true 718 | }, 719 | "source": [ 720 | "### How many leading (n=1) roles were available to actors, and how many to actresses, in the 1950s?" 721 | ] 722 | }, 723 | { 724 | "cell_type": "code", 725 | "execution_count": null, 726 | "metadata": { 727 | "collapsed": true 728 | }, 729 | "outputs": [], 730 | "source": [] 731 | }, 732 | { 733 | "cell_type": "code", 734 | "execution_count": null, 735 | "metadata": { 736 | "collapsed": true 737 | }, 738 | "outputs": [], 739 | "source": [] 740 | }, 741 | { 742 | "cell_type": "markdown", 743 | "metadata": { 744 | "collapsed": true 745 | }, 746 | "source": [ 747 | "### How many supporting (n=2) roles were available to actors, and how many to actresses, in the 1950s?" 748 | ] 749 | }, 750 | { 751 | "cell_type": "code", 752 | "execution_count": null, 753 | "metadata": { 754 | "collapsed": true 755 | }, 756 | "outputs": [], 757 | "source": [] 758 | }, 759 | { 760 | "cell_type": "code", 761 | "execution_count": null, 762 | "metadata": { 763 | "collapsed": true 764 | }, 765 | "outputs": [], 766 | "source": [] 767 | } 768 | ], 769 | "metadata": { 770 | "kernelspec": { 771 | "display_name": "Python 3", 772 | "language": "python", 773 | "name": "python3" 774 | }, 775 | "language_info": { 776 | "codemirror_mode": { 777 | "name": "ipython", 778 | "version": 3 779 | }, 780 | "file_extension": ".py", 781 | "mimetype": "text/x-python", 782 | "name": "python", 783 | "nbconvert_exporter": "python", 784 | "pygments_lexer": "ipython3", 785 | "version": "3.6.8" 786 | } 787 | }, 788 | "nbformat": 4, 789 | "nbformat_minor": 1 790 | } 791 | -------------------------------------------------------------------------------- /Exercises-3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%matplotlib inline\n", 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [ 18 | { 19 | "data": { 20 | "text/html": [ 21 | "" 57 | ], 58 | "text/plain": [ 59 | "" 60 | ] 61 | }, 62 | "execution_count": 2, 63 | "metadata": {}, 64 | "output_type": "execute_result" 65 | } 66 | ], 67 | "source": [ 68 | "from IPython.core.display import HTML\n", 69 | "css = open('style-table.css').read() + open('style-notebook.css').read()\n", 70 | "HTML(''.format(css))" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 3, 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "data": { 80 | "text/html": [ 81 | "
\n", 82 | "\n", 95 | "\n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | "
titleyear
0The Rising Son1990
1The Thousand Plane Raid1969
2The Spider and the Fly1949
3Country2000
4The Golden Note2017
\n", 131 | "
" 132 | ], 133 | "text/plain": [ 134 | " title year\n", 135 | "0 The Rising Son 1990\n", 136 | "1 The Thousand Plane Raid 1969\n", 137 | "2 The Spider and the Fly 1949\n", 138 | "3 Country 2000\n", 139 | "4 The Golden Note 2017" 140 | ] 141 | }, 142 | "execution_count": 3, 143 | "metadata": {}, 144 | "output_type": "execute_result" 145 | } 146 | ], 147 | "source": [ 148 | "titles = pd.read_csv('data/titles.csv')\n", 149 | "titles.head()" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 4, 155 | "metadata": {}, 156 | "outputs": [ 157 | { 158 | "data": { 159 | "text/html": [ 160 | "
\n", 161 | "\n", 174 | "\n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | "
titleyearnametypecharactern
0Closet Monster2015Buffy #1actorBuffy 431.0
1Suuri illusioni1985Homo $actorGuests22.0
2Battle of the Sexes2017$hutteractorBobby Riggs Fan10.0
3Secret in Their Eyes2015$hutteractor2002 Dodger FanNaN
4Steve Jobs2015$hutteractor1988 Opera House PatronNaN
\n", 234 | "
" 235 | ], 236 | "text/plain": [ 237 | " title year name type character n\n", 238 | "0 Closet Monster 2015 Buffy #1 actor Buffy 4 31.0\n", 239 | "1 Suuri illusioni 1985 Homo $ actor Guests 22.0\n", 240 | "2 Battle of the Sexes 2017 $hutter actor Bobby Riggs Fan 10.0\n", 241 | "3 Secret in Their Eyes 2015 $hutter actor 2002 Dodger Fan NaN\n", 242 | "4 Steve Jobs 2015 $hutter actor 1988 Opera House Patron NaN" 243 | ] 244 | }, 245 | "execution_count": 4, 246 | "metadata": {}, 247 | "output_type": "execute_result" 248 | } 249 | ], 250 | "source": [ 251 | "cast = pd.read_csv('data/cast.csv')\n", 252 | "cast.head()" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": null, 258 | "metadata": {}, 259 | "outputs": [], 260 | "source": [] 261 | }, 262 | { 263 | "cell_type": "markdown", 264 | "metadata": { 265 | "collapsed": true 266 | }, 267 | "source": [ 268 | "### Using groupby(), plot the number of films that have been released each decade in the history of cinema." 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": null, 274 | "metadata": { 275 | "collapsed": true 276 | }, 277 | "outputs": [], 278 | "source": [] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": null, 283 | "metadata": { 284 | "collapsed": true 285 | }, 286 | "outputs": [], 287 | "source": [] 288 | }, 289 | { 290 | "cell_type": "markdown", 291 | "metadata": { 292 | "collapsed": true 293 | }, 294 | "source": [ 295 | "### Use groupby() to plot the number of \"Hamlet\" films made each decade." 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": null, 301 | "metadata": { 302 | "collapsed": true 303 | }, 304 | "outputs": [], 305 | "source": [] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": null, 310 | "metadata": { 311 | "collapsed": true 312 | }, 313 | "outputs": [], 314 | "source": [] 315 | }, 316 | { 317 | "cell_type": "markdown", 318 | "metadata": { 319 | "collapsed": true 320 | }, 321 | "source": [ 322 | "### How many leading (n=1) roles were available to actors, and how many to actresses, in each year of the 1950s?" 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": null, 328 | "metadata": { 329 | "collapsed": true 330 | }, 331 | "outputs": [], 332 | "source": [] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": null, 337 | "metadata": { 338 | "collapsed": true 339 | }, 340 | "outputs": [], 341 | "source": [] 342 | }, 343 | { 344 | "cell_type": "markdown", 345 | "metadata": { 346 | "collapsed": true 347 | }, 348 | "source": [ 349 | "### In the 1950s decade taken as a whole, how many total roles were available to actors, and how many to actresses, for each \"n\" number 1 through 5?" 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": null, 355 | "metadata": { 356 | "collapsed": true 357 | }, 358 | "outputs": [], 359 | "source": [] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "execution_count": null, 364 | "metadata": { 365 | "collapsed": true 366 | }, 367 | "outputs": [], 368 | "source": [] 369 | }, 370 | { 371 | "cell_type": "markdown", 372 | "metadata": { 373 | "collapsed": true 374 | }, 375 | "source": [ 376 | "### Use groupby() to determine how many roles are listed for each movie named _The Pink Panther_." 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": null, 382 | "metadata": { 383 | "collapsed": true 384 | }, 385 | "outputs": [], 386 | "source": [] 387 | }, 388 | { 389 | "cell_type": "code", 390 | "execution_count": null, 391 | "metadata": { 392 | "collapsed": true 393 | }, 394 | "outputs": [], 395 | "source": [] 396 | }, 397 | { 398 | "cell_type": "markdown", 399 | "metadata": { 400 | "collapsed": true 401 | }, 402 | "source": [ 403 | "### List, in order by year, each of the films in which Frank Oz has played more than 1 role." 404 | ] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": null, 409 | "metadata": { 410 | "collapsed": true 411 | }, 412 | "outputs": [], 413 | "source": [] 414 | }, 415 | { 416 | "cell_type": "code", 417 | "execution_count": null, 418 | "metadata": { 419 | "collapsed": true 420 | }, 421 | "outputs": [], 422 | "source": [] 423 | }, 424 | { 425 | "cell_type": "markdown", 426 | "metadata": { 427 | "collapsed": true 428 | }, 429 | "source": [ 430 | "### List each of the characters that Frank Oz has portrayed at least twice." 431 | ] 432 | }, 433 | { 434 | "cell_type": "code", 435 | "execution_count": null, 436 | "metadata": { 437 | "collapsed": true 438 | }, 439 | "outputs": [], 440 | "source": [] 441 | }, 442 | { 443 | "cell_type": "code", 444 | "execution_count": null, 445 | "metadata": { 446 | "collapsed": true 447 | }, 448 | "outputs": [], 449 | "source": [] 450 | } 451 | ], 452 | "metadata": { 453 | "kernelspec": { 454 | "display_name": "Python 3", 455 | "language": "python", 456 | "name": "python3" 457 | }, 458 | "language_info": { 459 | "codemirror_mode": { 460 | "name": "ipython", 461 | "version": 3 462 | }, 463 | "file_extension": ".py", 464 | "mimetype": "text/x-python", 465 | "name": "python", 466 | "nbconvert_exporter": "python", 467 | "pygments_lexer": "ipython3", 468 | "version": "3.6.8" 469 | } 470 | }, 471 | "nbformat": 4, 472 | "nbformat_minor": 1 473 | } 474 | -------------------------------------------------------------------------------- /Exercises-4.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%matplotlib inline\n", 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [ 18 | { 19 | "data": { 20 | "text/html": [ 21 | "" 57 | ], 58 | "text/plain": [ 59 | "" 60 | ] 61 | }, 62 | "execution_count": 2, 63 | "metadata": {}, 64 | "output_type": "execute_result" 65 | } 66 | ], 67 | "source": [ 68 | "from IPython.core.display import HTML\n", 69 | "css = open('style-table.css').read() + open('style-notebook.css').read()\n", 70 | "HTML(''.format(css))" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 3, 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "data": { 80 | "text/html": [ 81 | "
\n", 82 | "\n", 95 | "\n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | "
titleyear
0The Rising Son1990
1The Thousand Plane Raid1969
2The Spider and the Fly1949
3Country2000
4The Golden Note2017
\n", 131 | "
" 132 | ], 133 | "text/plain": [ 134 | " title year\n", 135 | "0 The Rising Son 1990\n", 136 | "1 The Thousand Plane Raid 1969\n", 137 | "2 The Spider and the Fly 1949\n", 138 | "3 Country 2000\n", 139 | "4 The Golden Note 2017" 140 | ] 141 | }, 142 | "execution_count": 3, 143 | "metadata": {}, 144 | "output_type": "execute_result" 145 | } 146 | ], 147 | "source": [ 148 | "titles = pd.read_csv('data/titles.csv')\n", 149 | "titles.head()" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 4, 155 | "metadata": {}, 156 | "outputs": [ 157 | { 158 | "data": { 159 | "text/html": [ 160 | "
\n", 161 | "\n", 174 | "\n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | "
titleyearnametypecharactern
0Closet Monster2015Buffy #1actorBuffy 431.0
1Suuri illusioni1985Homo $actorGuests22.0
2Battle of the Sexes2017$hutteractorBobby Riggs Fan10.0
3Secret in Their Eyes2015$hutteractor2002 Dodger FanNaN
4Steve Jobs2015$hutteractor1988 Opera House PatronNaN
\n", 234 | "
" 235 | ], 236 | "text/plain": [ 237 | " title year name type character n\n", 238 | "0 Closet Monster 2015 Buffy #1 actor Buffy 4 31.0\n", 239 | "1 Suuri illusioni 1985 Homo $ actor Guests 22.0\n", 240 | "2 Battle of the Sexes 2017 $hutter actor Bobby Riggs Fan 10.0\n", 241 | "3 Secret in Their Eyes 2015 $hutter actor 2002 Dodger Fan NaN\n", 242 | "4 Steve Jobs 2015 $hutter actor 1988 Opera House Patron NaN" 243 | ] 244 | }, 245 | "execution_count": 4, 246 | "metadata": {}, 247 | "output_type": "execute_result" 248 | } 249 | ], 250 | "source": [ 251 | "cast = pd.read_csv('data/cast.csv')\n", 252 | "cast.head()" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": null, 258 | "metadata": {}, 259 | "outputs": [], 260 | "source": [] 261 | }, 262 | { 263 | "cell_type": "markdown", 264 | "metadata": { 265 | "collapsed": true 266 | }, 267 | "source": [ 268 | "### Define a year as a \"Superman year\" whose films feature more Superman characters than Batman. How many years in film history have been Superman years?" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": null, 274 | "metadata": { 275 | "collapsed": true 276 | }, 277 | "outputs": [], 278 | "source": [] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": null, 283 | "metadata": { 284 | "collapsed": true 285 | }, 286 | "outputs": [], 287 | "source": [] 288 | }, 289 | { 290 | "cell_type": "markdown", 291 | "metadata": { 292 | "collapsed": true 293 | }, 294 | "source": [ 295 | "### How many years have been \"Batman years\", with more Batman characters than Superman characters?" 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": null, 301 | "metadata": { 302 | "collapsed": true 303 | }, 304 | "outputs": [], 305 | "source": [] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": null, 310 | "metadata": { 311 | "collapsed": true 312 | }, 313 | "outputs": [], 314 | "source": [] 315 | }, 316 | { 317 | "cell_type": "markdown", 318 | "metadata": { 319 | "collapsed": true 320 | }, 321 | "source": [ 322 | "### Plot the number of actor roles each year and the number of actress roles each year over the history of film." 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": null, 328 | "metadata": { 329 | "collapsed": true 330 | }, 331 | "outputs": [], 332 | "source": [] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": null, 337 | "metadata": { 338 | "collapsed": true 339 | }, 340 | "outputs": [], 341 | "source": [] 342 | }, 343 | { 344 | "cell_type": "markdown", 345 | "metadata": { 346 | "collapsed": true 347 | }, 348 | "source": [ 349 | "### Plot the number of actor roles each year and the number of actress roles each year, but this time as a kind='area' plot." 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": null, 355 | "metadata": { 356 | "collapsed": true 357 | }, 358 | "outputs": [], 359 | "source": [] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "execution_count": null, 364 | "metadata": { 365 | "collapsed": true 366 | }, 367 | "outputs": [], 368 | "source": [] 369 | }, 370 | { 371 | "cell_type": "markdown", 372 | "metadata": { 373 | "collapsed": true 374 | }, 375 | "source": [ 376 | "### Plot the difference between the number of actor roles each year and the number of actress roles each year over the history of film." 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": null, 382 | "metadata": { 383 | "collapsed": true 384 | }, 385 | "outputs": [], 386 | "source": [] 387 | }, 388 | { 389 | "cell_type": "code", 390 | "execution_count": null, 391 | "metadata": { 392 | "collapsed": true 393 | }, 394 | "outputs": [], 395 | "source": [] 396 | }, 397 | { 398 | "cell_type": "markdown", 399 | "metadata": { 400 | "collapsed": true 401 | }, 402 | "source": [ 403 | "### Plot the fraction of roles that have been 'actor' roles each year in the history of film." 404 | ] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": null, 409 | "metadata": { 410 | "collapsed": true 411 | }, 412 | "outputs": [], 413 | "source": [] 414 | }, 415 | { 416 | "cell_type": "code", 417 | "execution_count": null, 418 | "metadata": { 419 | "collapsed": true 420 | }, 421 | "outputs": [], 422 | "source": [] 423 | }, 424 | { 425 | "cell_type": "markdown", 426 | "metadata": { 427 | "collapsed": true 428 | }, 429 | "source": [ 430 | "### Plot the fraction of supporting (n=2) roles that have been 'actor' roles each year in the history of film." 431 | ] 432 | }, 433 | { 434 | "cell_type": "code", 435 | "execution_count": null, 436 | "metadata": { 437 | "collapsed": true 438 | }, 439 | "outputs": [], 440 | "source": [] 441 | }, 442 | { 443 | "cell_type": "code", 444 | "execution_count": null, 445 | "metadata": { 446 | "collapsed": true 447 | }, 448 | "outputs": [], 449 | "source": [] 450 | }, 451 | { 452 | "cell_type": "markdown", 453 | "metadata": { 454 | "collapsed": true 455 | }, 456 | "source": [ 457 | "### Build a plot with a line for each rank n=1 through n=3, where the line shows what fraction of that rank's roles were 'actor' roles for each year in the history of film." 458 | ] 459 | }, 460 | { 461 | "cell_type": "code", 462 | "execution_count": null, 463 | "metadata": { 464 | "collapsed": true 465 | }, 466 | "outputs": [], 467 | "source": [] 468 | }, 469 | { 470 | "cell_type": "code", 471 | "execution_count": null, 472 | "metadata": { 473 | "collapsed": true 474 | }, 475 | "outputs": [], 476 | "source": [] 477 | } 478 | ], 479 | "metadata": { 480 | "kernelspec": { 481 | "display_name": "Python 3", 482 | "language": "python", 483 | "name": "python3" 484 | }, 485 | "language_info": { 486 | "codemirror_mode": { 487 | "name": "ipython", 488 | "version": 3 489 | }, 490 | "file_extension": ".py", 491 | "mimetype": "text/x-python", 492 | "name": "python", 493 | "nbconvert_exporter": "python", 494 | "pygments_lexer": "ipython3", 495 | "version": "3.6.8" 496 | } 497 | }, 498 | "nbformat": 4, 499 | "nbformat_minor": 1 500 | } 501 | -------------------------------------------------------------------------------- /Exercises-5.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%matplotlib inline\n", 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [ 18 | { 19 | "data": { 20 | "text/html": [ 21 | "" 57 | ], 58 | "text/plain": [ 59 | "" 60 | ] 61 | }, 62 | "execution_count": 2, 63 | "metadata": {}, 64 | "output_type": "execute_result" 65 | } 66 | ], 67 | "source": [ 68 | "from IPython.core.display import HTML\n", 69 | "css = open('style-table.css').read() + open('style-notebook.css').read()\n", 70 | "HTML(''.format(css))" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 3, 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "data": { 80 | "text/html": [ 81 | "
\n", 82 | "\n", 95 | "\n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | "
titleyearnametypecharactern
0Closet Monster2015Buffy #1actorBuffy 431.0
1Suuri illusioni1985Homo $actorGuests22.0
2Battle of the Sexes2017$hutteractorBobby Riggs Fan10.0
3Secret in Their Eyes2015$hutteractor2002 Dodger FanNaN
4Steve Jobs2015$hutteractor1988 Opera House PatronNaN
\n", 155 | "
" 156 | ], 157 | "text/plain": [ 158 | " title year name type character n\n", 159 | "0 Closet Monster 2015 Buffy #1 actor Buffy 4 31.0\n", 160 | "1 Suuri illusioni 1985 Homo $ actor Guests 22.0\n", 161 | "2 Battle of the Sexes 2017 $hutter actor Bobby Riggs Fan 10.0\n", 162 | "3 Secret in Their Eyes 2015 $hutter actor 2002 Dodger Fan NaN\n", 163 | "4 Steve Jobs 2015 $hutter actor 1988 Opera House Patron NaN" 164 | ] 165 | }, 166 | "execution_count": 3, 167 | "metadata": {}, 168 | "output_type": "execute_result" 169 | } 170 | ], 171 | "source": [ 172 | "cast = pd.read_csv('data/cast.csv')\n", 173 | "cast.head()" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 4, 179 | "metadata": {}, 180 | "outputs": [ 181 | { 182 | "data": { 183 | "text/html": [ 184 | "
\n", 185 | "\n", 198 | "\n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | "
titleyearcountrydate
0#73, Shaanthi Nivaasa2007India2007-06-15
1#BKKY2016Cambodia2017-10-12
2#Beings2015Romania2015-01-29
3#Captured2017USA2017-09-05
4#Ewankosau saranghaeyo2015Philippines2015-01-21
\n", 246 | "
" 247 | ], 248 | "text/plain": [ 249 | " title year country date\n", 250 | "0 #73, Shaanthi Nivaasa 2007 India 2007-06-15\n", 251 | "1 #BKKY 2016 Cambodia 2017-10-12\n", 252 | "2 #Beings 2015 Romania 2015-01-29\n", 253 | "3 #Captured 2017 USA 2017-09-05\n", 254 | "4 #Ewankosau saranghaeyo 2015 Philippines 2015-01-21" 255 | ] 256 | }, 257 | "execution_count": 4, 258 | "metadata": {}, 259 | "output_type": "execute_result" 260 | } 261 | ], 262 | "source": [ 263 | "release_dates = pd.read_csv(\n", 264 | " 'data/release_dates.csv',\n", 265 | " parse_dates=['date'],\n", 266 | " infer_datetime_format=True,\n", 267 | ")\n", 268 | "release_dates.head()" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": null, 274 | "metadata": { 275 | "collapsed": true 276 | }, 277 | "outputs": [], 278 | "source": [] 279 | }, 280 | { 281 | "cell_type": "markdown", 282 | "metadata": { 283 | "collapsed": true 284 | }, 285 | "source": [ 286 | "### Make a bar plot of the months in which movies with \"Christmas\" in their title tend to be released in the USA." 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": null, 292 | "metadata": { 293 | "collapsed": true 294 | }, 295 | "outputs": [], 296 | "source": [] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": null, 301 | "metadata": { 302 | "collapsed": true 303 | }, 304 | "outputs": [], 305 | "source": [] 306 | }, 307 | { 308 | "cell_type": "markdown", 309 | "metadata": { 310 | "collapsed": true 311 | }, 312 | "source": [ 313 | "### Make a bar plot of the months in which movies whose titles start with \"The Hobbit\" are released in the USA." 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": null, 319 | "metadata": { 320 | "collapsed": true 321 | }, 322 | "outputs": [], 323 | "source": [] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": null, 328 | "metadata": { 329 | "collapsed": true 330 | }, 331 | "outputs": [], 332 | "source": [] 333 | }, 334 | { 335 | "cell_type": "markdown", 336 | "metadata": { 337 | "collapsed": true 338 | }, 339 | "source": [ 340 | "### Make a bar plot of the day of the week on which movies with \"Romance\" in their title tend to be released in the USA." 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": null, 346 | "metadata": { 347 | "collapsed": true 348 | }, 349 | "outputs": [], 350 | "source": [] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": null, 355 | "metadata": { 356 | "collapsed": true 357 | }, 358 | "outputs": [], 359 | "source": [] 360 | }, 361 | { 362 | "cell_type": "markdown", 363 | "metadata": { 364 | "collapsed": true 365 | }, 366 | "source": [ 367 | "### Make a bar plot of the day of the week on which movies with \"Action\" in their title tend to be released in the USA." 368 | ] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": null, 373 | "metadata": { 374 | "collapsed": true 375 | }, 376 | "outputs": [], 377 | "source": [] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": null, 382 | "metadata": { 383 | "collapsed": true 384 | }, 385 | "outputs": [], 386 | "source": [] 387 | }, 388 | { 389 | "cell_type": "markdown", 390 | "metadata": { 391 | "collapsed": true 392 | }, 393 | "source": [ 394 | "### On which date was each Judi Dench movie from the 1990s released in the USA?" 395 | ] 396 | }, 397 | { 398 | "cell_type": "code", 399 | "execution_count": null, 400 | "metadata": { 401 | "collapsed": true 402 | }, 403 | "outputs": [], 404 | "source": [] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": null, 409 | "metadata": { 410 | "collapsed": true 411 | }, 412 | "outputs": [], 413 | "source": [] 414 | }, 415 | { 416 | "cell_type": "markdown", 417 | "metadata": { 418 | "collapsed": true 419 | }, 420 | "source": [ 421 | "### In which months do films with Judi Dench tend to be released in the USA?" 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": null, 427 | "metadata": { 428 | "collapsed": true 429 | }, 430 | "outputs": [], 431 | "source": [] 432 | }, 433 | { 434 | "cell_type": "code", 435 | "execution_count": null, 436 | "metadata": { 437 | "collapsed": true 438 | }, 439 | "outputs": [], 440 | "source": [] 441 | }, 442 | { 443 | "cell_type": "markdown", 444 | "metadata": { 445 | "collapsed": true 446 | }, 447 | "source": [ 448 | "### In which months do films with Tom Cruise tend to be released in the USA?" 449 | ] 450 | }, 451 | { 452 | "cell_type": "code", 453 | "execution_count": null, 454 | "metadata": { 455 | "collapsed": true 456 | }, 457 | "outputs": [], 458 | "source": [] 459 | }, 460 | { 461 | "cell_type": "code", 462 | "execution_count": null, 463 | "metadata": { 464 | "collapsed": true 465 | }, 466 | "outputs": [], 467 | "source": [] 468 | } 469 | ], 470 | "metadata": { 471 | "kernelspec": { 472 | "display_name": "Python 3", 473 | "language": "python", 474 | "name": "python3" 475 | }, 476 | "language_info": { 477 | "codemirror_mode": { 478 | "name": "ipython", 479 | "version": 3 480 | }, 481 | "file_extension": ".py", 482 | "mimetype": "text/x-python", 483 | "name": "python", 484 | "nbconvert_exporter": "python", 485 | "pygments_lexer": "ipython3", 486 | "version": "3.6.8" 487 | } 488 | }, 489 | "nbformat": 4, 490 | "nbformat_minor": 1 491 | } 492 | -------------------------------------------------------------------------------- /Exercises-6.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%matplotlib inline\n", 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [ 18 | { 19 | "data": { 20 | "text/html": [ 21 | "" 57 | ], 58 | "text/plain": [ 59 | "" 60 | ] 61 | }, 62 | "execution_count": 2, 63 | "metadata": {}, 64 | "output_type": "execute_result" 65 | } 66 | ], 67 | "source": [ 68 | "from IPython.core.display import HTML\n", 69 | "css = open('style-table.css').read() + open('style-notebook.css').read()\n", 70 | "HTML(''.format(css))" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 3, 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "data": { 80 | "text/html": [ 81 | "
\n", 82 | "\n", 95 | "\n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | "
Book titleNumber soldSales priceRoyalty paid
0The Bricklayer’s Bible82.990.55
1Swimrand21.990.35
2Pining For The Fisheries of Yore282.990.55
3The Duck Goes Here342.990.55
4The Tower Commission Report411.504.25
\n", 143 | "
" 144 | ], 145 | "text/plain": [ 146 | " Book title Number sold Sales price Royalty paid\n", 147 | "0 The Bricklayer’s Bible 8 2.99 0.55\n", 148 | "1 Swimrand 2 1.99 0.35\n", 149 | "2 Pining For The Fisheries of Yore 28 2.99 0.55\n", 150 | "3 The Duck Goes Here 34 2.99 0.55\n", 151 | "4 The Tower Commission Report 4 11.50 4.25" 152 | ] 153 | }, 154 | "execution_count": 3, 155 | "metadata": {}, 156 | "output_type": "execute_result" 157 | } 158 | ], 159 | "source": [ 160 | "sales1 = pd.read_csv('sales1.csv')\n", 161 | "sales1" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 4, 167 | "metadata": {}, 168 | "outputs": [ 169 | { 170 | "data": { 171 | "text/html": [ 172 | "
\n", 173 | "\n", 186 | "\n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | "
TitleUnits soldList priceRoyalty
0
1Sales report for Q4
2E-Book Reader US Store
3Pining for the Fisheries of Yore803.514.98
4Swimrand12.990.14
5The Bricklayer's Bible173.55.15
6The Duck Goes Here342.995.78
7The Tower Commission Report49.56.2
8US royalties (USD)32.25
9
10
11Sales report for Q4
12E-Book Reader UK Store
13Pining for the Fisheries of Yore472.9911.98
14The Bricklayer's Bible172.993.5
15The Tower Commission Report46.54.8
16UK royalties (GBP)20.28
17
18
19Sales report for Q4
20E-Book Reader France Store
21Swimrand81.990.88
22The Duck Goes Here121.991.5
23France royalties (EUR)2.38
\n", 367 | "
" 368 | ], 369 | "text/plain": [ 370 | " Title Units sold List price Royalty\n", 371 | "0 \n", 372 | "1 Sales report for Q4 \n", 373 | "2 E-Book Reader US Store \n", 374 | "3 Pining for the Fisheries of Yore 80 3.5 14.98\n", 375 | "4 Swimrand 1 2.99 0.14\n", 376 | "5 The Bricklayer's Bible 17 3.5 5.15\n", 377 | "6 The Duck Goes Here 34 2.99 5.78\n", 378 | "7 The Tower Commission Report 4 9.5 6.2\n", 379 | "8 US royalties (USD) 32.25\n", 380 | "9 \n", 381 | "10 \n", 382 | "11 Sales report for Q4 \n", 383 | "12 E-Book Reader UK Store \n", 384 | "13 Pining for the Fisheries of Yore 47 2.99 11.98\n", 385 | "14 The Bricklayer's Bible 17 2.99 3.5\n", 386 | "15 The Tower Commission Report 4 6.5 4.8\n", 387 | "16 UK royalties (GBP) 20.28\n", 388 | "17 \n", 389 | "18 \n", 390 | "19 Sales report for Q4 \n", 391 | "20 E-Book Reader France Store \n", 392 | "21 Swimrand 8 1.99 0.88\n", 393 | "22 The Duck Goes Here 12 1.99 1.5\n", 394 | "23 France royalties (EUR) 2.38" 395 | ] 396 | }, 397 | "execution_count": 4, 398 | "metadata": {}, 399 | "output_type": "execute_result" 400 | } 401 | ], 402 | "source": [ 403 | "sales2 = pd.read_csv('sales2.csv')\n", 404 | "sales2.fillna('')" 405 | ] 406 | }, 407 | { 408 | "cell_type": "markdown", 409 | "metadata": { 410 | "collapsed": true 411 | }, 412 | "source": [ 413 | "### Challenge: first combine these sales together into a single dataframe, then compute how much money consumers spent on each book in each currency." 414 | ] 415 | }, 416 | { 417 | "cell_type": "code", 418 | "execution_count": null, 419 | "metadata": { 420 | "collapsed": true 421 | }, 422 | "outputs": [], 423 | "source": [] 424 | }, 425 | { 426 | "cell_type": "code", 427 | "execution_count": null, 428 | "metadata": { 429 | "collapsed": true 430 | }, 431 | "outputs": [], 432 | "source": [] 433 | } 434 | ], 435 | "metadata": { 436 | "kernelspec": { 437 | "display_name": "Python 3", 438 | "language": "python", 439 | "name": "python3" 440 | }, 441 | "language_info": { 442 | "codemirror_mode": { 443 | "name": "ipython", 444 | "version": 3 445 | }, 446 | "file_extension": ".py", 447 | "mimetype": "text/x-python", 448 | "name": "python", 449 | "nbconvert_exporter": "python", 450 | "pygments_lexer": "ipython3", 451 | "version": "3.6.8" 452 | } 453 | }, 454 | "nbformat": 4, 455 | "nbformat_minor": 1 456 | } 457 | -------------------------------------------------------------------------------- /FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: brandon-rhodes 2 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright © 2015 Brandon Rhodes and available under the MIT license: 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a 4 | copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included 12 | in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Welcome to Brandon’s Pandas Tutorial 3 | 4 | The first instance of this tutorial was delivered at PyCon 2015 in 5 | Montréal, but I hope that many other people will be able to benefit from 6 | it over the next few years — both on occasions on which I myself get to 7 | deliver it, and also when other instructors are able to do so. 8 | 9 | If you want to follow along with the tutorial at home, here is the 10 | YouTube recording of the 3-hour tutorial at PyCon itself: 11 | 12 | [![Watch the video tutorial on YouTube](youtube.png)](http://www.youtube.com/watch?v=5JnMutdy6Fw "Pandas From The Ground Up - PyCon 2015") 13 | 14 | https://www.youtube.com/watch?v=5JnMutdy6Fw 15 | 16 | To make it useful to as many people as possible, I hereby release it 17 | under the MIT license (see the accompanying `LICENSE.txt` file) and I 18 | have tried to make sure that this repository contains all of the scripts 19 | needed to download and set up the data set that we used. 20 | 21 | ## Quick Start 22 | 23 | If you have both `conda` and `git` on your system (otherwise, read the 24 | next section for more detailed instructions): 25 | 26 | $ conda install --yes jupyter matplotlib pandas 27 | $ git clone https://github.com/brandon-rhodes/pycon-pandas-tutorial.git 28 | $ cd pycon-pandas-tutorial 29 | $ build/BUILD.sh 30 | $ jupyter notebook 31 | 32 | ## Detailed Instructions 33 | 34 | You will need Pandas, the IPython Notebook, and Matplotlib installed 35 | before you can successfully run the tutorial notebooks. The [Anaconda 36 | Distribution](https://www.anaconda.com/download/) is a great way to get up 37 | and running quickly without having to install them each separately — 38 | running the `conda` command shown above will install all three. 39 | 40 | Note that having `git` is not necessary for getting the materials. 41 | Simply click the “Download ZIP” button over on the right-hand side of 42 | this repository’s front page at the following link, and its files will 43 | be delivered to you as a ZIP archive: 44 | 45 | https://github.com/brandon-rhodes/pycon-pandas-tutorial 46 | 47 | Once you have unpacked the ZIP file, download the following four 48 | [IMDB](https://www.imdb.com/) data files and place them in the 49 | tutorial’s `build` directory: 50 | 51 | * ftp://ftp.fu-berlin.de/misc/movies/database/frozendata/actors.list.gz 52 | * ftp://ftp.fu-berlin.de/misc/movies/database/frozendata/actresses.list.gz 53 | * ftp://ftp.fu-berlin.de/misc/movies/database/frozendata/genres.list.gz 54 | * ftp://ftp.fu-berlin.de/misc/movies/database/frozendata/release-dates.list.gz 55 | 56 | If the above links don’t work for you, try these alternate sources of the same files: 57 | 58 | * ftp://ftp.funet.fi/pub/mirrors/ftp.imdb.com/pub/frozendata/actors.list.gz 59 | * ftp://ftp.funet.fi/pub/mirrors/ftp.imdb.com/pub/frozendata/actresses.list.gz 60 | * ftp://ftp.funet.fi/pub/mirrors/ftp.imdb.com/pub/frozendata/genres.list.gz 61 | * ftp://ftp.funet.fi/pub/mirrors/ftp.imdb.com/pub/frozendata/release-dates.list.gz 62 | 63 | To convert these into the CSV files that the tutorial needs, run the 64 | `BUILD.py` script with either Python 2 or Python 3. It will create the 65 | three CSV files in the `data` directory that you need to run all of the 66 | tutorial examples. It should take about 5 minutes to run on a fast 67 | modern machine: 68 | 69 | $ python build/BUILD.py 70 | 71 | You can then start up the IPython Notebook and start looking at the 72 | notebooks: 73 | 74 | $ ipython notebook 75 | 76 | I hope that the recording and the exercises in this repository prove 77 | useful if you are interested in learning more about Python and its data 78 | analysis capabilities! 79 | 80 | — [Brandon Rhodes](http://rhodesmill.org/brandon/) 81 | -------------------------------------------------------------------------------- /Solutions-3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "%matplotlib inline\n", 12 | "import pandas as pd" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": { 19 | "collapsed": false 20 | }, 21 | "outputs": [ 22 | { 23 | "data": { 24 | "text/html": [ 25 | "" 61 | ], 62 | "text/plain": [ 63 | "" 64 | ] 65 | }, 66 | "execution_count": 2, 67 | "metadata": {}, 68 | "output_type": "execute_result" 69 | } 70 | ], 71 | "source": [ 72 | "from IPython.core.display import HTML\n", 73 | "css = open('style-table.css').read() + open('style-notebook.css').read()\n", 74 | "HTML(''.format(css))" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 3, 80 | "metadata": { 81 | "collapsed": false 82 | }, 83 | "outputs": [ 84 | { 85 | "data": { 86 | "text/html": [ 87 | "
\n", 88 | "\n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | "
titleyear
0Tomorrow Ends at Dawn2002
1Brothers of the West1937
2Nemo1984
3Pereezd2014
4Bad for Business2007
\n", 124 | "
" 125 | ], 126 | "text/plain": [ 127 | " title year\n", 128 | "0 Tomorrow Ends at Dawn 2002\n", 129 | "1 Brothers of the West 1937\n", 130 | "2 Nemo 1984\n", 131 | "3 Pereezd 2014\n", 132 | "4 Bad for Business 2007" 133 | ] 134 | }, 135 | "execution_count": 3, 136 | "metadata": {}, 137 | "output_type": "execute_result" 138 | } 139 | ], 140 | "source": [ 141 | "titles = pd.read_csv('data/titles.csv')\n", 142 | "titles.head()" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 4, 148 | "metadata": { 149 | "collapsed": false 150 | }, 151 | "outputs": [ 152 | { 153 | "data": { 154 | "text/html": [ 155 | "
\n", 156 | "\n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | "
titleyearnametypecharactern
0Suuri illusioni1985Homo $actorGuests22
1Gangsta Rap: The Glockumentary2007Too $hortactorHimselfNaN
2Menace II Society1993Too $hortactorLew-Loc27
3Porndogs: The Adventures of Sadie2009Too $hortactorBosco3
4Stop Pepper Palmer2014Too $hortactorHimselfNaN
\n", 216 | "
" 217 | ], 218 | "text/plain": [ 219 | " title year name type character n\n", 220 | "0 Suuri illusioni 1985 Homo $ actor Guests 22\n", 221 | "1 Gangsta Rap: The Glockumentary 2007 Too $hort actor Himself NaN\n", 222 | "2 Menace II Society 1993 Too $hort actor Lew-Loc 27\n", 223 | "3 Porndogs: The Adventures of Sadie 2009 Too $hort actor Bosco 3\n", 224 | "4 Stop Pepper Palmer 2014 Too $hort actor Himself NaN" 225 | ] 226 | }, 227 | "execution_count": 4, 228 | "metadata": {}, 229 | "output_type": "execute_result" 230 | } 231 | ], 232 | "source": [ 233 | "cast = pd.read_csv('data/cast.csv')\n", 234 | "cast.head()" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": null, 240 | "metadata": { 241 | "collapsed": false 242 | }, 243 | "outputs": [], 244 | "source": [] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": 5, 249 | "metadata": { 250 | "collapsed": false 251 | }, 252 | "outputs": [ 253 | { 254 | "data": { 255 | "text/plain": [ 256 | "" 257 | ] 258 | }, 259 | "execution_count": 5, 260 | "metadata": {}, 261 | "output_type": "execute_result" 262 | }, 263 | { 264 | "data": { 265 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEdCAYAAADkeGc2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAH0BJREFUeJzt3Xu0XGWd5vHvI5GLyIBp6XARgRnDQGwdBCX2TY6idP7o\nAVztQOzRAc04thkHbEeH4JppcVY3Ak43gz1D1vQ0dAAVyZJG7REDATl9sYUIEkFimmAbJAECRi7e\nJfDMH/s9SeV0VXJOXVK1dz2ftWrV3u/e9ey3zqnz1q7f3rWPbBMREePlBcPuQERE7HkZ/CMixlAG\n/4iIMZTBPyJiDGXwj4gYQxn8IyLG0IwGf0kHSfqcpG9LWidpoaS5klZLekDSLZIOaln/AkkbJK2X\ndGpL+4mS7ivLLm9p30fS9aX9DklH9vdpRkREq5nu+V8O3GT7OODVwHpgGbDa9jHAbWUeSQuAs4AF\nwCLgCkkqOcuBJbbnA/MlLSrtS4Ctpf0y4JKen1lERHS028Ff0oHAb9q+CsD2NttPA6cBV5fVrgbO\nKNOnA9fZftb2RuBBYKGkQ4EDbK8p613T8pjWrBuAU3p6VhERsUsz2fM/GnhC0l9I+oak/ytpf2Ce\n7S1lnS3AvDJ9GLCp5fGbgMPbtG8u7ZT7h6F6cwGeljS3mycUERG7N5PBfw5wAnCF7ROAH1NKPFNc\nXSMi14mIiKiJOTNYZxOwyfbXy/zngAuAxyQdYvuxUtJ5vCzfDBzR8viXlYzNZXp6+9RjXg48ImkO\ncKDtH7R2QlLeXCIiumBb7Rp3ewP+BjimTF8IXFpu55e2ZcDFZXoBsBbYm6pk9B1AZdmdwEJAwE3A\notK+FFhephcDn23TB8+kry3rXzib9Wd7S/7w8uvc9+Qnf0/ndxo7Z7LnD/CfgE9L2rsM5u8C9gJW\nSloCbATOLFtZJ2klsA7YBix16UEZ5FcA+1GdPbSqtF8JXCtpA7C1vAH06qg+ZCR/NPMHmZ385I9F\n/owGf9vfBF7XZtGbO6x/EXBRm/a7gVe1af855c0jIiIGr8nf8F2R/MbmDzI7+ckfi3ztqMiMNkl2\nu4MWERHRUaexs7F7/pImkt/M/Dr3PfnJH5X8xg7+ERHRWco+ERENNnZln4iI6Kyxg39d6m7JH63s\n5Cd/XPIbO/hHRERnqflHRDRYav4REbFdYwf/utTdkj9a2clP/rjkN3bwj4iIzlLzj4hosNT8IyJi\nu8YO/nWpuyV/tLKTn/xxyW/s4B8REZ2l5h8R0WCp+UdExHaNHfzrUndL/mhlJz/545Lf2ME/IiI6\nS80/IqLBUvOPiIjtGjv416XulvzRyk5+8sclv7GDf0REdJaaf0REFyTNevAcxhjWaeycs6c7EhHR\nHLMZ/0dr37WxZZ+61N2SP1rZyU9+7yYHmp6af0REdG1GNX9JG4FngOeAZ22fJGkucD1wJLARONP2\nU2X9C4B3l/XPtX1LaT8RWAHsC9xk+7zSvg9wDXACsBU4y/ZD0/qQmn9EjIyq5j+7ss8o1fxnuudv\nYML2a2yfVNqWAattHwPcVuaRtAA4C1gALAKukDS14eXAEtvzgfmSFpX2JcDW0n4ZcMmsn2FERMzY\nbMo+0985TgOuLtNXA2eU6dOB62w/a3sj8CCwUNKhwAG215T1rml5TGvWDcAps+hX+87WvG6Y/OFk\nJz/5vZscaPqervkbuFXSXZLeU9rm2d5SprcA88r0YcCmlsduAg5v0765tFPuHwawvQ14upSVIiJi\nAGZ6quev235U0sHAaknrWxfadjfnvA6S7cnkNzO/zn1PfvPzYWKg6f3q/4wGf9uPlvsnJN0InARs\nkXSI7cdKSefxsvpm4IiWh7+Mao9/c5me3j71mJcDj0iaAxxo+wfT+yFpBdXBZYCngLVTP4ipj0KZ\nz3zmM7+n5neYLPcTu5mvDLJ/ZfqcsqmNdGJ7lzfgRVS1eoD9ga8CpwKXAueX9mXAxWV6AbAW2Bs4\nGvgOO84quhNYSHX84CZgUWlfCiwv04uBz7bph3fX12nrT8xm/dnekj+8/Dr3PfnNyQcMbnO7vUM7\nHkb/O213Jnv+84Abywk7c4BP275F0l3ASklLyrvLmWUr6yStBNYB24ClLj0og/wKYD+qUz1XlfYr\ngWslbaA61XPxDPoVERFdyrV9IiK6MC7n+UdERIM0dvCv+7nCyR9OdvKT37vJgabv6fP8IyKiQVLz\nj4joQmr+ERFRO40d/OteN0z+cLKTn/zeTQ40PTX/iIjoWmr+ERFdSM0/IiJqp7GDf93rhskfTnby\nk9+7yYGmp+YfERFdS80/IqILqflHRETtNHbwr3vdMPnDyU5+8ns3OdD01PwjIqJrqflHRHQhNf+I\niKidxg7+da8bJn842clPfu8mB5qemn9ERHQtNf+IiC6k5h8REbXT2MG/7nXD5A8nO/nJ793kQNNT\n84+IiK6l5h8R0YXU/CMionYaO/jXvW6Y/OFkJz/5vZscaHpq/hER0bXU/CMiupCaf0RE1M6MBn9J\ne0m6R9Jflfm5klZLekDSLZIOaln3AkkbJK2XdGpL+4mS7ivLLm9p30fS9aX9DklH9uOJ1b1umPzh\nZCc/+b2bHGj6nq75nwesY8dnnGXAatvHALeVeSQtAM4CFgCLgCskTX3cWA4ssT0fmC9pUWlfAmwt\n7ZcBl/T2lCIiYnd2W/OX9DJgBfBHwAdt/2tJ64GTbW+RdAgwaftYSRcAz9u+pDx2FXAh8BDwFdvH\nlfbFwITt3yvrfNT2nZLmAI/aPrhNP1Lzj4iRMQ41/8uADwPPt7TNs72lTG8B5pXpw4BNLettAg5v\n0765tFPuHwawvQ14WtLcGfQrIiK6NGdXCyX9NvC47Xs61Zlsu3oHHDxJK4CNZfYpYK3tybJsovRn\nav4Du1ne63zyh5Tf+lpMfvKHmb/DZLmfYOea/8S05ZVB9r9Mn1NW2UgntjvegIuo9sq/CzwK/Bi4\nFlgPHFLWORRYX6aXActaHr8KWAgcAny7pf3twPKWdV5fpucAT3Toi3fV1zbrT8xm/dnekj+8/Dr3\nPfnNyQcMbnO7vUM7Hkb/O213xuf5SzoZ+JCrmv+lVAdpL5G0DDjI9rJywPczwElU5ZxbgVfYtqQ7\ngXOBNcCXgE/aXiVpKfAq2+8rxwLOsL24zfbt1PwjYkTUvea/y7JPG1PP9GJgpaQlVB8rzgSwvU7S\nSqozg7YBS73j3WUp1YHj/YCbbK8q7VcC10raAGwF/snAHxER/dXYb/hKmnCphw2oP8kfUn6d+578\n5uR33vOfZEetf6dH9GXPf7b97+Vsn4iIaJjG7vlHRAxS3Wv+2fOPiBhDjR38O30vIfn1z69z35Pf\n/Py6XNtntmf7RETUwq6+fCq1r76MU2k5Nf+IaKRB1+RT84+IiNpp7OBf97ph8oeTnfzm5w+6Jl+X\nmn9jB/+IiOgsNf+IaKTU/MtWU/OPiIgpjR38616XTP5wspPf/PzU/CuNHfwjIqKz1PwjopFS8y9b\nTc0/IiKmNHbwr3tdMvnDyU5+8/NT8680dvCPiIjOUvOPiEZKzb9sNTX/iIiY0tjBv+51yeQPJzv5\nzc9Pzb/S2ME/IiI6S80/IhopNf+y1dT8IyJiSmMH/7rXJZM/nOzkNz8/Nf9KYwf/iIjoLDX/iGik\n1PzLVlPzj4iIKY0d/Otel0z+cLKT3/z81Pwruxz8Je0r6U5JayWtk/Tx0j5X0mpJD0i6RdJBLY+5\nQNIGSeslndrSfqKk+8qyy1va95F0fWm/Q9KR/XhiERHR2W5r/pJeZPsnkuYAfwd8CDgN+L7tSyWd\nD7zE9jJJC4DPAK8DDgduBebbtqQ1wPttr5F0E/BJ26skLQV+xfZSSWcBb7W9uE0/UvOPiBlLzb9s\ntduav+2flMm9gb2AJ6kG/6tL+9XAGWX6dOA628/a3gg8CCyUdChwgO01Zb1rWh7TmnUDcMosnldE\nRHRht4O/pBdIWgtsAW63fT8wz/aWssoWYF6ZPgzY1PLwTVSfAKa3by7tlPuHAWxvA56WNLe7p7NT\nvyd6zUj+aObXue/JH35+av6VObtbwfbzwPGSDgRulvTGactdffwZPEkrgI1l9ilgre3Jsmyi9Gey\nLD9eEp2W9zqf/OHmZz7zM5nfYbLcT+xmvjIq+d3Ml+lzyqY20sGszvOX9N+AnwL/Hpiw/Vgp6dxu\n+1hJy0oHLi7rrwI+CjxU1jmutL8deIPt95V1LrR9Rzmu8Kjtg9tsOzX/iJix1PzLVrup+Ut6qcqZ\nPJL2A94C3AN8ETi7rHY28Pky/UVgsaS9JR0NzAfW2H4MeEbSQkkC3gl8oeUxU1lvA27r8jlGRMQM\n7a7mfyjwlVLzvxP4K9u3ARcDb5H0APCmMo/tdcBKYB3wZWCpd3y0WAr8ObABeND2qtJ+JfBLkjYA\nHwCW9eOJ1b0umfzhZCe/+fmp+Vd2WfO3fR9wQpv2HwBv7vCYi4CL2rTfDbyqTfvPgTNn2N+IiOiD\nXNsnIhopNf+y1VzbJyIipjR28K97XTL5w8lOfvPzU/OvNHbwj4iIzlLzj4hGSs2/bDU1/4iImNLY\nwb/udcnkDyc7+c3PT82/0tjBPyIiOkvNPyIaKTX/stXU/CMiYkpjB/+61yWTP5zs5Dc/PzX/SmMH\n/4iI6Cw1/4hopNT8y1ZT84+IiCmNHfzrXpdM/nCyk9/8/NT8K40d/CMiorPU/COikVLzL1tNzT8i\nIqY0dvCve10y+cPJTn7z81PzrzR28I+IiM5S84+IRkrNv2w1Nf+IiJjS2MG/7nXJ5A8nO/nNz0/N\nv9LYwT8iIjpLzT8iGik1/7LV1PwjImJKYwf/utclkz+c7OQ3Pz81/0pjB/+IiOhstzV/SUcA1wC/\nTFXg+jPbn5Q0F7geOBLYCJxp+6nymAuAdwPPAefavqW0nwisAPYFbrJ9Xmnfp2zjBGArcJbth6b1\nIzX/iAapauazM0o1+XGo+T8L/L7tVwKvB/6jpOOAZcBq28cAt5V5JC0AzgIWAIuAKyRNbXg5sMT2\nfGC+pEWlfQmwtbRfBlzS5fOMiFrxLG7RT7sd/G0/Znttmf4R8G3gcOA04Oqy2tXAGWX6dOA628/a\n3gg8CCyUdChwgO01Zb1rWh7TmnUDcEovTwrqX5dM/nCykz/8/LrX5BtZ85d0FPAa4E5gnu0tZdEW\nYF6ZPgzY1PKwTVRvFtPbN5d2yv3DALa3AU+XslJERAzAnJmuKOnFVHvl59n+4Y5KDth2N/W72ZK0\ngur4AsBTwFrbk2XZROnLZMv6E52W9zqf/OHl257sd3+TP5z8HSbL/US5tc63Lq+MS343P/8yfU7Z\n1EY6mNGXvCS9EPh/wJdt/8/Sth6YsP1YKencbvtYSctKJy4u660CPgo8VNY5rrS/HXiD7feVdS60\nfYekOcCjtg+e1occ8I1okLofkG38Ad9ysPZKYN3UwF98ETi7TJ8NfL6lfbGkvSUdDcwH1th+DHhG\n0sKS+U7gC22y3kZ1ALknda97Jn842ckffn7da/J1qfnPpOzz68A7gHsl3VPaLgAuBlZKWkI51RPA\n9jpJK4F1wDZgqXd8vFhKdarnflSneq4q7VcC10raQHWq5+Ien1dEROxCru0TEW11cxxvlMomdc/v\nl05j54wP+EbEOJrd4Bb10djLO9S97pn84WQnfyYmBxuf/F3q1++3sYN/RER0lpp/RLRV95p53fP7\nJTX/iIYZ9AHZaLbGln3qXldN/nCy65ff7gJot3do75fJPmYlf7ZS84+IiK6l5h9RU3WvaSd/z+j6\n8g4REdE8jR3861W3Tf6oZPczX5Jne+vHdute007+rqXmH1ELwzggG7F7qflHDEjda87JH25+v6Tm\nHxER2zV28K9LXTj5o5W9J/LrXnNO/nDzU/OPiIiupeYfMSB1rzknf7j5/ZKaf0REbNfYwb/udeHk\nDyd7T+TXveac/OHmp+YfERFdS80/YkDqXnNO/nDz+yU1/4iI2K6xg3/d68LJH072nsive805+cPN\nT80/IiK6lpp/xIDUveac/OHm90tq/hERsV1jB/+614WTP/jsXG8/+XXMT80/oi9yvf0YT6n5x9iq\ne004+c3O75eua/6SrpK0RdJ9LW1zJa2W9ICkWyQd1LLsAkkbJK2XdGpL+4mS7ivLLm9p30fS9aX9\nDklH9vZUIyJid2ZS9vkLYNG0tmXAatvHALeVeSQtAM4CFpTHXCFp6h1nObDE9nxgvqSpzCXA1tJ+\nGXBJD89nuzrXtJM/vOzK5GDjk5/8Huyxmr/tvwWenNZ8GnB1mb4aOKNMnw5cZ/tZ2xuBB4GFkg4F\nDrC9pqx3TctjWrNuAE7p4nlERMQsdHvAd57tLWV6CzCvTB8GbGpZbxNweJv2zaWdcv8wgO1twNOS\n5nbZr+1sT/aakfzRzB9032FisPHJT34P+vX67/lsH1dHjOtx1DgiIgCY0+Xjtkg6xPZjpaTzeGnf\nDBzRst7LqPb4N5fp6e1Tj3k58IikOcCBtn/QbqOSVgAby+xTwNqpd8GpOljL/Ad2s7zX+eRXbmcW\nps466KX/rTXPPvS/mCz3E+xcs52YtryS/OTvifxuXv9l+pyyykY6sb3bG3AUcF/L/KXA+WV6GXBx\nmV4ArAX2Bo4GvsOO00nvBBYCAm4CFpX2pcDyMr0Y+GyHPngmfW1Zf2I268/2lvwdvxdwm9vtbdpm\n9zscrb7Pvv/JT/4ovP47bXe35/lLug44GXgpVX3/D4AvACup9tg3Amfafqqs/xHg3cA24DzbN5f2\nE4EVwH7ATbbPLe37ANcCrwG2AotdHSye3g875/mPnNmd6zyc85w7qft54Mlvdn6/dBo78yWv6EkG\n/+QnfzD5/dL1l7zqqs7nsTchf5DnOte578lPfq/69frv9oBv1IR2cTGyHd+/29ko7Z1HxGCk7NNw\no/XRt5vs2Rmlj+3JT34v+f3SaezMnn+MuNn9cUXEzKTmP6b59a57DjI7+ckf7fw9dm2fiIhontT8\nG2606p6jVVNNfvJHOb9fxu5Uz4iI6Kyxg3/da/Kp+Q8rO/nJH+38nOffEDkPPyKGITX/Iat7XTI1\n/+QnfzD5/ZKaf0REbNfYwT81+SbnDzI7+ckf7fyc5x8REV1LzX/I6l6XTM0/+ckfTH6/pOYfERHb\nNXbwT82/yfmDzE5+8kc7PzX/iIjoWmr+Q1b3umRq/slP/mDy+yU1/4iI2K6xg39q/k3OH2R28pM/\n2vmp+UdERNdS8x+yutclU/NPfvIHk98vqflHRMR2jR38U/Nvcv4gs5Of/NHOT80/IiK6lpr/kNW9\nLpmaf/KTP5j8fknNPyIithuZwV/SIknrJW2QdH4f8ib60K2h5de9Lpmaf/KTPxiNqvlL2gv4X8Ai\nYAHwdknH9Rh7fM8dG2r+2sHG1zq/zn1PfvJ71pexZyQGf+Ak4EHbG20/C3wWOL3HzIN679Yw858a\nbHyt8+vc9+Qnv2d9GXtGZfA/HHi4ZX5TaYuIiAEYlcF/EKccHTWAzD2Yv3Gw8bXOH2R28pM/6vn9\nGXtG4lRPSa8HLrS9qMxfADxv+5KWdYbf0YiIGmp3queoDP5zgH8ATgEeAdYAb7f97aF2LCKioeYM\nuwMAtrdJej9wM7AXcGUG/oiIwRmJPf+IiNizRuWAb0RE7EEjUfaJiBgESYuAM9hx6vhm4PO2V417\nfiPKPnX+BSR/eNnJb3a+pMuB+cA1JRfgZcA7qb5Ueu5Y59d98K/9LyD5Q8lO/ljkb7A9v027gA22\nXzHO+diu9a38ENq1i+oFlPya5te578kfifz7gJPatC8E7hv3/CbU/H8m6STba6a1nwT8NPm1zq9z\n35M//PxzgOWSDqC6ZAxUnyyeKcvGOr8JZZ8TgeVAux/QUtt3J7+e+XXue/KHn9+ynUOBw8rsZtuP\n9SN3Wv72Yxa2H61Dfu0H/yktPyADjwzoF5AX0MzyN/Xz5zPI7Db5g/zZmPq/dgb58+/7326pjy+k\npf/AGg944JN0rO31fcp6oaurHbe2vdT293vKbdDg/zqqvYbngAf69YMv2XkB7Tp7L9vPlekDgVdQ\n1XOf6TW7zbaW2r6i37kl+wCqA5D/aLvn6/JK2hvYZvv5Mv8m4ATgfttf7kP+q23f22vObrbxcuAZ\n209JOho4EVhv+1t93MZA/nYlnQpcATzIzp8s5lN9sri5H9vpsO2HbR/RY8YbgWuB/YC7gffa/m5Z\ndo/t1/SSX/uav6STgT+muoj2icDfAwdJehZ4p+2Hd/X4GeR3fAGVgWhgLyBgNdC3F5CknV5AJb+n\nF5Cks4D/Lelp4IPA5cB3qH4+/8E9nLIn6T+3af6IpH0BbP9Jt9kl/wrbS8v0bwCfYUff32v7S73k\nA3cBJwNPSvow8FbgJuCDkk62vazH/HskfRe4DrjO9roe83YiaRnwXuAXkj4BfAj4KvAxSVfZ/uMe\n8wf6twt8Eniz7Y3Ttns08GXg2F7CJf3pLhb345r7nwB+C1gH/A6wWtI7bX+tD9mNONtnLXBwmT6a\n6hxhgLcAt/Qhfz1wVJv2o6n2gHrN/9Nd3H7Yh/y7gFdSnUHxNqo3sV8ty+7pQ/69wCHl5/ET4NjS\nfiTVp6Nesn8EXA98tNwuBJ6cmu9D3+9pmZ4ETijT/xy4uw/532qZvhvYr0zPoT9ng9wD/ApwUfm9\n3gssa/d67TJ/HdVe50vL72Lq72x/qk8vveYP+m93A/DCNu1705+ziX5I9eZ4DnB2y+0cYGsf8u+d\nNv9KqgtgntGPv93a7/kDL7D9RJn+HtWgg+3V5TziXu3FjnOQW22mP5+czqHao/o5O/9fAwG/24f8\nvW3fX6Y/J+nbwF+qD/8nuXjOpQYs6bsuH9ltPyTphT1mLwD+hGqwudD2TySdbftjPea2c6DtbwDY\n/kdJ/bj0yQ8lvcr2fcATVAPpT4EXUv1+e+aq/PIRqk9EC4HFwN9J+p7tX+sxfpvtn0r6BdUb+w/K\nNn8s6fkes2Hwf7tXAV+XdB07PrUfQfUzuqoP+XdRvcF/dfoCSRf2If8Xkg6Z+vuyfb+kU4AvAf+i\n1/AmDP53S7oSuB04rdwjaX/6c+2ivIB2Q9ILXNW139XSNodqkOua7e8Bb5N0BnCrpMt66+k/cayk\n+8r00ZJeYvtJVf9Tutc3Lqj2Cj8l6V7gceAuSX8DvAr4eB/yd2L7TuDOUi57Qx8i7y+v+/2BW4CV\nkm4E3gR8sw/5A/3btf1xSV+g+pewry/Nm4HfdX9KZL8D/KzDto/qQ/4FVJ+qtx9gt72plMve32t4\n7Q/4loNq7wGOo3pBXmX7OUn7AfM8rd7X5TYWUL2Atp/tA3yxHy8gSXOBn9n+Sa9ZHfLfAjxhe+20\n9oOA99v+wx7zT6IqYfx0WvtRwG/Y/lQv+S15L6Yq+5xkux8D21QfWz1i+xeSXgq8wfZf9mEbc4BT\ngWOodrYeBm52fw4o/1vbn+41Zxf5+1Lt5Dxq+2ZJ7wB+jaoU+n9s/7zH/IH/7UZntR/8IyLaKTs4\ny6hq5POoyqqPA58HLu71Dbju+bW/pLOk10m6XdKnJB0habWkpyV9XVJPZ7KU/AMk/XdJ90t6RtL3\nJd0p6Zw+dD/5s8++ow59b3B+bX7+wEqqEwQmgLm25wJvpDq7aOW459d+z1/S14E/oDq16hPA7wOf\no6pL/qHtX+0x/4vAjcCtwL8BXgx8FvivVF94+UjyB5Nf574nfyTyH7B9zGyXjUt+z6eDDfvGzqfr\nfW/asrV9yJ9+utVd5f4FwD8kf3D5de578kcifzXwX6iOH0y1HQKcD9w67vm1L/sAz0r6LUlnUn0Z\n962w/QskPR2QKn4s6TdL5unAVgCXb20mf6D5de578oeffxbVdxT+WtKTkp6k+j7HLwFnjn1+r+8e\nw75RXQFwkupbjkdSfYR8BvgG8No+5P8r4OtUdbavAv+ytB8MnJv8weXXue/JH35+yToOeDNwwLT2\nReOe33PnRvkGvGvA+e9O/nDy69z35O+ZfOBcqm/Efh54CDijZVk/vt1e7/xB/gKHfQMeTn4z8+vc\n9+TvmXzgW8CLy/RRVJfY+ECZ78fgXOv82n/DVzu+odnOvOTXN7/OfU/+8POpzmb8EYDtjeU44A2S\njqQ/l9eodX7tB3/gl4FFVOfDTvf3ya91fp37nvzh5z8u6XiXb7fb/pGk3wauBF497vlNGPy/RPXR\n6J7pCyT9dfJrnV/nvid/+Pn/Dtjpf1jYflbS2cCfjXt+7b/kFRERs9eE8/wjImKWMvhHRIyhDP4R\nEWMog39ExBjK4B+xh6g//xoyoi/yYoxoQ9LHJJ3XMv9Hks6V9GFJayR9Uy3/ZlPSjZLukvQtSe9p\naf+RpP8haS07/pVgxNBl8I9o7yqq86yn9tjPovpfqq+wfRLwGuDEqatSUl2L5rXA64BzJb2ktL8I\nuMP28bb78cWliL5owpe8IvrO9kOStko6nuoa6vdQDeynSpr6UtL+wCuAvwXOU/WP5gGOAOYDa4Dn\ngBv2aOcjZiCDf0Rnfw68i+o6M1cBpwAft73TtyslTZRlr7f9M0m3A/uWxT9zvkkZIyhln4jObqS6\n9sxrgVXAzcC7Je0PIOlwSQcD/wx4sgz8x5LaftRA9vwjOijXUfkK1cBuYLWk44CvSQL4IfAOqjeG\n35O0jur6619rjdnD3Y6YkVzbJ6KDcqD3buBttr8z7P5E9FPKPhFtSFoAbKD6R9kZ+KNxsucfETGG\nsucfETGGMvhHRIyhDP4REWMog39ExBjK4B8RMYYy+EdEjKH/D99AFgdHHsqVAAAAAElFTkSuQmCC\n", 266 | "text/plain": [ 267 | "" 268 | ] 269 | }, 270 | "metadata": {}, 271 | "output_type": "display_data" 272 | } 273 | ], 274 | "source": [ 275 | "# Using groupby(), plot the number of films\n", 276 | "# that have been released each decade in the history of cinema.\n", 277 | "\n", 278 | "t = titles\n", 279 | "t.groupby(t.year // 10 * 10).size().plot(kind='bar')" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": 6, 285 | "metadata": { 286 | "collapsed": false 287 | }, 288 | "outputs": [ 289 | { 290 | "data": { 291 | "text/plain": [ 292 | "" 293 | ] 294 | }, 295 | "execution_count": 6, 296 | "metadata": {}, 297 | "output_type": "execute_result" 298 | }, 299 | { 300 | "data": { 301 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAEdCAYAAAAVczy7AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAH5pJREFUeJzt3XuQZGd53/HvT1pUXIStEJwFJMWrBHFRBbxCWGzERSPH\nELFFCVIhARLAC1X2plRboopAwJRSESm7sOMLAkeAUkZZcCVSCDYqyayswqCXIC4LknaWFSthyTD2\nShiBLS0gFsxFT/4472h7Wz3TPb19+jxn5vep2to+fc6e/qq79c7MMz09igjMzKx/Tug6wMzMpuMF\n3Mysp7yAm5n1lBdwM7Oe8gJuZtZTXsDNzHpqogVc0omS9km6foX975V0l6T9ks6ebaKZmY0y6Wfg\nbwIOAo940bik7cBTI+JM4NeA988uz8zMVjJ2AZd0GrAd+ENAIw65CPgQQETsBU6RtHmWkWZm9kiT\nfAb+buCtwEMr7D8VODSwfQ9w2nF2mZnZGKsu4JJeBnwrIvYx+rPvhw8d2vbP55uZtWzTmP3nARfV\nOfejgZ+R9OGIeP3AMfcCpw9sn1avO4YkL+pmZlOIiNGfQEfERH+A84HrR1y/HdhTL28DvrDCv49J\nb2uVhsuO9xzroSFLR4aGLB0ZGrJ0ZGiYVwcQEKv8+S9j9hOT3MZK+8Z9Bv6I9R5A0s561isjYo+k\n7ZLuBr4PvGGN51yLLS2ee1Jbug6otnQdQI4GyNGxpeuAakvXAeRogBQdS62efeIFPCI+DXy6Xr5y\naN+uGXeZmdkYfftJzN1dB5CjAXJ07O46oNrddQA5GiBHx+6uA6rdXQfAjlbPrjpjaZ2kiJUG8WZm\nPdS8OON41lAxbl1cbe3s1Wfgkhbc0MjQkaEBcnRkaIAcHRkaIEtHafXsvVrAzczsKI9QzMym5BGK\nmZlNpVcLeIaZVoYGyNGRoQFydGRogBwdGRogS0dp9ey9WsDNzOwoz8DNzKbkGbiZmU2lVwt4hplW\nhgbI0ZGhAXJ0ZGiAHB0ZGiBLR2n17L1awM3M7CjPwM3MpuQZuJmZTaVXC3iGmVaGBsjRkaEBcnRk\naIAcHRkaIEtHafXsvVrAzczsKM/Azcym5Bm4mZlNpVcLeIaZVoYGyNGRoQFydGRogBwdGRogS0dp\n9exjF3BJj5a0V9KipIOS3jXimAVJ35G0r/65tJ1cMzNbNtEMXNJjI+KIpE3AzcBbIuLmgf0LwJsj\n4qJVzuEZuJmtK72YgUfEkXrxJOBE4P6RJWZmNjcTLeCSTpC0CNwH3BQRB4cOCeA8Sfsl7ZF01qxD\na8dCG+ftWwPk6MjQADk6MjRAjo4MDZClo7R69kk/A38oIrYCpwEvGnHH3AacHhG/APwBcO1MK83M\n7BE2reXgiPiOpI8Dz2XgQ0tEfG/g8g2S3ifpCRFxzKhF0m5gqW4eBhYjotR9C/Xfr7o9cK6Jjl+v\n28vXdd0z2NLV/RERxY9Hnu0Mj8e8np9Hlfr3whq3H9lXL++ou5ZYxdhvYkp6IvCTiDgs6THAjcA7\nI+KTA8dsBr4VESHpXOAjEbFl6Dz+JqaZrSt9+Cbmk4FP1Rn4XuD6iPikpJ2SdtZjXgkcqMdcDrx6\n8v+AyWWYaWVogBwdGRogR0eGBsjRkaEBsnSUVs8+doQSEQeA54y4/sqBy1cAV8w2zczMVuP3QjEz\nm1IfRihmZpZQrxbwDDOtDA2QoyNDA+ToyNAAOToyNECWjtLq2Xu1gJuZ2VGegZuZTckzcDMzm0qv\nFvAMM60MDZCjI0MD5OjI0AA5OjI0QJaO0urZe7WAm5nZUZ6Bm5lNyTNwMzObSq8W8AwzrQwNkKMj\nQwPk6MjQADk6MjRAlo7S6tl7tYCbmdlRnoGbmU3JM3AzM5tKrxbwDDOtDA2QoyNDA+ToyNAAOToy\nNECWjtLq2Xu1gJuZ2VGegZuZTckzcDMzm0qvFvAMM60MDZCjI0MD5OjI0AA5OjI0QJaO0urZV13A\nJT1a0l5Ji5IOSnrXCse9V9JdkvZLOrudVDMzGzR2Bi7psRFxRNIm4GbgLRFx88D+7cCuiNgu6XnA\neyJi24jzeAZuZutK+hl4RBypF08CTgTuHzrkIuBD9di9wCmSNo87r5mZHZ+xC7ikEyQtAvcBN0XE\nwaFDTgUODWzfA5w2u8RjWhbaOG/fGiBHR4YGyNGRoQFydGRogCwdpdWzbxp3QEQ8BGyV9LPAjZIW\nImK4avjT+5FfU0jaDSzVzcPA4vK5mi9FxpNWn8Isf6mx/OANnH8m2wMdrZx/0m2ax6Sz26/bW6nP\n0K7vj663yfF4PKzr+yPJduvPz6NK/XthaHuy/YPnr5d31F1LrGJNrwOX9J+BH0TE7w5c9wGgRMQ1\ndftO4PyIuG/o3646Az/+WRJMMk8yM5uV1DNwSU+UdEq9/BjgxcC+ocOuA15fj9kGHB5evM3MbPbG\nzcCfDHyqzsD3AtdHxCcl7ZS0EyAi9gBfk3Q3cCVwcXu5pb1TTyjHXC1HR4YGyNGRoQFydGRogCwd\npdWzrzoDj4gDwHNGXH/l0PauGXeZmdkYad4LxTNwM+ub1DNwMzPLq2cLeOk6IMlcLUdHhgbI0ZGh\nAXJ0ZGiALB2l1bP3bAE3M7NlnoGbmU3JM3AzM5tKzxbw0nVAkrlajo4MDZCjI0MD5OjI0ABZOkqr\nZ+/ZAm5mZss8Azczm5Jn4GZmNpWeLeCl64Akc7UcHRkaIEdHhgbI0ZGhAbJ0lFbP3rMF3MzMlnkG\nbmY2Jc/AzcxsKj1bwEvXAUnmajk6MjRAjo4MDZCjI0MDZOkorZ69Zwu4mZkt8wzczGxKnoGbmdlU\neraAl64DkszVcnRkaIAcHRkaIEdHhgbI0lFaPfvYBVzS6ZJukvQVSbdLumTEMQuSviNpX/1zaTu5\nZma2bOwMXNKTgCdFxKKkk4FbgVdExB0DxywAb46Ii1Y5j2fgZraupJ+BR8Q3I2KxXn4QuAN4ysgS\nMzObmzXNwCVtAc4G9g7tCuA8Sfsl7ZF01mzyhpV2TrsGOeZqOToyNECOjgwNkKMjQwNk6Sitnn3T\npAfW8clHgTfVz8QH3QacHhFHJL0UuBZ42ohz7AaW6uZhYDEiytEjCrAwcJmh7cUx+4+5rQWA5fPP\narvt80+6DWyV1Nnt1+2t1Du+6/uj621yPB4P6/r+SLLd+vPzqFL/Xhjanmz/4Pnr5R111xKrmOh1\n4JIeBfwpcENEXD7B8V8HzomI+weu8wzczNaV9DNwSQI+CBxcafGWtLkeh6RzaT4w3D/qWDMzm41J\nZuDPB14LXDDwMsGXStopaWc95pXAAUmLwOXAq9vJLe2cdg1yzNVydGRogBwdGRogR0eGBsjSUVo9\n+9gZeETczJiFPiKuAK6YVZSZmY3n90IxM5tS+hm4mZnl1LMFvHQdkGSulqMjQwPk6MjQADk6MjRA\nlo7S6tl7toCbmdkyz8DNzKbkGbiZmU2lZwt46TogyVwtR0eGBsjRkaEBcnRkaIAsHaXVs/dsATcz\ns2WegZuZTckzcDMzm0rPFvDSdUCSuVqOjgwNkKMjQwPk6MjQAFk6Sqtn79kCbmZmyzwDNzObkmfg\nZmY2lZ4t4KXrgCRztRwdGRogR0eGBsjRkaEBsnSUVs/eswXczMyWeQZuZjYlz8DNzGwqPVvAS9cB\nSeZqOToyNECOjgwNkKMjQwNk6Sitnn2S30p/uqSbJH1F0u2SLlnhuPdKukvSfklnzz7VzMwGjZ2B\nS3oS8KSIWJR0MnAr8IqIuGPgmO3ArojYLul5wHsiYtvQeTwDN7N1Jf0MPCK+GRGL9fKDwB3AU4YO\nuwj4UD1mL3CKpM0T1JuZ2ZTWNAOXtAU4G9g7tOtU4NDA9j3AaccTNlqZ/SnXKMdcLUdHhgbI0ZGh\nAXJ0ZGiALB2l1bNvmvTAOj75KPCm+pn4Iw4Z2n7E1xWSdgNLdfMwsBgR5egRBVgYuMzQ9uKY/cfc\n1gLA8vlntd32+SfdBrZK6uz26/ZW6h3f4v19ExOQxk7OLjjOnrFfJ0/Q8PCXy31+fs7ovmj98Zik\n43gfj6NK/XthaHuy/UP37wKwo+5aWq1/oteBS3oU8KfADRFx+Yj9HwBKRFxTt+8Ezo+I+waO8Qzc\n1izL8yJLRwbzmPu235Clo+UZuJoPYR8EDo5avKvrgNfX47cBhwcXbzMzm71JZuDPB14LXCBpX/3z\nUkk7Je0EiIg9wNck3Q1cCVzcTm5p57RrkGOulqMjQ0OjdB1AjoYsj0npOqAqXQfQ+Qw8Im5msler\n7JpJkZmZTcTvhWKpZXleZOnIwDPwWXb4vVDMzDakni3gpeuAJDPGHB0ZGhql6wByNGR5TErXAVXp\nOoDO3wvFzMxy8gzcUsvyvMjSkYFn4LPs8AzczGxD6tkCXroOSDJjzNGRoaFRug4gR0OWx6R0HVCV\nrgPwDNzMzEbyDNxSy/K8yNKRgWfgs+zwDNzMbEPq2QJeug5IMmPM0ZGhoVG6DiBHQ5bHpHQdUJWu\nA/AM3MzMRvIM3FLL8rzI0pGBZ+Cz7PAM3MxsQ+rZAl66DkgyY8zRkaGhUboOIEdDlsekdB1Qla4D\n8AzczMxG8gzcUsvyvMjSkYFn4LPs8AzczGxD6tkCXroOSDJjzNGRoaFRug4gR0OWx6R0HVCVrgPo\nfAYu6SpJ90k6sML+BUnfGfiFx5fOPtPMzIaNnYFLeiHwIPDhiHjWiP0LwJsj4qIx5/EM3NYsy/Mi\nS0cGnoHPsqPlGXhEfAZ4YGyFmZnN1Sxm4AGcJ2m/pD2SzprBOVdQ2jv1hHLMGHN0ZGholK4DyNGQ\n5TEpXQdUpesA2m7YNINz3AacHhFHJL0UuBZ42qgDJe0GlurmYWAxIsrRIwqwMHCZoe3FMfuPua0F\ngOXzz2q77fNPug1sldTZ7dftrdQ7vu37e+XHe7LtGdzfrP78XJywh5n0dP38zP94FOaxXhzbsvL5\n13L79fKOumuJVUz0OnBJW4DrR83ARxz7deCciLh/6HrPwG3NsjwvsnRk4Bn4LDs6fh24pM2SVC+f\nS/NB4f4x/8zMzI7TJC8jvBr4HPB0SYckvVHSTkk76yGvBA5IWgQuB17dXm5p79QTyjFjzNGRoaFR\nug4gR0OWx6R0HVCVrgPofAYeEa8Zs/8K4IqZFZmZ2UT8XiiWWpbnRZaODDwDn2WH3wvFzGxD6tkC\nXroOSDJjzNGRoaFRug4gR0OWx6R0HVCVrgPo/L1QzMwsJ8/ALbUsz4ssHRl4Bj7LDs/Azcw2pJ4t\n4KXrgCQzxhwdGRoapesAcjRkeUxK1wFV6ToAz8DNzGwkz8AttSzPiywdGXgGPssOz8DNzDakni3g\npeuAJDPGHB0ZGhql6wByNGR5TErXAVXpOgDPwM3MbCTPwC21LM+LLB0ZeAY+yw7PwM3MNqSeLeCl\n64AkM8YcHRkaGqXrAHI0ZHlMStcBVek6AM/AzcxsJM/ALbUsz4ssHRl4Bj7LDs/Azcw2pJ4t4KXr\ngCQzxhwdGRoapesAcjRkeUxK1wFV6TqAzmfgkq6SdJ+kA6sc815Jd0naL+ns2SaamdkoY2fgkl4I\nPAh8OCKeNWL/dmBXRGyX9DzgPRGxbcRxnoHbmmV5XmTpyMAz8Fl2tDwDj4jPAA+scshFwIfqsXuB\nUyRtHndeMzM7PrOYgZ8KHBrYvgc4bQbnHaG0c9o1yDFjzNGRoaFRug4gR0OWx6R0HVCVrgNou2HT\njM4z/On9yK8pJO0GlurmYWAxIsrRIwqwMHCZoe3FMfuPua0FgOXzT7oN3DSqfejc4w4BuGCa24+I\n0nxZNt4EHRdMc/sD2zO5LyJCx/F4VKX+vTDV9rS3P/nzc3HCHo6nZ+zjUY9ddf/xPB5H74/l/54F\nptlu//EozGm9mOj8a7n9enlH3bXEKiZ6HbikLcD1K8zAPwCUiLimbt8JnB8R9w0d14sZeIaODA1Z\nOjI0ZOnI0DCbjgwNWTq6fx34dcDr6w1tAw4PL95mZjZ7k7yM8Grgc8DTJR2S9EZJOyXtBIiIPcDX\nJN0NXAlc3F5uae/UEytdB1Sl6wByNECOjtJ1QFW6DiBHA+ToKK2efewMPCJeM8Exu2aTY2Zmk/J7\noSTsyNCQpSNDQ5aODA2z6cjQkKWj+xm4mZl1oGcLeOk6gBwNkKOjdB1Qla4DyNEAOTpK1wFV6TqA\nzt8LxczMcvIMPGFHhoYsHRkasnRkaJhNR4aGLB2egZuZbUg9W8BL1wHkaIAcHaXrgKp0HUCOBsjR\nUboOqErXAXgGbmZmI3kGnrAjQ0OWjgwNWToyNMymI0NDlg7PwM3MNqSeLeCl6wByNECOjtJ1QFW6\nDiBHA+ToKF0HVKXrADwDNzOzkTwDT9iRoSFLR4aGLB0ZGmbTkaEhS4dn4GZmG1LPFvDSdQA5GiBH\nR+k6oCpdB5CjAXJ0lK4DqtJ1AJ6Bm5nZSJ6BJ+zI0JClI0NDlo4MDbPpyNCQpcMzcDOzDalnC3jp\nOoAcDZCjo3QdUJWuA8jRADk6StcBVek6gBQzcEkXSrpT0l2S3jZi/4Kk70jaV/9cOvtUMzMbNHYG\nLulE4KvALwP3Al8CXhMRdwwcswC8OSIuWuU8noH3qCFLR4aGLB0ZGmbTkaEhS0f7M/BzgbsjYiki\nfgxcA7x8ZImZmc3NJAv4qcChge176nWDAjhP0n5JeySdNavAY5V2TrsmpeuAqnQdQI4GyNFRug6o\nStcB5GiAHB2l1bNvmuCYSb4+uA04PSKOSHopcC3wtOGDJO0GlurmYWAxIsrRIwqwMHCZoe3FMfuP\nua0FgOXzT7p9bMvK5195/8Jx3f7R+2P18zf3xWr7m3NOe/uT3R8ZHo/Jto//8Vg+50q3N+7xGDjN\ncfQc27Ly+du6/cmfn6tvt/94FHI8PyfbP3j+enlH3bXEKiaZgW8DLouIC+v2rwMPRcRvr/Jvvg6c\nExH3D1znGXiPGrJ0ZGjI0pGhYTYdGRqydLQ/A78FOFPSFkknAa8Crhu6gc2SVC+fS/OB4f5HnsrM\nzGZl7AIeET8BdgE3AgeB/xMRd0jaKWlnPeyVwAFJi8DlwKvbyS3tnHZNStcBVek6gBwNkKOjdB1Q\nla4DyNEAOTpKq2efZAZORNwA3DB03ZUDl68ArphtmpmZrcbvhZKwI0NDlo4MDVk6MjTMpiNDQ5YO\nvxeKmdmG1LMFvHQdQI4GyNFRug6oStcB5GiAHB2l64CqdB1AivdCMTOzfDwDT9iRoSFLR4aGLB0Z\nGmbTkaEhS4dn4GZmG1LPFvDSdQA5GiBHR+k6oCpdB5CjAXJ0lK4DqtJ1AJ6Bm5nZSJ6BJ+zI0JCl\nI0NDlo4MDbPpyNCQpcMzcDOzDalnC3jpOoAcDZCjo3QdUJWuA8jRADk6StcBVek6AM/AzcxsJM/A\nE3ZkaMjSkaEhS0eGhtl0ZGjI0uEZuJnZhtSzBbx0HUCOBsjRUboOqErXAeRogBwdpeuAqnQdgGfg\nZmY2kmfgCTsyNGTpyNCQpSNDw2w6MjRk6fAM3MxsQxq7gEu6UNKdku6S9LYVjnlv3b9f0tmzz1xW\n2jv1xErXAVXpOoAcDZCjo3QdUJWuA8jRADk6SqtnX3UBl3Qi8N+BC4GzgNdIeubQMduBp0bEmcCv\nAe9vqRVYbO/UE8vQADk6MjRAjo4MDZCjI0MD5Ohot2HcZ+DnAndHxFJE/Bi4Bnj50DEXAR8CiIi9\nwCmSNs+8FIDD7Zx2TTI0QI6ODA2QoyNDA+ToyNAAOTrabRi3gJ8KHBrYvqdeN+6Y044/zczMVjNu\nAZ/026vD3yFt6aUtS+2cdk2Wug6olroOIEcD5OhY6jqgWuo6gBwNkKNjqdWzr/oyQknbgMsi4sK6\n/evAQxHx2wPHfAAoEXFN3b4TOD8i7hs613xer2hmts6s9DLCTWP+3S3AmZK2AN8AXgW8ZuiY64Bd\nwDV1wT88vHivFmBmZtNZdQGPiJ9I2gXcCJwIfDAi7pC0s+6/MiL2SNou6W7g+8AbWq82M7P5/SSm\nmZnNln8S08ysp8bNwM3M0pN0IfAKjr7M+V7g2oj4s/XckHaEslEfkKwdGRqydGRoyNKRpOE9wJnA\nh+vtQ/OzKK+j+UHES9ZrQ8oFfCM/IBk7MjRk6cjQkKUjQ0PtuKu+lcfw9QLuioinrtuGiEj3p/4H\nj7peNE+MDdGQpSNDQ5aODA1ZOjI01Ns7AJw74vrnAQfWc0PWGfgPJZ0bEV8cuv5c4AcbqCFLR4aG\nLB0ZGrJ0ZGgA2AG8X9Ljad7KA5qvBL5b963bhqwjlHNo3tVw1J1xcUTcuhEasnRkaMjSkaEhS0eG\nhqGeJwNPqZv3RsQ353n7Aw0Pfz8gIv6m1dvLuIAvm/edkbVhRMc9G+HJOUFHsEH+R52wo+vnRQDf\n6Oj/U9GMKx6+L4AvRoIFTtIzIuLOVs6d4L9vTdq8M1a4vUdF81a6g9c9MSL+dl4NI5oujoj3dXj7\nj6f55tXXImJu79kp6STgJxHxUN3+JeA5wFci4oY5NTw7Ir48j9saR9I/Br4bEYclnQGcA9wZEbfP\nueMXaT7z/inwF/P8/7Pe/kuA9wF3c+xXAmfSfCVw4zx7hkk6FBGnt3LuHi7grd0ZQ7dzAfBHwGOA\nW4GdEfH1um9fRLT4m4eO6fiPI65+B/CbABHx+3NoeF9EXFwvvwD438Bf0vwPsjMiPt52Q73tL9O8\nUdoDkt4K/CtgD3A+cGtEvH0ODT8Fvg5cDVwdEQfbvs0VOt4O7AR+BPwO8Bbgs8A24KqI+L05NJwP\n/B7Nm16fA3wOOAX4MfC6iDi0yj+fZcedwIURsTR0/RnADRHxjDk0/MEqu3dExOPbuN2U38Qcc2ec\nMqeM3wH+JXAQ+NfAJyS9LiI+P6fbX/ZO4OO1A5rv8J9AM3ecl38+cPk3gFdExG2S/gnwf2vfPJwQ\nEQ/Uy68GXhARP5D0W8A+oPUFHPgyzcvk/h1wnaQjNB/QrhleQFr2eprfkvU4mvcsPSMivi3pccAX\naRbWtr0HeHG93TOAd0fE8yW9GPgg8JI5NEDzPk33jrj+Xua3xu2g+SD69xz7dtqiea60IuUCTkd3\nxpCTIuIr9fJHJd0B/MlKvxe0RWcBv0/zP+plEXFE0q9ExDvn3LHsZyPiNoCI+Jqkeb4dw/ckPSsi\nDgDfpvnq6AfAo3jke9K3po4o3gG8Q9LzaD6Y3CzpryPivDll/KR+8PoRcAS4v7Z9X9JDc2o4ISK+\nXS//NfDzteET9TXi83IV8CVJV3N0hHI6zeNy1ZwabgFuj4jPDu+QdFlbN5pyhCLpJuDSFe6MpYjY\nMoeGW4CXDX5TSNJpNJ9t/tOIOLnthqGeVwD/CXg38N8i4ow53vYPaOaLAGcAp9cxxonA/oj4Z3Pq\neDbNWOvLNB/YXwD8P+BZwO9HxP+aQ8PI8Vn9QPaiiChtN9Tbu7pefBzNqz4eA3wM+CWaTz5eO4eG\n/wk8BNxE86sV74mIN9evAm6dx+hioOUsml/3+PCrUIDr5jXikvQE4IcRcWQet/fw7SZdwDu5M4Ya\nXgx8OyIWh64/BdgVEb/RQdPJwGU0PzDwojne7pahq74RET+S9ESaRetP5tiyieZL86fRfAV5CLhx\nXt9MlfTv5/GBYoKOR9N8hvk3EXGjpNcC5wF3AldGxN/PoeEk4FeBZwL7aWbvP5X0GGDznEdKG1LK\nBdzMbFL1k6q307wny2aar86+BVwL/NY8Prh31ZDy7WQlPV7Sf5X0FUnflfS3kvZK2rGRGlbp+ILv\nCz8v/Lx42EeAB4AF4AkR8QTgAppXx3xkPTek/Axc0nU087w/B/4NcDJwDXApzZztHRuhIUtHhoYs\nHRkasnRkaKgdfxERT1vrvnXREHN6w5m1/AG+PLR9S/37BOCrG6UhS0eGhiwdGRqydGRoqLf3CZpv\n8G8euO5JwNuAP1/PDSlHKMD3Jb0QQNLLgb8DiPoTeBuoIUtHhoYsHRkasnRkaIDml60/Efi0pAck\nPQAU4B8C/3ZdN8zro+QaP5r9AvAlmvnRZ4Gn1+t/DrhkozRk6cjQkKUjQ0OWjgwNAy3PBH4ZePzQ\n9Reu54a53cEzvJPe6IY8HRkasnRkaMjSMc8G4BLgqzSv+Pgrmp8UXt63bz03pPwm5mo0p/dCyd6Q\npSNDQ5aODA1ZOubZIOl2YFtEPFh/ZuGPgT+KiMtX+sGr9dKQ8kfpJR1YZffmjdKQpSNDQ5aODA1Z\nOjI0VIqIBwEiYknNm2z9saSfZ35vsdBJQ8oFHPhHwIU0r6sc9rkN1JClI0NDlo4MDVk6MjQAfEvS\n1qg/NV0/C34ZzRtqPXs9N2RdwD8OnBwR+4Z3SPr0BmrI0pGhIUtHhoYsHRkaoHlnxmPesz8ifizp\nV4D/sZ4bejcDNzOzRtbXgZuZ2RhewM3MesoLuJlZT3kBNzPrKS/gZhPSfH99nNlYfkLauiTpnZLe\nNLD9m5IukfRWSV+UtF8Dv6tQ0sck3SLpdkm/OnD9g5J+V9IizW98N0vDC7itV1fRvDZ3+TPnVwHf\nBJ4aEecCZwPnLL+bHs17dzwX+EXgEkn/oF7/WOALEbE1Iub5wylmY2X9QR6z4xIRfyXp7yRtpXlf\n5n00i/NLJC3/4MnjgKcCnwHepOYXR0PzG83PBL4I/JTmfS3M0vECbuvZHwJvoHlfjquAfwG8KyKO\n+ck4SQt137aI+KGkm4BH190/DP+0myXlEYqtZx+jea+O5wJ/BtwIvFHS4wAknSrp54CfAR6oi/cz\n8KzbesKfgdu6Vd+L4lM0i3MAn5D0TODzkgC+B7yWZnH/D5IO0ryn8+cHTzPnbLOJ+b1QbN2q37y8\nFXhlRPxl1z1ms+YRiq1Lks4C7qL5hbJevG1d8mfgZmY95c/Azcx6ygu4mVlPeQE3M+spL+BmZj3l\nBdzMrKe8gJuZ9dT/BxT1noPbB61OAAAAAElFTkSuQmCC\n", 302 | "text/plain": [ 303 | "" 304 | ] 305 | }, 306 | "metadata": {}, 307 | "output_type": "display_data" 308 | } 309 | ], 310 | "source": [ 311 | "# Use groupby() to plot the number of \"Hamlet\" films made each decade.\n", 312 | "\n", 313 | "t = titles\n", 314 | "t = t[t.title == 'Hamlet']\n", 315 | "t.groupby(t.year // 10 * 10).size().plot(kind='bar')" 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": 7, 321 | "metadata": { 322 | "collapsed": false 323 | }, 324 | "outputs": [ 325 | { 326 | "data": { 327 | "text/plain": [ 328 | "year type \n", 329 | "1950 actor 603\n", 330 | " actress 267\n", 331 | "1951 actor 626\n", 332 | " actress 272\n", 333 | "1952 actor 590\n", 334 | " actress 282\n", 335 | "1953 actor 629\n", 336 | " actress 283\n", 337 | "1954 actor 620\n", 338 | " actress 298\n", 339 | "1955 actor 602\n", 340 | " actress 263\n", 341 | "1956 actor 609\n", 342 | " actress 284\n", 343 | "1957 actor 703\n", 344 | " actress 281\n", 345 | "1958 actor 695\n", 346 | " actress 273\n", 347 | "1959 actor 672\n", 348 | " actress 286\n", 349 | "dtype: int64" 350 | ] 351 | }, 352 | "execution_count": 7, 353 | "metadata": {}, 354 | "output_type": "execute_result" 355 | } 356 | ], 357 | "source": [ 358 | "# How many leading (n=1) roles were available to actors,\n", 359 | "# and how many to actresses, in each year of the 1950s?\n", 360 | "\n", 361 | "c = cast\n", 362 | "c = c[c.year // 10 == 195]\n", 363 | "c = c[c.n == 1]\n", 364 | "c.groupby(['year', 'type']).size()" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": 8, 370 | "metadata": { 371 | "collapsed": false 372 | }, 373 | "outputs": [ 374 | { 375 | "data": { 376 | "text/plain": [ 377 | "n type \n", 378 | "1 actor 6349\n", 379 | " actress 2789\n", 380 | "2 actor 4354\n", 381 | " actress 4375\n", 382 | "3 actor 5370\n", 383 | " actress 3005\n", 384 | "4 actor 5370\n", 385 | " actress 2735\n", 386 | "5 actor 5403\n", 387 | " actress 2426\n", 388 | "dtype: int64" 389 | ] 390 | }, 391 | "execution_count": 8, 392 | "metadata": {}, 393 | "output_type": "execute_result" 394 | } 395 | ], 396 | "source": [ 397 | "# In the 1950s decade taken as a whole,\n", 398 | "# how many total roles were available to actors,\n", 399 | "# and how many to actresses, for each \"n\" number 1 through 5?\n", 400 | "\n", 401 | "c = cast\n", 402 | "c = c[c.year // 10 * 10 == 1950]\n", 403 | "c = c[c.n < 6]\n", 404 | "c.groupby(['n', 'type']).size()" 405 | ] 406 | }, 407 | { 408 | "cell_type": "code", 409 | "execution_count": 9, 410 | "metadata": { 411 | "collapsed": false 412 | }, 413 | "outputs": [ 414 | { 415 | "data": { 416 | "text/html": [ 417 | "
\n", 418 | "\n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | "
n
year
196315
200650
\n", 440 | "
" 441 | ], 442 | "text/plain": [ 443 | " n\n", 444 | "year \n", 445 | "1963 15\n", 446 | "2006 50" 447 | ] 448 | }, 449 | "execution_count": 9, 450 | "metadata": {}, 451 | "output_type": "execute_result" 452 | } 453 | ], 454 | "source": [ 455 | "# Use groupby() to determine how many roles are listed\n", 456 | "# for each movie named _The Pink Panther_.\n", 457 | "\n", 458 | "c = cast\n", 459 | "c = c[c.title == 'The Pink Panther']\n", 460 | "c = c.sort_values('n').groupby(['year'])[['n']].max()\n", 461 | "c" 462 | ] 463 | }, 464 | { 465 | "cell_type": "code", 466 | "execution_count": 10, 467 | "metadata": { 468 | "collapsed": false 469 | }, 470 | "outputs": [ 471 | { 472 | "data": { 473 | "text/plain": [ 474 | "year title \n", 475 | "1979 The Muppet Movie 8\n", 476 | "1981 An American Werewolf in London 2\n", 477 | " The Great Muppet Caper 6\n", 478 | "1982 The Dark Crystal 2\n", 479 | "1984 The Muppets Take Manhattan 7\n", 480 | "1985 Sesame Street Presents: Follow that Bird 3\n", 481 | "1992 The Muppet Christmas Carol 7\n", 482 | "1996 Muppet Treasure Island 4\n", 483 | "1999 Muppets from Space 4\n", 484 | " The Adventures of Elmo in Grouchland 3\n", 485 | "dtype: int64" 486 | ] 487 | }, 488 | "execution_count": 10, 489 | "metadata": {}, 490 | "output_type": "execute_result" 491 | } 492 | ], 493 | "source": [ 494 | "# List, in order by year, each of the films\n", 495 | "# in which Frank Oz has played more than 1 role.\n", 496 | "\n", 497 | "c = cast\n", 498 | "c = c[c.name == 'Frank Oz']\n", 499 | "g = c.groupby(['year', 'title']).size()\n", 500 | "g[g > 1]" 501 | ] 502 | }, 503 | { 504 | "cell_type": "code", 505 | "execution_count": 11, 506 | "metadata": { 507 | "collapsed": false 508 | }, 509 | "outputs": [ 510 | { 511 | "data": { 512 | "text/plain": [ 513 | "character\n", 514 | "Grover 2\n", 515 | "Bert 3\n", 516 | "Cookie Monster 3\n", 517 | "Fozzie Bear 4\n", 518 | "Sam the Eagle 5\n", 519 | "Yoda 5\n", 520 | "Animal 6\n", 521 | "Miss Piggy 6\n", 522 | "dtype: int64" 523 | ] 524 | }, 525 | "execution_count": 11, 526 | "metadata": {}, 527 | "output_type": "execute_result" 528 | } 529 | ], 530 | "source": [ 531 | "# List each of the characters that Frank Oz\n", 532 | "# has portrayed at least twice.\n", 533 | "\n", 534 | "c = cast\n", 535 | "c = c[c.name == 'Frank Oz']\n", 536 | "g = c.groupby(['character']).size()\n", 537 | "g[g > 1].sort_values()" 538 | ] 539 | }, 540 | { 541 | "cell_type": "code", 542 | "execution_count": null, 543 | "metadata": { 544 | "collapsed": true 545 | }, 546 | "outputs": [], 547 | "source": [] 548 | }, 549 | { 550 | "cell_type": "code", 551 | "execution_count": null, 552 | "metadata": { 553 | "collapsed": true 554 | }, 555 | "outputs": [], 556 | "source": [] 557 | }, 558 | { 559 | "cell_type": "code", 560 | "execution_count": null, 561 | "metadata": { 562 | "collapsed": true 563 | }, 564 | "outputs": [], 565 | "source": [] 566 | }, 567 | { 568 | "cell_type": "code", 569 | "execution_count": null, 570 | "metadata": { 571 | "collapsed": true 572 | }, 573 | "outputs": [], 574 | "source": [] 575 | }, 576 | { 577 | "cell_type": "code", 578 | "execution_count": null, 579 | "metadata": { 580 | "collapsed": true 581 | }, 582 | "outputs": [], 583 | "source": [] 584 | }, 585 | { 586 | "cell_type": "code", 587 | "execution_count": null, 588 | "metadata": { 589 | "collapsed": true 590 | }, 591 | "outputs": [], 592 | "source": [] 593 | }, 594 | { 595 | "cell_type": "code", 596 | "execution_count": null, 597 | "metadata": { 598 | "collapsed": true 599 | }, 600 | "outputs": [], 601 | "source": [] 602 | }, 603 | { 604 | "cell_type": "code", 605 | "execution_count": null, 606 | "metadata": { 607 | "collapsed": true 608 | }, 609 | "outputs": [], 610 | "source": [] 611 | }, 612 | { 613 | "cell_type": "code", 614 | "execution_count": null, 615 | "metadata": { 616 | "collapsed": true 617 | }, 618 | "outputs": [], 619 | "source": [] 620 | }, 621 | { 622 | "cell_type": "code", 623 | "execution_count": null, 624 | "metadata": { 625 | "collapsed": true 626 | }, 627 | "outputs": [], 628 | "source": [] 629 | }, 630 | { 631 | "cell_type": "code", 632 | "execution_count": null, 633 | "metadata": { 634 | "collapsed": true 635 | }, 636 | "outputs": [], 637 | "source": [] 638 | }, 639 | { 640 | "cell_type": "code", 641 | "execution_count": null, 642 | "metadata": { 643 | "collapsed": true 644 | }, 645 | "outputs": [], 646 | "source": [] 647 | }, 648 | { 649 | "cell_type": "code", 650 | "execution_count": null, 651 | "metadata": { 652 | "collapsed": true 653 | }, 654 | "outputs": [], 655 | "source": [] 656 | }, 657 | { 658 | "cell_type": "code", 659 | "execution_count": null, 660 | "metadata": { 661 | "collapsed": true 662 | }, 663 | "outputs": [], 664 | "source": [] 665 | } 666 | ], 667 | "metadata": { 668 | "kernelspec": { 669 | "display_name": "Python 3", 670 | "language": "python", 671 | "name": "python3" 672 | }, 673 | "language_info": { 674 | "codemirror_mode": { 675 | "name": "ipython", 676 | "version": 3 677 | }, 678 | "file_extension": ".py", 679 | "mimetype": "text/x-python", 680 | "name": "python", 681 | "nbconvert_exporter": "python", 682 | "pygments_lexer": "ipython3", 683 | "version": "3.4.3" 684 | } 685 | }, 686 | "nbformat": 4, 687 | "nbformat_minor": 0 688 | } 689 | -------------------------------------------------------------------------------- /Solutions-6.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "%matplotlib inline\n", 12 | "import pandas as pd" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [ 20 | { 21 | "data": { 22 | "text/html": [ 23 | "" 59 | ], 60 | "text/plain": [ 61 | "" 62 | ] 63 | }, 64 | "execution_count": 2, 65 | "metadata": {}, 66 | "output_type": "execute_result" 67 | } 68 | ], 69 | "source": [ 70 | "from IPython.core.display import HTML\n", 71 | "css = open('style-table.css').read() + open('style-notebook.css').read()\n", 72 | "HTML(''.format(css))" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 3, 78 | "metadata": {}, 79 | "outputs": [ 80 | { 81 | "data": { 82 | "text/html": [ 83 | "
\n", 84 | "\n", 97 | "\n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | "
Book titleNumber soldSales priceRoyalty paid
0The Bricklayer’s Bible82.990.55
1Swimrand21.990.35
2Pining For The Fisheries of Yore282.990.55
3The Duck Goes Here342.990.55
4The Tower Commission Report411.504.25
\n", 145 | "
" 146 | ], 147 | "text/plain": [ 148 | " Book title Number sold Sales price Royalty paid\n", 149 | "0 The Bricklayer’s Bible 8 2.99 0.55\n", 150 | "1 Swimrand 2 1.99 0.35\n", 151 | "2 Pining For The Fisheries of Yore 28 2.99 0.55\n", 152 | "3 The Duck Goes Here 34 2.99 0.55\n", 153 | "4 The Tower Commission Report 4 11.50 4.25" 154 | ] 155 | }, 156 | "execution_count": 3, 157 | "metadata": {}, 158 | "output_type": "execute_result" 159 | } 160 | ], 161 | "source": [ 162 | "sales1 = pd.read_csv('sales1.csv')\n", 163 | "sales1" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 4, 169 | "metadata": {}, 170 | "outputs": [ 171 | { 172 | "data": { 173 | "text/html": [ 174 | "
\n", 175 | "\n", 188 | "\n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | "
TitleUnits soldList priceRoyalty
0
1Sales report for Q4
2E-Book Reader US Store
3Pining for the Fisheries of Yore803.514.98
4Swimrand12.990.14
5The Bricklayer's Bible173.55.15
6The Duck Goes Here342.995.78
7The Tower Commission Report49.56.2
8US royalties (USD)32.25
9
10
11Sales report for Q4
12E-Book Reader UK Store
13Pining for the Fisheries of Yore472.9911.98
14The Bricklayer's Bible172.993.5
15The Tower Commission Report46.54.8
16UK royalties (GBP)20.28
17
18
19Sales report for Q4
20E-Book Reader France Store
21Swimrand81.990.88
22The Duck Goes Here121.991.5
23France royalties (EUR)2.38
\n", 369 | "
" 370 | ], 371 | "text/plain": [ 372 | " Title Units sold List price Royalty\n", 373 | "0 \n", 374 | "1 Sales report for Q4 \n", 375 | "2 E-Book Reader US Store \n", 376 | "3 Pining for the Fisheries of Yore 80 3.5 14.98\n", 377 | "4 Swimrand 1 2.99 0.14\n", 378 | "5 The Bricklayer's Bible 17 3.5 5.15\n", 379 | "6 The Duck Goes Here 34 2.99 5.78\n", 380 | "7 The Tower Commission Report 4 9.5 6.2\n", 381 | "8 US royalties (USD) 32.25\n", 382 | "9 \n", 383 | "10 \n", 384 | "11 Sales report for Q4 \n", 385 | "12 E-Book Reader UK Store \n", 386 | "13 Pining for the Fisheries of Yore 47 2.99 11.98\n", 387 | "14 The Bricklayer's Bible 17 2.99 3.5\n", 388 | "15 The Tower Commission Report 4 6.5 4.8\n", 389 | "16 UK royalties (GBP) 20.28\n", 390 | "17 \n", 391 | "18 \n", 392 | "19 Sales report for Q4 \n", 393 | "20 E-Book Reader France Store \n", 394 | "21 Swimrand 8 1.99 0.88\n", 395 | "22 The Duck Goes Here 12 1.99 1.5\n", 396 | "23 France royalties (EUR) 2.38" 397 | ] 398 | }, 399 | "execution_count": 4, 400 | "metadata": {}, 401 | "output_type": "execute_result" 402 | } 403 | ], 404 | "source": [ 405 | "sales2 = pd.read_csv('sales2.csv')\n", 406 | "sales2.fillna('')" 407 | ] 408 | }, 409 | { 410 | "cell_type": "code", 411 | "execution_count": 5, 412 | "metadata": { 413 | "collapsed": true 414 | }, 415 | "outputs": [], 416 | "source": [ 417 | "# Challenge: first combine these sales together into a single dataframe,\n", 418 | "# then compute how much money consumers spent on each book in each currency." 419 | ] 420 | }, 421 | { 422 | "cell_type": "code", 423 | "execution_count": 6, 424 | "metadata": {}, 425 | "outputs": [ 426 | { 427 | "data": { 428 | "text/html": [ 429 | "
\n", 430 | "\n", 443 | "\n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | "
titlenumbertheir_pricewe_gotcurrency
0The Bricklayer’s Bible82.990.55USD
1Swimrand21.990.35USD
2Pining For The Fisheries of Yore282.990.55USD
3The Duck Goes Here342.990.55USD
4The Tower Commission Report411.504.25USD
\n", 497 | "
" 498 | ], 499 | "text/plain": [ 500 | " title number their_price we_got currency\n", 501 | "0 The Bricklayer’s Bible 8 2.99 0.55 USD\n", 502 | "1 Swimrand 2 1.99 0.35 USD\n", 503 | "2 Pining For The Fisheries of Yore 28 2.99 0.55 USD\n", 504 | "3 The Duck Goes Here 34 2.99 0.55 USD\n", 505 | "4 The Tower Commission Report 4 11.50 4.25 USD" 506 | ] 507 | }, 508 | "execution_count": 6, 509 | "metadata": {}, 510 | "output_type": "execute_result" 511 | } 512 | ], 513 | "source": [ 514 | "df1 = sales1.rename(columns={\n", 515 | " 'Book title': 'title',\n", 516 | " 'Number sold': 'number',\n", 517 | " 'Sales price': 'their_price',\n", 518 | " 'Royalty paid': 'we_got',\n", 519 | " })\n", 520 | "df1['currency'] = 'USD'\n", 521 | "df1" 522 | ] 523 | }, 524 | { 525 | "cell_type": "code", 526 | "execution_count": 7, 527 | "metadata": {}, 528 | "outputs": [ 529 | { 530 | "data": { 531 | "text/html": [ 532 | "
\n", 533 | "\n", 546 | "\n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | "
TitleUnits soldList priceRoyaltycurrency
3Pining for the Fisheries of Yore80.03.5014.98USD
4Swimrand1.02.990.14USD
5The Bricklayer's Bible17.03.505.15USD
6The Duck Goes Here34.02.995.78USD
7The Tower Commission Report4.09.506.20USD
\n", 600 | "
" 601 | ], 602 | "text/plain": [ 603 | " Title Units sold List price Royalty currency\n", 604 | "3 Pining for the Fisheries of Yore 80.0 3.50 14.98 USD\n", 605 | "4 Swimrand 1.0 2.99 0.14 USD\n", 606 | "5 The Bricklayer's Bible 17.0 3.50 5.15 USD\n", 607 | "6 The Duck Goes Here 34.0 2.99 5.78 USD\n", 608 | "7 The Tower Commission Report 4.0 9.50 6.20 USD" 609 | ] 610 | }, 611 | "execution_count": 7, 612 | "metadata": {}, 613 | "output_type": "execute_result" 614 | } 615 | ], 616 | "source": [ 617 | "s = sales2.copy()\n", 618 | "t = sales2['Title']\n", 619 | "t = t.where(t.str.endswith(')')).str.split().str[-1].str.strip('()')\n", 620 | "s['currency'] = t.fillna(method='bfill')\n", 621 | "s = s[s['List price'].notnull()]\n", 622 | "df2 = s.rename(columns={\n", 623 | " 'Units sold': 'number',\n", 624 | " 'Title': 'title',\n", 625 | " 'List price': 'their_price',\n", 626 | " 'Royalty': 'we_got',\n", 627 | " })\n", 628 | "s.head()" 629 | ] 630 | }, 631 | { 632 | "cell_type": "code", 633 | "execution_count": 8, 634 | "metadata": {}, 635 | "outputs": [ 636 | { 637 | "data": { 638 | "text/html": [ 639 | "
\n", 640 | "\n", 653 | "\n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | "
titlenumbertheir_pricewe_gotcurrency
0The Bricklayer’s Bible8.02.990.55USD
1Swimrand2.01.990.35USD
2Pining For The Fisheries of Yore28.02.990.55USD
3The Duck Goes Here34.02.990.55USD
4The Tower Commission Report4.011.504.25USD
3Pining for the Fisheries of Yore80.03.5014.98USD
4Swimrand1.02.990.14USD
5The Bricklayer's Bible17.03.505.15USD
6The Duck Goes Here34.02.995.78USD
7The Tower Commission Report4.09.506.20USD
13Pining for the Fisheries of Yore47.02.9911.98GBP
14The Bricklayer's Bible17.02.993.50GBP
15The Tower Commission Report4.06.504.80GBP
21Swimrand8.01.990.88EUR
22The Duck Goes Here12.01.991.50EUR
\n", 787 | "
" 788 | ], 789 | "text/plain": [ 790 | " title number their_price we_got currency\n", 791 | "0 The Bricklayer’s Bible 8.0 2.99 0.55 USD\n", 792 | "1 Swimrand 2.0 1.99 0.35 USD\n", 793 | "2 Pining For The Fisheries of Yore 28.0 2.99 0.55 USD\n", 794 | "3 The Duck Goes Here 34.0 2.99 0.55 USD\n", 795 | "4 The Tower Commission Report 4.0 11.50 4.25 USD\n", 796 | "3 Pining for the Fisheries of Yore 80.0 3.50 14.98 USD\n", 797 | "4 Swimrand 1.0 2.99 0.14 USD\n", 798 | "5 The Bricklayer's Bible 17.0 3.50 5.15 USD\n", 799 | "6 The Duck Goes Here 34.0 2.99 5.78 USD\n", 800 | "7 The Tower Commission Report 4.0 9.50 6.20 USD\n", 801 | "13 Pining for the Fisheries of Yore 47.0 2.99 11.98 GBP\n", 802 | "14 The Bricklayer's Bible 17.0 2.99 3.50 GBP\n", 803 | "15 The Tower Commission Report 4.0 6.50 4.80 GBP\n", 804 | "21 Swimrand 8.0 1.99 0.88 EUR\n", 805 | "22 The Duck Goes Here 12.0 1.99 1.50 EUR" 806 | ] 807 | }, 808 | "execution_count": 8, 809 | "metadata": {}, 810 | "output_type": "execute_result" 811 | } 812 | ], 813 | "source": [ 814 | "df = pd.concat([df1, df2])\n", 815 | "df" 816 | ] 817 | }, 818 | { 819 | "cell_type": "code", 820 | "execution_count": 9, 821 | "metadata": {}, 822 | "outputs": [ 823 | { 824 | "data": { 825 | "text/html": [ 826 | "
\n", 827 | "\n", 840 | "\n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | "
total_paid
titlecurrency
Pining For The Fisheries of YoreUSD83.72
Pining for the Fisheries of YoreGBP140.53
USD280.00
SwimrandEUR15.92
USD6.97
The Bricklayer's BibleGBP50.83
USD59.50
The Bricklayer’s BibleUSD23.92
The Duck Goes HereEUR23.88
USD203.32
The Tower Commission ReportGBP26.00
USD84.00
\n", 911 | "
" 912 | ], 913 | "text/plain": [ 914 | " total_paid\n", 915 | "title currency \n", 916 | "Pining For The Fisheries of Yore USD 83.72\n", 917 | "Pining for the Fisheries of Yore GBP 140.53\n", 918 | " USD 280.00\n", 919 | "Swimrand EUR 15.92\n", 920 | " USD 6.97\n", 921 | "The Bricklayer's Bible GBP 50.83\n", 922 | " USD 59.50\n", 923 | "The Bricklayer’s Bible USD 23.92\n", 924 | "The Duck Goes Here EUR 23.88\n", 925 | " USD 203.32\n", 926 | "The Tower Commission Report GBP 26.00\n", 927 | " USD 84.00" 928 | ] 929 | }, 930 | "execution_count": 9, 931 | "metadata": {}, 932 | "output_type": "execute_result" 933 | } 934 | ], 935 | "source": [ 936 | "# First try: differences in case and punctuation\n", 937 | "# make some books look like two different entries.\n", 938 | "\n", 939 | "t = df\n", 940 | "t = t.assign(total_paid=t.their_price * t.number)\n", 941 | "t.groupby(['title', 'currency'])[['total_paid']].sum()" 942 | ] 943 | }, 944 | { 945 | "cell_type": "code", 946 | "execution_count": 10, 947 | "metadata": {}, 948 | "outputs": [ 949 | { 950 | "data": { 951 | "text/html": [ 952 | "
\n", 953 | "\n", 966 | "\n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | "
total_paid
title_fixedcurrency
pining for the fisheries of yoreGBP140.53
USD363.72
swimrandEUR15.92
USD6.97
the bricklayer's bibleGBP50.83
USD83.42
the duck goes hereEUR23.88
USD203.32
the tower commission reportGBP26.00
USD84.00
\n", 1027 | "
" 1028 | ], 1029 | "text/plain": [ 1030 | " total_paid\n", 1031 | "title_fixed currency \n", 1032 | "pining for the fisheries of yore GBP 140.53\n", 1033 | " USD 363.72\n", 1034 | "swimrand EUR 15.92\n", 1035 | " USD 6.97\n", 1036 | "the bricklayer's bible GBP 50.83\n", 1037 | " USD 83.42\n", 1038 | "the duck goes here EUR 23.88\n", 1039 | " USD 203.32\n", 1040 | "the tower commission report GBP 26.00\n", 1041 | " USD 84.00" 1042 | ] 1043 | }, 1044 | "execution_count": 10, 1045 | "metadata": {}, 1046 | "output_type": "execute_result" 1047 | } 1048 | ], 1049 | "source": [ 1050 | "# Second try: make book titles match by fixing differences\n", 1051 | "# in casing and punctuation.\n", 1052 | "\n", 1053 | "t = df\n", 1054 | "t = t.assign(total_paid=t.their_price * t.number)\n", 1055 | "t = t.assign(title_fixed=t['title'].str.lower().str.replace(\"’\", \"'\"))\n", 1056 | "t.groupby(['title_fixed', 'currency'])[['total_paid']].sum()" 1057 | ] 1058 | }, 1059 | { 1060 | "cell_type": "code", 1061 | "execution_count": null, 1062 | "metadata": { 1063 | "collapsed": true 1064 | }, 1065 | "outputs": [], 1066 | "source": [] 1067 | }, 1068 | { 1069 | "cell_type": "code", 1070 | "execution_count": null, 1071 | "metadata": { 1072 | "collapsed": true 1073 | }, 1074 | "outputs": [], 1075 | "source": [] 1076 | }, 1077 | { 1078 | "cell_type": "code", 1079 | "execution_count": null, 1080 | "metadata": { 1081 | "collapsed": true 1082 | }, 1083 | "outputs": [], 1084 | "source": [] 1085 | }, 1086 | { 1087 | "cell_type": "code", 1088 | "execution_count": null, 1089 | "metadata": { 1090 | "collapsed": true 1091 | }, 1092 | "outputs": [], 1093 | "source": [] 1094 | }, 1095 | { 1096 | "cell_type": "code", 1097 | "execution_count": null, 1098 | "metadata": { 1099 | "collapsed": true 1100 | }, 1101 | "outputs": [], 1102 | "source": [] 1103 | }, 1104 | { 1105 | "cell_type": "code", 1106 | "execution_count": null, 1107 | "metadata": { 1108 | "collapsed": true 1109 | }, 1110 | "outputs": [], 1111 | "source": [] 1112 | }, 1113 | { 1114 | "cell_type": "code", 1115 | "execution_count": null, 1116 | "metadata": { 1117 | "collapsed": true 1118 | }, 1119 | "outputs": [], 1120 | "source": [] 1121 | }, 1122 | { 1123 | "cell_type": "code", 1124 | "execution_count": null, 1125 | "metadata": { 1126 | "collapsed": true 1127 | }, 1128 | "outputs": [], 1129 | "source": [] 1130 | }, 1131 | { 1132 | "cell_type": "code", 1133 | "execution_count": null, 1134 | "metadata": { 1135 | "collapsed": true 1136 | }, 1137 | "outputs": [], 1138 | "source": [] 1139 | }, 1140 | { 1141 | "cell_type": "code", 1142 | "execution_count": null, 1143 | "metadata": { 1144 | "collapsed": true 1145 | }, 1146 | "outputs": [], 1147 | "source": [] 1148 | }, 1149 | { 1150 | "cell_type": "code", 1151 | "execution_count": null, 1152 | "metadata": { 1153 | "collapsed": true 1154 | }, 1155 | "outputs": [], 1156 | "source": [] 1157 | }, 1158 | { 1159 | "cell_type": "code", 1160 | "execution_count": null, 1161 | "metadata": { 1162 | "collapsed": true 1163 | }, 1164 | "outputs": [], 1165 | "source": [] 1166 | } 1167 | ], 1168 | "metadata": { 1169 | "kernelspec": { 1170 | "display_name": "Python 3", 1171 | "language": "python", 1172 | "name": "python3" 1173 | }, 1174 | "language_info": { 1175 | "codemirror_mode": { 1176 | "name": "ipython", 1177 | "version": 3 1178 | }, 1179 | "file_extension": ".py", 1180 | "mimetype": "text/x-python", 1181 | "name": "python", 1182 | "nbconvert_exporter": "python", 1183 | "pygments_lexer": "ipython3", 1184 | "version": "3.6.2" 1185 | } 1186 | }, 1187 | "nbformat": 4, 1188 | "nbformat_minor": 1 1189 | } 1190 | -------------------------------------------------------------------------------- /build/BUILD.py: -------------------------------------------------------------------------------- 1 | """Build the tutorial data files from the IMDB *.list.gz files.""" 2 | 3 | import csv 4 | import gzip 5 | import os 6 | import re 7 | import sys 8 | from datetime import datetime 9 | 10 | split_on_tabs = re.compile(b'\t+').split 11 | 12 | BAD_GENRES = {b'Adult', b'Documentary', b'Short', b'Horror', b'Reality-TV', 13 | b'Talk-Show', b'Game-Show', b'Reality-tv'} 14 | 15 | def main(): 16 | os.chdir(os.path.dirname(os.path.abspath(__file__))) 17 | if not os.path.isdir('../data'): 18 | os.makedirs('../data') 19 | 20 | # Load movie titles. 21 | 22 | titles = set() 23 | uninteresting_titles = set() 24 | 25 | lines = iter(gzip.open('genres.list.gz')) 26 | line = next(lines) 27 | while line != b'8: THE GENRES LIST\n': 28 | line = next(lines) 29 | assert next(lines) == b'==================\n' 30 | assert next(lines) == b'\n' 31 | 32 | print('Reading "genres.list.gz" to find interesting movies') 33 | 34 | for line in lines: 35 | if not_a_real_movie(line): 36 | continue 37 | 38 | fields = split_on_tabs(line.strip(b'\n')) 39 | raw_title = fields[0] 40 | genre = fields[1] 41 | 42 | try: 43 | raw_title.decode('ascii') 44 | except UnicodeDecodeError: 45 | continue 46 | 47 | if genre in BAD_GENRES: 48 | uninteresting_titles.add(raw_title) 49 | else: 50 | titles.add(raw_title) 51 | 52 | interesting_titles = titles - uninteresting_titles 53 | del titles 54 | del uninteresting_titles 55 | 56 | print('Found {0} titles'.format(len(interesting_titles))) 57 | 58 | print('Writing "titles.csv"') 59 | 60 | with open('../data/titles.csv', 'w') as f: 61 | output = csv.writer(f) 62 | output.writerow(('title', 'year')) 63 | for raw_title in interesting_titles: 64 | title_and_year = parse_title(raw_title) 65 | output.writerow(title_and_year) 66 | 67 | print('Finished writing "titles.csv"') 68 | print('Reading release dates from "release-dates.list.gz"') 69 | 70 | lines = iter(gzip.open('release-dates.list.gz')) 71 | line = next(lines) 72 | while line != b'RELEASE DATES LIST\n': 73 | line = next(lines) 74 | assert next(lines) == b'==================\n' 75 | 76 | output = csv.writer(open('../data/release_dates.csv', 'w')) 77 | output.writerow(('title', 'year', 'country', 'date')) 78 | 79 | for line in lines: 80 | if not_a_real_movie(line): 81 | continue 82 | 83 | if line.startswith(b'----'): 84 | continue 85 | 86 | fields = split_on_tabs(line.strip(b'\n')) 87 | if len(fields) > 2: # ignore "DVD premier" lines and so forth 88 | continue 89 | 90 | raw_title = fields[0] 91 | if raw_title not in interesting_titles: 92 | continue 93 | 94 | title, year = parse_title(raw_title) 95 | if title is None: 96 | continue 97 | 98 | country, datestr = fields[1].decode('ascii').split(':') 99 | try: 100 | date = datetime.strptime(datestr, '%d %B %Y').date() 101 | except ValueError: 102 | continue # incomplete dates like "April 2014" 103 | output.writerow((title, year, country, date)) 104 | 105 | print('Finished writing "release_dates.csv"') 106 | 107 | if sys.version_info < (3, 0): 108 | output = csv.writer(open('../data/cast.csv', 'w')) 109 | else: 110 | output = csv.writer(open('../data/cast.csv', 'w', encoding='utf-8')) 111 | output.writerow(('title', 'year', 'name', 'type', 'character', 'n')) 112 | 113 | for role_type, filename in ( 114 | ('actor', 'actors.list.gz'), 115 | ('actress', 'actresses.list.gz'), 116 | ): 117 | print('Reading {0!r}'.format(filename)) 118 | lines = iter(gzip.open(filename)) 119 | 120 | line = next(lines) 121 | while (b'Name' not in line) or (b'Titles' not in line): 122 | line = next(lines) 123 | 124 | assert b'----' in next(lines) 125 | 126 | for line in lines: 127 | if line.startswith(b'----------------------'): 128 | break 129 | 130 | line = line.rstrip() 131 | if not line: 132 | continue 133 | 134 | fields = split_on_tabs(line.strip(b'\n')) 135 | if fields[0]: 136 | name = decode_ascii(fields[0]) 137 | name = swap_names(name) 138 | 139 | if len(fields) < 2: 140 | raise ValueError('broken line: {!r}'.format(line)) 141 | 142 | if not_a_real_movie(fields[1]): 143 | continue 144 | 145 | fields = fields[1].split(b' ') 146 | raw_title = fields[0] 147 | if raw_title not in interesting_titles: 148 | continue 149 | 150 | if len(fields) < 2: 151 | continue 152 | 153 | if fields[1].startswith(b'('): # uncredited, archive footage, etc 154 | del fields[1] 155 | if len(fields) < 2: 156 | continue 157 | 158 | if not fields[1].startswith(b'['): 159 | continue 160 | 161 | character = decode_ascii(fields[1].strip(b'[]')) 162 | 163 | if len(fields) > 2 and fields[2].startswith(b'<'): 164 | n = int(fields[2].strip(b'<>')) 165 | else: 166 | n = '' 167 | 168 | title, year = parse_title(raw_title) 169 | if title is None: 170 | continue 171 | 172 | if character == 'N/A': 173 | clist = ['(N/A)'] 174 | else: 175 | clist = character.split('/') 176 | 177 | for character in clist: 178 | if not character: 179 | continue 180 | output.writerow((title, year, name, role_type, character, n)) 181 | 182 | print('Finished writing "cast.csv"') 183 | 184 | 185 | def not_a_real_movie(line): 186 | return ( 187 | line.startswith(b'"') # TV show 188 | or b'{' in line # TV episode 189 | or b' (????' in line # Unknown year 190 | or b' (TV)' in line # TV Movie 191 | or b' (V)' in line # Video 192 | or b' (VG)' in line # Video game 193 | or b' (segment ' in line # Anthology film 194 | ) 195 | 196 | 197 | match_title = re.compile(r'^(.*) \((\d+)(/[IVXL]+)?\)$').match 198 | 199 | 200 | def parse_title(raw_title): 201 | try: 202 | title = raw_title.decode('ascii') 203 | except UnicodeDecodeError: 204 | return None, None 205 | 206 | m = match_title(title) 207 | title = m.group(1) 208 | year = int(m.group(2)) 209 | numeral = m.group(3) 210 | 211 | if numeral is not None: 212 | numeral = numeral.strip('/') 213 | if numeral != 'I': 214 | title = '{0} ({1})'.format(title, numeral) 215 | 216 | return title, year 217 | 218 | 219 | def swap_names(name): 220 | if name.endswith(' (I)'): 221 | name = name[:-4] 222 | if ',' in name: 223 | last, first = name.split(',', 1) 224 | name = first.strip() + ' ' + last.strip() 225 | return name 226 | 227 | 228 | def decode_ascii(s): 229 | return s.decode('latin-1') #ascii', 'replace').replace(u'\ufffd', u'?') 230 | 231 | 232 | if __name__ == '__main__': 233 | main() 234 | -------------------------------------------------------------------------------- /build/BUILD.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | cd "$( dirname "${BASH_SOURCE[0]}" )" 5 | 6 | curl -O ftp://ftp.fu-berlin.de/misc/movies/database/frozendata/actors.list.gz 7 | curl -O ftp://ftp.fu-berlin.de/misc/movies/database/frozendata/actresses.list.gz 8 | curl -O ftp://ftp.fu-berlin.de/misc/movies/database/frozendata/genres.list.gz 9 | curl -O ftp://ftp.fu-berlin.de/misc/movies/database/frozendata/release-dates.list.gz 10 | 11 | python ./BUILD.py 12 | -------------------------------------------------------------------------------- /build/split.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2.7 2 | 3 | import glob 4 | import json 5 | import os 6 | import re 7 | 8 | def blank_code_cell(): 9 | return { 10 | "cell_type": "code", 11 | "execution_count": None, 12 | "metadata": { 13 | "collapsed": True 14 | }, 15 | "outputs": [], 16 | "source": [], 17 | } 18 | 19 | def question_cell(text): 20 | return { 21 | "cell_type": "markdown", 22 | "metadata": { 23 | "collapsed": True 24 | }, 25 | "source": '### ' + text.strip(), 26 | } 27 | 28 | def main(): 29 | session_cells = {n: [] for n in range(1, 6+1)} 30 | f = open(os.path.dirname(os.path.abspath(__file__)) + '/../All.ipynb') 31 | j = json.load(f) 32 | cells = j['cells'] 33 | for cell in cells: 34 | source = u''.join(cell['source']) 35 | m = re.search(r'# +(\d+)\. ', source.strip()) 36 | if not m: 37 | continue 38 | n = int(m.group(1)) 39 | session_cells[n].append(cell) 40 | for n, cells in sorted(session_cells.items()): 41 | print 'Session {}: {} cells'.format(n, len(cells)) 42 | 43 | def convert(filename): 44 | f = open(filename) 45 | j = json.load(f) 46 | j['cells'] = list(filter_cells(filename, j['cells'])) 47 | assert 'Solutions' in filename 48 | with open(filename.replace('Solutions', 'Exercises'), 'w') as f: 49 | f.write(json.dumps(j, indent=2)) 50 | 51 | def filter_cells(filename, cells): 52 | n = 0 53 | starting = True 54 | for cell in cells: 55 | if cell['cell_type'] != 'code': 56 | continue 57 | source = u''.join(cell['source']) 58 | 59 | if starting: 60 | if not source.startswith('# '): 61 | yield cell 62 | else: 63 | starting = False 64 | 65 | if not source.startswith('# '): 66 | continue 67 | 68 | question = [] 69 | 70 | for line in cell['source']: 71 | if not line.startswith('# '): 72 | break 73 | question.append(line[2:].strip()) 74 | 75 | question = ' '.join(question) 76 | 77 | yield question_cell(question) 78 | 79 | yield blank_code_cell() 80 | yield blank_code_cell() 81 | 82 | n += 1 83 | print '{:6} {}'.format(n, filename) 84 | 85 | def main2(): 86 | for filename in sorted(glob.glob('Solutions-*.ipynb')): 87 | convert(filename) 88 | 89 | if __name__ == '__main__': 90 | main2() 91 | -------------------------------------------------------------------------------- /cheat-sheet.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Welcome! 5 | 6 | 7 | 1. Install Pandas and the IPython Notebook 8 | 9 | The Anaconda distribution includes Pandas built-in: 10 | https://www.anaconda.com/download/ 11 | 12 | 13 | 2. Download and unzip "Pandas-Tutorial.zip" from 14 | 15 | https://github.com/brandon-rhodes/pycon-pandas-tutorial/releases 16 | 17 | 18 | 3. Start the IPython Notebook and visit the "pandas-tutorial" 19 | folder from inside the .zip 20 | 21 | 22 | 23 | len(df) series + value df[df.c == value] 24 | df.head() series + series2 df[(df.c >= value) & (df.d < value)] 25 | df.tail() series.notnull() df[(df.c < value) | (df.d != value)] 26 | df.COLUMN series.isnull() df.sort_values('column') 27 | df['COLUMN'] series.sort_index() df.sort_values(['column1', 'column2']) 28 | 29 | 30 | 31 | 32 | import sys 33 | reload(sys) 34 | sys.setdefaultencoding('utf-8') 35 | 36 | 37 | 38 | 39 | 40 | 41 | https://www.anaconda.com/download/ 42 | 43 | https://github.com/brandon-rhodes/pycon-pandas-tutorial/releases 44 | 45 | 46 | len(df) series + value df[df.c == value] 47 | df.head() series + series2 df[(df.c >= value) & (df.d < value)] 48 | df.tail() series.notnull() df[(df.c < value) | (df.d != value)] 49 | df.COLUMN series.isnull() df.sort_values('column') 50 | df['COLUMN'] series.sort_index() df.sort_values(['column1', 'column2']) 51 | 52 | s.str.len() s.value_counts() 53 | s.str.contains() s.sort_index() df[['column1', 'column2']] 54 | s.str.startswith() s.plot(...) df.plot(x='a', y='b', kind='bar') 55 | 56 | 57 | import sys 58 | reload(sys) 59 | sys.setdefaultencoding('utf-8') 60 | 61 | 62 | 63 | 64 | len(df) series + value df[df.c == value] 65 | df.head() series + series2 df[(df.c >= value) & (df.d < value)] 66 | df.tail() series.notnull() df[(df.c < value) | (df.d != value)] 67 | df.COLUMN series.isnull() df.sort_values('column') 68 | df['COLUMN'] series.sort_index() df.sort_values(['column1', 'column2']) 69 | 70 | s.str.len() s.value_counts() 71 | s.str.contains() s.sort_index() df[['column1', 'column2']] 72 | s.str.startswith() s.plot(...) df.plot(x='a', y='b', kind='bar') 73 | 74 | df.set_index('a').sort_index() df.loc['value'] 75 | df.set_index(['a', 'b']).sort_index() df.loc[('v','u')] 76 | df.groupby('column') .size() .mean() .min() .max() 77 | df.groupby(['column1', 'column2']) .agg(['min', 'max']) 78 | 79 | 80 | 81 | "The Pink Panther" 82 | 83 | 84 | brandon@rhodesmill.org 85 | 86 | 87 | 88 | len(df) series + value df[df.c == value] 89 | df.head() series + series2 df[(df.c >= value) & (df.d < value)] 90 | df.tail() series.notnull() df[(df.c < value) | (df.d != value)] 91 | df.COLUMN series.isnull() df.sort_values('column') 92 | df['COLUMN'] series.sort_index() df.sort_values(['column1', 'column2']) 93 | 94 | s.str.len() s.value_counts() 95 | s.str.contains() s.sort_index() df[['column1', 'column2']] 96 | s.str.startswith() s.plot(...) df.plot(x='a', y='b', kind='bar') 97 | 98 | df.set_index('a').sort_index() df.loc['value'] 99 | df.set_index(['a', 'b']).sort_index() df.loc[('v','u')] 100 | df.groupby('column') .size() .mean() .min() .max() 101 | df.groupby(['column1', 'column2']) .agg(['min', 'max']) 102 | 103 | df.unstack() 104 | df.stack() 105 | df.fillna(value) 106 | s.fillna(value) 107 | 108 | 109 | 110 | 111 | len(df) series + value df[df.c == value] 112 | df.head() series + series2 df[(df.c >= value) & (df.d < value)] 113 | df.tail() series.notnull() df[(df.c < value) | (df.d != value)] 114 | df.COLUMN series.isnull() df.sort_values('column') 115 | df['COLUMN'] series.sort_index() df.sort_values(['column1', 'column2']) 116 | 117 | s.str.len() s.value_counts() 118 | s.str.contains() s.sort_index() df[['column1', 'column2']] 119 | s.str.startswith() s.plot(...) df.plot(x='a', y='b', kind='bar') 120 | 121 | df.set_index('a').sort_index() df.loc['value'] 122 | df.set_index(['a', 'b']).sort_index() df.loc[('v','u')] 123 | df.groupby('column') .size() .mean() .min() .max() 124 | df.groupby(['column1', 'column2']) .agg(['min', 'max']) 125 | 126 | df.unstack() s.dt.year 127 | df.stack() s.dt.month 128 | df.fillna(value) s.dt.day 129 | s.fillna(value) s.dt.dayofweek 130 | 131 | 132 | 133 | 134 | len(df) series + value df[df.c == value] 135 | df.head() series + series2 df[(df.c >= value) & (df.d < value)] 136 | df.tail() series.notnull() df[(df.c < value) | (df.d != value)] 137 | df.COLUMN series.isnull() df.sort_values('column') 138 | df['COLUMN'] series.sort_index() df.sort_values(['column1', 'column2']) 139 | 140 | s.str.len() s.value_counts() 141 | s.str.contains() s.sort_index() df[['column1', 'column2']] 142 | s.str.startswith() s.plot(...) df.plot(x='a', y='b', kind='bar') 143 | 144 | df.set_index('a').sort_index() df.loc['value'] 145 | df.set_index(['a', 'b']).sort_index() df.loc[('v','u')] 146 | df.groupby('column') .size() .mean() .min() .max() 147 | df.groupby(['column1', 'column2']) .agg(['min', 'max']) 148 | 149 | df.unstack() s.dt.year df.merge(df2, how='outer', ...) 150 | df.stack() s.dt.month df.rename(columns={'a': 'y', 'b': 'z'}) 151 | df.fillna(value) s.dt.day pd.concat([df1, df2]) 152 | s.fillna(value) s.dt.dayofweek 153 | 154 | 155 | 156 | 157 | 158 | 159 | Thanks! 160 | 161 | Any questions? 162 | 163 | Local variables: 164 | mode:text 165 | mode:page 166 | End: 167 | -------------------------------------------------------------------------------- /email.txt: -------------------------------------------------------------------------------- 1 | 2 | [This is the email I sent out ahead of time to the students who were 3 | scheduled to attend the tutorial at PyCon 2015.] 4 | 5 | Wow. PyCon is almost here! How can you help make our Pandas tutorial on Wednesday morning a success? 6 | 7 | Easy — by installing Pandas right now! :) 8 | 9 | This is the first of three emails you will receive before we all arrive and start the tutorial at 9am on Tuesday. 10 | 11 | (a) This email says how to install Pandas. 12 | 13 | (b) A Monday evening email will suggest a sample command to try running to make sure that your install is working. 14 | 15 | (c) Finally, a Tuesday email will point you at a few data sets to download before you arrive for the tutorial! 16 | 17 | A PyCon tutorial is only about three hours long, and we want every minute that we can spare to be available for learning, conversing, and working together on data analysis problems. The best way to make the time as valuable as possible is, if you can, to get Pandas installed and working on the laptop you will be bringing with you into the tutorial. Although one or two TA's will be present to help with last-minute emergencies, an already-working Pandas install is the best way to be prepared to pay attention to the material and not be distracted by unfortunate obstacles thrown in your way by your operating system. 18 | 19 | I recommend the following combination: 20 | 21 | Python + Pandas + IPython Notebook + matplotlib 22 | 23 | Fortunately, there is a very easy way to get this modern data-science stack installed: the Anaconda distribution from Continuum Analytics! Their version of Python comes bundled with everything you need, and it is what I myself will be running in the tutorial: 24 | 25 | https://www.anaconda.com/download/ 26 | 27 | For this evening — or tomorrow, if that is when you have a few minutes — set the goal of installing the components listed above and then being able to start up an IPython Notebook and see its interface appear in your browser. By clicking on "New -> Python" to create a new notebook, typing "3+3" into the box, and pressing Shift+Enter to get the result "6", you can confirm that the IPython Notebook is working and able to talk to Python itself. 28 | 29 | Feel free to email me at brandon@rhodesmill.org if you have any questions, and I look forward to seeing you all on Tuesday! 30 | 31 | — Brandon 32 | -------------------------------------------------------------------------------- /images/Makefile: -------------------------------------------------------------------------------- 1 | TABLE_HTML := $(wildcard *.html) 2 | TABLE_IMG := $(addsuffix .png, $(basename $(TABLE_HTML))) 3 | 4 | tmp.png: tmp.dot $(TABLE_IMG) 5 | dot -Tpng -o tmp.png tmp.dot 6 | 7 | $(TABLE_IMG): %.png: %.html ../style-table.css 8 | (echo ''; cat $<) |\ 9 | wkhtmltoimage - tmp.png && convert -trim tmp.png $@ 10 | 11 | -------------------------------------------------------------------------------- /images/loop.sh: -------------------------------------------------------------------------------- 1 | ,make-loop ../style-table.css *.html Makefile tmp.dot 2 | -------------------------------------------------------------------------------- /images/sample.csv: -------------------------------------------------------------------------------- 1 | Title,Year,Director 2 | North by Northwest,1959,Alfred Hitchcock 3 | Notorious,1946,Alfred Hitchcock 4 | The Philadelphia Story,1940,George Cukor 5 | To Catch a Thief,1955,Alfred Hitchcock 6 | His Girl Friday,1940,Howard Hawks 7 | -------------------------------------------------------------------------------- /images/tmp.dot: -------------------------------------------------------------------------------- 1 | digraph { 2 | # graph [rankdir=LR] 3 | node [label=""] 4 | edge [fontname="Ubuntu Light"; fontsize=12.0] 5 | 6 | u__tyd [shape=none image="u__tyd.png"]; 7 | u_d_ty [shape=none image="u_d_ty.png"]; 8 | s_d_ty [shape=none image="s_d_ty.png"]; 9 | u_dt_y [shape=none image="u_dt_y.png"]; 10 | s_dt_y [shape=none image="s_dt_y.png"]; 11 | 12 | u__tyd -> u_d_ty [label=" .set_index('director')"]; 13 | u_d_ty -> u_dt_y [label=" .set_index('title', append=True)"]; 14 | u__tyd -> u_dt_y [label=" .set_index(['director', 'title'])"]; 15 | 16 | s_d_ty -> s_dt_y [label=" .set_index('title', append=True)"]; 17 | 18 | u_d_ty -> s_d_ty [label=" .sort_index()"]; 19 | u_dt_y -> s_dt_y [label=" .sort_index()"]; 20 | 21 | {rank=same; u_d_ty s_d_ty} 22 | {rank=same; u_dt_y s_dt_y} 23 | 24 | } -------------------------------------------------------------------------------- /requirements.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | conda install --yes ipython-notebook matplotlib pandas scipy "$@" 4 | -------------------------------------------------------------------------------- /sales1.csv: -------------------------------------------------------------------------------- 1 | Book title,Number sold,Sales price,Royalty paid 2 | The Bricklayer’s Bible,8,2.99,0.55 3 | Swimrand,2,1.99,0.35 4 | Pining For The Fisheries of Yore,28,2.99,0.55 5 | The Duck Goes Here,34,2.99,0.55 6 | The Tower Commission Report,4,11.50,4.25 7 | -------------------------------------------------------------------------------- /sales2.csv: -------------------------------------------------------------------------------- 1 | Title,Units sold,List price,Royalty 2 | , 3 | Sales report for Q4 4 | E-Book Reader US Store 5 | Pining for the Fisheries of Yore,80,3.50,14.98 6 | Swimrand,1,2.99,0.14 7 | The Bricklayer's Bible,17,3.50,5.15 8 | The Duck Goes Here,34,2.99,5.78 9 | The Tower Commission Report,4,9.50,6.20 10 | US royalties (USD),,,32.25 11 | , 12 | , 13 | Sales report for Q4 14 | E-Book Reader UK Store 15 | Pining for the Fisheries of Yore,47,2.99,11.98 16 | The Bricklayer's Bible,17,2.99,3.50 17 | The Tower Commission Report,4,6.50,4.80 18 | UK royalties (GBP),,,20.28 19 | , 20 | , 21 | Sales report for Q4 22 | E-Book Reader France Store 23 | Swimrand,8,1.99,0.88 24 | The Duck Goes Here,12,1.99,1.50 25 | France royalties (EUR),,,2.38 26 | -------------------------------------------------------------------------------- /script.txt: -------------------------------------------------------------------------------- 1 | 2 | Session 1 3 | ========= 4 | 5 | You should do install if you have not already. 6 | We have Internet! But, I have USB keys too. 7 | Format: lesson, exercises, solutions. 8 | 9 | Open Exercise-1 10 | Point out .from_csv functions 11 | 12 | VIEWING 13 | 14 | len(titles) 15 | titles.head() and .head(20) 16 | titles.tail() and .tail(10) 17 | titles 18 | 19 | FILTERING 20 | 21 | h = titles.head() 22 | h['year'] or h.year 23 | 24 | h.year + 1000 25 | h.year - 2000 26 | 27 | h.year > 1960 28 | h[h.year > 1960] 29 | h[h.year > 1960 & h.year < 1970] 30 | h[(h.year > 1960) & (h.year < 1970)] 31 | t.year // 10 * 10 32 | h[h.title == '...'] 33 | 34 | SORTING 35 | 36 | titles.sort_values(['title']) 37 | titles.sort_values(['year']) 38 | titles.sort_values(['year', 'title']) 39 | 40 | Session 2 41 | ========= 42 | 43 | STRING METHODS 44 | 45 | h.str.len() 46 | h.str.startswith(s) 47 | h.str.extract(RE) 48 | 49 | AGGREGATION 50 | 51 | titles.year.value_counts() 52 | titles.year.value_counts().plot() whoops! 53 | titles.year.index 54 | titles.year.value_counts().sort_index().plot() 55 | titles.year.value_counts().sort_index().plot(kind='bar') 56 | 57 | c = cast 58 | c = c[c.character == 'Kermit the Frog] 59 | c.plot(x='year', y='n', kind='scatter') 60 | 61 | COLUMNS 62 | 63 | Can be hard to see data 64 | 65 | c = cast 66 | c = c[c.character == 'Kermit the Frog'] 67 | c = c[['year', 'n']] 68 | c 69 | 70 | Can also: 71 | 72 | c[['year']] 73 | 74 | Session 3 75 | ========= 76 | 77 | INDEXES - SPEED 78 | 79 | %%time cast[cast.title == 'Sleuth'] 80 | c = cast.set_index(['title']) 81 | %%time c.loc['Sleuth'] 82 | c = cast.set_index(['title']).sort_index() 83 | %%time c.loc['Sleuth'] 84 | 85 | c = cast.set_index(['title', 'year']).sort_index() 86 | c.loc['Sleuth'] 87 | c.loc['Sleuth',1996] 88 | c.loc[('Sleuth',1996),'character'] 89 | c.loc[('Sleuth',1996),('character','n')] 90 | 91 | .reset_index('title') 92 | .reset_index('year') 93 | .reset_index(['title', 'year']) 94 | .reset_index() 95 | 96 | INDEXES - GROUP BY 97 | 98 | c = cast 99 | c = c[c.name == 'George Clooney'] 100 | c.groupby(['title', 'year', 'character']).size() 101 | 102 | c = cast 103 | c = c[c.name == 'George Clooney'] 104 | c.groupby(['character', 'title', 'year']).size() 105 | 106 | c = cast 107 | c = c[c.name == 'George Clooney'] 108 | c.groupby(['character']).size() 109 | 110 | # How many times has he had two roles in the same film? 111 | 112 | c = cast 113 | c = c[c.name == 'George Clooney'] 114 | c = c.groupby(['year', 'title']).size() 115 | c[c > 1] 116 | 117 | c = cast 118 | c = c[c.name == 'George Clooney'] 119 | c.groupby([c.year // 10 * 10, 'character']).size() 120 | 121 | c = cast 122 | c = c[c.name == 'George Clooney'] 123 | c.groupby(['character', c.year // 10 * 10]).size() 124 | 125 | TODO: mean min max! 126 | 127 | Session 4 128 | ========= 129 | 130 | UNSTACK 131 | 132 | c = cast 133 | c = c[(c.character == 'Kermit the Frog') | (c.character == 'Oscar the Grouch')] 134 | g = c.groupby(['character', c.year // 10 * 10]).size() 135 | g 136 | 137 | How can we compare years? Unstack! 138 | 139 | g.unstack('year') 140 | g.unstack('character') 141 | 142 | u = g.unstack('character') 143 | u['difference'] = u['Kermit the Frog'] - u['Oscar the Grouch'] 144 | u 145 | 146 | But, NaN. 147 | 148 | u = g.unstack('character').fillna(0) 149 | u['difference'] = u['Kermit the Frog'] - u['Oscar the Grouch'] 150 | u 151 | 152 | THE DANGERS OF UNSTACK 153 | 154 | Do it again? Oh no, we get a series! 155 | 156 | .stack() again to repair damage, BUT can devolve to series again. 157 | 158 | PLOTTING 159 | 160 | Ratio? 161 | 162 | u = g.unstack('character') 163 | total = u['Oscar the Grouch'] + u['Kermit the Frog'] 164 | u['difference'] = u['Oscar the Grouch'] / total 165 | u.difference.plot(ylim=[0,1]) 166 | 167 | Indexing and grouping has been moving our data LEFT. 168 | "Unstacking" moves it UP, to columns! Stacking, DOWN. 169 | 170 | Session 5 171 | ========= 172 | 173 | r = release_dates 174 | r = r[r.title == 'Inception'] 175 | r.date.dt.year 176 | 177 | year month date dayofweek dayofyear 178 | 179 | MERGE 180 | 181 | What if we were interested in fetching release dates, 182 | NOT by information in that table itself, 183 | but by information over in "cast"? 184 | 185 | c = cast 186 | c = c[c.name == 'Ellen Page'] 187 | c = c.merge(release_dates) 188 | c 189 | 190 | Session 6 191 | ========= 192 | 193 | c = cast 194 | c = c[c.n <= 2] 195 | c = c[c.name == 'Cary Grant'] 196 | 197 | c = c.merge(cast, on=['title', 'year']) 198 | c = c[c.n_y <= 2] 199 | c = c[c.name_y != 'Cary Grant'] 200 | c = c[['title', 'year', 'name_x', 'name_y']] 201 | c 202 | 203 | c.groupby('name_y').size().order(ascending=False) 204 | 205 | reindex? or what? yeah. 206 | .dropna() 207 | .info() 208 | 209 | Pivot 210 | 211 | r = release_dates 212 | r = r[r.title.str.startswith('Star Wars: Episode')] 213 | r = r[r.country.str.startswith('U')] 214 | r.pivot('title', 'country', 'date') 215 | 216 | which is the same as 217 | 218 | r.set_index(['title', 'country'])[['date']].unstack() 219 | 220 | .rename(columns={...}) 221 | .concat(df) 222 | 223 | Thoughts for later 224 | ================== 225 | 226 | (who had which co-stars how often) 227 | (what pairs of co-stars have appeared the most often together) 228 | Can you use merge to find who was in movies with each other? 229 | 230 | Fix later: second exercise s/hamlet/batman/ 231 | -------------------------------------------------------------------------------- /style-notebook.css: -------------------------------------------------------------------------------- 1 | h3 { 2 | color: white; 3 | background-color: black; 4 | padding: 0.5em; 5 | } 6 | -------------------------------------------------------------------------------- /style-table.css: -------------------------------------------------------------------------------- 1 | body { 2 | margin: 0; 3 | font-family: Helvetica; 4 | } 5 | table.dataframe { 6 | border-collapse: collapse; 7 | border: none; 8 | } 9 | table.dataframe tr { 10 | border: none; 11 | } 12 | table.dataframe td, table.dataframe th { 13 | margin: 0; 14 | border: 1px solid white; 15 | padding-left: 0.25em; 16 | padding-right: 0.25em; 17 | } 18 | table.dataframe th:not(:empty) { 19 | background-color: #fec; 20 | text-align: left; 21 | font-weight: normal; 22 | } 23 | table.dataframe tr:nth-child(2) th:empty { 24 | border-left: none; 25 | border-right: 1px dashed #888; 26 | } 27 | table.dataframe td { 28 | border: 2px solid #ccf; 29 | background-color: #f4f4ff; 30 | } 31 | -------------------------------------------------------------------------------- /youtube.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brandon-rhodes/pycon-pandas-tutorial/fc150489efcfd6f6630cbae914fb3599ff0e6993/youtube.png --------------------------------------------------------------------------------