├── .gitignore ├── README.md ├── environment.yml ├── images ├── groupby.PNG ├── install.png ├── logo2.png ├── mpl_anatomy.png └── retinal-variables.png ├── installation.md └── notebooks ├── 00_getting_started.ipynb ├── 01_overview.ipynb ├── 02_visual_variables.ipynb ├── 03_aggregations.ipynb ├── 04_images.ipynb ├── 06-interactive.ipynb ├── 07-line-creator.ipynb ├── 09-interactive_annotation.ipynb ├── 10-selector_widget.ipynb ├── data ├── auto-mpg.data ├── auto-mpg.txt ├── dem.npy └── titanicfull.csv └── helpers └── ensure_print.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![](images/logo2.png) 2 | # Scipy 2019 Tutorial 3 | Matplotlib is a Python 2D plotting library which produces publication quality figures in a variety of hardcopy formats and interactive environments across platforms. Matplotlib can be used in Python scripts, the Python and IPython shells, the Jupyter notebook, web application servers, and four graphical user interface toolkits. 4 | 5 | 6 | 7 | ## Installation 8 | Instructions can be found [here](installation.md) 9 | If the installation doesn't work, try [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/story645/mpl_tutorial/master) 10 | 11 | ## Acknowledgements 12 | 13 | This tutorial is inspired by and steals liberally from [Benjamin Root](https://github.com/WeatherGod)'s fantastic [Anatomy of Matplotlib](https://github.com/matplotlib/AnatomyOfMatplotlib) tutorial. It also borrows some things from Rachel Rakov & Hannah Aizenman's [Introduction to Machine Learning with Sklearn](https://github.com/DHRI-Curriculum/machine-learning). 14 | 15 | The authors are also grateful to Michael Grossberg for his liberal feedback and heatmap data set suggestion, Sadie Bartholomew for inspiring a coding challenge with her gorgeous [tweet](https://twitter.com/sadie_lb/status/1145385648532205568), and tutoral participant Dershan Luo for the install check bug fix. :) 16 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: mpltutorial 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - _libgcc_mutex=0.1=main 7 | - attrs=19.1.0=py_0 8 | - backcall=0.1.0=py_0 9 | - bleach=3.1.0=py_0 10 | - bzip2=1.0.6=h14c3975_1002 11 | - ca-certificates=2019.6.16=hecc5488_0 12 | - certifi=2019.6.16=py37_0 13 | - cycler=0.10.0=py_1 14 | - dbus=1.13.6=he372182_0 15 | - decorator=4.4.0=py_0 16 | - defusedxml=0.5.0=py_1 17 | - entrypoints=0.3=py37_1000 18 | - expat=2.2.5=he1b5a44_1003 19 | - fontconfig=2.13.1=he4413a7_1000 20 | - freetype=2.10.0=he983fc9_0 21 | - gettext=0.19.8.1=hc5be6a0_1002 22 | - glib=2.58.3=h6f030ca_1001 23 | - gst-plugins-base=1.14.5=h0935bb2_0 24 | - gstreamer=1.14.5=h36ae1b5_0 25 | - icu=58.2=hf484d3e_1000 26 | - ipykernel=5.1.1=py37h24bf2e0_0 27 | - ipympl=0.2.1=py37_1002 28 | - ipython=7.6.1=py37h5ca1d4c_0 29 | - ipython_genutils=0.2.0=py_1 30 | - ipywidgets=7.5.0=py_0 31 | - jedi=0.14.0=py37_0 32 | - jinja2=2.10.1=py_0 33 | - jpeg=9c=h14c3975_1001 34 | - jsonschema=3.0.1=py37_0 35 | - jupyter=1.0.0=py_2 36 | - jupyter_client=5.3.0=py_0 37 | - jupyter_console=6.0.0=py_0 38 | - jupyter_contrib_core=0.3.3=py_2 39 | - jupyter_contrib_nbextensions=0.5.1=py37_0 40 | - jupyter_core=4.4.0=py_0 41 | - jupyter_highlight_selected_word=0.2.0=py37_1000 42 | - jupyter_latex_envs=1.4.4=py37_1000 43 | - jupyter_nbextensions_configurator=0.4.1=py37_0 44 | - jupyterlab=0.35.5=py37hf63ae98_0 45 | - jupyterlab_server=0.2.0=py37_0 46 | - kiwisolver=1.1.0=py37hc9558a2_0 47 | - libblas=3.8.0=10_openblas 48 | - libcblas=3.8.0=10_openblas 49 | - libffi=3.2.1=he1b5a44_1006 50 | - libgcc-ng=9.1.0=hdf63c60_0 51 | - libgfortran-ng=7.3.0=hdf63c60_0 52 | - libiconv=1.15=h516909a_1005 53 | - liblapack=3.8.0=10_openblas 54 | - libopenblas=0.3.6=h6e990d7_4 55 | - libpng=1.6.37=hed695b0_0 56 | - libsodium=1.0.16=h14c3975_1001 57 | - libstdcxx-ng=9.1.0=hdf63c60_0 58 | - libtiff=4.0.10=h57b8799_1003 59 | - libuuid=2.32.1=h14c3975_1000 60 | - libxcb=1.13=h14c3975_1002 61 | - libxml2=2.9.9=h13577e0_1 62 | - libxslt=1.1.32=hae48121_1003 63 | - lxml=4.3.4=py37h7ec2d77_0 64 | - lz4-c=1.8.3=he1b5a44_1001 65 | - markupsafe=1.1.1=py37h14c3975_0 66 | - matplotlib=3.1.0=py37_1 67 | - matplotlib-base=3.1.0=py37hfd891ef_1 68 | - mistune=0.8.4=py37h14c3975_1000 69 | - nbconvert=5.5.0=py_0 70 | - nbformat=4.4.0=py_1 71 | - ncurses=6.1=hf484d3e_1002 72 | - notebook=5.7.8=py37_1 73 | - numpy=1.16.4=py37h95a1406_0 74 | - olefile=0.46=py_0 75 | - openblas=0.3.6=h6e990d7_4 76 | - openssl=1.1.1b=h14c3975_1 77 | - pandas=0.24.2=py37hb3f55d8_0 78 | - pandoc=2.7.3=0 79 | - pandocfilters=1.4.2=py_1 80 | - parso=0.5.0=py_0 81 | - pcre=8.41=hf484d3e_1003 82 | - pexpect=4.7.0=py37_0 83 | - pickleshare=0.7.5=py37_1000 84 | - pillow=6.1.0=py37he7afcd5_0 85 | - pip=19.1.1=py37_0 86 | - prometheus_client=0.7.1=py_0 87 | - prompt_toolkit=2.0.9=py_0 88 | - pthread-stubs=0.4=h14c3975_1001 89 | - ptyprocess=0.6.0=py_1001 90 | - pygments=2.4.2=py_0 91 | - pyparsing=2.4.0=py_0 92 | - pyqt=5.9.2=py37hcca6a23_0 93 | - pyrsistent=0.15.3=py37h516909a_0 94 | - python=3.7.3=h33d41f4_1 95 | - python-dateutil=2.8.0=py_0 96 | - pytz=2019.1=py_0 97 | - pyyaml=5.1.1=py37h516909a_0 98 | - pyzmq=18.0.2=py37hc4ba49a_0 99 | - qt=5.9.7=h52cfd70_2 100 | - qtconsole=4.5.1=py_0 101 | - readline=8.0=hf8c457e_0 102 | - rise=5.5.1=py37_0 103 | - send2trash=1.5.0=py_0 104 | - setuptools=41.0.1=py37_0 105 | - sip=4.19.8=py37hf484d3e_1000 106 | - six=1.12.0=py37_1000 107 | - sqlite=3.28.0=hcee41ef_1 108 | - terminado=0.8.2=py37_0 109 | - testpath=0.4.2=py_1001 110 | - tk=8.6.9=hed695b0_1002 111 | - tornado=6.0.3=py37h516909a_0 112 | - traitlets=4.3.2=py37_1000 113 | - wcwidth=0.1.7=py_1 114 | - webencodings=0.5.1=py_1 115 | - wheel=0.33.4=py37_0 116 | - widgetsnbextension=3.5.0=py37_0 117 | - xorg-libxau=1.0.9=h14c3975_0 118 | - xorg-libxdmcp=1.1.3=h516909a_0 119 | - xz=5.2.4=h14c3975_1001 120 | - yaml=0.1.7=h14c3975_1001 121 | - zeromq=4.3.1=hf484d3e_1000 122 | - zlib=1.2.11=h14c3975_1004 123 | - zstd=1.4.0=h3b9ef0a_0 124 | prefix: /home/hannah/miniconda3/envs/mpltutorial 125 | 126 | -------------------------------------------------------------------------------- /images/groupby.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/story645/mpl_tutorial/bdf9c547eebae76b65eb75cde62546e0b3149701/images/groupby.PNG -------------------------------------------------------------------------------- /images/install.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/story645/mpl_tutorial/bdf9c547eebae76b65eb75cde62546e0b3149701/images/install.png -------------------------------------------------------------------------------- /images/logo2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/story645/mpl_tutorial/bdf9c547eebae76b65eb75cde62546e0b3149701/images/logo2.png -------------------------------------------------------------------------------- /images/mpl_anatomy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/story645/mpl_tutorial/bdf9c547eebae76b65eb75cde62546e0b3149701/images/mpl_anatomy.png -------------------------------------------------------------------------------- /images/retinal-variables.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/story645/mpl_tutorial/bdf9c547eebae76b65eb75cde62546e0b3149701/images/retinal-variables.png -------------------------------------------------------------------------------- /installation.md: -------------------------------------------------------------------------------- 1 | # Recommended 2 | Instructions for installing the [Anaconda Python](https://www.anaconda.com/distribution/) distribution can be found here: 3 | * https://github.com/story645/install/blob/master/sections/python.md 4 | 5 | Any __3.X__ version of Python is acceptable. Once you have a working Anaconda install, please open the conda terminal and type: 6 | 7 | ```bash 8 | conda install -c conda-forge matplotlib jupyter pandas -y 9 | ``` 10 | 11 | More detailed instructions for using conda to install libraries can be found at 12 | * https://github.com/story645/install/blob/master/sections/conda.md 13 | 14 | # Alternative 15 | If for whatever reason Anaconda does not work, you can install Matplotlib using the pip package installer. First open a command line or terminal prompt and then type: 16 | ```bash 17 | python -m pip install -U pip 18 | python -m pip install -U matplotlib 19 | python -m pip install -U pandas 20 | ``` 21 | 22 | # Test Install 23 | To test the install, please open a Jupyter notebook and type the following in a cell: 24 | 25 | ```python 26 | %matplotlib inline 27 | import pandas as pd 28 | import matplotlib.pyplot as plt 29 | 30 | df = pd.read_csv("http://bit.ly/tscv17") 31 | fig, ax = plt.subplots() 32 | _ = ax.plot(np.sort(df['Age']), 'o') 33 | ``` 34 | 35 | Then execute the cell and this figure should appear: 36 | 37 | ![scatter plot, image should look like dots curving upward](images/install.png?) 38 | 39 | 40 | 41 | # Resources 42 | If you are unfamiliar with command or terminal prompts, more information can be found at: 43 | 44 | * __Windows:__ https://github.com/story645/install/blob/master/sections/windows_terminal.md 45 | * __OS/X:__ https://github.com/story645/install/blob/master/sections/osx_terminal.md 46 | 47 | This tutorial uses Jupyter notebook as the programming environment. More information about opening and working with a Jupyter notebook can be found at https://github.com/story645/install/blob/master/sections/jupyter.md 48 | -------------------------------------------------------------------------------- /notebooks/00_getting_started.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "![](../images/logo2.png)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "# Introduction to Matplotlib" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "Matplotlib is a Python visualization library producing publication quality figures in a variety of hardcopy formats and interactive environments. Matplotlib can be used in Python scripts, the Python and IPython shell, web application servers, and various graphical user interface toolkits." 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "# tutorial: http://bit.ly/scipympl19" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "# Get to know the library!\n", 36 | "1. Go to http://bit.ly/mpl_gallery\n", 37 | "2. Pick a visualization you'd like to learn how to create\n", 38 | "3. Turn to your neighbor & discuss w/ each other why you want to learn to make it\n", 39 | "4. Post your images to the slack! \n", 40 | "5. optional: tweet your faves & tag @matplotlib and #scipy2019 ;)" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "## Getting Help!\n", 48 | "\n", 49 | "During this tutorial, please flag down Hannah, Tom (whoever isn't teaching at the moment) or Kimberly. \n", 50 | "\n", 51 | "The easiest way to do this is to post in the Scipy2019 #matplotlib slack channel \n", 52 | "\n", 53 | "__stickies__: Put a sticky note on your laptop. If you've used the flags before, we don't have enough of the orange so in this class all stickies mean please help!" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "### After the tutorial" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "To follow up on the material discussed in this tutorial:" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "__Documentation__:\n", 75 | "* https://matplotlib.org/" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": {}, 81 | "source": [ 82 | "__Mailing lists__:\n", 83 | "* [User](https://mail.python.org/mailman/listinfo/matplotlib-users): matplotlib-users@python.org\n", 84 | "* [Announcement](https://mail.python.org/mailman/listinfo/matplotlib-announce): matplotlib-announce@python.org\n", 85 | "* [Development](https://mail.python.org/mailman/listinfo/matplotlib-devel): matplotlib-devel@python.org" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "__Social__:\n", 93 | "* twitter: [@matplotlib](https://twitter.com/matplotlib)\n", 94 | "* gitter chat: https://gitter.im/matplotlib/matplotlib" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "## What do you need for this tutorial?\n", 102 | "Installation instructions can be found [here](installation.md). We will be using __Python 3__. We will also be using the [Pandas](https://pandas.pydata.org/) data analysis and the [NumPy](https://www.numpy.org/) numerical analysis libraries to load in and process much of the data that we are trying to visualize. " 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "metadata": {}, 108 | "source": [ 109 | "### Import libraries and check versions\n", 110 | "To use a library in Python, we need to first import it. In this code block, we also print the version of the libraries we are importing for reproducibility. Sometimes minor changes in the libraries between versions will cause code to behave unexpectedly - for example the images you produce may look slightly different from the ones in this tutorial. We are using Python [format strings](https://docs.python.org/3.4/library/string.html#string-formatting) for the printing.\n" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "import matplotlib\n", 120 | "print(f'Matplotlib: {matplotlib.__version__}')\n", 121 | "import matplotlib.pyplot as plt # load in matplotlib plotting tools\n", 122 | "import pandas as pd # rename as pd by convention\n", 123 | "print(f\"pandas: {pd.__version__}\")\n", 124 | "import numpy as np # rename as np by convention\n", 125 | "print(f\"numpy: {np.__version__}\")" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "### Get familiar with the titanic dataset" 133 | ] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "metadata": {}, 138 | "source": [ 139 | "For this tutorial we are using the [Kaggle Titanic dataset](https://www.kaggle.com/c/titanic/data) because it has a mix of quantitative and categorical variables and is well suited to data exploration. In this tutorial, we will explore the demographics of passengers on the Titanic. " 140 | ] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "metadata": {}, 145 | "source": [ 146 | "| Variable | \tDefinition | \tKey |\n", 147 | "|-----------:|-------------:|-------:|\n", 148 | "| survival | \tSurvival \t| 0 = No, 1 = Yes |\n", 149 | "| pclass | Ticket class \t| 1 = 1st, 2 = 2nd, 3 = 3rd |\n", 150 | "| sex \t| sex | | \t\n", 151 | "| age \t| age in years \t | | \n", 152 | "| sibsp |\t# of siblings / spouses on board \t| |\n", 153 | "| parch |\t# of parents / children on board | |\t\n", 154 | "| ticket| \tTicket number ||\n", 155 | "| fare |\tPassenger fare \t||\n", 156 | "| cabin |\tCabin number \t||\n", 157 | "| embarked |Port of Embarkation | \tC = Cherbourg, Q = Queenstown, S = Southampton|" 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": {}, 163 | "source": [ 164 | "__pclass:__ A proxy for socio-economic status (SES)\n", 165 | "* 1st = Upper\n", 166 | "* 2nd = Middle\n", 167 | "* 3rd = Lower\n", 168 | "\n", 169 | "__age:__ Age is fractional if less than 1. If the age is estimated, is it in the form of xx.5\n", 170 | "\n", 171 | "__sibsp:__ The dataset defines family relations in this way...\n", 172 | "* Sibling = brother, sister, stepbrother, stepsister\n", 173 | "* Spouse = husband, wife (mistresses and fiancés were ignored)\n", 174 | "\n", 175 | "__parch:__ The dataset defines family relations in this way...\n", 176 | "* Parent = mother, father\n", 177 | "* Child = daughter, son, stepdaughter, stepson\n", 178 | "* Some children travelled only with a nanny, therefore parch=0 for them." 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": {}, 184 | "source": [ 185 | "You are welcome to download a local copy from http://bit.ly/tcsv19. " 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "df = pd.read_csv(\"http://bit.ly/tcsv19\")" 195 | ] 196 | }, 197 | { 198 | "cell_type": "markdown", 199 | "metadata": {}, 200 | "source": [ 201 | "We use the `.columns` method of print the columns in our dataframe so that we have a reference when trying to access this data throughout this tutorial." 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": null, 207 | "metadata": {}, 208 | "outputs": [], 209 | "source": [ 210 | "df.columns" 211 | ] 212 | }, 213 | { 214 | "cell_type": "markdown", 215 | "metadata": {}, 216 | "source": [ 217 | "# Let's test our install\n", 218 | "\n", 219 | "Here we open the Titanic dataset via url and plot the sorted ages of the passengers. We select the ages from our dataframe (spreadsheet) using `df['age']`, and use numpy's sort because it can handle the missing values in our age column. We use `%matplotlib inline` to tell jupyter to show the matplotlib images. We will unpack the figure generating code in the next couple of notebooks, but basically `fig, ax` creates the area to plot on, and `ax.plot` draws the scatter plot. `_` is used for assignment variables we don't care about, and here specifically we also use it to suppress output. " 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "metadata": {}, 226 | "outputs": [], 227 | "source": [ 228 | "%matplotlib inline\n", 229 | "fig, ax = plt.subplots()\n", 230 | "_ = ax.plot(np.sort(df['age']), marker='o', markersize=1)" 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": {}, 236 | "source": [ 237 | "## Lightning notebook introduction!\n", 238 | "\n", 239 | " - notebooks support tab completion! In the above cell if we typed `ax.pl` we would get a list of possible completion\n", 240 | " - you can use `?` to get a function's documentation string, which is how the function is documented inside the source code." 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": null, 246 | "metadata": {}, 247 | "outputs": [], 248 | "source": [ 249 | "? ax.plot" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": null, 255 | "metadata": {}, 256 | "outputs": [], 257 | "source": [] 258 | } 259 | ], 260 | "metadata": { 261 | "kernelspec": { 262 | "display_name": "Python 3", 263 | "language": "python", 264 | "name": "python3" 265 | }, 266 | "language_info": { 267 | "codemirror_mode": { 268 | "name": "ipython", 269 | "version": 3 270 | }, 271 | "file_extension": ".py", 272 | "mimetype": "text/x-python", 273 | "name": "python", 274 | "nbconvert_exporter": "python", 275 | "pygments_lexer": "ipython3", 276 | "version": "3.7.3" 277 | }, 278 | "latex_envs": { 279 | "LaTeX_envs_menu_present": true, 280 | "autoclose": false, 281 | "autocomplete": true, 282 | "bibliofile": "biblio.bib", 283 | "cite_by": "apalike", 284 | "current_citInitial": 1, 285 | "eqLabelWithNumbers": true, 286 | "eqNumInitial": 1, 287 | "hotkeys": { 288 | "equation": "Ctrl-E", 289 | "itemize": "Ctrl-I" 290 | }, 291 | "labels_anchors": false, 292 | "latex_user_defs": false, 293 | "report_style_numbering": false, 294 | "user_envs_cfg": false 295 | }, 296 | "varInspector": { 297 | "cols": { 298 | "lenName": 16, 299 | "lenType": 16, 300 | "lenVar": 40 301 | }, 302 | "kernels_config": { 303 | "python": { 304 | "delete_cmd_postfix": "", 305 | "delete_cmd_prefix": "del ", 306 | "library": "var_list.py", 307 | "varRefreshCmd": "print(var_dic_list())" 308 | }, 309 | "r": { 310 | "delete_cmd_postfix": ") ", 311 | "delete_cmd_prefix": "rm(", 312 | "library": "var_list.r", 313 | "varRefreshCmd": "cat(var_dic_list()) " 314 | } 315 | }, 316 | "types_to_exclude": [ 317 | "module", 318 | "function", 319 | "builtin_function_or_method", 320 | "instance", 321 | "_Feature" 322 | ], 323 | "window_display": false 324 | } 325 | }, 326 | "nbformat": 4, 327 | "nbformat_minor": 4 328 | } 329 | -------------------------------------------------------------------------------- /notebooks/01_overview.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "# How do you speak Matplotlib?" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": { 17 | "slideshow": { 18 | "slide_type": "skip" 19 | } 20 | }, 21 | "source": [ 22 | "A *matplotlib* visualization is a `figure` onto which is attached one or more `axes`. Each `axes` has a horizontal (x) `axis` and vertical (y) `axis`, and the data is encoded using color and glyphs such as `markers` (for example circles) or `lines` or polygons (called `patches`). The figure below annotates these parts of a visualization and was created by Nicolas P. Rougier using `matplotlib`. The source code can be found in the [matplotlib documentation](https://matplotlib.org/gallery/showcase/anatomy.html#sphx-glr-gallery-showcase-anatomy-py)." 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": { 28 | "slideshow": { 29 | "slide_type": "slide" 30 | } 31 | }, 32 | "source": [ 33 | "![Diagram of the components of the matplotlib generated visualization.](../images/mpl_anatomy.png)" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": { 39 | "slideshow": { 40 | "slide_type": "slide" 41 | } 42 | }, 43 | "source": [ 44 | "# Import matplotlib & tell Jupyter to draw things" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": { 51 | "slideshow": { 52 | "slide_type": "slide" 53 | } 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "%config InlineBackend.figure_format = 'retina' # change this to png if your plots are too small\n", 58 | "%matplotlib inline\n", 59 | "import matplotlib.pyplot as plt\n", 60 | "import numpy as np" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": { 66 | "slideshow": { 67 | "slide_type": "slide" 68 | } 69 | }, 70 | "source": [ 71 | "### What is this backend thing?" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": { 77 | "slideshow": { 78 | "slide_type": "skip" 79 | } 80 | }, 81 | "source": [ 82 | "Backends are the rendering engines matplotlib use to transform the Python code into pixels on the screen. Matplotlib supports many different static and interactive [backends](https://matplotlib.org/3.1.0/api/index_backend_api.html). \n", 83 | "\n", 84 | "The following line of code prints the current backend:" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": { 91 | "slideshow": { 92 | "slide_type": "slide" 93 | } 94 | }, 95 | "outputs": [], 96 | "source": [ 97 | "import matplotlib\n", 98 | "matplotlib.get_backend()" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": { 104 | "slideshow": { 105 | "slide_type": "skip" 106 | } 107 | }, 108 | "source": [ 109 | "We told matplotlib to use the ipython inline backend when we typed `%matplotlib inline`\n", 110 | "\n", 111 | "the `inline` backend results in static, non-interactive images. Later in this tutorial we will cover how to use interactive backends in the notebook." 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": { 117 | "slideshow": { 118 | "slide_type": "slide" 119 | } 120 | }, 121 | "source": [ 122 | "# Let's make that figure" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": { 128 | "slideshow": { 129 | "slide_type": "skip" 130 | } 131 | }, 132 | "source": [ 133 | "A Matplotlib [figure](https://matplotlib.org/api/_as_gen/matplotlib.figure.Figure.html#matplotlib.figure.Figure) is like an empty sheet of paper (or a blank canvas) on which we will draw all our plots, images, and diagrams. Here, the ipython backend decided to not render anything because we have not yet added any plots to our figure." 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": { 140 | "slideshow": { 141 | "slide_type": "slide" 142 | } 143 | }, 144 | "outputs": [], 145 | "source": [ 146 | "plt.figure()" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": { 152 | "slideshow": { 153 | "slide_type": "slide" 154 | } 155 | }, 156 | "source": [ 157 | "# Figure+ Axes (plotting surface)" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "metadata": { 164 | "slideshow": { 165 | "slide_type": "slide" 166 | } 167 | }, 168 | "outputs": [], 169 | "source": [ 170 | "plt.subplots()" 171 | ] 172 | }, 173 | { 174 | "cell_type": "markdown", 175 | "metadata": { 176 | "slideshow": { 177 | "slide_type": "skip" 178 | } 179 | }, 180 | "source": [ 181 | "The x and y ticks and the box let us know that the axes was created. Let's assign the output of `plt.subplots`-which is a helper function that creates the figure and axes-to variables so that we can manipulate the figure and axes directly. " 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": null, 187 | "metadata": { 188 | "slideshow": { 189 | "slide_type": "slide" 190 | } 191 | }, 192 | "outputs": [], 193 | "source": [ 194 | "fig, ax = plt.subplots()" 195 | ] 196 | }, 197 | { 198 | "cell_type": "markdown", 199 | "metadata": { 200 | "slideshow": { 201 | "slide_type": "notes" 202 | } 203 | }, 204 | "source": [ 205 | "## Adjusting figure size\n", 206 | "We can make the figure bigger or smaller using the `figsize=(width, height)` keyword construction. The default is 8x6" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": null, 212 | "metadata": { 213 | "slideshow": { 214 | "slide_type": "notes" 215 | } 216 | }, 217 | "outputs": [], 218 | "source": [ 219 | "fig, ax = plt.subplots(figsize=(4,2))" 220 | ] 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "metadata": { 225 | "slideshow": { 226 | "slide_type": "slide" 227 | } 228 | }, 229 | "source": [ 230 | "# Add data to the figure using a plotting method" 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": { 236 | "slideshow": { 237 | "slide_type": "skip" 238 | } 239 | }, 240 | "source": [ 241 | "First, lets use `Pandas` to open our titanic dataset so that we have something to plot!" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": null, 247 | "metadata": { 248 | "slideshow": { 249 | "slide_type": "slide" 250 | } 251 | }, 252 | "outputs": [], 253 | "source": [ 254 | "import pandas as pd\n", 255 | "df = pd.read_csv(\"http://bit.ly/tcsv19\")" 256 | ] 257 | }, 258 | { 259 | "cell_type": "markdown", 260 | "metadata": { 261 | "slideshow": { 262 | "slide_type": "skip" 263 | } 264 | }, 265 | "source": [ 266 | "Matplotlib supports many, many, many different plot types. Peruse the [gallery](https://matplotlib.org/gallery.html) to find some you like! \n", 267 | "\n", 268 | "A hopefully full list of supported plot methods is at https://matplotlib.org/3.1.0/api/axes_api.html#plotting \n", 269 | "\n", 270 | "Here we will use the `ax.plot` method from our installation test. This function is usually used to create line plots, but it can be used to create scatter plots when the marker is set to `'o'`. We use `.plot` here instead of `.scatter` so that we do not have to generate x-values to plot against." 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": null, 276 | "metadata": { 277 | "slideshow": { 278 | "slide_type": "slide" 279 | } 280 | }, 281 | "outputs": [], 282 | "source": [ 283 | "fig, ax = plt.subplots()\n", 284 | "_ = ax.plot(np.sort(df['age']), marker='o', markersize=1)" 285 | ] 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "metadata": { 290 | "slideshow": { 291 | "slide_type": "skip" 292 | } 293 | }, 294 | "source": [ 295 | "__troubleshooting__:\n", 296 | "If your figure looks \"fuzzy\" it is likely you have a hi-dpi (aka 'retnia' display), try running\n", 297 | "\n", 298 | "```ipython\n", 299 | "%config InlineBackend.figure_format = 'retina' # tell IPython to use hi-dpi pngs\n", 300 | "``` \n", 301 | "And then re-rendering your figure by typing ` + ` in the above notebook cell." 302 | ] 303 | }, 304 | { 305 | "cell_type": "markdown", 306 | "metadata": { 307 | "slideshow": { 308 | "slide_type": "slide" 309 | } 310 | }, 311 | "source": [ 312 | "# What are we plotting? Let's label" 313 | ] 314 | }, 315 | { 316 | "cell_type": "markdown", 317 | "metadata": { 318 | "slideshow": { 319 | "slide_type": "skip" 320 | } 321 | }, 322 | "source": [ 323 | "You can label almost everything in matplotlib. The most commonly used labeling methods are to set the:\n", 324 | "* title: `set_title`\n", 325 | "* x axis label: `set_xlabel`\n", 326 | "* y axis label: `set_ylabel`" 327 | ] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "execution_count": null, 332 | "metadata": { 333 | "slideshow": { 334 | "slide_type": "slide" 335 | } 336 | }, 337 | "outputs": [], 338 | "source": [ 339 | "fig, ax = plt.subplots()\n", 340 | "_ = ax.set_title (\"Titanic Data Set\")\n", 341 | "_ = ax.plot(np.sort(df['age']), marker='o', markersize=1)\n", 342 | "_ = ax.set_ylabel(\"Ages\")" 343 | ] 344 | }, 345 | { 346 | "cell_type": "markdown", 347 | "metadata": { 348 | "slideshow": { 349 | "slide_type": "slide" 350 | } 351 | }, 352 | "source": [ 353 | "# Let's change up the y axis" 354 | ] 355 | }, 356 | { 357 | "cell_type": "markdown", 358 | "metadata": { 359 | "slideshow": { 360 | "slide_type": "slide" 361 | } 362 | }, 363 | "source": [ 364 | "Instead of breaking the ages up by 10, lets use the US census demographic groupings:\n", 365 | " * Children: 0–14 years\n", 366 | " * Youth: 15–24 years \n", 367 | " * Adult: 25–54 years \n", 368 | " * Older Adult: 55–64 years\n", 369 | " * Senior: 65 years and over" 370 | ] 371 | }, 372 | { 373 | "cell_type": "markdown", 374 | "metadata": { 375 | "slideshow": { 376 | "slide_type": "skip" 377 | } 378 | }, 379 | "source": [ 380 | "Ticks can primarily be customized in two ways:\n", 381 | "* [Locators](https://matplotlib.org/3.1.1/gallery/ticks_and_spines/tick-locators.html): where the tick is\n", 382 | "* [Formatters](https://matplotlib.org/3.1.1/gallery/ticks_and_spines/tick-formatters.html): what the tick looks like" 383 | ] 384 | }, 385 | { 386 | "cell_type": "markdown", 387 | "metadata": { 388 | "slideshow": { 389 | "slide_type": "skip" 390 | } 391 | }, 392 | "source": [ 393 | "First we use the `.set_yticks()` method to place our ticks according to the demographic group." 394 | ] 395 | }, 396 | { 397 | "cell_type": "code", 398 | "execution_count": null, 399 | "metadata": { 400 | "slideshow": { 401 | "slide_type": "slide" 402 | } 403 | }, 404 | "outputs": [], 405 | "source": [ 406 | "fig, ax = plt.subplots()\n", 407 | "_ = ax.set_title (\"Titanic Data Set\")\n", 408 | "_ = ax.plot(np.sort(df['age']), marker='o', markersize=1)\n", 409 | "_ = ax.set_ylabel(\"Ages\")\n", 410 | "_ = ax.set_yticks([15,25, 55, 64])" 411 | ] 412 | }, 413 | { 414 | "cell_type": "markdown", 415 | "metadata": { 416 | "slideshow": { 417 | "slide_type": "slide" 418 | } 419 | }, 420 | "source": [ 421 | " Now we use `.set_yticklabels()` to replace the labels that were automatically generated based on the tick locations with manually defined labels. " 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": null, 427 | "metadata": { 428 | "slideshow": { 429 | "slide_type": "slide" 430 | } 431 | }, 432 | "outputs": [], 433 | "source": [ 434 | "fig, ax = plt.subplots()\n", 435 | "_ = ax.set_title (\"Titanic Data Set\")\n", 436 | "_ = ax.plot(np.sort(df['age']), marker='o', markersize=1)\n", 437 | "_ = ax.set_ylabel(\"Ages\")\n", 438 | "_ = ax.set_yticks([15,25, 55, 64])\n", 439 | "_ = ax.set_yticklabels(['children', 'youth', 'adults', 'seniors'])" 440 | ] 441 | }, 442 | { 443 | "cell_type": "markdown", 444 | "metadata": { 445 | "slideshow": { 446 | "slide_type": "slide" 447 | } 448 | }, 449 | "source": [ 450 | "# Putting it all together: Let's look at fares!\n", 451 | "\n", 452 | "* First Class Suite- £870 or $4,350\n", 453 | "\n", 454 | "* First Class Berth- £30 or $150\n", 455 | "\n", 456 | "* Second Class- £12 or $60\n", 457 | "\n", 458 | "* Third Class- £3 to £8 or $40\n", 459 | "\n", 460 | "source: (Wilkinson & Hamilton, 2011) from [Titanic: The Whole Iceberg](https://autumnmccordckp.weebly.com/tickets-and-accomodations.html)" 461 | ] 462 | }, 463 | { 464 | "cell_type": "markdown", 465 | "metadata": { 466 | "slideshow": { 467 | "slide_type": "slide" 468 | } 469 | }, 470 | "source": [ 471 | "## Tasks\n", 472 | "1. Create a figure and an axis\n", 473 | "2. Plot the \"fare\" column\n", 474 | "3. Label the y-axis \"fare\"\n", 475 | "4. Set the y tick locations using the fare class boundaries: [3, 12, 30, 870]\n", 476 | "5. Set the y tick labels using the fare class boundaries: [3rd, 2nd, 1st B, 1st S]" 477 | ] 478 | }, 479 | { 480 | "cell_type": "code", 481 | "execution_count": null, 482 | "metadata": { 483 | "slideshow": { 484 | "slide_type": "slide" 485 | } 486 | }, 487 | "outputs": [], 488 | "source": [ 489 | "#We'll work through the solution together here\n" 490 | ] 491 | }, 492 | { 493 | "cell_type": "markdown", 494 | "metadata": { 495 | "slideshow": { 496 | "slide_type": "slide" 497 | } 498 | }, 499 | "source": [ 500 | "# Let's put more than one image on the page" 501 | ] 502 | }, 503 | { 504 | "cell_type": "markdown", 505 | "metadata": { 506 | "slideshow": { 507 | "slide_type": "skip" 508 | } 509 | }, 510 | "source": [ 511 | "Matplotlib offers a lot of flexibility in creating images that have multiple figures, and you can read all about them in the [customizing figures tutorial](https://matplotlib.org/3.1.1/tutorials/intermediate/gridspec.html#sphx-glr-tutorials-intermediate-gridspec-py). We will briefly touch on using subplots, constrained_layout, and a drop of grid_spec. " 512 | ] 513 | }, 514 | { 515 | "cell_type": "markdown", 516 | "metadata": { 517 | "slideshow": { 518 | "slide_type": "slide" 519 | } 520 | }, 521 | "source": [ 522 | "## Multiple Axes" 523 | ] 524 | }, 525 | { 526 | "cell_type": "markdown", 527 | "metadata": { 528 | "slideshow": { 529 | "slide_type": "skip" 530 | } 531 | }, 532 | "source": [ 533 | "Let's put our two axes together on one figure, using `subplots` with the ncols kwarg to create a figure with two axes side by side. \n", 534 | "\n", 535 | "We can use the `Figure` method `suptitle` to put a figure-level title on the plot." 536 | ] 537 | }, 538 | { 539 | "cell_type": "code", 540 | "execution_count": null, 541 | "metadata": { 542 | "slideshow": { 543 | "slide_type": "slide" 544 | } 545 | }, 546 | "outputs": [], 547 | "source": [ 548 | "fig, (ax1, ax2) = plt.subplots(ncols=2)\n", 549 | "fig.suptitle('Next To (1x2)')" 550 | ] 551 | }, 552 | { 553 | "cell_type": "markdown", 554 | "metadata": { 555 | "slideshow": { 556 | "slide_type": "skip" 557 | } 558 | }, 559 | "source": [ 560 | "If you want the axes to be one under the other, you can use `plt.subplots(nrows=2)`. You can also create a grid using `plt.subplots(ncols=2, nrows=2)`. In the grid case, plt.subplots returns a (nrows, ncols) shaped array of axes objects. " 561 | ] 562 | }, 563 | { 564 | "cell_type": "code", 565 | "execution_count": null, 566 | "metadata": { 567 | "slideshow": { 568 | "slide_type": "notes" 569 | } 570 | }, 571 | "outputs": [], 572 | "source": [ 573 | "fig, (ax1, ax2) = plt.subplots(nrows=2)\n", 574 | "_ = fig.suptitle('Stacked (2x1)')" 575 | ] 576 | }, 577 | { 578 | "cell_type": "code", 579 | "execution_count": null, 580 | "metadata": { 581 | "slideshow": { 582 | "slide_type": "notes" 583 | } 584 | }, 585 | "outputs": [], 586 | "source": [ 587 | "fig, axes = plt.subplots(nrows=2, ncols=2)\n", 588 | "_ = fig.suptitle('Quad (2x2)')" 589 | ] 590 | }, 591 | { 592 | "cell_type": "code", 593 | "execution_count": null, 594 | "metadata": { 595 | "slideshow": { 596 | "slide_type": "slide" 597 | } 598 | }, 599 | "outputs": [], 600 | "source": [ 601 | "fig, (ax1, ax2) = plt.subplots(ncols=2)\n", 602 | "_ = fig.suptitle(\"Titanic Data Set\")\n", 603 | "_ = ax1.plot(np.sort(df['age']), marker='o', markersize=1)\n", 604 | "_ = ax1.set_ylabel(\"Ages\")\n", 605 | "_ = ax1.set_yticks([15,25, 55, 64])\n", 606 | "_ = ax2.set_ylabel('Fare')" 607 | ] 608 | }, 609 | { 610 | "cell_type": "markdown", 611 | "metadata": { 612 | "slideshow": { 613 | "slide_type": "slide" 614 | } 615 | }, 616 | "source": [ 617 | "## Yuck, crowded! Constrained Layout" 618 | ] 619 | }, 620 | { 621 | "cell_type": "markdown", 622 | "metadata": { 623 | "slideshow": { 624 | "slide_type": "slide" 625 | } 626 | }, 627 | "source": [ 628 | "[Constrained layout](https://matplotlib.org/3.1.1/tutorials/intermediate/constrainedlayout_guide.html) is matplotlib automagic that tries to place all the elements of the image on the page such that they don't overlap but the image still has breathing room and the users instructions are respected. It is especially useful when plotting multiple images. " 629 | ] 630 | }, 631 | { 632 | "cell_type": "code", 633 | "execution_count": null, 634 | "metadata": {}, 635 | "outputs": [], 636 | "source": [ 637 | "fig, (ax1, ax2) = plt.subplots(ncols=2, constrained_layout=True)\n", 638 | "_ = fig.suptitle(\"Titanic Data Set\")\n", 639 | "_ = ax1.plot(np.sort(df['age']), marker='o', markersize=1)\n", 640 | "_ = ax1.set_ylabel(\"Ages\")\n", 641 | "_ = ax1.set_yticks([15,25, 55, 64])\n", 642 | "_ = ax2.set_ylabel('Fare')\n", 643 | "# Exercise plote the fare data in the right axes" 644 | ] 645 | }, 646 | { 647 | "cell_type": "markdown", 648 | "metadata": { 649 | "slideshow": { 650 | "slide_type": "slide" 651 | } 652 | }, 653 | "source": [ 654 | "# Making images different sizes" 655 | ] 656 | }, 657 | { 658 | "cell_type": "markdown", 659 | "metadata": {}, 660 | "source": [ 661 | "There's a full [Gridspec API](https://matplotlib.org/3.1.1/api/gridspec_api.html#module-matplotlib.gridspec) that you can make use of, but here we will take a shortcut and use the `gridspec_kw` argument to plt.subplots(). `gridspec_kw` is a dictionary of all the keyword arguments that can be passed into Gridspec class objects.\n", 662 | "\n", 663 | "Here, we want one image to be much thinner than the second, so we will pass the `width_ratios` argument to gridspec_kw. Here, we say we want the second plot to be 3 times wider than the first." 664 | ] 665 | }, 666 | { 667 | "cell_type": "code", 668 | "execution_count": null, 669 | "metadata": { 670 | "slideshow": { 671 | "slide_type": "slide" 672 | } 673 | }, 674 | "outputs": [], 675 | "source": [ 676 | "fig, (ax1, ax2) = plt.subplots(ncols=2, gridspec_kw={'width_ratios':[1,3]}, constrained_layout=True)\n", 677 | "_ = fig.suptitle (\"Titanic Data Set\")\n", 678 | "_ = ax1.plot(np.sort(df['age']), marker='o', markersize=1)\n", 679 | "_ = ax1.set_ylabel(\"Ages\")\n", 680 | "_ = ax1.set_yticks([15,25, 55, 64])\n", 681 | "_ = ax2.set_ylabel('Fare')" 682 | ] 683 | }, 684 | { 685 | "cell_type": "markdown", 686 | "metadata": { 687 | "slideshow": { 688 | "slide_type": "slide" 689 | } 690 | }, 691 | "source": [ 692 | "# Practice:\n", 693 | "1. Flip-make the ages graph wider than the fares graph\n", 694 | "2. Rotate: put the ages graph on top of the fares graph\n", 695 | "3. Top-heavy: in the vertical orientation from 2, make the ages graph larger \n", 696 | " + hint: there's a `height_ratios` gridspec_kw" 697 | ] 698 | }, 699 | { 700 | "cell_type": "code", 701 | "execution_count": null, 702 | "metadata": {}, 703 | "outputs": [], 704 | "source": [] 705 | } 706 | ], 707 | "metadata": { 708 | "celltoolbar": "Slideshow", 709 | "kernelspec": { 710 | "display_name": "Python 3", 711 | "language": "python", 712 | "name": "python3" 713 | }, 714 | "language_info": { 715 | "codemirror_mode": { 716 | "name": "ipython", 717 | "version": 3 718 | }, 719 | "file_extension": ".py", 720 | "mimetype": "text/x-python", 721 | "name": "python", 722 | "nbconvert_exporter": "python", 723 | "pygments_lexer": "ipython3", 724 | "version": "3.7.3" 725 | }, 726 | "latex_envs": { 727 | "LaTeX_envs_menu_present": true, 728 | "autoclose": false, 729 | "autocomplete": true, 730 | "bibliofile": "biblio.bib", 731 | "cite_by": "apalike", 732 | "current_citInitial": 1, 733 | "eqLabelWithNumbers": true, 734 | "eqNumInitial": 1, 735 | "hotkeys": { 736 | "equation": "Ctrl-E", 737 | "itemize": "Ctrl-I" 738 | }, 739 | "labels_anchors": false, 740 | "latex_user_defs": false, 741 | "report_style_numbering": false, 742 | "user_envs_cfg": false 743 | }, 744 | "varInspector": { 745 | "cols": { 746 | "lenName": 16, 747 | "lenType": 16, 748 | "lenVar": 40 749 | }, 750 | "kernels_config": { 751 | "python": { 752 | "delete_cmd_postfix": "", 753 | "delete_cmd_prefix": "del ", 754 | "library": "var_list.py", 755 | "varRefreshCmd": "print(var_dic_list())" 756 | }, 757 | "r": { 758 | "delete_cmd_postfix": ") ", 759 | "delete_cmd_prefix": "rm(", 760 | "library": "var_list.r", 761 | "varRefreshCmd": "cat(var_dic_list()) " 762 | } 763 | }, 764 | "types_to_exclude": [ 765 | "module", 766 | "function", 767 | "builtin_function_or_method", 768 | "instance", 769 | "_Feature" 770 | ], 771 | "window_display": false 772 | } 773 | }, 774 | "nbformat": 4, 775 | "nbformat_minor": 4 776 | } 777 | -------------------------------------------------------------------------------- /notebooks/03_aggregations.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "# Let's make matplotlib do math...maybe" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": { 17 | "slideshow": { 18 | "slide_type": "slide" 19 | } 20 | }, 21 | "source": [ 22 | "First let's import the data and remake one of the figures from the [overview](01_overview.ipynb) notebook. " 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 5, 28 | "metadata": { 29 | "slideshow": { 30 | "slide_type": "slide" 31 | } 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "%matplotlib inline\n", 36 | "import pandas as pd\n", 37 | "import numpy as np\n", 38 | "import matplotlib.pyplot as plt\n", 39 | "df = pd.read_csv(\"http://bit.ly/tcsv19\").dropna()" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 6, 45 | "metadata": { 46 | "slideshow": { 47 | "slide_type": "slide" 48 | } 49 | }, 50 | "outputs": [ 51 | { 52 | "data": { 53 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZoAAAEICAYAAABmqDIrAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nO3deXxddZ3/8denaZOmSemaLukWGqAFilRIkU3KqhaRZRQFYSwjI4y/mQcigw6IjxEcmdHRUcFBtDBCGWdks2CHRQUpLVuhKdCN7umeNk26ZG+Wez+/P+5JmqY3bdrm5G7v5+PRR27OOffc75erffe7HnN3REREwtIn0QUQEZH0pqAREZFQKWhERCRUChoREQmVgkZEREKloBERkVApaEREJFQKGpFuMrONZtZoZnVmVmFmj5lZfqLLJZLsFDQiR+Zz7p4PnAFMA76b4PKExsz6JroMkh4UNCJHwd23AS8DU8zsb8xspZnVmlmZmd3adp2ZDTezF8xsr5ntNrM3zKxPcO6fzGxb8L7VZnZJcLyPmd1lZuvNbJeZPW1mQ4NzRWbmZjbTzDabWZWZ3dPh83LNbLaZ7QnK9G0z29rhfKGZ/d7MKs1sg5nd1uHcvWb2rJn91sxqgJvM7CwzKzWzmqAV99PQ/+NK2tG/WESOgpmNAy4H5gA7gSuAMuAC4GUzW+Tu7wP/CGwFCoK3ng24mU0C/gGY5u7lZlYEZAXX3AZcDUwHKoEHgYeA6zsU4XxgEnAS8J6ZzXH3lcD3gCJgIpAHvNShzH2A/wP+ENxrLPCqma129z8Fl10FXAt8BcgBXgMecPf/DroJpxz9fzXJVGrRiByZ581sL/AmMB/4V3d/0d3Xe8x84M/AJ4PrW4DRwAR3b3H3Nzy2wWCE2F/kp5hZP3ff6O7rg/fcCtzj7lvdvQm4F/hCp66s+9y90d2XAEuA04PjXwzKtMfdtxILqTbTgAJ3/767N7t7GfAIcF2Ha95x9+fdPerujUH5TzCz4e5e5+4Lj/m/oGQcBY3Ikbna3Qe7+wR3/3/u3mhmM8xsYdA1tpdYS2d4cP2PgXXAn4NutbsA3H0dcDuxENlpZk+aWWHwngnAc0F3215gJbFgGtmhHDs6vG4A2iYlFAJbOpzr+HoCUNh23+De3+l0347XA9xMrNW0yswWmdkV3fmPJNKRgkbkGJhZDvB74CfASHcfTKy7ygDcvdbd/9HdJwKfA+5oG4tx9/919/OJBYADPwpuuwWYEQRa25/+wbjQ4Wwn1iXWZlyH11uADZ3uO9DdL+9wzQHbubv7Wne/HhgRlO9ZM8vrRjlE2iloRI5NNrEusEqg1cxmAJ9qO2lmV5jZCWZmQA2xlknEzCaZ2cVBUO0DGoNzAL8C7jezCcE9Cszsqm6W52ngbjMbYmZjiI0DtXkPqAkmIeSaWZaZTTGzaV3dzMxuNLMCd48Ce4PDka6uF4lHQSNyDNy9ltjg/dPAHuDLwNwOl5wIvArUAe8Av3T314mF0w+BKmLdYCOIdWMBPBDc489mVgssBD7RzSJ9n9jkgw3B5z4LNAVljRBrVU0NzlcBjwKDDnG/zwArzKwuKNd17r6vm2URAcD04DOR9GVmXycWDtMTXRbJXGrRiKQRMxttZucFa3EmEZte/VyiyyWZTetoRNJLNvBr4HhiYypPAr9MaIkk46nrTEREQqWuMxERCZW6zuIYPny4FxUVJboYIiIpZfHixVXuXtD5uIImjqKiIkpLSxNdDBGRlGJmm+IdV9eZiIiESkEjIiKhUtCIiEioFDQiIhIqBY2IiIRKQSMiIqFS0IiISKgUNCIiwqKNu7nh0XdZU1HT4/dW0IiICI+9tYG31lXx3PvlPX5vBY2IiDBuyAAAvnLuhB6/t4JGRETY29DC8PwcRg/K7fF7K2hERITy6kYKB/cP5d4KGhERYXv1PkYdp6AREZEQuDvb9zZSOLjnu81AQSMikvFq9rVS3xxh9CC1aEREJATbqxsBGK0WjYiIhGH73n0AFKpFIyIiYVi7sxaA3OysUO6voBERyXDz11QFPytDub+CRkQkw40cmMOAfllcN218KPdX0IiIZLjaplbGDR3A0LzsUO6voBERyXBVdU0MHxhOyICCRkQk41XVNTE8Pye0+ytoREQyXFVts4JGRETCUd/USmNLhIKBChoREQlBVV0TgFo0IiISjv1Bo8kAIiISgspatWhERCRElXXNAIzQGI2IiIShqrYJM0JbrAkKGhGRjLa+so7+fbOo2dca2mcoaEREMtTehmb+uHwHjS0RnindEtrnKGhERDLUrAVltEadGz8xnmtLxoX2OQoaEZEM9dwH2wBC3VATFDQiIhnJ3alpbOGM8YNDbc2AgkZEJCNV1TVT3xzhc6cXhtqaAQWNiEhGKqusA2BiQX7on6WgERHJQBuq6gGYODwv9M9S0IiIZKCyqnqy+/ahcHBu6J+loBERyUBllfUcPyyPrD4W+mcpaEREMtDailoAdtc3h/5ZChoRkQyzt6GZTbsbWF1RG+qOAG0UNCIiGeaDzXsB+PJZ4e4I0KZv6J8gIiJJZfGmPWT1Mb57xckMyA4/BtSiERHJINGoU7ppN6eMPq5XQgZSLGjMrMTMHkx0OUREUtG2vY187L4/sbBsN32sdyYCQIoFjbuXuvtt3b3ezNQ1KCISeOzNDdQ3RwBYsrW6VyYCQC+O0ZhZHvA0MBbIAv4FWAf8FMgHqoCb3H27mb0OvAtcBAwGbnb3N8zsQuBOd7/CzIYCvwEmAg3ALe6+1MzuBQqBIqDKzO4HHgOyiQXr5919ba9UWkQkgd5cW8mDf1lLSyQKwPLyGi6eVMCUMYMA65WJANC7kwE+A5S7+2cBzGwQ8DJwlbtXmtmXgPuBr7aVzd3PMrPLge8Bl3a6333AB+5+tZldDDwBTA3OnQmc7+6NZvYL4AF3/x8zyyYWcgcxs1uAWwDGjx/fQ1UWEUmMaNS57XcfsLuh5YDjRcPz+eZlk3q1LL0ZNMuAn5jZj4AXgD3AFOAVM4NYAGzvcP2c4OdiYq2Tzs4HPg/g7q+Z2bAgvADmuntj8Pod4B4zGwvM6ao14+6zgFkAJSUlflQ1FBFJAu7OKysr2N3QwmemjGT8kNh+ZrnZWcw8t6jXy9NrQePua8zsTOBy4N+AV4AV7n5OF29pCn5GiF/OePsmtAVEfYfP/V8zexf4LPAnM/tbd3/taOogIpLsWiNRPv3zBayvrGdgTl++f+UURhzXP6Fl6rXJAGZWCDS4+2+BnwCfAArM7JzgfD8zO/UIbrkAuCF474VAlbvXxPnciUCZuz8IzAU+dkwVERFJYi8t38H6yti/tWubWtufoplIvdl1dhrwYzOLAi3A14FW4MGgy6sv8HNgRTfvdy/wmJktJTYZYGYX130JuNHMWoAdwPePugYiIknM3Xl43joG5/bl2jPHMSCnb68N+B+KuWs4orOSkhIvLS1NdDFERI7IwrJdXDdrIQB3z5jMrdOLe/XzzWyxu5d0Pq51JiIiKS4SjTUYHllQxuAB/fjaJycmRUumjYJGRCSFzX57I9+bu3/E4fwThnP9WeMZmpedwFIdKKV2BhARkf0iUednr65p/724II8311X12or/7lKLRkQkhcx+eyMvLi2nJRKlvrmVvQ0tXDp5BKeOGcSVUwt59aOKpOo2AwWNiEjKWL2j9oBusjZnFg3l6xfGBv6Lp+f3drEOS0EjIpIiHnmjjP59+zDznKL21em52Vl8aVpytWA6U9CIiKSAipp9PP/BVk4fO5hbLyxOqsH+w9FkABGRFPD42xtpjcLizXuTbrD/cNSiERFJIk8v2sIzi7fQ0hptP9Yvqw8fba+muCCPKz5WmHSD/YejoBERSRLbqxu5a85SonE2bOljsL6yngHZWSnVbQYKGhGRpBCNOo+9tRGAm84tIjtr/8hGbnZW0k5d7g4FjYhIgm3e1cCMBxZQ3xzhxBH53HbJiXFbLck4dbk7NBlARCTBHn2zjMaWCABrd9al3GD/4ahFIyKSAIs37eFHL6+kuTXK8vIaLjtlJJNHDQQsJbvHDiUlgsbMbgJK3P0fDnFNEfCCu08xs6lAobu/1DslFBE5Mt986gM2725s/3380AF887JJCSxReFIiaI7CVKAEUNCISMJFo060w7O/lpfXsHl3I+efMJxTRh9HbnYWM88tSlwBQ5YUQWNmzwPjgP7AA+4+y8z+Brgb2A6sAZqCax8n1nJ5Nvi9zt3zO9wrm9hTNHPN7Hzg34g9WfOB4BIHLnD32t6om4hktjUVtVz5izfZ12FdDEC/LONfr5nC+GF5CSpZ70mWyQBfdfczibVCbjOzMcB9wHnAZcAp3b2RuzcD/ww85e5T3f0p4E7g7919KvBJoLHz+8zsFjMrNbPSysrKY6+RiAjwq/nricR5knFLxHl5+Y4ElKj3JUWLhli4XBO8Hgf8NfC6u1cCmNlTwEnHcP+3gJ+a2f8Ac9x9a+cL3H0WMAtij3I+hs8SkQz1+8VbeXLR5vZV/Q4s3VrN1VMLGT9sAI3N+1s1udlZaTfo35WEB42ZXQhcCpzj7g1m9jqwCji5i7e0ErTEzMyAwy6RdfcfmtmLwOXAQjO71N1X9UDxRUQA2Fm7j28/uzRu62XUoNy0HejvjoQHDTAI2BOEzGTgbCAXuNDMhgE1wLXAkuD6jcCZwNPAVUC/OPesBQa2/WJmxe6+DFhmZucAk4mFmYjIMYlEHXfnibc3EXHnK+dMoH/frPbz6T7Q3x3JEDR/BP7OzJYCq4GFxCYA3Au8E7x+H2j75h4B/mBm7wF/Aerj3HMecJeZfUhsMsD5ZnYREAE+Al4OrTYikjHeWFPJTY8vIhJsTlZckMftl56UcnuRhc08TjMv05WUlHhpaWmiiyEiSe5TP5vPmoo6jh8+gA1VDQDcPWMyt04vTnDJEsPMFrt7SefjydCiERFJCS8tK+extzbS0holirOmoo5pE4bwwy98jLkfbiMdV/X3BAWNiEg31DW18s2nltDUaT3M2cXDKC7Iz+jB/sNR0IiIHELbqv6nFm2hqTXKF0vGMjg3Ngajgf7uUdCIiHShrLKOzz74Bo0tsVZM4aD+3DXjZA32H6Fk2RlARCTpPPJGGc2R/ROmyqv3pd0W/r1BLRoRyVjRqPPw6+t4fU1l+2r+jpZuq2bGaaM4oSCfxuZoRq3m70kKGhHJWK+v2cmP/7zmkNeMHZy+2/f3FgWNiKQ9d29fVNnRr+eXMXJgDtdOG0tzy8HnNdjfMxQ0IpLWolHn8gcXsGpHXdzzl0wewVfPm6gB/hBpMoCIpLVXV1Z0GTLFBXn8ZdVODfCHTC0aEUlJ7s6v5q/ntVU74w7k98vqw0kjB/Lyiu3k5WRx/bTx9DFrP5+bncWVUwt59aMKDfCHTEEjIinpzXVV/OiPqw95zaJNe9pfFwzMibsHWfH0/IOOSc9S0IhI0opGPe7zXQBmLShjaF421581jpbW+AP5V04t1B5kSUBBIyJJqaG5lYt/Mp8dNfu6vGb6SQXcfP6hB/I1NTnxNBlARJLS7xdvPWTIFBfkMX9NpQbyU4BaNCKSNNZU1PDPz68gEnU2VtUz8rgcrjq9ELADrtNAfmpR0IhIUthYVc/VD71NQ3MEgD4GUYdh+fEH8UED+alCQSMivaY1EiXe0H5Ta5QbH32XhuYIUwoHcsnJo9RiSSMKGhHpFbPf3sj35q445DWXTxnFD645rX1wXy2W9KDJACISukjU+fmr+zevPH74gLjXnT5usLaCSUNq0YhIaJ7/YCu/fXcTNY0t7Glo4dLJIzh1zKD29S2NzftX9GsL/vSloBGRUOypb+bOZ5bS2mHX5DOLhvD1C08AtL4lkyhoRKRHRaJO1J0n3tlEa9S58ezxDOjXl9zsLL40bXyiiycJcERBY2Z57l4fVmFEJLWtKK/mmofean/88YShA7jjskkad8lw3ZoMYGbnmtlHwMrg99PN7JehlkxEUs6v5pfRcXHlpt0NWrkv3W7R/Az4NDAXwN2XmNkFoZVKRJLeM4u28NTizbQGG1o6sHRrNdeWjGX0oP40Nkc1wC/AEXSdufsWswO2gYj0fHFEJBVsr27kn+YsJc7TkSkYmKOBfjlAd4Nmi5mdC7iZZQO3EXSjiUjmaBvof/ytjbjDTecWkZ21vwc+NzuLmecWJa6AkpS6GzR/BzwAjAG2An8G/j6sQolI8pm3qoKbZ5e2t2JOHJHPbZecqIF+OaxuTQZw9yp3v8HdR7r7CHe/0d13hV04EUkeP3hxJVHfv6p/7c46DfRLt3SrRWNmD8Y5XA2UuvsferZIIpIM5i7ZxhPvbKS1NdZdtr6ynrOPH8r9f3WanlopR6S7XWf9gcnAM8HvnwdWADeb2UXufnsYhRORxKjZ18KdTy+lORI94PgnJg6luCBfg/1yRLobNCcAF7t7K4CZPUxsnOYyYFlIZRORkLj7AVvDdPa/726mORLl+rPGMTCnH6CBfjl63Q2aMUAese4ygteF7h4xs6ZQSiYioXB3rvzPN1m2reaQ140dksu3Pj1Zg/1yzLr7mIB/Bz40s8fM7HHgA+AnZpYHvBpW4USk581fU9llyHTcvn/rnkYN9kuP6FaLxt3/y8xeBv4aWEWs22xrsO/Zt0Isn4gco3U7a/nOc8vxqHPiyHz+tKKCAf36cP1ZE8jqs38Rdm521gHb92tVv/SU7s46+1vgG8BY4EPgbOAd4OLwiiYiPeHbzy7l/c17ASjdtKf9Ucojjsvh1unFB12vgX7pad0do/kGMA1Y6O4Xmdlk4L7wiiUiRyMadSK+f5B/b0MLK8qrOWXUQC47dVR7i0VTk6U3dTdo9rn7PjPDzHLcfZWZ6Z89IklkTUUtNzyykMq65oPO3Xf1FKYVDQXUYpHe193JAFvNbDDwPPCKmf0BKA+vWCJyJMr3NvLXj77L3saWuOff37Snl0sksl93JwNcE7y818zmAYOAP4ZWKpEMEok6v3htDW+uraIlcvDaln5ZfThxRD5rd9bGPQ+x577UN7XSEnHOKx7GqYWD2s9pUF8S7Ygf5ezu88MoiEimemnZdn7+6rpDXrO4wyB+Vz5/xhhOGjmQa0vGae2LJJUjDhoROTKRqBPpYhW+4zzyRhkThg7gqqmF7GuJHnRN52nH8bSt2lfASDJS0IiEaG1FLVf84g2aWg/dHvnO5ZO55YKDpxp3pEF8SVXdnQwgIkfh1wvKiMRvhBygtYuxF5F0oBaNSAhWlFdz79wVLN60hys/NpoJw/MO2e113Vnje7mEIr1HQSMSgjufXsLKHbUAjB48QN1ektEUNCLHqPNgf/neRlbtqOWM8YP55IkF2lpfMp6CRuQYvL95Dzc99h41ja0HHDfg/mumcPLoQfHfKJJBFDQiR+G1lRXc+38rqKptIt7IiwML1lQpaERQ0Ih020vLyvnNmxtoiTjLtlUTdSjIz6Gyrkmr8UUOQUEj0g0Nza3c8dQS9rXub7+cUzyUH1x9Gq9+VKHV+CKHoKARCbh7l3uJPfneFva1Rrn2zLEMGZB9wEr84un5vVxSkdSioBEJfOPJD5i7ZHuX50cdl8Pdl5+slovIEdLOACLA5l0N/F+ckJk4PK/99Y6aJp4p3dKbxRJJC2rRSEb444rtPLqgrL1rrPPW+ztrmgC44RPjycuO/d+i82aWGuAXOToKGkl7+1oi3PHkEhpaIgccj7f1/vihA7h1+oGbW2pVv8ixUdBIWmmJRPFO6fHs4q00tET4q4+PYXh+DhB/6321WETCoaCRtODu3D1nGU8uij+GUjAwm3s+ezLDgqBpo9aKSPg0GUDSwr+9tOqgkOk4kF9Z28yzi7f2drFEBLVoJEW5Ow+/vp7XVlVQu6+V1RV1AO0r9DWQL5I8FDSSkt5Zv4t//9PqA45Nn1TAz7449YB1LuoaE0k8BY0kpc5b73c2640yhub14/qzxtPS6ges1BeR5KKgkaSzo3ofl/70deqaIoe87oITh3Pz+RMVLiJJTpMBJOk89vYG6g8TMsUFeSxYW6WV+iIpQC0aSQofbt7D/S+txB0+Kq+meEQeF08aGffatoH+tl2TRSS5pV3QmNntwCx3bwh+r3N3ba+b5P7xmSWsr6wHIKuPsW5nPdeemX3QKv2OtGuySGpIu6ABbgd+CzQkuiByoHir9gHWV9axvrKes48fyicmDlNrRSTNJE3QmNm/AFXu/kDw+/3ATmAsMIPY03F/4O5PmdmFwJ3ufkVw7X8CpcBxQCEwz8yq3P2iDve6AmgErnL3il6tnPDEOxv55z+s6PJ8lsH9f3UaxQWxVopaKyLpI5kmA/wXMBPAzPoA1wFbganA6cClwI/NbHRXN3D3B4Fy4KK2kAHygIXufjqwAPhavPea2S1mVmpmpZWVlT1UJQGIRp2fv7Km/feOK/bbRBxe/Uj5L5KOkqZF4+4bzWyXmX0cGAl8AJwP/M7dI0CFmc0HpgE1R3DrZuCF4PVi4LIuPn8WMAugpKSk6wUcAsAzi7bwZOlmWjs8kbJfVh9OCLbe73i8saWV3Q0tXDx5BKeNGXTQZpagDS1F0lnSBE3gUeAmYBTwG+BTXVzXyoGtsf6HuGeLe/vIQITkq3PK2Vmzj3+as5R46ynjbb3fZlrREL5+4QmAVuyLZJJk+0v3OeD7QD/gy8QC5FYzmw0MBS4AvhWcP8XMcoJrLgHeDO5RCwwEqnq36OnF3WmOROOe+81bG3GHmedOICcrq/14vK33O5770rTxoZZZRJJTUgWNuzeb2Txgr7tHzOw54BxgCbHJAN929x0AZvY0sBRYS6ybrc0s4GUz295hnEaO0JcffZd31u/q8vwJBXl845KT4q7KV2tFRDoyjzffNEGCSQDvA9e6+9pElaOkpMRLS0sT9fEJt3jTHj7/8Ntxz00cnkdZVWy9y90zJh9ynYuIZBYzW+zuJZ2PJ02LxsxOITZo/1wiQybTrCiv5t65K3CnfSB/c1UD/frAjWcX0S9r/1BYx64xMA3ei0i3JE3QuPtHwMRElyPTfOuZpXy0PTaJr/NA/qhB/eO2WNQ1JiJHImmCRnpP20D/jup9rNpRwxnjB/PJEwv0oDARCYWCJsO4O1c/9BZLtlYDYMAPrp7CKYWDALVWRKTnJdPOANIL3lxX1R4yEJvK98ZazQQXkfCoRZNhHpq3jgHZfbhu2nj69umjLjIRCZ2CJoOs3F7DwrLdAIw8Lv5Av4hIT1PQpKGuVvU/sqCM3H59uHV6sVoxItJrFDRppqG5lRsefZcPNu+Ne/70sYP4yjlFcVf0i4iEQZMB0khLJMrfzi6NGzJtW/Mv2VrNM6VbertoIpLB1KJJQfNWVfCf89a1b8Xftj3/O+t3sWl37MGi5xUP49RgyrJW9ItIIiloUkxrJMrtT31IdWPrAcc7ruq/aFIB//HFqQd1j2mNjIgkgoImRUSiTms0yh+X76C6sZUrPjaKwkEDgINbLDPP1RiMiCQPBU0KqKpr4pL/eL29FTM4tx/3fm4KwwfmHHCdWiwikow0GSAFPPHOpgO6yvY2tvD797cmsEQiIt2nFk2SWrZ1L/e9sALcWFVRw8TheVx68kgAreYXkZTSq0FjZo8DL7j7s52OFwIPuvsXzOxC4E53vyLO+zcCJe6e9ptz3fnMUlZX1AKQZUbZvnqG5WdrNb+IpJykaNG4eznwhaN5r5kZsSeFxn/AfQratKue1RW1lBQN5rzi2Pb9r35UoVaMiKSkUIPGzL4C3Elsk+ClQAS4wMzuAEYB33b3Z82siFhLZ0qn9w8DfgcUAO8R29We4PqXgXnAOcDVZjYJuA/IAdYDf+PudUEraDbwOaAfscdErwqt0j3goXnryDLjX68+jZNGHQdA8fT8BJdKROTohDYZwMxOBe4BLnb304FvBKdGA+cDVwA/PMxtvge86e4fB+YC4zucmwQ8EZyrB74LXOruZwClwB0drq0Kjj9MLPjilfcWMys1s9LKysojqGnP2lPfzJz3txFxZ97qxJVDRKSnhDnr7GLg2bbxFHffHRx/3t2jwaObRx7mHhcAvw3e/yKwp8O5Te6+MHh9NnAK8JaZfQjMBCZ0uHZO8HMxUBTvg9x9lruXuHtJQUFBd+oXilkLymiNOjeePV5dZSKSFsLsOjM44BH0bZo6XXM48e4BsVZMx/u84u7Xd3Ft22dGSJJxqXj2tUSY/c5GAMYNGaBFlyKSFsJs0fwF+GIwzoKZDT2KeywAbgjePwMY0sV1C4HzzOyE4NoBZnbSUXxeQj3/wTYamiNcf9Y4tWZEJG2E9q97d19hZvcD880sAnxwFLe5D/idmb0PzAc2d/FZlWZ2U3Bt23L57wJrjuIzEyIadX41fz0jj8vhzk9NUmtGRNKGuXfVM5W5SkpKvLS0tFc/87VVFXz18dhn3j1jstbLiEjKMbPF7l7S+XjSjldkmofmrSM/J4ubzjte3WYiklYUNElg6da9LN4Ue1jZwJy+6jYTkbSioEkCj7yxgbzsLG6dPlGtGRFJO9q9OcF21zfz4tJypowZxI1n6zkyIpJ+FDQJtmV3A1GHdzfs5pnSLYkujohIj1PQJFhVXWwt6cxzJqjbTETSkoImwdqC5msXTFS3mYikJQVNglXVNQMwPD/nMFeKiKQmBU2CVdY2MTCnL/37ZSW6KCIioVDQJFhlXRMFA9WaEZH0paBJsKraJnWbiUhaU9AkWFVdE8MHahKAiKQvBU2CVdU1q0UjImlNQZNAza1RqhtbFDQiktYUNAm0qz62hkaTAUQknSloEmTdzlq+Njv2/JmcvvoaRCR96W+4BPmXF1ayvLwGgLUVdQkujYhIePSYgASJRJ0hA/rxlXOKmHluUaKLIyISGgVNAkSjzvLyaj59yii+edlJiS6OiEio1HWWAGVV9extaOHMCUMSXRQRkdCpRdOL5i7Zxuy3N1JVG9tIs3hEfoJLJCISPgVNL6nd18K3nl5KUyTafmzRht1q1YhI2lPQ9JKnFm2hKRLlumljOa5/NrnZWXxxmh50JiLpT0HTC1oiUR5ZUMa4Ibl8+zMn6wFnIpJRNBmgF7y0bDsVtU1s2aw8B8MAAARiSURBVNPIM6VbEl0cEZFepRZNSDbuqueeOcsYMzSXPy3fwXG5fbnpnCKuLVF3mYhkFgVNSL4zZxlvr9+FlYF77FheTl91m4lIxlHQ9DB3p7qxhWXbqjlpZD4P33gmcz/cBphaMyKSkRQ0PeiOpz/kxSXlNEViTZgHr5tMcUE+37xsUoJLJiKSOJoM0IN21TW3hwzAGm2WKSKiFk1P+tmXpjL77Q00NkfJzc5SV5mICAqaHjU0L1vdZCIinajrTEREQqWgERGRUCloREQkVAoaEREJlYJGRERCpaAREZFQKWhERCRU5u6HvyrDmFklsOko3z4cqOrB4iSjdK9jutcP0r+O6V4/SM46TnD3gs4HFTQ9zMxK3b0k0eUIU7rXMd3rB+lfx3SvH6RWHdV1JiIioVLQiIhIqBQ0PW9WogvQC9K9juleP0j/OqZ7/SCF6qgxGhERCZVaNCIiEioFjYiIhEpB00PM7DNmttrM1pnZXYkuT08xs41mtszMPjSz0uDYUDN7xczWBj+HJLqcR8LMfmNmO81seYdjXdbJzO4OvtfVZvbpxJS6+7qo371mti34Hj80s8s7nEup+gGY2Tgzm2dmK81shZl9IzieFt/jIeqXmt+ju+vPMf4BsoD1wEQgG1gCnJLocvVQ3TYCwzsd+3fgruD1XcCPEl3OI6zTBcAZwPLD1Qk4Jfg+c4Djg+85K9F1OIr63QvcGefalKtfUO7RwBnB64HAmqAuafE9HqJ+Kfk9qkXTM84C1rl7mbs3A08CVyW4TGG6CpgdvJ4NXJ3Ashwxd18A7O50uKs6XQU86e5N7r4BWEfs+05aXdSvKylXPwB33+7u7weva4GVwBjS5Hs8RP26ktT1U9D0jDHAlg6/b+XQ/6NIJQ782cwWm9ktwbGR7r4dYv+HAEYkrHQ9p6s6pdN3+w9mtjToWmvrUkr5+plZEfBx4F3S8HvsVD9Iwe9RQdMzLM6xdJk3fp67nwHMAP7ezC5IdIF6Wbp8tw8DxcBUYDvwH8HxlK6fmeUDvwdud/eaQ10a51jS1zNO/VLye1TQ9IytwLgOv48FyhNUlh7l7uXBz53Ac8Sa4xVmNhog+LkzcSXsMV3VKS2+W3evcPeIu0eBR9jfrZKy9TOzfsT+Ev4fd58THE6b7zFe/VL1e1TQ9IxFwIlmdryZZQPXAXMTXKZjZmZ5Zjaw7TXwKWA5sbrNDC6bCfwhMSXsUV3VaS5wnZnlmNnxwInAewko3zFp+8s3cA2x7xFStH5mZsB/ASvd/acdTqXF99hV/VL2e0z0bIR0+QNcTmxmyHrgnkSXp4fqNJHYTJYlwIq2egHDgL8Aa4OfQxNd1iOs1++IdTu0EPuX4M2HqhNwT/C9rgZmJLr8R1m//waWAUuJ/aU0OlXrF5T5fGJdQ0uBD4M/l6fL93iI+qXk96gtaEREJFTqOhMRkVApaEREJFQKGhERCZWCRkREQqWgERGRUCloREQkVAoaEREJ1f8HKugd/Q/EoCEAAAAASUVORK5CYII=\n", 54 | "text/plain": [ 55 | "
" 56 | ] 57 | }, 58 | "metadata": { 59 | "needs_background": "light" 60 | }, 61 | "output_type": "display_data" 62 | } 63 | ], 64 | "source": [ 65 | "fig, ax = plt.subplots()\n", 66 | "_ = ax.plot(np.sort(df['age']), marker='o', markersize=1)\n", 67 | "_ = ax.set(title='Passengers', ylabel='age')\n", 68 | "_ = ax.set_yticks([15,25, 55, 64])\n", 69 | "_ = ax.set_yticklabels(['children', 'youth', 'adults', 'seniors'])" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": { 75 | "slideshow": { 76 | "slide_type": "skip" 77 | } 78 | }, 79 | "source": [ 80 | "The above figure is a expected cumulative distribution graph. It shows that most passengers are adults, but it is a bit hard to do comparisons, especially at the tales. Instead, we compute the histogram of passenger ages. " 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": { 86 | "slideshow": { 87 | "slide_type": "slide" 88 | } 89 | }, 90 | "source": [ 91 | "# Let's make Histograms" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": { 97 | "slideshow": { 98 | "slide_type": "skip" 99 | } 100 | }, 101 | "source": [ 102 | "Here we call the histogram function. Bins = 'auto' tries to find the optimal number of bins using methods described in the numpy [histogram docs](https://docs.scipy.org/doc/numpy/reference/generated/numpy.histogram_bin_edges.html#numpy.histogram_bin_edges). " 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 7, 108 | "metadata": { 109 | "slideshow": { 110 | "slide_type": "slide" 111 | } 112 | }, 113 | "outputs": [ 114 | { 115 | "data": { 116 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAD4CAYAAAD1jb0+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAM6klEQVR4nO3db4xl9V3H8fdHirZQFVYGsgJxakKwhMhSN0jFGGWL2ZYGeNIEEsw+IOEJRjBNmkUTE56tiWnqA2OyabEb29Bgi0Igsd1sS4ymoc7yp11ccKtdKXbdnWIqVpNa2q8P7lkZh13m7uzcuedb3q9kcs75zZ09n9y988lvfveee1NVSJL6+bF5B5AkrY8FLklNWeCS1JQFLklNWeCS1NTbNvNkF110US0uLm7mKSWpvYMHD367qhZWj29qgS8uLrK0tLSZp5Sk9pL8y6nGXUKRpKYscElqygKXpKYscElqygKXpKYscElqygKXpKYscElqygKXpKY29UpMaT0Wdz8x83Mc3XPzzM8hbTRn4JLUlAUuSU1Z4JLUlAUuSU1Z4JLUlAUuSU1Z4JLUlAUuSU1Z4JLUlAUuSU1Z4JLUlAUuSU35ZlY/ojbjDaDAN4GS5skZuCQ1ZYFLUlMWuCQ1ZYFLUlMWuCQ1ZYFLUlMWuCQ1ZYFLUlMWuCQ1ZYFLUlMWuCQ1ZYFLUlMWuCQ1NXWBJzknyTNJHh+OtyTZn+TIsL1wdjElSaudyQz8XuDwiuPdwIGqugI4MBxLkjbJVAWe5DLgZuDjK4ZvBfYN+/uA2zY2miTpzUw7A/8Y8BHghyvGLqmqYwDD9uINziZJehNrFniSDwInqurgek6Q5O4kS0mWlpeX1/NPSJJOYZoZ+A3ALUmOAp8BbkzyKeB4kq0Aw/bEqX64qvZW1faq2r6wsLBBsSVJaxZ4Vd1fVZdV1SJwO/DFqroTeAzYNdxsF/DozFJKkt7gbF4Hvge4KckR4KbhWJK0Sc7oU+mr6kngyWH/FWDHxkeSJE3DKzElqSkLXJKassAlqSkLXJKassAlqSkLXJKassAlqSkLXJKassAlqSkLXJKassAlqSkLXJKassAlqSkLXJKassAlqSkLXJKassAlqakz+kQeabXF3U/MO4L0luUMXJKassAlqSkLXJKassAlqSkLXJKassAlqSkLXJKassAlqSkLXJKassAlqSkLXJKassAlqSnfzEriR+tNuY7uuXneEbRJnIFLUlMWuCQ1ZYFLUlMWuCQ1tWaBJ3l7kq8keS7J80keGMa3JNmf5MiwvXD2cSVJJ00zA/8ecGNVXQNsA3YmuR7YDRyoqiuAA8OxJGmTrFngNfHd4fDc4auAW4F9w/g+4LaZJJQkndJUa+BJzknyLHAC2F9VTwGXVNUxgGF78exiSpJWm6rAq+oHVbUNuAy4LsnV054gyd1JlpIsLS8vrzenJGmVM3oVSlV9B3gS2AkcT7IVYNieOM3P7K2q7VW1fWFh4SzjSpJOmuZVKAtJLhj23wG8D3gBeAzYNdxsF/DorEJKkt5omvdC2QrsS3IOk8J/uKoeT/Jl4OEkdwEvAR+aYU5J0iprFnhVfRW49hTjrwA7ZhFKkrQ2r8SUpKYscElqygKXpKYscElqygKXpKYscElqygKXpKYscElqygKXpKYscElqygKXpKYscElqapp3I5TUyOLuJ2Z+jqN7bp75ObQ2Z+CS1JQFLklNWeCS1JQFLklNWeCS1JQFLklNWeCS1JQFLklNWeCS1JQFLklNWeCS1JQFLklNWeCS1JQFLklNWeCS1JQFLklNWeCS1JQFLklNWeCS1JQFLklNWeCS1JSfSj8Hm/Gp4ZJ+9DkDl6Sm1izwJJcn+VKSw0meT3LvML4lyf4kR4bthbOPK0k6aZoZ+GvAh6vq3cD1wD1JrgJ2Aweq6grgwHAsSdokaxZ4VR2rqqeH/f8EDgOXArcC+4ab7QNum1VISdIbndEaeJJF4FrgKeCSqjoGk5IHLj7Nz9ydZCnJ0vLy8tmllST9n6kLPMk7gc8B91XVq9P+XFXtrartVbV9YWFhPRklSacwVYEnOZdJeX+6qh4Zho8n2Tp8fytwYjYRJUmnMs2rUAJ8AjhcVR9d8a3HgF3D/i7g0Y2PJ0k6nWku5LkB+C3ga0meHcZ+D9gDPJzkLuAl4EOziShJOpU1C7yq/hbIab69Y2PjSJKm5ZWYktSUBS5JTVngktSUBS5JTVngktSUBS5JTVngktSUBS5JTVngktSUBS5JTVngktSUBS5JTVngktSUBS5JTVngktSUBS5JTVngktSUBS5JTVngktSUBS5JTVngktSUBS5JTVngktSUBS5JTVngktSUBS5JTVngktSUBS5JTVngktTU2+YdYFqLu5/YlPMc3XPzppxH6szfx3FwBi5JTVngktSUBS5JTVngktSUBS5JTa1Z4EkeTHIiyaEVY1uS7E9yZNheONuYkqTVppmBfxLYuWpsN3Cgqq4ADgzHkqRNtGaBV9XfAP++avhWYN+wvw+4bYNzSZLWsN418Euq6hjAsL34dDdMcneSpSRLy8vL6zydJGm1mT+JWVV7q2p7VW1fWFiY9ekk6S1jvQV+PMlWgGF7YuMiSZKmsd4CfwzYNezvAh7dmDiSpGlN8zLCh4AvA1cmeTnJXcAe4KYkR4CbhmNJ0iZa890Iq+qO03xrxwZnkSSdAa/ElKSmLHBJasoCl6SmLHBJasoCl6SmLHBJasoCl6SmLHBJasoCl6SmLHBJasoCl6SmLHBJasoCl6SmLHBJasoCl6SmLHBJasoCl6SmLHBJasoCl6SmLHBJamrNDzV+q1nc/cS8I0jSVJyBS1JTFrgkNWWBS1JTFrgkNeWTmJJGazNeVHB0z80zP8esOAOXpKYscElqygKXpKZcA5f0lrZZF+/NYq3dGbgkNWWBS1JTFrgkNWWBS1JTFrgkNXVWBZ5kZ5IXk3w9ye6NCiVJWtu6CzzJOcCfAO8HrgLuSHLVRgWTJL25s5mBXwd8var+uar+B/gMcOvGxJIkreVsLuS5FPjmiuOXgV9efaMkdwN3D4ffTfLiGZzjIuDb6044O2PNBePNNtZcMN5sY80F48021lzkD88q28+davBsCjynGKs3DFTtBfau6wTJUlVtX8/PztJYc8F4s401F4w321hzwXizjTUXzCbb2SyhvAxcvuL4MuBbZxdHkjStsynwvweuSPKuJD8O3A48tjGxJElrWfcSSlW9luS3gc8D5wAPVtXzG5ZsYl1LL5tgrLlgvNnGmgvGm22suWC82caaC2aQLVVvWLaWJDXglZiS1JQFLklNjbLAx3SJfpIHk5xIcmjF2JYk+5McGbYXziHX5Um+lORwkueT3DuGbEnenuQrSZ4bcj0whlyrMp6T5Jkkj48pW5KjSb6W5NkkS2PJluSCJJ9N8sLweHvvvHMluXK4n05+vZrkvnnnWpHvd4fH/6EkDw2/FxuebXQFPsJL9D8J7Fw1ths4UFVXAAeG4832GvDhqno3cD1wz3A/zTvb94Abq+oaYBuwM8n1I8i10r3A4RXHY8r2G1W1bcXrhceQ7Y+Bv66qXwCuYXLfzTVXVb043E/bgF8C/hv4y3nnAkhyKfA7wPaquprJizxun0m2qhrVF/Be4PMrju8H7p9zpkXg0IrjF4Gtw/5W4MUR3G+PAjeNKRtwHvA0kyt0R5GLyfUKB4AbgcfH9P8JHAUuWjU212zATwHfYHjBw1hyrcrym8DfjSUXr1+lvoXJK/0eHzJueLbRzcA59SX6l84py+lcUlXHAIbtxfMMk2QRuBZ4ihFkG5YongVOAPurahS5Bh8DPgL8cMXYWLIV8IUkB4e3oBhDtp8HloE/G5adPp7k/BHkWul24KFhf+65qupfgT8CXgKOAf9RVV+YRbYxFvhUl+hrIsk7gc8B91XVq/POA1BVP6jJn7aXAdcluXremQCSfBA4UVUH553lNG6oqvcwWT68J8mvzTsQkxnke4A/raprgf9ivktM/89wEeEtwF/MO8tJw9r2rcC7gJ8Fzk9y5yzONcYC73CJ/vEkWwGG7Yl5hEhyLpPy/nRVPTKmbABV9R3gSSbPIYwh1w3ALUmOMnn3zBuTfGok2aiqbw3bE0zWc68bQbaXgZeHv6IAPsuk0Oed66T3A09X1fHheAy53gd8o6qWq+r7wCPAr8wi2xgLvMMl+o8Bu4b9XUzWnzdVkgCfAA5X1UfHki3JQpILhv13MHkwvzDvXABVdX9VXVZVi0weV1+sqjvHkC3J+Ul+8uQ+kzXTQ/POVlX/BnwzyZXD0A7gH+ada4U7eH35BMaR6yXg+iTnDb+nO5g88bvx2eb1xMMaTwJ8APhH4J+A359zloeYrGN9n8ls5C7gZ5g8EXZk2G6ZQ65fZbK09FXg2eHrA/POBvwi8MyQ6xDwB8P43O+zVTl/ndefxJx7NiZrzc8NX8+ffNyPJNs2YGn4P/0r4MKR5DoPeAX46RVjc8815HiAycTlEPDnwE/MIpuX0ktSU2NcQpEkTcECl6SmLHBJasoCl6SmLHBJasoCl6SmLHBJaup/AQN6Z7/3mtEsAAAAAElFTkSuQmCC\n", 117 | "text/plain": [ 118 | "
" 119 | ] 120 | }, 121 | "metadata": { 122 | "needs_background": "light" 123 | }, 124 | "output_type": "display_data" 125 | } 126 | ], 127 | "source": [ 128 | "fig, ax = plt.subplots()\n", 129 | "(counts, edges, _) = ax.hist('age', bins='auto', data=df)" 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": { 135 | "slideshow": { 136 | "slide_type": "slide" 137 | } 138 | }, 139 | "source": [ 140 | "## Custom Bins to match our CDF" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": { 146 | "slideshow": { 147 | "slide_type": "skip" 148 | } 149 | }, 150 | "source": [ 151 | "In our line plot, we use bins based on the census-a commonly accepted binning for ages. We can pass those bins into our histogram function via the `bins` parameter. " 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 8, 157 | "metadata": { 158 | "slideshow": { 159 | "slide_type": "slide" 160 | } 161 | }, 162 | "outputs": [ 163 | { 164 | "data": { 165 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAQC0lEQVR4nO3df4xldXnH8fenrGLxR1yys3RlN53VrFowVuiUakkN7dZC1bD0D5ol0WwsybYNWm1q7aKJ8M8mtLXaJq0mW1hZWwrZIJZNTa10q6VNKjj8EpYV2QiFgZUdSqw/mkDBp3/M2eQ6zDAz99xhdr59vxJy7nnO99zzMLn5zHfP3HNOqgpJUlt+YqUbkCSNnuEuSQ0y3CWpQYa7JDXIcJekBq1Z6QYA1q1bV+Pj4yvdhiStKnfccceTVTU217YTItzHx8eZnJxc6TYkaVVJ8p/zbfO0jCQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNeiEuEJVmsv4ri+udAtapR6+6l0r3cKKc+YuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDFgz3JHuTHEty36z6B5I8kORQkj8ZqF+e5Ei37fzlaFqS9MIWcxHTtcBfAp87Xkjyy8A24M1V9XSS9V39DGA7cCbwGuCfk7y+qp4bdeOSpPktOHOvqluBp2aVfxe4qqqe7sYc6+rbgBuq6umqegg4Apwzwn4lSYsw7Dn31wO/lOS2JP+a5Oe7+unAowPjprra8yTZmWQyyeT09PSQbUiS5jJsuK8B1gJvBf4Q2J8kQOYYW3O9QVXtqaqJqpoYGxsbsg1J0lyGDfcp4KaacTvwI2BdV980MG4j8Hi/FiVJSzVsuP898CsASV4PvBR4EjgAbE9ycpLNwBbg9lE0KklavAW/LZPkeuA8YF2SKeAKYC+wt/t65DPAjqoq4FCS/cD9wLPAZX5TRpJefAuGe1VdMs+m98wzfjewu09TkqR+vEJVkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBi0Y7kn2JjnWPZhj9rYPJ6kk6wZqlyc5kuSBJOePumFJ0sIWM3O/FrhgdjHJJuAdwCMDtTOA7cCZ3T6fTnLSSDqVJC3aguFeVbcCT82x6VPAR4AaqG0Dbqiqp6vqIeAIcM4oGpUkLd5Q59yTXAg8VlX3zNp0OvDowPpUV5vrPXYmmUwyOT09PUwbkqR5LDnck5wCfAz4+Fyb56jVHDWqak9VTVTVxNjY2FLbkCS9gAUfkD2H1wGbgXuSAGwE7kxyDjMz9U0DYzcCj/dtUpK0NEueuVfVvVW1vqrGq2qcmUA/u6q+AxwAtic5OclmYAtw+0g7liQtaDFfhbwe+A/gDUmmklw639iqOgTsB+4HvgRcVlXPjapZSdLiLHhapqouWWD7+Kz13cDufm1JkvrwClVJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMW8ySmvUmOJblvoPanSb6Z5BtJvpDk1QPbLk9yJMkDSc5frsYlSfNbzMz9WuCCWbVbgDdV1ZuBbwGXAyQ5A9gOnNnt8+kkJ42sW0nSoiwY7lV1K/DUrNqXq+rZbvVrwMbu9Tbghqp6uqoeAo4A54ywX0nSIozinPtvAf/YvT4deHRg21RXe54kO5NMJpmcnp4eQRuSpON6hXuSjwHPAtcdL80xrObat6r2VNVEVU2MjY31aUOSNMuaYXdMsgN4N7C1qo4H+BSwaWDYRuDx4duTJA1jqJl7kguAPwIurKr/Gdh0ANie5OQkm4EtwO3925QkLcWCM/ck1wPnAeuSTAFXMPPtmJOBW5IAfK2qfqeqDiXZD9zPzOmay6rqueVqXpI0twXDvaoumaN8zQuM3w3s7tOUJKkfr1CVpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQQuGe5K9SY4luW+gdmqSW5I82C3XDmy7PMmRJA8kOX+5GpckzW8xM/drgQtm1XYBB6tqC3CwWyfJGcB24Mxun08nOWlk3UqSFmXBcK+qW4GnZpW3Afu61/uAiwbqN1TV01X1EHAEOGdEvUqSFmnYc+6nVdVRgG65vqufDjw6MG6qqz1Pkp1JJpNMTk9PD9mGJGkuo/6Dauao1VwDq2pPVU1U1cTY2NiI25Ck/9+GDfcnkmwA6JbHuvoUsGlg3Ebg8eHbkyQNY9hwPwDs6F7vAG4eqG9PcnKSzcAW4PZ+LUqSlmrNQgOSXA+cB6xLMgVcAVwF7E9yKfAIcDFAVR1Ksh+4H3gWuKyqnlum3iVJ81gw3Kvqknk2bZ1n/G5gd5+mJEn9eIWqJDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBvcI9ye8nOZTkviTXJ3lZklOT3JLkwW65dlTNSpIWZ+hwT3I68HvARFW9CTgJ2A7sAg5W1RbgYLcuSXoR9T0tswb4ySRrgFOAx4FtwL5u+z7gop7HkCQt0dDhXlWPAZ9g5gHZR4H/rqovA6dV1dFuzFFg/Vz7J9mZZDLJ5PT09LBtSJLm0Oe0zFpmZumbgdcAL0/ynsXuX1V7qmqiqibGxsaGbUOSNIc+p2V+FXioqqar6n+Bm4BfBJ5IsgGgWx7r36YkaSn6hPsjwFuTnJIkwFbgMHAA2NGN2QHc3K9FSdJSrRl2x6q6LcmNwJ3As8BdwB7gFcD+JJcy8wvg4lE0KklavKHDHaCqrgCumFV+mplZvCRphXiFqiQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ3qFe5JXp3kxiTfTHI4yduSnJrkliQPdsu1o2pWkrQ4fWfufwF8qareCPwsM4/Z2wUcrKotwMFuXZL0Iho63JO8Cng7cA1AVT1TVd8FtgH7umH7gIv6NilJWpo+M/fXAtPAZ5PcleTqJC8HTquqowDdcv1cOyfZmWQyyeT09HSPNiRJs/UJ9zXA2cBnquos4Ics4RRMVe2pqomqmhgbG+vRhiRptj7hPgVMVdVt3fqNzIT9E0k2AHTLY/1alCQt1dDhXlXfAR5N8oautBW4HzgA7OhqO4Cbe3UoSVqyNT33/wBwXZKXAt8G3sfML4z9SS4FHgEu7nkMSdIS9Qr3qrobmJhj09Y+7ytJ6scrVCWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1qO+9ZXSCG9/1xZVuQdIKcOYuSQ0y3CWpQYa7JDXIcJekBhnuktSg3uGe5KQkdyX5h2791CS3JHmwW67t36YkaSlGMXP/IHB4YH0XcLCqtgAHu3VJ0ouoV7gn2Qi8C7h6oLwN2Ne93gdc1OcYkqSl6ztz/3PgI8CPBmqnVdVRgG65fq4dk+xMMplkcnp6umcbkqRBQ4d7kncDx6rqjmH2r6o9VTVRVRNjY2PDtiFJmkOf2w+cC1yY5J3Ay4BXJflb4IkkG6rqaJINwLFRNCpJWryhZ+5VdXlVbayqcWA78C9V9R7gALCjG7YDuLl3l5KkJVmO77lfBbwjyYPAO7p1SdKLaCR3hayqrwJf7V7/F7B1FO8rSRqOV6hKUoMMd0lqkOEuSQ0y3CWpQYa7JDXIZ6hKas5qenbww1e9a1ne15m7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1qM8zVDcl+UqSw0kOJflgVz81yS1JHuyWa0fXriRpMfrM3J8F/qCqfgZ4K3BZkjOAXcDBqtoCHOzWJUkvoj7PUD1aVXd2r78PHAZOB7YB+7ph+4CL+jYpSVqakZxzTzIOnAXcBpxWVUdh5hcAsH6efXYmmUwyOT09PYo2JEmd3uGe5BXA54EPVdX3FrtfVe2pqomqmhgbG+vbhiRpQK9wT/ISZoL9uqq6qSs/kWRDt30DcKxfi5KkperzbZkA1wCHq+qTA5sOADu61zuAm4dvT5I0jD4P6zgXeC9wb5K7u9pHgauA/UkuBR4BLu7XoiRpqYYO96r6dyDzbN467PsOYzU9dUWSXgxeoSpJDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJatCyhXuSC5I8kORIkl3LdRxJ0vMtS7gnOQn4K+DXgTOAS5KcsRzHkiQ933LN3M8BjlTVt6vqGeAGYNsyHUuSNEufB2S/kNOBRwfWp4BfGByQZCews1v9QZIHehxvHfBkj/1btpp/Nqu5d62sVfPZyR/36vWn59uwXOE+14Oz68dWqvYAe0ZysGSyqiZG8V6tWc0/m9Xcu1bWavrsLFevy3VaZgrYNLC+EXh8mY4lSZplucL968CWJJuTvBTYDhxYpmNJkmZZltMyVfVskvcD/wScBOytqkPLcazOSE7vNGo1/2xWc+9aWavps7MsvaaqFh4lSVpVvEJVkhpkuEtSg1Z1uHuLgx+XZG+SY0nuG6hdmeSxJHd3/71zJXucT5JNSb6S5HCSQ0k+2NVXRf9aWUkeTnJv9xmZ7Gon3GcnyauT3Jjkm91n/W0D2z6cpJKsG8mxVus59+4WB98C3sHMVy+/DlxSVfevaGMrKMnbgR8An6uqN3W1K4EfVNUnVrK3hSTZAGyoqjuTvBK4A7gI+E1WQf9aWUkeBiaq6smB2pWcYJ+dJPuAf6uqq7tvEp5SVd9Nsgm4Gngj8HOD/x/DWs0zd29xMEtV3Qo8tdJ9DKOqjlbVnd3r7wOHmbnSWWpCklcBbweuAaiqZ6rqu93mTwEfYdbFnn2s5nCf6xYHhsHc3p/kG91pm7Ur3cxCkowDZwG3daVV1b9WRAFfTnJHd2uT406kz85rgWngs0nuSnJ1kpcnuRB4rKruGeXBVnO4L3iLAwHwGeB1wFuAo8CfrWw7LyzJK4DPAx+qqu+xyvrXijm3qs5m5k60l3WnKE+0z84a4GzgM1V1FvBD4ErgY8DHR32w1Rzu3uJgEarqiap6rqp+BPw1M6ezTkhJXsJMsF9XVTfB6upfK6eqHu+Wx4AvAOecgJ+dKWCqqo7/i/RGZsJ+M3BP93eDjcCdSX6q78FWc7h7i4NF6P5QedxvAPfNN3YlJQkz5yIPV9UnB+qron+tnO7UxiuPvwZ+DbjvRPvsVNV3gEeTvKErbQXurKr1VTVeVePM/AI4uxvby3LdFXLZrcAtDk54Sa4HzgPWJZkCrgDOS/IWZk5ZPQz89oo1+MLOBd4L3Jvk7q72UWYe9LIa+tfKOQ34wsz8gDXA31XVl5L8zQn42fkAcF03If028L7lOtCq/SqkJGl+q/m0jCRpHoa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJatD/ARH9WPZ+B4CjAAAAAElFTkSuQmCC\n", 166 | "text/plain": [ 167 | "
" 168 | ] 169 | }, 170 | "metadata": { 171 | "needs_background": "light" 172 | }, 173 | "output_type": "display_data" 174 | } 175 | ], 176 | "source": [ 177 | "fig, ax = plt.subplots()\n", 178 | "(counts, edges, _) = ax.hist('age', bins=[0, 15,25, 55, 64], data=df)\n", 179 | "_ = ax.set_xticks([0, 15,25, 55, 64])" 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": { 185 | "slideshow": { 186 | "slide_type": "slide" 187 | } 188 | }, 189 | "source": [ 190 | "### Probability density" 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "metadata": { 196 | "slideshow": { 197 | "slide_type": "skip" 198 | } 199 | }, 200 | "source": [ 201 | "By default `ax.hist` returns the number in each bin and the bin edges used. The bin edges array is 1 longer than the counts because it is all of the left bin edges and then right edge of the last bin. This is useful when we need to compare absolute values between data sets, but sometime we need to be able to compare the probability density instead. In the continuous limit, probability density has the condition:\n", 202 | "\n", 203 | "$$1 = \\int_{-\\infty}^{\\infty} P(x)dx$$\n", 204 | "\n", 205 | "and in the discrete case:\n", 206 | "$$1 = \\sum_{n=0}^N w_n p_n$$\n", 207 | "\n", 208 | "where $w_n$ is the width of the nth bin. We set the `density` parameter to true to plot the density of the data. " 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": 9, 214 | "metadata": { 215 | "slideshow": { 216 | "slide_type": "slide" 217 | } 218 | }, 219 | "outputs": [ 220 | { 221 | "data": { 222 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAD4CAYAAADlwTGnAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAReklEQVR4nO3df6xfdX3H8edrVbep24BxNV1pdplpnI2ZhTWAY1mcjKUti9U/TIoRCCGpZG0Gi8lSXbLpf53xx0ZC2sDohM1AnLLRQDMkzMW4DEfBiq214w47udDBVTNwIxlW3/vjezq+Xi+957b33u+Xfp6P5OR7zud8Pt/v+9ze+331nO8555uqQpLUnp8adQGSpNEwACSpUQaAJDXKAJCkRhkAktSoV426gIU499xza3JyctRlSNIryiOPPPKdqpqY3f6KCoDJyUn2798/6jIk6RUlyX/M1e4hIElqlAEgSY0yACSpUQaAJDXKAJCkRhkAktQoA0CSGmUASFKjDABJatQr6kpg6VRM7rhvyV/j6M4rlvw1pMXmHoAkNcoAkKRGGQCS1CgDQJIaZQBIUqMMAElqlAEgSY3qFQBJNiQ5kmQqyY451ifJTd36x5Jc2LWvTvLFJIeTHEpyw9CYjyR5KsmBbtq0eJslSZrPvBeCJVkB3AxcDkwDDyfZW1XfGOq2EVjTTRcDu7rH48AHq+rRJD8HPJLkgaGxn6qqjy/e5kiS+uqzB3ARMFVVT1TVi8BdwOZZfTYDd9TAQ8BZSVZW1bGqehSgqr4PHAZWLWL9kqRT1CcAVgFPDi1P85Nv4vP2STIJXAB8Zah5e3fIaE+Ss+d68SRbk+xPsn9mZqZHuZKkPvoEQOZoq4X0SfJ64PPAjVX1fNe8C3gTsA44Bnxirhevqluqan1VrZ+YmOhRriSpjz43g5sGVg8tnwc83bdPklczePP/TFXdfaJDVT1zYj7JrcC9C6pcS2o5bqAG3kRNGqU+ewAPA2uSnJ/kNcAWYO+sPnuBq7uzgS4BnquqY0kC3AYcrqpPDg9IsnJo8T3AwVPeCknSgs27B1BVx5NsB+4HVgB7qupQkuu79buBfcAmYAp4Abi2G34pcBXw9SQHurYPV9U+4GNJ1jE4VHQU+MCibZUkaV69vg+ge8PeN6tt99B8AdvmGPdl5v58gKq6akGVSpIWlVcCS1KjDABJapQBIEmNMgAkqVEGgCQ1ygCQpEYZAJLUKANAkhplAEhSowwASWqUASBJjTIAJKlRBoAkNcoAkKRGGQCS1CgDQJIaZQBIUqMMAElqlAEgSY0yACSpUQaAJDXKAJCkRhkAktQoA0CSGmUASFKjDABJapQBIEmNMgAkqVEGgCQ1ygCQpEb1CoAkG5IcSTKVZMcc65Pkpm79Y0ku7NpXJ/liksNJDiW5YWjMOUkeSPJ493j24m2WJGk+8wZAkhXAzcBGYC1wZZK1s7ptBNZ001ZgV9d+HPhgVb0FuATYNjR2B/BgVa0BHuyWJUnLpM8ewEXAVFU9UVUvAncBm2f12QzcUQMPAWclWVlVx6rqUYCq+j5wGFg1NOb2bv524N2nuS2SpAXoEwCrgCeHlqd56U28d58kk8AFwFe6pjdW1TGA7vENc714kq1J9ifZPzMz06NcSVIffQIgc7TVQvokeT3weeDGqnq+f3lQVbdU1fqqWj8xMbGQoZKkk+gTANPA6qHl84Cn+/ZJ8moGb/6fqaq7h/o8k2Rl12cl8OzCSpcknY5X9ejzMLAmyfnAU8AW4H2z+uwFtie5C7gYeK6qjiUJcBtwuKo+OceYa4Cd3eM9p74ZeqWa3HHfqEuQmjVvAFTV8STbgfuBFcCeqjqU5Ppu/W5gH7AJmAJeAK7thl8KXAV8PcmBru3DVbWPwRv/Z5NcB3wbeO/ibZYkaT599gDo3rD3zWrbPTRfwLY5xn2ZuT8foKq+C1y2kGIlSYvHK4ElqVEGgCQ1ygCQpEYZAJLUKANAkhplAEhSowwASWqUASBJjTIAJKlRBoAkNarXrSAkndyZdFO7ozuvGHUJWibuAUhSowwASWqUASBJjTIAJKlRBoAkNcoAkKRGGQCS1CgDQJIaZQBIUqMMAElqlAEgSY0yACSpUQaAJDXKAJCkRhkAktQoA0CSGmUASFKjDABJapQBIEmN6hUASTYkOZJkKsmOOdYnyU3d+seSXDi0bk+SZ5McnDXmI0meSnKgmzad/uZIkvqaNwCSrABuBjYCa4Erk6yd1W0jsKabtgK7htZ9GtjwMk//qapa1037Fli7JOk09NkDuAiYqqonqupF4C5g86w+m4E7auAh4KwkKwGq6kvA9xazaEnS6esTAKuAJ4eWp7u2hfaZy/bukNGeJGfP1SHJ1iT7k+yfmZnp8ZSSpD76BEDmaKtT6DPbLuBNwDrgGPCJuTpV1S1Vtb6q1k9MTMxXqySppz4BMA2sHlo+D3j6FPr8mKp6pqp+WFU/Am5lcKhJkrRMXtWjz8PAmiTnA08BW4D3zeqzl8HhnLuAi4HnqurYyZ40ycqhPu8BDp6sv6TlMbnjviV/jaM7r1jy19D85g2AqjqeZDtwP7AC2FNVh5Jc363fDewDNgFTwAvAtSfGJ7kTeAdwbpJp4E+r6jbgY0nWMThUdBT4wCJulyRpHn32AOhO0dw3q2330HwB215m7JUv035V/zIlSYvNK4ElqVEGgCQ1ygCQpEYZAJLUKANAkhplAEhSowwASWqUASBJjTIAJKlRBoAkNcoAkKRGGQCS1CgDQJIaZQBIUqMMAElqlAEgSY0yACSpUQaAJDXKAJCkRvX6TmCNl8kd9426BElnAPcAJKlRBoAkNcoAkKRGGQCS1CgDQJIaZQBIUqMMAElqlAEgSY0yACSpUQaAJDWqVwAk2ZDkSJKpJDvmWJ8kN3XrH0ty4dC6PUmeTXJw1phzkjyQ5PHu8ezT3xxJUl/zBkCSFcDNwEZgLXBlkrWzum0E1nTTVmDX0LpPAxvmeOodwINVtQZ4sFuWJC2TPnsAFwFTVfVEVb0I3AVsntVnM3BHDTwEnJVkJUBVfQn43hzPuxm4vZu/HXj3qWyAJOnU9AmAVcCTQ8vTXdtC+8z2xqo6BtA9vmGuTkm2JtmfZP/MzEyPciVJffQJgMzRVqfQ55RU1S1Vtb6q1k9MTCzGU0qS6BcA08DqoeXzgKdPoc9sz5w4TNQ9PtujFknSIukTAA8Da5Kcn+Q1wBZg76w+e4Gru7OBLgGeO3F45yT2Atd089cA9yygbknSaZo3AKrqOLAduB84DHy2qg4luT7J9V23fcATwBRwK/D7J8YnuRP4F+DNSaaTXNet2glcnuRx4PJuWZK0THp9JWRV7WPwJj/ctntovoBtLzP2ypdp/y5wWe9KJUmLyiuBJalRBoAkNcoAkKRGGQCS1CgDQJIaZQBIUqMMAElqlAEgSY0yACSpUQaAJDXKAJCkRhkAktQoA0CSGmUASFKjDABJapQBIEmNMgAkqVEGgCQ1ygCQpEYZAJLUqF5fCn8mmNxx37K8ztGdVyzL60ivZP49jgf3ACSpUQaAJDXKAJCkRhkAktQoA0CSGmUASFKjDABJapQBIEmNMgAkqVG9AiDJhiRHkkwl2THH+iS5qVv/WJIL5xub5CNJnkpyoJs2Lc4mSZL6mDcAkqwAbgY2AmuBK5OsndVtI7Cmm7YCu3qO/VRVreumfae7MZKk/vrsAVwETFXVE1X1InAXsHlWn83AHTXwEHBWkpU9x0qSRqBPAKwCnhxanu7a+vSZb+z27pDRniRn965aknTa+gRA5mirnn1ONnYX8CZgHXAM+MScL55sTbI/yf6ZmZke5UqS+ugTANPA6qHl84Cne/Z52bFV9UxV/bCqfgTcyuBw0U+oqluqan1VrZ+YmOhRriSpjz4B8DCwJsn5SV4DbAH2zuqzF7i6OxvoEuC5qjp2srHdZwQnvAc4eJrbIklagHm/EKaqjifZDtwPrAD2VNWhJNd363cD+4BNwBTwAnDtycZ2T/2xJOsYHBI6CnxgMTdMknRyvb4RrDtFc9+stt1D8wVs6zu2a79qQZVKkhaVVwJLUqMMAElqlAEgSY0yACSpUQaAJDXKAJCkRhkAktQoA0CSGmUASFKjDABJapQBIEmNMgAkqVEGgCQ1ygCQpEYZAJLUKANAkhrV6wth1N/kjvtGXYIk9eIegCQ1ygCQpEYZAJLUKANAkhrlh8CSzljLcVLG0Z1XLPlrLBX3ACSpUQaAJDXKAJCkRvkZgCSdhuW6+HMpPmtwD0CSGmUASFKjDABJapQBIEmNMgAkqVG9AiDJhiRHkkwl2THH+iS5qVv/WJIL5xub5JwkDyR5vHs8e3E2SZLUx7wBkGQFcDOwEVgLXJlk7axuG4E13bQV2NVj7A7gwapaAzzYLUuSlkmfPYCLgKmqeqKqXgTuAjbP6rMZuKMGHgLOSrJynrGbgdu7+duBd5/mtkiSFqDPhWCrgCeHlqeBi3v0WTXP2DdW1TGAqjqW5A1zvXiSrQz2KgD+O8mRHjWfcC7wnQX0Xy7jWheMb23jWheMb23jWheMb23jWhf5s9Oq7ZfnauwTAJmjrXr26TP2pKrqFuCWhYz5/6KS/VW1/lTGLqVxrQvGt7ZxrQvGt7ZxrQvGt7ZxrQuWprY+h4CmgdVDy+cBT/fsc7Kxz3SHiegen+1ftiTpdPUJgIeBNUnOT/IaYAuwd1afvcDV3dlAlwDPdYd3TjZ2L3BNN38NcM9pboskaQHmPQRUVceTbAfuB1YAe6rqUJLru/W7gX3AJmAKeAG49mRju6feCXw2yXXAt4H3LuqWDZzSoaNlMK51wfjWNq51wfjWNq51wfjWNq51wRLUlqoFHZKXJJ0hvBJYkhplAEhSo87IAJjv1hXLXMueJM8mOTjUNvLbYCRZneSLSQ4nOZTkhnGoLcnPJPnXJF/r6vroONQ1q8YVSb6a5N5xqi3J0SRfT3Igyf5xqS3JWUk+l+Sb3e/b20ddV5I3dz+nE9PzSW4cdV1D9f1h9/t/MMmd3d/Fotd2xgVAz1tXLKdPAxtmtY3DbTCOAx+sqrcAlwDbup/TqGv7X+CdVfU2YB2woTuzbNR1DbsBODy0PE61/XZVrRs6X3wcavsL4B+q6leBtzH42Y20rqo60v2c1gG/zuDklb8bdV0ASVYBfwCsr6q3MjiBZsuS1FZVZ9QEvB24f2j5Q8CHRlzTJHBwaPkIsLKbXwkcGYOf2z3A5eNUG/Ba4FEGV4+PRV0MrmV5EHgncO84/XsCR4FzZ7WNtDbg54Fv0Z1wMi51zarld4F/Hpe6eOkOCucwOFPz3q7GRa/tjNsD4OVvSzFOfuw2GMCct8FYLkkmgQuArzAGtXWHWA4wuDjwgaoai7o6fw78EfCjobZxqa2ALyR5pLuFyjjU9ivADPBX3WGzv0zyujGoa9gW4M5ufuR1VdVTwMcZnB5/jMF1VV9YitrOxAA47dtPtCTJ64HPAzdW1fOjrgegqn5Yg13z84CLkrx11DUBJPk94NmqemTUtbyMS6vqQgaHP7cl+a1RF8Tgf7AXAruq6gLgfxijO/92F6i+C/jbUddyQndsfzNwPvBLwOuSvH8pXutMDIA+t64YtbG4DUaSVzN48/9MVd09TrUBVNV/Af/E4DOUcajrUuBdSY4yuLPtO5P8zZjURlU93T0+y+B49kVjUNs0MN3txQF8jkEgjLquEzYCj1bVM93yONT1O8C3qmqmqn4A3A38xlLUdiYGQJ9bV4zayG+DkSTAbcDhqvrkuNSWZCLJWd38zzL4Y/jmqOsCqKoPVdV5VTXJ4PfqH6vq/eNQW5LXJfm5E/MMjhkfHHVtVfWfwJNJ3tw1XQZ8Y9R1DbmSlw7/wHjU9W3gkiSv7f5OL2Pwwfni1zaqD16W+EOUTcC/Af8O/PGIa7mTwXG8HzD439B1wC8y+CDx8e7xnBHU9ZsMDo09Bhzopk2jrg34NeCrXV0HgT/p2kf+M5tV5zt46UPgkdfG4Fj717rp0Inf+zGpbR2wv/s3/Xvg7DGp67XAd4FfGGobeV1dHR9l8B+fg8BfAz+9FLV5KwhJatSZeAhIktSDASBJjTIAJKlRBoAkNcoAkKRGGQCS1CgDQJIa9X9XEPz/1+8HMwAAAABJRU5ErkJggg==\n", 223 | "text/plain": [ 224 | "
" 225 | ] 226 | }, 227 | "metadata": { 228 | "needs_background": "light" 229 | }, 230 | "output_type": "display_data" 231 | } 232 | ], 233 | "source": [ 234 | "fig, ax = plt.subplots()\n", 235 | "# only consider rows where we know the age\n", 236 | "(density, edges, _) = ax.hist('age', bins='auto', data=df, density=True)\n", 237 | "\n", 238 | "assert np.sum(np.diff(edges) * density) == 1" 239 | ] 240 | }, 241 | { 242 | "cell_type": "markdown", 243 | "metadata": { 244 | "slideshow": { 245 | "slide_type": "slide" 246 | } 247 | }, 248 | "source": [ 249 | "## Practice:\n", 250 | "1. Make histogram of fares\n", 251 | "2. Use the bins defined in the [overview](01_overview.ipynb)" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": null, 257 | "metadata": { 258 | "slideshow": { 259 | "slide_type": "slide" 260 | } 261 | }, 262 | "outputs": [], 263 | "source": [] 264 | }, 265 | { 266 | "cell_type": "markdown", 267 | "metadata": { 268 | "slideshow": { 269 | "slide_type": "notes" 270 | } 271 | }, 272 | "source": [ 273 | "# Use groupby to simplify code" 274 | ] 275 | }, 276 | { 277 | "cell_type": "markdown", 278 | "metadata": { 279 | "slideshow": { 280 | "slide_type": "notes" 281 | } 282 | }, 283 | "source": [ 284 | "![](../images/groupby.png?)\n", 285 | "source: [pandas cheatsheet](https://github.com/pandas-dev/pandas/blob/master/doc/cheatsheet/Pandas_Cheat_Sheet.pdf)\n" 286 | ] 287 | }, 288 | { 289 | "cell_type": "markdown", 290 | "metadata": { 291 | "slideshow": { 292 | "slide_type": "notes" 293 | } 294 | }, 295 | "source": [ 296 | "Groupby is used to gather all rows that have one observation of one variable in common. For example, a `.groupby('sex')` gathers all the rows where the sex is male and puts them in one dataframe, and puts all the rows where the sex is female into a second dataframe. The next step in a groupby operation is usually to then aggregate these dataframes using some function (mean, median, etc) to get aggregate statistics for each variable (step 2 in the image above) but we can also make use of the individual dataframes. " 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": null, 302 | "metadata": { 303 | "slideshow": { 304 | "slide_type": "slide" 305 | } 306 | }, 307 | "outputs": [], 308 | "source": [ 309 | "fig, ax = plt.subplots()\n", 310 | "# only consider rows where we know the age\n", 311 | "for label, gdf in df.groupby('sex'):\n", 312 | " ax.hist('age', bins=[0, 15,25, 55, 64], data=gdf, label=label, alpha=.5)\n", 313 | "_ = ax.set_xticks([0, 15,25, 55, 64])\n", 314 | "_ = ax.legend()" 315 | ] 316 | }, 317 | { 318 | "cell_type": "markdown", 319 | "metadata": { 320 | "slideshow": { 321 | "slide_type": "slide" 322 | } 323 | }, 324 | "source": [ 325 | "# Lets make things interactive" 326 | ] 327 | }, 328 | { 329 | "cell_type": "markdown", 330 | "metadata": { 331 | "slideshow": { 332 | "slide_type": "skip" 333 | } 334 | }, 335 | "source": [ 336 | "Just as in the [previous notebook](02_visual_variables.ipynb), we can link multiple graphs together via the sharex parameter." 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": null, 342 | "metadata": { 343 | "slideshow": { 344 | "slide_type": "slide" 345 | } 346 | }, 347 | "outputs": [], 348 | "source": [ 349 | "%matplotlib widget\n", 350 | "fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True)\n", 351 | "# only consider rows where we know the age\n", 352 | "for label, gdf in df.groupby('sex'):\n", 353 | " ax1.hist('age', bins='auto', data=gdf, label=label, alpha=.5)\n", 354 | " ax2.hist('age', bins='auto', data=gdf, label=label, alpha=.5, density=True)\n", 355 | "\n", 356 | "_ = ax1.legend()\n", 357 | "_ = ax1.set(title='counts', ylabel='N')\n", 358 | "_ = ax2.set(title='density', ylabel='P', xlabel='age')" 359 | ] 360 | }, 361 | { 362 | "cell_type": "markdown", 363 | "metadata": { 364 | "slideshow": { 365 | "slide_type": "slide" 366 | } 367 | }, 368 | "source": [ 369 | "# What about bars and pies?" 370 | ] 371 | }, 372 | { 373 | "cell_type": "markdown", 374 | "metadata": { 375 | "slideshow": { 376 | "slide_type": "slide" 377 | } 378 | }, 379 | "source": [ 380 | "The titanic dataset has a bunch of categorical variables (sex, pclass, survival) that are fairly interesting. In particular, it would be useful to plot some counts of this data so that we can get a sense of how things like the ratios of people in each class." 381 | ] 382 | }, 383 | { 384 | "cell_type": "markdown", 385 | "metadata": { 386 | "slideshow": { 387 | "slide_type": "slide" 388 | } 389 | }, 390 | "source": [ 391 | "## Lets get counts\n" 392 | ] 393 | }, 394 | { 395 | "cell_type": "markdown", 396 | "metadata": { 397 | "slideshow": { 398 | "slide_type": "skip" 399 | } 400 | }, 401 | "source": [ 402 | "Pandas has a very useful function called `.value_counts` that returns the frequencies of each measurement of a variable in a row. Here I want to compute who survived. " 403 | ] 404 | }, 405 | { 406 | "cell_type": "code", 407 | "execution_count": 23, 408 | "metadata": { 409 | "slideshow": { 410 | "slide_type": "slide" 411 | } 412 | }, 413 | "outputs": [ 414 | { 415 | "data": { 416 | "text/plain": [ 417 | "1 180\n", 418 | "0 90\n", 419 | "Name: survived, dtype: int64" 420 | ] 421 | }, 422 | "execution_count": 23, 423 | "metadata": {}, 424 | "output_type": "execute_result" 425 | } 426 | ], 427 | "source": [ 428 | "survived = df['survived'].value_counts()\n", 429 | "survived" 430 | ] 431 | }, 432 | { 433 | "cell_type": "markdown", 434 | "metadata": { 435 | "slideshow": { 436 | "slide_type": "slide" 437 | } 438 | }, 439 | "source": [ 440 | "# Let's make a pie chart!" 441 | ] 442 | }, 443 | { 444 | "cell_type": "markdown", 445 | "metadata": { 446 | "slideshow": { 447 | "slide_type": "skip" 448 | } 449 | }, 450 | "source": [ 451 | "Pie charts show the ratios between different categories. The first argument to then pie chart is the wedge size (or data values). Like many of the other Matplotlib plotting routines, we can also pass in the colors and labels and [lots of other customizations](https://matplotlib.org/gallery/pie_and_polar_charts/pie_features.html). We access the values in the dataframe using the `.values` attribute" 452 | ] 453 | }, 454 | { 455 | "cell_type": "code", 456 | "execution_count": 26, 457 | "metadata": { 458 | "slideshow": { 459 | "slide_type": "slide" 460 | } 461 | }, 462 | "outputs": [ 463 | { 464 | "data": { 465 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAOcAAADnCAYAAADl9EEgAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAUP0lEQVR4nO3deZQdZZnH8e9rEgIEWZwBWRRZBkGWKLeCiIIL4gzqcWZUIoy4zXCubBFhRDkOAoogcRsIkAQpBAYwCIQBlUExAQxLAsSqECCYQDp7ZyF70p2k12f+qErTadJJL7freavq+ZxzT3MS+r6/hvr1W1W3ql4nIhhj/PM27QDGmO2zchrjKSunMZ6ychrjKSunMZ6ychrjKSunMZ6ychrjKSunMZ6ychrjKSunMZ6ychrjKSunMZ6ychrjKSunMZ6ychrjKSunMZ6ychrjKSunMZ6ychrjKSunMZ6ychrjKSunMZ6ychrjKSunMZ6ychrjKSunMZ4arB2gP5xz/wwcLSKja/BeDSKyRw1i1VQYB7sAh6avwzq99geGAkOAXdKvQ9JvawZaOn3dAiwB5gHz06/zgEXVStSW1c9iesf5vpCRc26wiLRmMI5qOcM4OBqokBSvcxEPZOD2cFqBRWxb2DpgerUSLRigMU0PZVZO59ww4H7gXcAg4MfAT4ERIrLKOTcC+IWIfNw590OSjfIQYBVwOPAfIjIrfa+/AN8BjgNGAJcDM4HDRKTdObc7MIdk4z4YGAvsC2wCqiIy2zl3KDCBZO/hT8AlWZYzjIMDgNOAT6VfD8hq7B6aC0wGJgFPVCvROuU8pZPlbu3pwFIR+SyAc24vknJ2JwBOFpHNzrlLgC8BVznnDgAOFJHIOXccgIisd87NBD4GPAl8DnhMRFqcc7cC54nI6865E4FxwKnAGGC8iNzlnLtwYH7kN4VxsEeab2sZjxnoMfvpH9LXeUBbGAcRb5Z1arUSNWuGK4MsZ873Ao+RzJ6PiMjTzrkFdD9zioj8KP3eg4BJInK0c+7bwH4icrlz7hvp949yzn0Z+KiInOece4ikhNOAlSSz6FZDReR9zrnVwP5pgfck+cVR05kzjIOjgJEkhfwQbx4T5l0j8DRJUe+rVqJ65TyFlNnMKSKvOecC4DPAdc65P5Mc82w9ntq1y7c0dvreeufcaufccOBM4NztDPH79H3fQTLrPgEMA9aJyAe6i9XnH6gbYRwMBv4VuAD4RK3f3xPDSPaETgd+GsbB74Fx1Ur0uG6sYsmsnM65A4E1InKPc64B+AawgKRIfwS+uJO3+C3wPWAvEXm561+KSINz7gWS3dVHRKQN2OCcm++cGykiDzjnHDBcRGYCzwJnAfcAZ/f35wvj4EDgm0CV5Hi5LAYDXwC+EMbBbOAW4M5qJVqvGyv/svyc8zjgBefciyQncK4BfgSMcc49DezslP5EkjLdv4N/5z7gK+nXrc4GzkmPSWcB/5L++beBC51z04G9evmzdAjj4NQwDiYCC4GrKFcxuzoKuAFYGsZBGMZBd3sspge8/yjFR2Ec7AV8HTifZIM03XuO5Pj//molatIOkydWzl4I42A34BLgMmBP5Th5sxS4ErijWonatcPkgZWzB8I4eBvJTHk1yee0pu9eAS6rVqJHtYP4zsq5E2Ec/CPwc2C4dpaCeQK4tFqJZmgH8ZWVsxthHLyb5Mzv57WzFFg7MB74gV2B9FZWzi7COBgC/CdwBcnneWbgvQF8t1qJ7tIO4hMrZydhHJwC/Ap4n3aWknoKOLdaiWZrB/GBlRMI48AB3yc54TNIOU7ZNQLfrFaiCdpBtJW+nGEc7APcDXxWO4vZxnjg4jJfYF/qcoZxEJBceXSIchSzfdOBkdVKtFA7iIbSPqYkjIPzSK6vPUQ5iuneCUAcxsGntYNoKN3MGcbB7iQnfb6incX0mADXAleV6eqiUpUzjIMjgQfx/0Zns32PA/9WrUQrtYNkoTS7tWEcfJ7kGMaKmV+fBGaEcfBB7SBZKMXMGcbBmcBvsI9JimIDcHq1Ek3TDjKQCj9zWjELaU/gT2EcnKQdZCAVupxWzEIrfEELW04rZikUuqCFLKcVs1QKW9DCldOKWUqFLGihymnFLLXCFbQwH6WEcXAGyeMzrZjltgE4rVqJpmsH6a9ClDOMg+OA54HdtLMYL9QDx+f9SqLc79aGcbAnySV5Vkyz1UHAhPTBbLmV6/Cp24EjtEMY75xG8tDy3Mr1bm0YB5cA/62dw3hLgM9WK9EftYP0RW7LGcbBR4C/kPPVuc2AW0Ny/LlIO0hv5bKcYRzsC8wgObYwZmemAyfn7ZEnuTvmTA/y78WKaXruBOB67RC9lbtykjwh75PaIUzuXBDGwZe1Q/RGrnZrwzg4HXgUcNpZTC41AiPy8lzc3MycnZ79Y8U0fTWMZBvKhdyUk2R5hIO1Q5jc+2gYB1/VDtETuditDePgKOAlYIh2FlMIK4Ajq5VovXaQHcnLzDkWK6apnXcC12iH2BnvZ870NrDfaucwhdMGnODz+qBez5xhHAwFRmvnMIU0CPiFdogd8bqcwChsuQQzcE71eakHb3dr09W/6oB9tLOYQnsFeL+Pyzz4PHNejhXTDLxjgW9oh9geL2fOMA7eDbwODNXOYkqhHji8WomatIN05uvMeT5WTJOdg4CR2iG68q6cYRzsApyjncOUzgXaAbryrpzAGcB+2iFM6ZwUxsEHtEN05mM5vfsNZkrDq23PqxNCYRwMB2Zq5zCl1Qgc5Ms1t77NnF795jKlMwz4unaIrbwpZ/r82bO1c5jSO187wFbelBP4GrCHdghTekeFcXCqdgjwq5ze/MYypefF4ZUXJ4TCOPg48KR2DmNSrcB7qpVoqWYIX2ZOO9Y0PhkMnKkdwpdyfko7gDFdqG+T6ru1YRwcAbymGsKYt2oE3qH5lHgfZs7TtAMYsx3DANVVsn0op/rugzHdUN02VcsZxsEg4BOaGYzZAdW9Ou2ZcwSwt3IGY7ozIowDte1Tu5x2vGl8prpnp11OO940vlPbRtXKGcaB+tkwY3pAbe9Oc+b8KLCL4vjG9MQRYRy8R2NgzXKeoDi2Mb3xQY1BNct5uOLYxvTGYRqDapZT5Qc2pg+snMZ4qjzlDONgV+AAjbGN6YPylJNk5TCnNLYxvXVweqlpprTKabu0Jk8GA+/OelArpzE9k/k2a+U0pmdKU85DlcY1pq8y32Zt5jSmZ0ozc75LaVxj+irzbVarnLsqjWtMX2W+zWqVc4jSuMb0VebbrJXTmJ7J/PbGzMsZxoEV0+RRKWZOu8Ha5FHm2+3grAc0A2fKxJXPTRi9+L3aOYrIvY0N1dZsx9QoZ4vCmIXX0tS+5d6fLT5IhHdoZykiaWNt1mNq7NZaOQfAXdcseq69LfuLs0sk8+0283JWK5GQrH9oamTNiublzz+6ZoR2joLLfEEjrY9SbPasoZsumlsH7KGdo+CKP3OmmpTGLZzZL2ycVT93y4e1c5RA5tusVjmXK41bKCIi4y6tA3uqRBYy32a1yjlPadxC+eMdK6ZuaWw/RjtHSWS+zVo5c6ppU1vj78YvtWf/ZsfKaXrmth8s+Ku0s792jhKxcpqdW7Foy+KZU9afqJ2jZEpTzvlK4xbCmFFz67F7YrPUDizMelCbOXPmxb+se3FVffOHtHOUTL2IlOMihGolagBWaoydZ+1t0nbb5Qt2085RQiqTieZaKTZ79tJDY5dObd7SfqR2jhIqXTntuLMXGje0rv/zXSuO1s5RUirbqs2cOXHLd+fNFOHvtHOUVOlmzlhx7FxZ8tqmeXP+2mDXz+qZoTGoZjmfJDlFbXZizLfq1mBPrdBSLyKvagysVs5qJVqDzZ47Ne2R1dPXr2qxezX1PK41sObMCTBJeXyvtbZIy93XLPp77Rwlp7aNapdzsvL4XvvtzxZPbW0RW/RJl9o2ql3OZ4HNyhm8tGF1y6qn/nfVB7RzlNwrIqJ277FqOauVqAl4WjODr26+uO5vwF7aOUpOdc9Oe+YEO+58i7qXGuYseHXTR7RzGN1t04dy2nFnFzdfXLcFP/7flFkLMEUzgA8bwEzgDe0Qvnj83jemNa5ve792DsM0EWnUDKBezvQ5tmqfJfmkpal9ywPXL7EHQ/tB/XBLvZyp+7UD+ODOHy18vr3NVv32xETtAL6U8w/AEu0QmlYva1o2/bG1J2jnMAA8ISKztUN4Uc5qJWoDbtXOoenGb9XNB3bXzmEAGKcdADwpZyqkpMs0zJq24eVl8+2p7Z6oB36nHQI8Kme1Ei0HHtLOkTURkVu+N2+Qdg7TIRQRLxba8qacKS92J7L0f7ctn9q0qd2ecOCHVjw6vPKqnNVKNAWYpZ0jK5sb2zb+4dZlR2jnMB0eFpFl2iG28qqcqfHaAbJy2/fnx9LOfto5TAev9tx8LOddQIN2iIG2fMGWhS8/u8GeP+uPv4nIk9ohOvOunNVKtBG4RzvHQBszau5yYKh2DtPBuz0278qZuhkQ7RADJXp8bbx6WbOtdeKPDcD/aIfoystyVivRLAo6e7a3SdvtVyx4u3YOs43rRGSDdoiuvCxn6gfAFu0QtTZxTP2zLU1iZ2j9sRi4QTvE9nhbzmolWgTcpJ2jlhrWta6dPOGN47RzmG1cKSJeTgLeljP1E2CNdohaGX/pvJcQ9tHOYTq8RPLpgJe8Lme1Eq0DrtbOUQuLZm+qe31Ggz16xC+Xioi3Dzb3upypscDL2iH668aL5q7DntrukwdFRP2G6h3xvpzVStQKXECOP1p55uFVL2xY3Rpo5zAdGoGLtUPsjPflBKhWomfw+NhgR1pb2pt/c93id2rnMNu4WkS8v7k/F+VMfQ9Yqx2ityaMXjy1rVXeo53DdHgVuF47RE/kppzVSvQG8F3tHL2xbmXLymceXl3RzmE6tAHniUguburPTTkBqpXo1+ToyqGbL547B9hTO4fpcIWI5GaFgVyVM3Uu8Ip2iJ15fUbD3xbN3myPHvHHI8Bo7RC9kbtyVivRJuAMYKN2lh0Ze0ldCzn871tQ84GviUiuzvjncuOpVqI5wDnaOboz6Z4VUzdtbBuuncMA0ASMFJHcnUzMZTkBqpXoAeBG7RxdNW9p3/zgjfWHaOcwHb4tIpF2iL7IbTlTlwLTtEN0dsdVC15ob+NA7RwGgLtF5FfaIfoq1+WsVqIW4EvAKu0sAKuWNi2NJq/7oHYOAyQnDc/TDtEfuS4nQLUSLQG+DKhfwDxm1NyFwG7aOQwbgS+KyCbtIP2R+3ICVCvRJOD7mhlefnb9SysWNp2kmcEAyYUGXxOR17SD9FchyglQrUQ/A67UGLu9XdpvvWz+EI2xzTbagLNF5GHtILVQmHICVCvRj1Eo6B9uXTa1aXP7+7Ie12xjazHv0w5SK4UqJ2Rf0M0NbRsevW35kVmNZ7arcMWEApYTsi3ory6bN0OEfbMYy2xXIYsJBS0nZFPQZfM3L3z1uY12EkhPYYsJBS4nDHxBx4yauwLYZaDe3+xQoYsJBS8nDFxBpz+2JlqzvMUuONBR+GJCCcoJHQW9jBpdqNDWKq13/HDh3rV4L9Nrm4Czil5MKEk5oeNz0NOpwaV+D1y/ZGprsxze/1Sml14DThSRidpBslCackLHlUTH04+L5TeubVnzxH0r7Xaw7E0ERoiI9zfa10qpygkd1+J+DBjTl+8f9515ryDYLm12WoBLRGSkiHh9g32tuZzdHF5TYRyMBH4N9GjVr4WvNr5+7VfnHAYMGtBgZqt64EsiMlU7iIbSzZydpTdsn0APn0k05qK6BqyYWZkMHF/WYkLJywkdjzw5kZ081W/Kgyufb1jbenw2qUpNgB8D/yQiK7XDaCr1bm1XYRycA/wctl0JrKW5vemiU2auaGuVg3WSlcZC4FwReUw7iA9KP3N2lj4X90jgTjqtzXLPTxY9Z8UcUM3AdcDRVsw32czZjTAOTgbGrl3R/M7LPvPKMGAP7UwF9ThwoYjM0Q7iG5s5u5EunhRMGL34v0ger2hqawFwpoicZsXcPps5e8A5tzfJY1AuAnZVjpN3a4BrgZtFpFk7jM+snL3gnDsYuAb4CuCU4+RNE3ATcK2IrNMOkwdWzj5wzh0DXEhS0h5dwFBibwC3AePzsCamT6yc/eCcezvwVZKVt49RjuObZ4BxJMu793n31Tn3Q6CBZLW2p0Rkcg+/7xDgERE5tq9ja7MTQv0gIhtFZFy6AXwMuI/kWtCyagBuAYaLyCkicm+tjitF5MqeFrMorJw1IiJPichZwMHAFcBi5UhZmgWMAg4UkfNF5OX+vJlz7nLn3Bzn3GSSz51xzt3pnDsj/efAOTfFORc55x5zzh3Q6c9nOuemkRx25JqVs8ZEZLmIXAMcCnwemEBy3FU0C4AQ+LiIHCsiY2tx14hzLgDOIrm17wsk1z53/vshJCeWzhCRALid5OwvwB3ARSJSiOc6DdYOUFQi0gY8DDzsnHPAcOA04FPAKcDuivH6Yi3wBMkF6ZNEpG6AxjkFeGjrUgrOud93+fsjgWOBScl/VgYBy5xzewF7i8iU9N+7G/j0AGXMhJUzA+mirTPT1y+dc0OBD/NmWQP824tpBqYCk0gK+VcRyWo9mh2dpXTArK6zY/pZdKHOblo5FYhIE/Bk+rrcObcPcCrJrtxhJLvEhwH7ZREHWArM6/R6geTMqMZCQE8BdzrnRpNsn58DOi/jNwfY1zl3kohMS3dz3ysis5xz651zJ4vIM8DZ2UevLSunB9JVlx9MXx2cc8NIStq5sFtf+wNDgSHp6y1vS3LmuAXYAiwhKd58ti3igvSXhRdEJHbO3Qe8SHKXytNd/r45PTF0Y7orOxi4geSk1L8DtzvnNgG5v4DePucsiHQG2YWklM0i0qocyfSTldMYT/l2EsIYk7JyGuMpK6cxnrJyGuMpK6cxnrJyGuMpK6cxnrJyGuMpK6cxnrJyGuMpK6cxnrJyGuMpK6cxnrJyGuMpK6cxnrJyGuMpK6cxnrJyGuMpK6cxnrJyGuMpK6cxnrJyGuMpK6cxnrJyGuMpK6cxnrJyGuMpK6cxnvp/ucq+RtzkZWcAAAAASUVORK5CYII=\n", 466 | "text/plain": [ 467 | "
" 468 | ] 469 | }, 470 | "metadata": {}, 471 | "output_type": "display_data" 472 | } 473 | ], 474 | "source": [ 475 | "fig, ax =plt.subplots()\n", 476 | "_ = ax.pie(x=survived.values, \n", 477 | " labels=['survived', 'died'], colors=['yellowgreen', 'black'])" 478 | ] 479 | }, 480 | { 481 | "cell_type": "markdown", 482 | "metadata": { 483 | "slideshow": { 484 | "slide_type": "slide" 485 | } 486 | }, 487 | "source": [ 488 | "## Practice\n", 489 | "What's the ratio of men to women? Display it using a pie chart" 490 | ] 491 | }, 492 | { 493 | "cell_type": "code", 494 | "execution_count": null, 495 | "metadata": {}, 496 | "outputs": [], 497 | "source": [] 498 | }, 499 | { 500 | "cell_type": "markdown", 501 | "metadata": { 502 | "slideshow": { 503 | "slide_type": "slide" 504 | } 505 | }, 506 | "source": [ 507 | "# Bar charts" 508 | ] 509 | }, 510 | { 511 | "cell_type": "markdown", 512 | "metadata": { 513 | "slideshow": { 514 | "slide_type": "skip" 515 | } 516 | }, 517 | "source": [ 518 | "Often we don't want the ratio of categories, but the counts. To do this, we create bar charts. Matplotlib supports [many different types](https://matplotlib.org/gallery/index.html#lines-bars-and-markers) of bar charts. Here we will illustrate vertical bar charts." 519 | ] 520 | }, 521 | { 522 | "cell_type": "code", 523 | "execution_count": 31, 524 | "metadata": { 525 | "slideshow": { 526 | "slide_type": "slide" 527 | } 528 | }, 529 | "outputs": [ 530 | { 531 | "data": { 532 | "text/plain": [ 533 | "male 138\n", 534 | "female 132\n", 535 | "Name: sex, dtype: int64" 536 | ] 537 | }, 538 | "execution_count": 31, 539 | "metadata": {}, 540 | "output_type": "execute_result" 541 | } 542 | ], 543 | "source": [ 544 | "pclass = df['sex'].value_counts()\n", 545 | "pclass" 546 | ] 547 | }, 548 | { 549 | "cell_type": "markdown", 550 | "metadata": { 551 | "slideshow": { 552 | "slide_type": "skip" 553 | } 554 | }, 555 | "source": [ 556 | "Matplotlib supports strings as first class data, so we can plot the labels directly. We use the '.index' attribute to get the categories and the '.values' attribute to obtain the values." 557 | ] 558 | }, 559 | { 560 | "cell_type": "code", 561 | "execution_count": 33, 562 | "metadata": { 563 | "slideshow": { 564 | "slide_type": "slide" 565 | } 566 | }, 567 | "outputs": [ 568 | { 569 | "data": { 570 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAObklEQVR4nO3df6zdd13H8eeLFobbBLb0binr9A5swLEwp5eCYMhIQYaQdcYs6QQt2tgQxw9/EOxYZCZYnWJQ/nCYBuaaOLdUQFcwAk2BoAiDu42xdd1sQ3Erq+tFwg8lGY69/eN+R45353rvPd9z2vWz5yNZzvn+Oufd7Ntnv/n2ntNUFZKktjzlRA8gSRo/4y5JDTLuktQg4y5JDTLuktSg1Sd6AIA1a9bU9PT0iR5Dkk4qt9122zeqamrYtidE3Kenp5mdnT3RY0jSSSXJvy+2zdsyktQg4y5JDTLuktSgJeOe5Pokx5LcPWTb25NUkjUD665KcijJfUlePe6BJUlLW86V+w3AJQtXJjkXeBVw/8C684HNwAu6Y65Lsmosk0qSlm3JuFfVZ4FvDtn058A7gMFvHtsE3FxVD1fVYeAQsGEcg0qSlm+ke+5JLgW+XlV3Lth0DvDAwPKRbt2w19iWZDbJ7Nzc3ChjSJIWseK4JzkVuBp417DNQ9YN/U7hqtpZVTNVNTM1NfRn8CVJIxrlQ0zPBc4D7kwCsA64PckG5q/Uzx3Ydx3wYN8hJUkrs+K4V9VdwFmPLSf5GjBTVd9Isgf42yTvBZ4NrAe+OKZZFzW9/R8n/RY6SX3t2tee6BGkE2I5Pwp5E/B54HlJjiTZuti+VbUf2A3cA3wcuLKqfjCuYSVJy7PklXtVXbHE9ukFyzuAHf3GkiT14SdUJalBxl2SGmTcJalBxl2SGmTcJalBxl2SGvSE+Gf2pNb5QTstZlIftPPKXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUFLxj3J9UmOJbl7YN17ktyb5CtJ/j7Jswa2XZXkUJL7krx6UoNLkha3nCv3G4BLFqzbC1xQVS8E/g24CiDJ+cBm4AXdMdclWTW2aSVJy7Jk3Kvqs8A3F6z7ZFU90i1+AVjXPd8E3FxVD1fVYeAQsGGM80qSlmEc99x/Hfin7vk5wAMD24506x4nybYks0lm5+bmxjCGJOkxveKe5GrgEeDGx1YN2a2GHVtVO6tqpqpmpqam+owhSVpg5H9DNckW4HXAxqp6LOBHgHMHdlsHPDj6eJKkUYx05Z7kEuD3gEur6nsDm/YAm5OckuQ8YD3wxf5jSpJWYskr9yQ3ARcDa5IcAa5h/qdjTgH2JgH4QlW9qar2J9kN3MP87Zorq+oHkxpekjTcknGvqiuGrP7g/7P/DmBHn6EkSf34CVVJapBxl6QGGXdJapBxl6QGGXdJapBxl6QGGXdJapBxl6QGGXdJapBxl6QGGXdJapBxl6QGGXdJapBxl6QGGXdJapBxl6QGGXdJapBxl6QGGXdJapBxl6QGGXdJatCScU9yfZJjSe4eWHdmkr1JDnaPZwxsuyrJoST3JXn1pAaXJC1uOVfuNwCXLFi3HdhXVeuBfd0ySc4HNgMv6I65LsmqsU0rSVqWJeNeVZ8Fvrlg9SZgV/d8F3DZwPqbq+rhqjoMHAI2jGlWSdIyjXrP/eyqOgrQPZ7VrT8HeGBgvyPdusdJsi3JbJLZubm5EceQJA0z7r9QzZB1NWzHqtpZVTNVNTM1NTXmMSTpyW3UuD+UZC1A93isW38EOHdgv3XAg6OPJ0kaxahx3wNs6Z5vAW4ZWL85ySlJzgPWA1/sN6IkaaVWL7VDkpuAi4E1SY4A1wDXAruTbAXuBy4HqKr9SXYD9wCPAFdW1Q8mNLskaRFLxr2qrlhk08ZF9t8B7OgzlCSpHz+hKkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkN6hX3JL+dZH+Su5PclOTpSc5MsjfJwe7xjHENK0lanpHjnuQc4K3ATFVdAKwCNgPbgX1VtR7Y1y1Lko6jvrdlVgM/kmQ1cCrwILAJ2NVt3wVc1vM9JEkrNHLcq+rrwJ8B9wNHgW9X1SeBs6vqaLfPUeCsYccn2ZZkNsns3NzcqGNIkoboc1vmDOav0s8Dng2cluQNyz2+qnZW1UxVzUxNTY06hiRpiD63ZV4JHK6quar6H+AjwEuBh5KsBegej/UfU5K0En3ifj/wkiSnJgmwETgA7AG2dPtsAW7pN6IkaaVWj3pgVd2a5EPA7cAjwB3ATuB0YHeSrcz/AXD5OAaVJC3fyHEHqKprgGsWrH6Y+at4SdIJ4idUJalBxl2SGmTcJalBxl2SGmTcJalBxl2SGmTcJalBxl2SGmTcJalBxl2SGmTcJalBxl2SGmTcJalBxl2SGmTcJalBxl2SGmTcJalBxl2SGmTcJalBxl2SGmTcJalBveKe5FlJPpTk3iQHkvxskjOT7E1ysHs8Y1zDSpKWp++V+/uAj1fV84ELgQPAdmBfVa0H9nXLkqTjaOS4J3kG8HLggwBV9f2q+hawCdjV7bYLuKzvkJKklelz5f4cYA746yR3JPlAktOAs6vqKED3eNawg5NsSzKbZHZubq7HGJKkhfrEfTXw08D7q+oi4L9ZwS2YqtpZVTNVNTM1NdVjDEnSQn3ifgQ4UlW3dssfYj72DyVZC9A9Hus3oiRppUaOe1X9B/BAkud1qzYC9wB7gC3dui3ALb0mlCSt2Oqex78FuDHJ04CvAr/G/B8Yu5NsBe4HLu/5HpKkFeoV96r6MjAzZNPGPq8rSerHT6hKUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoN6xz3JqiR3JPlYt3xmkr1JDnaPZ/QfU5K0EuO4cn8bcGBgeTuwr6rWA/u6ZUnScdQr7knWAa8FPjCwehOwq3u+C7isz3tIklau75X7XwDvAB4dWHd2VR0F6B7PGnZgkm1JZpPMzs3N9RxDkjRo5LgneR1wrKpuG+X4qtpZVTNVNTM1NTXqGJKkIVb3OPZlwKVJfgF4OvCMJH8DPJRkbVUdTbIWODaOQSVJyzfylXtVXVVV66pqGtgMfKqq3gDsAbZ0u20Bbuk9pSRpRSbxc+7XAq9KchB4VbcsSTqO+tyW+aGq+gzwme75fwIbx/G6kqTR+AlVSWqQcZekBhl3SWqQcZekBhl3SWqQcZekBhl3SWqQcZekBhl3SWqQcZekBhl3SWqQcZekBhl3SWqQcZekBhl3SWqQcZekBhl3SWqQcZekBhl3SWqQcZekBhl3SWrQyHFPcm6STyc5kGR/krd1689MsjfJwe7xjPGNK0lajj5X7o8Av1tVPwm8BLgyyfnAdmBfVa0H9nXLkqTjaOS4V9XRqrq9e/5d4ABwDrAJ2NXttgu4rO+QkqSVGcs99yTTwEXArcDZVXUU5v8AAM5a5JhtSWaTzM7NzY1jDElSp3fck5wOfBj4rar6znKPq6qdVTVTVTNTU1N9x5AkDegV9yRPZT7sN1bVR7rVDyVZ221fCxzrN6IkaaX6/LRMgA8CB6rqvQOb9gBbuudbgFtGH0+SNIrVPY59GfArwF1JvtyteydwLbA7yVbgfuDyfiNKklZq5LhX1b8AWWTzxlFfV5LUn59QlaQGGXdJapBxl6QGGXdJapBxl6QGGXdJapBxl6QGGXdJapBxl6QGGXdJapBxl6QGGXdJapBxl6QGGXdJapBxl6QGGXdJapBxl6QGGXdJapBxl6QGGXdJapBxl6QGTSzuSS5Jcl+SQ0m2T+p9JEmPN5G4J1kF/CXwGuB84Iok50/ivSRJjzepK/cNwKGq+mpVfR+4Gdg0ofeSJC2wekKvew7wwMDyEeDFgzsk2QZs6xb/K8l9E5rlyWYN8I0TPcQTRf7kRE+gITxHB/Q8R398sQ2TinuGrKv/s1C1E9g5ofd/0koyW1UzJ3oOaTGeo8fHpG7LHAHOHVheBzw4ofeSJC0wqbh/CVif5LwkTwM2A3sm9F6SpAUmclumqh5J8mbgE8Aq4Pqq2j+J99LjeKtLT3Seo8dBqmrpvSRJJxU/oSpJDTLuktQg4964JBcn+diJnkPtSPLWJAeS3Dih1/+DJG+fxGs/mUzq59wltes3gddU1eETPYgW55X7SSDJdJJ7k3wgyd1JbkzyyiSfS3IwyYbuv39Nckf3+Lwhr3NakuuTfKnbz6+E0Iok+SvgOcCeJFcPO5+SvDHJPyT5aJLDSd6c5He6fb6Q5Mxuv9/ojr0zyYeTnDrk/Z6b5ONJbkvyz0mef3x/xScv437y+AngfcALgecDvwz8HPB24J3AvcDLq+oi4F3AHw15jauBT1XVi4BXAO9JctpxmF2NqKo3Mf+BxFcAp7H4+XQB8+foBmAH8L3u3Pw88KvdPh+pqhdV1YXAAWDrkLfcCbylqn6G+XP9usn8ytrjbZmTx+GqugsgyX5gX1VVkruAaeCZwK4k65n/qoenDnmNnwcuHbif+XTgx5j/jSWt1GLnE8Cnq+q7wHeTfBv4aLf+LuYvUAAuSPKHwLOA05n/XMwPJTkdeCnwd8kPv9HklEn8Qlpk3E8eDw88f3Rg+VHm/z++m/nfUL+YZBr4zJDXCPBLVeWXtGkchp5PSV7M0ucrwA3AZVV1Z5I3AhcveP2nAN+qqp8a79hPDt6Wacczga93z9+4yD6fAN6S7jIoyUXHYS61q+/59KPA0SRPBV6/cGNVfQc4nOTy7vWT5MKeMz9pGPd2/Cnwx0k+x/xXPgzzbuZv13wlyd3dsjSqvufT7wO3AnuZ/zujYV4PbE1yJ7Af/12IZfPrBySpQV65S1KDjLskNci4S1KDjLskNci4S1KDjLskNci4S1KD/hcXflHg02ss2AAAAABJRU5ErkJggg==\n", 571 | "text/plain": [ 572 | "
" 573 | ] 574 | }, 575 | "metadata": { 576 | "needs_background": "light" 577 | }, 578 | "output_type": "display_data" 579 | } 580 | ], 581 | "source": [ 582 | "fig, ax = plt.subplots()\n", 583 | "_ = ax.bar(pclass.index, pclass.values)" 584 | ] 585 | }, 586 | { 587 | "cell_type": "markdown", 588 | "metadata": { 589 | "slideshow": { 590 | "slide_type": "slide" 591 | } 592 | }, 593 | "source": [ 594 | "# Practice \n", 595 | "How many people are in each class and how do visualize that?" 596 | ] 597 | }, 598 | { 599 | "cell_type": "code", 600 | "execution_count": null, 601 | "metadata": {}, 602 | "outputs": [], 603 | "source": [] 604 | } 605 | ], 606 | "metadata": { 607 | "celltoolbar": "Slideshow", 608 | "kernelspec": { 609 | "display_name": "Python 3", 610 | "language": "python", 611 | "name": "python3" 612 | }, 613 | "latex_envs": { 614 | "LaTeX_envs_menu_present": true, 615 | "autoclose": false, 616 | "autocomplete": true, 617 | "bibliofile": "biblio.bib", 618 | "cite_by": "apalike", 619 | "current_citInitial": 1, 620 | "eqLabelWithNumbers": true, 621 | "eqNumInitial": 1, 622 | "hotkeys": { 623 | "equation": "Ctrl-E", 624 | "itemize": "Ctrl-I" 625 | }, 626 | "labels_anchors": false, 627 | "latex_user_defs": false, 628 | "report_style_numbering": false, 629 | "user_envs_cfg": false 630 | }, 631 | "varInspector": { 632 | "cols": { 633 | "lenName": 16, 634 | "lenType": 16, 635 | "lenVar": 40 636 | }, 637 | "kernels_config": { 638 | "python": { 639 | "delete_cmd_postfix": "", 640 | "delete_cmd_prefix": "del ", 641 | "library": "var_list.py", 642 | "varRefreshCmd": "print(var_dic_list())" 643 | }, 644 | "r": { 645 | "delete_cmd_postfix": ") ", 646 | "delete_cmd_prefix": "rm(", 647 | "library": "var_list.r", 648 | "varRefreshCmd": "cat(var_dic_list()) " 649 | } 650 | }, 651 | "types_to_exclude": [ 652 | "module", 653 | "function", 654 | "builtin_function_or_method", 655 | "instance", 656 | "_Feature" 657 | ], 658 | "window_display": false 659 | } 660 | }, 661 | "nbformat": 4, 662 | "nbformat_minor": 4 663 | } 664 | -------------------------------------------------------------------------------- /notebooks/04_images.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# images and heatmaps" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import matplotlib.pyplot as plt\n", 17 | "import numpy as np" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "We are going to use a digital elevation model (DEM) from the [basemap tutorial](https://github.com/rveciana/BasemapTutorial/blob/master/code_examples/sample_files/dem.tiff) as the sample data set in this section (which is bunlded in the repository as an npy file) for the zone of Montserrat at 5m resolution." 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "im = np.load('data/dem.npy')" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "print(f\"type: {type(im)}, shape: {im.shape}, dtype: {im.dtype}\")" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "To display 2D images the data needs to be spatially re-sampled to fit the screen and mapped from scaler values in the array to the colors we see on the screen." 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "plt.imshow(im)" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "of course we should include a color bar to go with the image:" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "fig, ax = plt.subplots()\n", 75 | "img = ax.imshow(im)\n", 76 | "cbar = fig.colorbar(img)\n", 77 | "cbar.set_label('elevation')" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "By default the colors are scaled over the full range of the input data, however this may not always be what you want (for example, if you are interested on a small contrast on top of a background or if there are extreme outliers). To set the lower limit, use the `vmin` parameter to `imshow` " 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [ 93 | "fig, ax = plt.subplots()\n", 94 | "img = ax.imshow(im, vmin=700)\n", 95 | "cbar = fig.colorbar(img)\n", 96 | "cbar.set_label('elevation')" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "Note how the upper left which used to have texture is now colored unifromly and the lower limit of the color bar has been changed." 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "### Exercise\n", 111 | "\n", 112 | "Use the `vmax` parameter to restrict color space from above." 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "metadata": {}, 119 | "outputs": [], 120 | "source": [] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": {}, 125 | "source": [ 126 | "## All the colors\n", 127 | "\n", 128 | "While the default color map, [which replace 'jet' in mpl 2.0](https://bids.github.io/colormap/) has many good properties, but sometimes you just are not feeling the green (or have a [domain specfic reason for needing a different color map](https://matplotlib.org/cmocean)). The color map can be controlled via the `cmap` parameter to `imshow`" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [ 137 | "fig, ax = plt.subplots()\n", 138 | "img = ax.imshow(im, cmap='magma')\n", 139 | "cbar = fig.colorbar(img)\n", 140 | "cbar.set_label('elevation')" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": {}, 146 | "source": [ 147 | "### Exercise\n", 148 | "\n", 149 | "Have a look at the [color maps available](https://matplotlib.org/tutorials/colors/colormaps.html) and try out some different colors." 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "metadata": {}, 162 | "source": [ 163 | "## Follow the lines\n", 164 | "\n", 165 | "A different way to visualize array data is via a contour plot." 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "fig, (ax1, ax2) = plt.subplots(1, 2, sharex=True, sharey=True)\n", 175 | "ax1.contour(im, cmap='magma')\n", 176 | "ax1.set_aspect('equal')\n", 177 | "ax2.imshow(im, cmap='magma', origin='lower') # put (0, 0) on the lower left to match contour" 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": {}, 183 | "source": [ 184 | "### Exercise\n", 185 | "\n", 186 | "Try changing the `cmap` and `levels` parameters to `contour`" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "metadata": {}, 193 | "outputs": [], 194 | "source": [] 195 | }, 196 | { 197 | "cell_type": "markdown", 198 | "metadata": {}, 199 | "source": [ 200 | "## Change the normalization\n", 201 | "\n", 202 | "The conversion of your input data to the image on the screen is a multi-step process:\n", 203 | "\n", 204 | " - Your data is mapped to the range [0, 1] via a `Normalization`\n", 205 | " - The renormalized data is re-sampled to the correct number of pixels to fit on your output\n", 206 | " - The resampled data is mapped from [0, 1] -> RGB via the color map\n", 207 | " \n", 208 | "The default normalization in linear, however there are cases where you need a more exotic normalization.\n", 209 | "\n", 210 | "One such case is if your data covers several orders of magnitude." 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": {}, 217 | "outputs": [], 218 | "source": [ 219 | "from matplotlib.colors import LogNorm\n", 220 | "\n", 221 | "# take the 2D fft of our test image and re-order to put 0 frequency in the center\n", 222 | "spike = np.abs(np.fft.fftshift(np.fft.fft2(im)))\n", 223 | "\n", 224 | "fig, (ax1, ax2) = plt.subplots(1, 2, tight_layout=True, figsize=(10, 5))\n", 225 | "\n", 226 | "lin_im = ax1.imshow(spike)\n", 227 | "_ = fig.colorbar(lin_im, ax=ax1)\n", 228 | "_ = ax1.set_title('linear color scale')\n", 229 | "\n", 230 | "log_im = ax2.imshow(spike, norm=LogNorm())\n", 231 | "_ = fig.colorbar(log_im, ax=ax2)\n", 232 | "_ = ax2.set_title('log color scale')" 233 | ] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "metadata": {}, 238 | "source": [ 239 | "We can make use of a `BoundryNorm` to quantize the data (in in possibly non-uniform bins)." 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": null, 245 | "metadata": {}, 246 | "outputs": [], 247 | "source": [ 248 | "import matplotlib.colors as mcolors\n", 249 | "import matplotlib.cm as mcm\n", 250 | "\n", 251 | "cmap = mcm.tab20b\n", 252 | "norm = mcolors.BoundaryNorm([1, 3, 4, 5, 7, 9, 12, 15, 20], cmap.N)\n", 253 | "\n", 254 | "data = np.arange(20).reshape((1, 20))\n", 255 | "\n", 256 | "fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 6))\n", 257 | "\n", 258 | "top = ax1.imshow(data, cmap=cmap, aspect='auto')\n", 259 | "fig.colorbar(top, ax=ax1)\n", 260 | "bottom = ax2.imshow(data, norm=norm, cmap=cmap, aspect='auto')\n", 261 | "fig.colorbar(bottom, ax=ax2)" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": null, 267 | "metadata": {}, 268 | "outputs": [], 269 | "source": [] 270 | } 271 | ], 272 | "metadata": { 273 | "kernelspec": { 274 | "display_name": "Python 3", 275 | "language": "python", 276 | "name": "python3" 277 | }, 278 | "language_info": { 279 | "codemirror_mode": { 280 | "name": "ipython", 281 | "version": 3 282 | }, 283 | "file_extension": ".py", 284 | "mimetype": "text/x-python", 285 | "name": "python", 286 | "nbconvert_exporter": "python", 287 | "pygments_lexer": "ipython3", 288 | "version": "3.7.3" 289 | }, 290 | "latex_envs": { 291 | "LaTeX_envs_menu_present": true, 292 | "autoclose": false, 293 | "autocomplete": true, 294 | "bibliofile": "biblio.bib", 295 | "cite_by": "apalike", 296 | "current_citInitial": 1, 297 | "eqLabelWithNumbers": true, 298 | "eqNumInitial": 1, 299 | "hotkeys": { 300 | "equation": "Ctrl-E", 301 | "itemize": "Ctrl-I" 302 | }, 303 | "labels_anchors": false, 304 | "latex_user_defs": false, 305 | "report_style_numbering": false, 306 | "user_envs_cfg": false 307 | }, 308 | "varInspector": { 309 | "cols": { 310 | "lenName": 16, 311 | "lenType": 16, 312 | "lenVar": 40 313 | }, 314 | "kernels_config": { 315 | "python": { 316 | "delete_cmd_postfix": "", 317 | "delete_cmd_prefix": "del ", 318 | "library": "var_list.py", 319 | "varRefreshCmd": "print(var_dic_list())" 320 | }, 321 | "r": { 322 | "delete_cmd_postfix": ") ", 323 | "delete_cmd_prefix": "rm(", 324 | "library": "var_list.r", 325 | "varRefreshCmd": "cat(var_dic_list()) " 326 | } 327 | }, 328 | "types_to_exclude": [ 329 | "module", 330 | "function", 331 | "builtin_function_or_method", 332 | "instance", 333 | "_Feature" 334 | ], 335 | "window_display": false 336 | } 337 | }, 338 | "nbformat": 4, 339 | "nbformat_minor": 4 340 | } 341 | -------------------------------------------------------------------------------- /notebooks/06-interactive.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "# Interactive figures\n", 12 | "\n", 13 | "In addition to the static figures we have been working with, Matplotlib can produce interactive figures. For use on a desktop there are bindings to produces figures using the Tk, Gtk, Wx, and Qt frameworks. These can be used as stand-alone windows are embedded in large applications, see [the gallery](https://matplotlib.org/gallery/#embedding-matplotlib-in-graphical-user-interfaces) for details.\n", 14 | "\n", 15 | "There are two backends to work with IPython/Jupyter notebook:\n", 16 | "\n", 17 | " - NBAgg which ships with Matplotlib \n", 18 | " - `%matplotlib notebook`\n", 19 | " - ipympl which is an additional package \n", 20 | " - `%matplotlib widgets`\n", 21 | " \n", 22 | " \n" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": { 28 | "slideshow": { 29 | "slide_type": "skip" 30 | } 31 | }, 32 | "source": [ 33 | "NBAgg only works in the classic IPython notebook but not in Jupyter lab (because the way it works is by injecting a fair amount of javascript into the DOM which is a security hole). ipympl is built on top of the `ipywidgets` framework and requires a jupyterlab extension to be installed in addition to the python module.\n", 34 | " \n", 35 | "If you are in the classic notebook\n", 36 | " \n", 37 | " ```python\n", 38 | "%matplotlib notebook\n", 39 | "```\n", 40 | "\n", 41 | "will enable NBAgg. In either the classic notebook or jupyter lab\n", 42 | "\n", 43 | "```python\n", 44 | "%matplotlib widget\n", 45 | "```\n", 46 | "\n", 47 | "will enable ipympl." 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": { 54 | "slideshow": { 55 | "slide_type": "slide" 56 | } 57 | }, 58 | "outputs": [], 59 | "source": [ 60 | "%matplotlib notebook\n", 61 | "import matplotlib.pyplot as plt" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": { 68 | "slideshow": { 69 | "slide_type": "slide" 70 | } 71 | }, 72 | "outputs": [], 73 | "source": [ 74 | "fig, ax = plt.subplots()\n", 75 | "ax.plot(range(42))" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": {}, 81 | "source": [ 82 | "You should now have an interactive figure that you can pan / zoom around on just like on the desktop. Matplotlib is using a client-server model for the display, user input is sent from the Jupyter front-end back to Matplotlib running in your kernel and the kernel ships the rendered png to the browser. This has he advantages that the plots will look identical to using `inline` or the desktop backends and we do not need to ship all of your data to the browser. However, this approach has will have higher latency than an in-browser plotting library (such as bokeh or plotly)." 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": { 88 | "slideshow": { 89 | "slide_type": "slide" 90 | } 91 | }, 92 | "source": [ 93 | "## print statements and tracebacks in callbacks\n", 94 | "\n", 95 | "Any print statements or tracebacks in user-callbacks will not be naively shown in the notebook (because it is not clear _where_ they should be shown; in a terminal they are printed to stdout of which there is only one, in a notebook there are many output cells, any of which may be the correct place to print). For convince in the next few notebooks we have provided a helper to make sure that the print statements will be associated with an output area under the `Figure`." 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": { 102 | "slideshow": { 103 | "slide_type": "fragment" 104 | } 105 | }, 106 | "outputs": [], 107 | "source": [ 108 | "%run helpers/ensure_print.py" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": { 114 | "slideshow": { 115 | "slide_type": "skip" 116 | } 117 | }, 118 | "source": [ 119 | "Understanding this code is not required for this tutorial, but running the above is required for the following notebooks to run properly. We are discussing how to solve this in a cleaner way [at ipympl](https://github.com/matplotlib/jupyter-matplotlib/issues/116).\n", 120 | "\n", 121 | "To test that this is working, run the following cell and pan/zoom around." 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": { 128 | "slideshow": { 129 | "slide_type": "slide" 130 | } 131 | }, 132 | "outputs": [], 133 | "source": [ 134 | "fig, ax = plt.subplots()\n", 135 | "ax.plot(range(52))\n", 136 | "fig.canvas.mpl_connect('draw_event', lambda event: print(\"I Drew!\"))" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [] 145 | } 146 | ], 147 | "metadata": { 148 | "celltoolbar": "Slideshow", 149 | "kernelspec": { 150 | "display_name": "Python 3", 151 | "language": "python", 152 | "name": "python3" 153 | }, 154 | "language_info": { 155 | "codemirror_mode": { 156 | "name": "ipython", 157 | "version": 3 158 | }, 159 | "file_extension": ".py", 160 | "mimetype": "text/x-python", 161 | "name": "python", 162 | "nbconvert_exporter": "python", 163 | "pygments_lexer": "ipython3", 164 | "version": "3.7.3" 165 | }, 166 | "latex_envs": { 167 | "LaTeX_envs_menu_present": true, 168 | "autoclose": false, 169 | "autocomplete": true, 170 | "bibliofile": "biblio.bib", 171 | "cite_by": "apalike", 172 | "current_citInitial": 1, 173 | "eqLabelWithNumbers": true, 174 | "eqNumInitial": 1, 175 | "hotkeys": { 176 | "equation": "Ctrl-E", 177 | "itemize": "Ctrl-I" 178 | }, 179 | "labels_anchors": false, 180 | "latex_user_defs": false, 181 | "report_style_numbering": false, 182 | "user_envs_cfg": false 183 | }, 184 | "varInspector": { 185 | "cols": { 186 | "lenName": 16, 187 | "lenType": 16, 188 | "lenVar": 40 189 | }, 190 | "kernels_config": { 191 | "python": { 192 | "delete_cmd_postfix": "", 193 | "delete_cmd_prefix": "del ", 194 | "library": "var_list.py", 195 | "varRefreshCmd": "print(var_dic_list())" 196 | }, 197 | "r": { 198 | "delete_cmd_postfix": ") ", 199 | "delete_cmd_prefix": "rm(", 200 | "library": "var_list.r", 201 | "varRefreshCmd": "cat(var_dic_list()) " 202 | } 203 | }, 204 | "types_to_exclude": [ 205 | "module", 206 | "function", 207 | "builtin_function_or_method", 208 | "instance", 209 | "_Feature" 210 | ], 211 | "window_display": false 212 | } 213 | }, 214 | "nbformat": 4, 215 | "nbformat_minor": 4 216 | } 217 | -------------------------------------------------------------------------------- /notebooks/07-line-creator.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "# Collecting user input\n", 12 | "\n", 13 | "One frequently wants to collect interactively collect input from the user, for example to draw a curve or region of interest on a graph or to drill down into complex data sets.\n", 14 | "\n", 15 | "Matplotlib has support for collecting mouse events (motion, clicks, and scrolling) and keyboard events (key up/down + command keys) from the UI. This machinery is how the pan / zoom / hotkeys configured by default in Matplotlib work, this tutorial will show you how to use this machinery for your own purposes.\n", 16 | "\n", 17 | "To start with, we will need to be using either the nbagg or ipypmpl and ensure that any print statements in our callbacks will make it back to the notebook." 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": null, 23 | "metadata": { 24 | "slideshow": { 25 | "slide_type": "slide" 26 | } 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "# pick widget or notebook. Widget works in Jupyter Lab, but requires ipympl to be installed\n", 31 | "%matplotlib notebook\n", 32 | "# nbagg ships as part of Matplotlib, but does not work in Jupyter Lab\n", 33 | "# %matplotlib widget\n", 34 | "# make sure prints come to the notebook\n", 35 | "%run helpers/ensure_print.py" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": { 41 | "slideshow": { 42 | "slide_type": "slide" 43 | } 44 | }, 45 | "source": [ 46 | "To start lets interactively build a curve based on the points the users clicks on. To get notified when the user clicks we register a callback with the canvas. This is the same idea as e.g. `on_click` in jquery or signal/slots in Qt, we provide a function that will be called on our behalf when the user takes some action. To register a callabck we use the `canvas.mpl_connect` method which has the signature:\n", 47 | "\n", 48 | "```python\n", 49 | "def mpl_connect(self, s: str, func : Callable[Event]):\n", 50 | " ...\n", 51 | "\n", 52 | "```\n", 53 | "\n", 54 | "`s` can be one of the 14 strings defining when we want our function called and the exact sub-class of `Event` that is passed in depends on the value of `s`.\n", 55 | "\n" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": { 61 | "slideshow": { 62 | "slide_type": "slide" 63 | } 64 | }, 65 | "source": [ 66 | "\n", 67 | "## Button Press Callbacks\n", 68 | "\n", 69 | "\n", 70 | "[`MouseEvent`](https://matplotlib.org/3.1.1/api/backend_bases_api.html#matplotlib.backend_bases.MouseEvent) provides use acceses to what axes we are in, the mouse button that was clicked, any modified keys the user was holding down, and the location of the mouse.\n", 71 | "The attributes on the `event` that we are going to primarily conecerned with are:\n", 72 | "\n", 73 | "```python\n", 74 | "def callback(event : MouseEvent) -> None:\n", 75 | " button = event.button # the button clicked as an Enum\n", 76 | " x = event.xdata # the x location of the click in *data space*\n", 77 | " y = event.ydata # the y locatino of the click in *data space*\n", 78 | " key = event.key # any keyboard key that is held down while clicking\n", 79 | "```\n", 80 | "\n", 81 | "The values of [MouseButton](https://matplotlib.org/3.1.1/api/backend_bases_api.html#matplotlib.backend_bases.MouseButton) we are interested in are\n", 82 | "\n", 83 | "```python\n", 84 | "from matplotlib.backend_bases import MouseButton\n", 85 | "MouseButton.LEFT == 1\n", 86 | "MouseButton.RIGHT == 3\n", 87 | "```\n", 88 | "\n", 89 | "In the browser both the middle and right button report as `MouseButton.RIGHT` because the right mouse button in the browser always opens a context menu. \n" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": { 95 | "slideshow": { 96 | "slide_type": "slide" 97 | } 98 | }, 99 | "source": [ 100 | "\n", 101 | "## Capture the Clicks\n", 102 | "\n", 103 | "As we are collecting state lets define a helper-class that is going to own the interaction and wrap around the `Line2D` artist that is drawing our curve. To start with we will just print out _where_ the user clicked and with which button." 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": { 110 | "slideshow": { 111 | "slide_type": "slide" 112 | } 113 | }, 114 | "outputs": [], 115 | "source": [ 116 | "import matplotlib.pyplot as plt\n", 117 | "from itertools import cycle\n", 118 | "# available on mpl >= 3.1\n", 119 | "# from matplotlib.backend_bases import MouseButton\n", 120 | "\n", 121 | "class LineMaker:\n", 122 | " def __init__(self, ln):\n", 123 | " # Stash the Line2D object, we will use this later\n", 124 | " self.ln = ln\n", 125 | " # register our method to be called when the mouse button is pressed down\n", 126 | " self.button_cid = ln.figure.canvas.mpl_connect('button_press_event',\n", 127 | " self.on_button)\n", 128 | "\n", 129 | " def on_button(self, event):\n", 130 | " # print out what button and where the user clicked\n", 131 | " print(f'button: {event.button!r} @ ({event.xdata}, {event.ydata}) + key: {event.key}')" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": { 138 | "slideshow": { 139 | "slide_type": "slide" 140 | } 141 | }, 142 | "outputs": [], 143 | "source": [ 144 | "fig, ax = plt.subplots()\n", 145 | "ln, = ax.plot([1], [1], '-o')\n", 146 | "line_maker = LineMaker(ln)" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": { 152 | "slideshow": { 153 | "slide_type": "slide" 154 | } 155 | }, 156 | "source": [ 157 | "### Exercise\n", 158 | "\n", 159 | "Only print when the left buton is pressed." 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": { 165 | "slideshow": { 166 | "slide_type": "skip" 167 | } 168 | }, 169 | "source": [ 170 | "Now we need to record where the user clicked and update the `Line2D` artist. It is critical that we remember to call `draw_idle` at the end or the UI will not update to reflect our changes." 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": null, 176 | "metadata": { 177 | "slideshow": { 178 | "slide_type": "slide" 179 | } 180 | }, 181 | "outputs": [], 182 | "source": [ 183 | "class LineMaker:\n", 184 | " def __init__(self, ln):\n", 185 | " # stash the Line2D artist\n", 186 | " self.ln = ln\n", 187 | " ln.axes.annotate('Left-click to add points', (.5, .9), \n", 188 | " ha='center', xycoords='axes fraction')\n", 189 | " # register our method to be called per-click\n", 190 | " self.button_cid = ln.figure.canvas.mpl_connect('button_press_event',\n", 191 | " self.on_button)\n", 192 | "\n", 193 | " def on_button(self, event):\n", 194 | " print(f'button: {event.button!r} @ ({event.xdata}, {event.ydata}) + key: {event.key}')\n", 195 | " \n", 196 | " # only consider events from the lines Axes or if not the left mouse button bail! \n", 197 | " if event.inaxes is not self.ln.axes or event.button != 1:\n", 198 | " return\n", 199 | " \n", 200 | " # append the new point to the current Line2D data\n", 201 | " xdata = list(self.ln.get_xdata()) + [event.xdata]\n", 202 | " ydata = list(self.ln.get_ydata()) + [event.ydata]\n", 203 | "\n", 204 | " # and update the data on the Line2D artist\n", 205 | " self.ln.set_data(xdata, ydata)\n", 206 | "\n", 207 | " # ask the UI to re-draw the next time it can\n", 208 | " self.ln.figure.canvas.draw_idle()\n", 209 | " \n", 210 | " @property\n", 211 | " def curve(self):\n", 212 | " # get the current (x, y) for the line\n", 213 | " return {'x': self.ln.get_xdata(), 'y': self.ln.get_ydata()}\n", 214 | "\n" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": null, 220 | "metadata": { 221 | "slideshow": { 222 | "slide_type": "slide" 223 | } 224 | }, 225 | "outputs": [], 226 | "source": [ 227 | "fig, ax = plt.subplots()\n", 228 | "ln, = ax.plot([1], [1], '-o')\n", 229 | "line_maker = LineMaker(ln)" 230 | ] 231 | }, 232 | { 233 | "cell_type": "markdown", 234 | "metadata": { 235 | "slideshow": { 236 | "slide_type": "skip" 237 | } 238 | }, 239 | "source": [ 240 | "We can use the the `curve` attribute on `line_maker` to get the current (x, y) data and when we are happy with it feed it into the next step of our analysis." 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": null, 246 | "metadata": { 247 | "slideshow": { 248 | "slide_type": "skip" 249 | } 250 | }, 251 | "outputs": [], 252 | "source": [ 253 | "print(line_maker.curve)" 254 | ] 255 | }, 256 | { 257 | "cell_type": "markdown", 258 | "metadata": { 259 | "slideshow": { 260 | "slide_type": "skip" 261 | } 262 | }, 263 | "source": [ 264 | "### Exercise\n", 265 | "\n", 266 | "Being able to add points to our line is great! However, what if we make a mistake and want to remove a point? One way for the user to express they would like to remove the point is to hold down 'shift' while clicking.\n", 267 | "\n", 268 | "In this exercise, implement removing the nearest point from where the user clicked when they shift-click." 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": null, 274 | "metadata": { 275 | "slideshow": { 276 | "slide_type": "slide" 277 | } 278 | }, 279 | "outputs": [], 280 | "source": [ 281 | "class LineMakerRemover(LineMaker):\n", 282 | "\n", 283 | " def on_button(self, event):\n", 284 | " # print out what button and where the user clicked\n", 285 | " print(f'button: {event.button!r} @ ({event.xdata}, {event.ydata}) + key: {event.key}')\n", 286 | " \n", 287 | " # only consider events from the lines Axes or if not the left mouse button bail!\n", 288 | " if event.inaxes is not self.ln.axes or event.button != MouseButton.LEFT:\n", 289 | " return\n", 290 | " \n", 291 | " xdata = list(self.ln.get_xdata())\n", 292 | " ydata = list(self.ln.get_ydata())\n", 293 | "\n", 294 | " if event.key == 'shift':\n", 295 | " print('in shift')\n", 296 | " # TODO compute the closest (x, y) point and remove it from \n", 297 | " # xdata, ydata\n", 298 | " else:\n", 299 | " # append the new point to the current Line2D data\n", 300 | " xdata += [event.xdata]\n", 301 | " ydata += [event.ydata]\n", 302 | "\n", 303 | " # and update the data on the Line2D artist\n", 304 | " self.ln.set_data(xdata, ydata)\n", 305 | "\n", 306 | " # ask the UI to re-draw the next time it can\n", 307 | " self.ln.figure.canvas.draw_idle()\n", 308 | "\n", 309 | "\n" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": null, 315 | "metadata": { 316 | "slideshow": { 317 | "slide_type": "slide" 318 | } 319 | }, 320 | "outputs": [], 321 | "source": [ 322 | "fig = plt.figure()\n", 323 | "ax = fig.subplots()\n", 324 | "ln, = ax.plot([1], [1], '-o')\n", 325 | "line_maker = LineMaker(ln)" 326 | ] 327 | } 328 | ], 329 | "metadata": { 330 | "celltoolbar": "Slideshow", 331 | "kernelspec": { 332 | "display_name": "Python 3", 333 | "language": "python", 334 | "name": "python3" 335 | }, 336 | "language_info": { 337 | "codemirror_mode": { 338 | "name": "ipython", 339 | "version": 3 340 | }, 341 | "file_extension": ".py", 342 | "mimetype": "text/x-python", 343 | "name": "python", 344 | "nbconvert_exporter": "python", 345 | "pygments_lexer": "ipython3", 346 | "version": "3.7.3" 347 | } 348 | }, 349 | "nbformat": 4, 350 | "nbformat_minor": 4 351 | } 352 | -------------------------------------------------------------------------------- /notebooks/09-interactive_annotation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "# Artist Picking\n", 12 | "\n", 13 | "This tutorial will go through how to configure a callback using `'pick_event'` to drill into a data set (in this case the Titanic passanger manifest). For a more through tutorial please see [the full interactive tutorial](https://github.com/matplotlib/interactive_tutorial) and the [API documentation](\n", 14 | "https://matplotlib.org/3.1.0/users/event_handling.html).\n", 15 | "\n", 16 | "This tutorial shows how to build a interaction customized to this visualization of this data set, for sets of high-level general tools see [mpldatacursors](https://github.com/joferkington/mpldatacursor/) and [mplcursors](https://github.com/anntzer/mplcursors)." 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": { 23 | "slideshow": { 24 | "slide_type": "slide" 25 | } 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "%matplotlib notebook\n", 30 | "%run helpers/ensure_print.py" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": { 37 | "slideshow": { 38 | "slide_type": "fragment" 39 | } 40 | }, 41 | "outputs": [], 42 | "source": [ 43 | "from matplotlib.backend_bases import MouseButton\n", 44 | "import numpy as np\n", 45 | "import pandas as pd\n", 46 | "import matplotlib.pyplot as plt" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": { 53 | "slideshow": { 54 | "slide_type": "fragment" 55 | } 56 | }, 57 | "outputs": [], 58 | "source": [ 59 | "data = pd.read_csv(\"http://bit.ly/tscv17\")\n" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": { 65 | "slideshow": { 66 | "slide_type": "skip" 67 | } 68 | }, 69 | "source": [ 70 | "## Our plotting function\n", 71 | "\n", 72 | "The first step is to make our selves a helper function that will \"do the right thing\" for our data. To start with, this is a wrapper around `scatter`, but eventually this will also define and install the interactive functions.\n", 73 | "\n", 74 | "We are using the `legend_elements` which was added in [Matplotlib 3.1](https://matplotlib.org/users/prev_whats_new/whats_new_3.1.0.html#legend-for-scatter) to get a nice legend for the data." 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": { 81 | "slideshow": { 82 | "slide_type": "slide" 83 | } 84 | }, 85 | "outputs": [], 86 | "source": [ 87 | "def make_plot(ax, data, *, x_data='Age', y_data='Fare', c_data='Survived'):\n", 88 | " # note use of 'data kwarg' new in mpl 1.5\n", 89 | " sc = ax.scatter(x_data, y_data, c=c_data, data=data, picker=5, alpha=.75) \n", 90 | " ax.set(xlabel=x_data, ylabel=y_data)\n", 91 | " # mpl 3.1 feature\n", 92 | " ax.legend(*sc.legend_elements(), title=c_data)\n", 93 | " \n", 94 | "fig, ax = plt.subplots()\n", 95 | "make_plot(ax, data)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": { 101 | "slideshow": { 102 | "slide_type": "slide" 103 | } 104 | }, 105 | "source": [ 106 | "## Print a row\n", 107 | "\n", 108 | "To get a bit more information about each point we can use the `'pick_event'` to print information about each point (aka row) to the screen. By passing the `picker=5` to `ax.scatter` we enable picking on the resulting `PathCollection` artist. The units on `picker` is pixels. We can then register a callback to be called when ever the user clicks on (aka 'picks') the `Artist`.\n", 109 | "\n", 110 | "[`PickEvent`](https://matplotlib.org/3.1.1/api/backend_bases_api.html#matplotlib.backend_bases.PickEvent) gives us access to the underlying [`MouseEvent`](https://matplotlib.org/3.1.1/api/backend_bases_api.html#matplotlib.backend_bases.MouseEvent), the `Artist` instance that was picked, and depending on the artist some additional information. In the case of `PathCollection` (from `scatter`) and `Line2D` (from `plot`) the `PickEvent` has and `ind` attribute which is a list of the positional indexes in the input data that was picked.\n", 111 | "\n", 112 | "The attributes on the `event` that we are going to primarily conecerned with are:\n", 113 | "\n", 114 | "```python\n", 115 | "def callback(event : PickEvent) -> None:\n", 116 | " mouse_event = event.mousevent # the underlying mouse event\n", 117 | " button = mouse_event.button # the button clicked as an Enum\n", 118 | " ind = event.ind # list of index of data point picked\n", 119 | "\n", 120 | "```\n", 121 | "\n", 122 | "\n", 123 | "Additional information (which `Axes`, the (x, y) values, keyboard keys held down) can be extract from `mouse_event` (see previous tutorial)\n", 124 | " " 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": null, 130 | "metadata": { 131 | "slideshow": { 132 | "slide_type": "slide" 133 | } 134 | }, 135 | "outputs": [], 136 | "source": [ 137 | "def make_plot(ax, data, *, x_data='Age', y_data='Fare', c_data='Survived'):\n", 138 | "\n", 139 | " sc = ax.scatter(x_data, y_data, c=c_data, data=data, picker=5, alpha=.75)\n", 140 | " ax.set(xlabel=x_data, ylabel=y_data)\n", 141 | " ax.legend(*sc.legend_elements(), title=c_data)\n", 142 | " \n", 143 | " # Define an inner function. This will \"close over\" the input `data`\n", 144 | " # and our newly created artist `sc`\n", 145 | " def print_row(event):\n", 146 | " print(f'in a pick event! {event.ind} with mouse button {event.mouseevent.button}')\n", 147 | " # make sure we are picking the artist we care about and the left mouse button!\n", 148 | " if event.artist is not sc or event.mouseevent.button != MouseButton.LEFT: \n", 149 | " return \n", 150 | "\n", 151 | " for indx in event.ind:\n", 152 | " # use iloc to select a row by numeric index\n", 153 | " row = data.iloc[indx]\n", 154 | " # TODO: print something more interesting!\n", 155 | " print(f\"hit row {indx}\")\n", 156 | " \n", 157 | " # connect our call back to the canvas\n", 158 | " ax.figure.canvas.mpl_connect('pick_event', print_row) \n", 159 | " return sc\n", 160 | " \n" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": null, 166 | "metadata": { 167 | "slideshow": { 168 | "slide_type": "slide" 169 | } 170 | }, 171 | "outputs": [], 172 | "source": [ 173 | "fig, ax = plt.subplots()\n", 174 | "make_plot(ax, data);" 175 | ] 176 | }, 177 | { 178 | "cell_type": "markdown", 179 | "metadata": { 180 | "slideshow": { 181 | "slide_type": "skip" 182 | } 183 | }, 184 | "source": [ 185 | "## Add an annotation\n", 186 | "\n", 187 | "We can do better than printing though, via [`ax.anotate`](https://matplotlib.org/3.1.0/api/_as_gen/matplotlib.pyplot.annotate.html) we can add annotations directly to the plot. The API an `annotate` is large, in this case we are:\n", 188 | "\n", 189 | "- offsetting the text by \\pm 50pt in each direction from the data point\n", 190 | "- putting a transparent gray box around the text\n", 191 | "- connect the text to the data point with a curved arrow" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "metadata": { 198 | "slideshow": { 199 | "slide_type": "slide" 200 | } 201 | }, 202 | "outputs": [], 203 | "source": [ 204 | "def make_plot(ax, data, *, x_data='Age', y_data='Fare', c_data='Survived'):\n", 205 | " sc = ax.scatter(x_data, y_data, c=c_data, data=data, picker=5) \n", 206 | " ax.set(xlabel=x_data, ylabel=y_data)\n", 207 | " ax.legend(*sc.legend_elements(), \n", 208 | " loc=\"best\", title=c_data, \n", 209 | " ncol=1)\n", 210 | " \n", 211 | " def add_annotation(event):\n", 212 | " # if this is not our artist, bail\n", 213 | " if event.artist is not sc or event.mouseevent.button != MouseButton.LEFT: \n", 214 | " return \n", 215 | " for indx in event.ind:\n", 216 | " # grab the row\n", 217 | " row = data.iloc[indx] \n", 218 | " # format everything into a multi-line string\n", 219 | " txt = '\\n'.join(['hit row', f'{indx}']) \n", 220 | " ann = ax.annotate(s=txt, \n", 221 | " # update this to point someplace more sensible!\n", 222 | " xy=(0, 0), \n", 223 | " # styling\n", 224 | " # make the box light gray and transparent\n", 225 | " bbox={'color': '.1', 'alpha': .2},\n", 226 | " # offset text randomly by a few points\n", 227 | " xytext=(np.random.rand(2) - .5) * 100,\n", 228 | " textcoords='offset points', \n", 229 | " # connect text to point with a curved arrow\n", 230 | " arrowprops=dict(\n", 231 | " arrowstyle=\"->\", \n", 232 | " connectionstyle=\"angle3,angleA=0,angleB=-90\") \n", 233 | " ) \n", 234 | " # make it movable!\n", 235 | " ann.draggable()\n", 236 | " \n", 237 | " # trigger a re-draw at the next possible time\n", 238 | " ax.figure.canvas.draw_idle() \n", 239 | " # connect our function\n", 240 | " ax.figure.canvas.mpl_connect('pick_event', add_annotation) \n", 241 | " \n", 242 | "fig, ax = plt.subplots()\n", 243 | "make_plot(ax, data) " 244 | ] 245 | }, 246 | { 247 | "cell_type": "markdown", 248 | "metadata": { 249 | "slideshow": { 250 | "slide_type": "slide" 251 | } 252 | }, 253 | "source": [ 254 | "### Exercise\n", 255 | "\n", 256 | "Adjust the annotaiton to point at the data point (not (0, 0))" 257 | ] 258 | }, 259 | { 260 | "cell_type": "markdown", 261 | "metadata": { 262 | "slideshow": { 263 | "slide_type": "slide" 264 | } 265 | }, 266 | "source": [ 267 | "## Bonus: remove the annotations\n", 268 | "\n", 269 | "If we can _add_ annotations, we should also be able to remove them. All Matplotlib aritsts have a `art.remove()` method that will remove them from their parent in the draw tree. Thus, with a bit of book-keeping we can remove the annotation on right (aka middle) click" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": null, 275 | "metadata": { 276 | "slideshow": { 277 | "slide_type": "slide" 278 | } 279 | }, 280 | "outputs": [], 281 | "source": [ 282 | "def make_plot(ax, data, *, x_data='Age', y_data='Fare', c_data='Survived'):\n", 283 | "\n", 284 | " sc = ax.scatter(x_data, y_data, c=c_data, data=data, picker=5, alpha=.75) \n", 285 | " ax.set_xlabel(x_data)\n", 286 | " ax.set_ylabel(y_data)\n", 287 | " ax.legend(*sc.legend_elements(), title=c_data)\n", 288 | " # someplace to stash our Annotation artists\n", 289 | " annotations = set()\n", 290 | " def add_annotation(event):\n", 291 | " if event.artist is not sc: \n", 292 | " return \n", 293 | " if event.mouseevent.button != MouseButton.LEFT: \n", 294 | " return \n", 295 | " \n", 296 | " for indx in event.ind: \n", 297 | " row = data.iloc[indx] \n", 298 | " txt = '\\n'.join(f'{k}: {v}' for k, v in row.items()) \n", 299 | "\n", 300 | " ann = ax.annotate(txt, \n", 301 | " (row[x_data], row[y_data]), \n", 302 | " # make the annotation also pickable\n", 303 | " picker=1,\n", 304 | " # style\n", 305 | " bbox={'color': '.1', 'alpha': .2}, \n", 306 | " xytext=(np.random.rand(2) - .5) * 100, \n", 307 | " textcoords='offset points', \n", 308 | " arrowprops=dict(\n", 309 | " arrowstyle=\"->\", \n", 310 | " connectionstyle=\"angle3,angleA=0,angleB=-90\") \n", 311 | " ) \n", 312 | " ann.draggable()\n", 313 | " # also add the artist to the stash\n", 314 | " annotations.add(ann)\n", 315 | " ax.figure.canvas.draw_idle()\n", 316 | " \n", 317 | " # call back for _removing_ the annotations\n", 318 | " def remove_annotation(event): \n", 319 | " # if the artist is not in annotations, bail!\n", 320 | " if event.artist not in annotations: \n", 321 | " return \n", 322 | " \n", 323 | " if event.mouseevent.button == MouseButton.RIGHT:\n", 324 | " # grab the artist to be efficent\n", 325 | " art = event.artist\n", 326 | " # remove the annotation from the figure\n", 327 | " art.remove()\n", 328 | " # remove the annotation from our stash\n", 329 | " annotations.remove(art)\n", 330 | " # ask the figure to re-draw\n", 331 | " ax.figure.canvas.draw_idle() \n", 332 | " \n", 333 | " # register both of the callbacks\n", 334 | " ax.figure.canvas.mpl_connect('pick_event', add_annotation)\n", 335 | " ax.figure.canvas.mpl_connect('pick_event', remove_annotation)\n" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": null, 341 | "metadata": { 342 | "slideshow": { 343 | "slide_type": "slide" 344 | } 345 | }, 346 | "outputs": [], 347 | "source": [ 348 | "fig, ax = plt.subplots()\n", 349 | "make_plot(ax, data) " 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": null, 355 | "metadata": {}, 356 | "outputs": [], 357 | "source": [] 358 | } 359 | ], 360 | "metadata": { 361 | "celltoolbar": "Slideshow", 362 | "kernelspec": { 363 | "display_name": "Python 3", 364 | "language": "python", 365 | "name": "python3" 366 | }, 367 | "language_info": { 368 | "codemirror_mode": { 369 | "name": "ipython", 370 | "version": 3 371 | }, 372 | "file_extension": ".py", 373 | "mimetype": "text/x-python", 374 | "name": "python", 375 | "nbconvert_exporter": "python", 376 | "pygments_lexer": "ipython3", 377 | "version": "3.7.3" 378 | }, 379 | "latex_envs": { 380 | "LaTeX_envs_menu_present": true, 381 | "autoclose": false, 382 | "autocomplete": true, 383 | "bibliofile": "biblio.bib", 384 | "cite_by": "apalike", 385 | "current_citInitial": 1, 386 | "eqLabelWithNumbers": true, 387 | "eqNumInitial": 1, 388 | "hotkeys": { 389 | "equation": "Ctrl-E", 390 | "itemize": "Ctrl-I" 391 | }, 392 | "labels_anchors": false, 393 | "latex_user_defs": false, 394 | "report_style_numbering": false, 395 | "user_envs_cfg": false 396 | }, 397 | "varInspector": { 398 | "cols": { 399 | "lenName": 16, 400 | "lenType": 16, 401 | "lenVar": 40 402 | }, 403 | "kernels_config": { 404 | "python": { 405 | "delete_cmd_postfix": "", 406 | "delete_cmd_prefix": "del ", 407 | "library": "var_list.py", 408 | "varRefreshCmd": "print(var_dic_list())" 409 | }, 410 | "r": { 411 | "delete_cmd_postfix": ") ", 412 | "delete_cmd_prefix": "rm(", 413 | "library": "var_list.r", 414 | "varRefreshCmd": "cat(var_dic_list()) " 415 | } 416 | }, 417 | "types_to_exclude": [ 418 | "module", 419 | "function", 420 | "builtin_function_or_method", 421 | "instance", 422 | "_Feature" 423 | ], 424 | "window_display": false 425 | } 426 | }, 427 | "nbformat": 4, 428 | "nbformat_minor": 4 429 | } 430 | -------------------------------------------------------------------------------- /notebooks/10-selector_widget.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#%matplotlib notebook\n", 10 | "%matplotlib widget\n", 11 | "%run helpers/ensure_print.py" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "import matplotlib.pyplot as plt\n", 21 | "import matplotlib.widgets as mwidgets\n", 22 | "from matplotlib.gridspec import GridSpec\n", 23 | "import numpy as np\n", 24 | "import pandas as pd" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "mpg = pd.read_csv('data/auto-mpg.data', delim_whitespace=True)" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "def make_slider(fig, data, \n", 43 | " x_data='model_year',\n", 44 | " y_data='acceleration',\n", 45 | " c_data='horsepower' ,\n", 46 | " s_data='displacement'):\n", 47 | " h_data = 'mpg'\n", 48 | " # use gridspec to build un-evenly split axes, make what will be the \n", 49 | " # histogram axes 1/3 the height of the main axes.\n", 50 | " gs = GridSpec(3, 1, figure=fig)\n", 51 | " ax1 = fig.add_subplot(gs[0:2, :])\n", 52 | " ax2 = fig.add_subplot(gs[2, :])\n", 53 | " \n", 54 | " # histogram the the mgp\n", 55 | " hist = ax2.hist(h_data, data=data, bins='auto') \n", 56 | " ax2.set(xlabel=h_data, ylabel='N')\n", 57 | " # set up a text label to publish updates to\n", 58 | " range_label = ax2.annotate('', (1, 1), \n", 59 | " ha='right', va='top', \n", 60 | " xycoords='axes fraction', \n", 61 | " xytext=(-4, -4), \n", 62 | " textcoords='offset points') \n", 63 | " \n", 64 | " # plot the data with some alpha and extra think marker edges\n", 65 | " sc = ax1.scatter(x_data, y_data, c=c_data, s=s_data, data=data, \n", 66 | " alpha=.5, linewidth=3) \n", 67 | " ax1.set(xlabel=x_data, ylabel=y_data)\n", 68 | " \n", 69 | " # produce a legend with the unique colors from the scatter \n", 70 | " legend1 = ax1.legend(*sc.legend_elements(), \n", 71 | " loc=\"upper left\", title=c_data, \n", 72 | " ncol=3) \n", 73 | " ax1.add_artist(legend1) \n", 74 | " # produce a legend with a cross section of sizes from the scatter \n", 75 | " handles, labels = sc.legend_elements(prop=\"sizes\", alpha=0.6) \n", 76 | " legend2 = ax1.legend(handles, labels, loc=\"lower right\", title=s_data, ncol=3) \n", 77 | " \n", 78 | " \n", 79 | " def mark_markers(low, high):\n", 80 | " \n", 81 | " base_ec = sc.get_facecolors() \n", 82 | " ec = np.array(base_ec) \n", 83 | " mask = (low < data[h_data]) & (data[h_data] < high) \n", 84 | " mask = mask.values \n", 85 | " ec[mask, :3] = [1, 0, 0] \n", 86 | " sc.set_edgecolor(ec) \n", 87 | " range_label.set_text(f'showing {low:.2f} < {h_data} < {high:.2f}')\n", 88 | " fig.canvas.draw_idle()\n", 89 | " \n", 90 | " slider = mwidgets.SpanSelector(ax2, mark_markers, 'horizontal', useblit=True, span_stays=True)\n", 91 | " fig.tight_layout()\n", 92 | " return slider \n", 93 | " \n", 94 | "s = make_slider(plt.figure(figsize=(9, 10)), mpg)" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "# Exercise: Port the picker from the titanic to this example." 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "plt.gcf().canvas.layout.height='8in'\n", 113 | "plt.gcf().canvas.layout.width='9in'" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [] 122 | } 123 | ], 124 | "metadata": { 125 | "kernelspec": { 126 | "display_name": "Python 3", 127 | "language": "python", 128 | "name": "python3" 129 | }, 130 | "language_info": { 131 | "codemirror_mode": { 132 | "name": "ipython", 133 | "version": 3 134 | }, 135 | "file_extension": ".py", 136 | "mimetype": "text/x-python", 137 | "name": "python", 138 | "nbconvert_exporter": "python", 139 | "pygments_lexer": "ipython3", 140 | "version": "3.7.3" 141 | }, 142 | "latex_envs": { 143 | "LaTeX_envs_menu_present": true, 144 | "autoclose": false, 145 | "autocomplete": true, 146 | "bibliofile": "biblio.bib", 147 | "cite_by": "apalike", 148 | "current_citInitial": 1, 149 | "eqLabelWithNumbers": true, 150 | "eqNumInitial": 1, 151 | "hotkeys": { 152 | "equation": "Ctrl-E", 153 | "itemize": "Ctrl-I" 154 | }, 155 | "labels_anchors": false, 156 | "latex_user_defs": false, 157 | "report_style_numbering": false, 158 | "user_envs_cfg": false 159 | }, 160 | "varInspector": { 161 | "cols": { 162 | "lenName": 16, 163 | "lenType": 16, 164 | "lenVar": 40 165 | }, 166 | "kernels_config": { 167 | "python": { 168 | "delete_cmd_postfix": "", 169 | "delete_cmd_prefix": "del ", 170 | "library": "var_list.py", 171 | "varRefreshCmd": "print(var_dic_list())" 172 | }, 173 | "r": { 174 | "delete_cmd_postfix": ") ", 175 | "delete_cmd_prefix": "rm(", 176 | "library": "var_list.r", 177 | "varRefreshCmd": "cat(var_dic_list()) " 178 | } 179 | }, 180 | "types_to_exclude": [ 181 | "module", 182 | "function", 183 | "builtin_function_or_method", 184 | "instance", 185 | "_Feature" 186 | ], 187 | "window_display": false 188 | } 189 | }, 190 | "nbformat": 4, 191 | "nbformat_minor": 4 192 | } 193 | -------------------------------------------------------------------------------- /notebooks/data/auto-mpg.data: -------------------------------------------------------------------------------- 1 | mpg cylinders displacement horsepower weight acceleration model_year origin car_name 2 | 18.0 8 307.0 130.0 3504. 12.0 70 1 "chevrolet chevelle malibu" 3 | 15.0 8 350.0 165.0 3693. 11.5 70 1 "buick skylark 320" 4 | 18.0 8 318.0 150.0 3436. 11.0 70 1 "plymouth satellite" 5 | 16.0 8 304.0 150.0 3433. 12.0 70 1 "amc rebel sst" 6 | 17.0 8 302.0 140.0 3449. 10.5 70 1 "ford torino" 7 | 15.0 8 429.0 198.0 4341. 10.0 70 1 "ford galaxie 500" 8 | 14.0 8 454.0 220.0 4354. 9.0 70 1 "chevrolet impala" 9 | 14.0 8 440.0 215.0 4312. 8.5 70 1 "plymouth fury iii" 10 | 14.0 8 455.0 225.0 4425. 10.0 70 1 "pontiac catalina" 11 | 15.0 8 390.0 190.0 3850. 8.5 70 1 "amc ambassador dpl" 12 | 15.0 8 383.0 170.0 3563. 10.0 70 1 "dodge challenger se" 13 | 14.0 8 340.0 160.0 3609. 8.0 70 1 "plymouth 'cuda 340" 14 | 15.0 8 400.0 150.0 3761. 9.5 70 1 "chevrolet monte carlo" 15 | 14.0 8 455.0 225.0 3086. 10.0 70 1 "buick estate wagon (sw)" 16 | 24.0 4 113.0 95.00 2372. 15.0 70 3 "toyota corona mark ii" 17 | 22.0 6 198.0 95.00 2833. 15.5 70 1 "plymouth duster" 18 | 18.0 6 199.0 97.00 2774. 15.5 70 1 "amc hornet" 19 | 21.0 6 200.0 85.00 2587. 16.0 70 1 "ford maverick" 20 | 27.0 4 97.00 88.00 2130. 14.5 70 3 "datsun pl510" 21 | 26.0 4 97.00 46.00 1835. 20.5 70 2 "volkswagen 1131 deluxe sedan" 22 | 25.0 4 110.0 87.00 2672. 17.5 70 2 "peugeot 504" 23 | 24.0 4 107.0 90.00 2430. 14.5 70 2 "audi 100 ls" 24 | 25.0 4 104.0 95.00 2375. 17.5 70 2 "saab 99e" 25 | 26.0 4 121.0 113.0 2234. 12.5 70 2 "bmw 2002" 26 | 21.0 6 199.0 90.00 2648. 15.0 70 1 "amc gremlin" 27 | 10.0 8 360.0 215.0 4615. 14.0 70 1 "ford f250" 28 | 10.0 8 307.0 200.0 4376. 15.0 70 1 "chevy c20" 29 | 11.0 8 318.0 210.0 4382. 13.5 70 1 "dodge d200" 30 | 9.0 8 304.0 193.0 4732. 18.5 70 1 "hi 1200d" 31 | 27.0 4 97.00 88.00 2130. 14.5 71 3 "datsun pl510" 32 | 28.0 4 140.0 90.00 2264. 15.5 71 1 "chevrolet vega 2300" 33 | 25.0 4 113.0 95.00 2228. 14.0 71 3 "toyota corona" 34 | 25.0 4 98.00 nan 2046. 19.0 71 1 "ford pinto" 35 | 19.0 6 232.0 100.0 2634. 13.0 71 1 "amc gremlin" 36 | 16.0 6 225.0 105.0 3439. 15.5 71 1 "plymouth satellite custom" 37 | 17.0 6 250.0 100.0 3329. 15.5 71 1 "chevrolet chevelle malibu" 38 | 19.0 6 250.0 88.00 3302. 15.5 71 1 "ford torino 500" 39 | 18.0 6 232.0 100.0 3288. 15.5 71 1 "amc matador" 40 | 14.0 8 350.0 165.0 4209. 12.0 71 1 "chevrolet impala" 41 | 14.0 8 400.0 175.0 4464. 11.5 71 1 "pontiac catalina brougham" 42 | 14.0 8 351.0 153.0 4154. 13.5 71 1 "ford galaxie 500" 43 | 14.0 8 318.0 150.0 4096. 13.0 71 1 "plymouth fury iii" 44 | 12.0 8 383.0 180.0 4955. 11.5 71 1 "dodge monaco (sw)" 45 | 13.0 8 400.0 170.0 4746. 12.0 71 1 "ford country squire (sw)" 46 | 13.0 8 400.0 175.0 5140. 12.0 71 1 "pontiac safari (sw)" 47 | 18.0 6 258.0 110.0 2962. 13.5 71 1 "amc hornet sportabout (sw)" 48 | 22.0 4 140.0 72.00 2408. 19.0 71 1 "chevrolet vega (sw)" 49 | 19.0 6 250.0 100.0 3282. 15.0 71 1 "pontiac firebird" 50 | 18.0 6 250.0 88.00 3139. 14.5 71 1 "ford mustang" 51 | 23.0 4 122.0 86.00 2220. 14.0 71 1 "mercury capri 2000" 52 | 28.0 4 116.0 90.00 2123. 14.0 71 2 "opel 1900" 53 | 30.0 4 79.00 70.00 2074. 19.5 71 2 "peugeot 304" 54 | 30.0 4 88.00 76.00 2065. 14.5 71 2 "fiat 124b" 55 | 31.0 4 71.00 65.00 1773. 19.0 71 3 "toyota corolla 1200" 56 | 35.0 4 72.00 69.00 1613. 18.0 71 3 "datsun 1200" 57 | 27.0 4 97.00 60.00 1834. 19.0 71 2 "volkswagen model 111" 58 | 26.0 4 91.00 70.00 1955. 20.5 71 1 "plymouth cricket" 59 | 24.0 4 113.0 95.00 2278. 15.5 72 3 "toyota corona hardtop" 60 | 25.0 4 97.50 80.00 2126. 17.0 72 1 "dodge colt hardtop" 61 | 23.0 4 97.00 54.00 2254. 23.5 72 2 "volkswagen type 3" 62 | 20.0 4 140.0 90.00 2408. 19.5 72 1 "chevrolet vega" 63 | 21.0 4 122.0 86.00 2226. 16.5 72 1 "ford pinto runabout" 64 | 13.0 8 350.0 165.0 4274. 12.0 72 1 "chevrolet impala" 65 | 14.0 8 400.0 175.0 4385. 12.0 72 1 "pontiac catalina" 66 | 15.0 8 318.0 150.0 4135. 13.5 72 1 "plymouth fury iii" 67 | 14.0 8 351.0 153.0 4129. 13.0 72 1 "ford galaxie 500" 68 | 17.0 8 304.0 150.0 3672. 11.5 72 1 "amc ambassador sst" 69 | 11.0 8 429.0 208.0 4633. 11.0 72 1 "mercury marquis" 70 | 13.0 8 350.0 155.0 4502. 13.5 72 1 "buick lesabre custom" 71 | 12.0 8 350.0 160.0 4456. 13.5 72 1 "oldsmobile delta 88 royale" 72 | 13.0 8 400.0 190.0 4422. 12.5 72 1 "chrysler newport royal" 73 | 19.0 3 70.00 97.00 2330. 13.5 72 3 "mazda rx2 coupe" 74 | 15.0 8 304.0 150.0 3892. 12.5 72 1 "amc matador (sw)" 75 | 13.0 8 307.0 130.0 4098. 14.0 72 1 "chevrolet chevelle concours (sw)" 76 | 13.0 8 302.0 140.0 4294. 16.0 72 1 "ford gran torino (sw)" 77 | 14.0 8 318.0 150.0 4077. 14.0 72 1 "plymouth satellite custom (sw)" 78 | 18.0 4 121.0 112.0 2933. 14.5 72 2 "volvo 145e (sw)" 79 | 22.0 4 121.0 76.00 2511. 18.0 72 2 "volkswagen 411 (sw)" 80 | 21.0 4 120.0 87.00 2979. 19.5 72 2 "peugeot 504 (sw)" 81 | 26.0 4 96.00 69.00 2189. 18.0 72 2 "renault 12 (sw)" 82 | 22.0 4 122.0 86.00 2395. 16.0 72 1 "ford pinto (sw)" 83 | 28.0 4 97.00 92.00 2288. 17.0 72 3 "datsun 510 (sw)" 84 | 23.0 4 120.0 97.00 2506. 14.5 72 3 "toyouta corona mark ii (sw)" 85 | 28.0 4 98.00 80.00 2164. 15.0 72 1 "dodge colt (sw)" 86 | 27.0 4 97.00 88.00 2100. 16.5 72 3 "toyota corolla 1600 (sw)" 87 | 13.0 8 350.0 175.0 4100. 13.0 73 1 "buick century 350" 88 | 14.0 8 304.0 150.0 3672. 11.5 73 1 "amc matador" 89 | 13.0 8 350.0 145.0 3988. 13.0 73 1 "chevrolet malibu" 90 | 14.0 8 302.0 137.0 4042. 14.5 73 1 "ford gran torino" 91 | 15.0 8 318.0 150.0 3777. 12.5 73 1 "dodge coronet custom" 92 | 12.0 8 429.0 198.0 4952. 11.5 73 1 "mercury marquis brougham" 93 | 13.0 8 400.0 150.0 4464. 12.0 73 1 "chevrolet caprice classic" 94 | 13.0 8 351.0 158.0 4363. 13.0 73 1 "ford ltd" 95 | 14.0 8 318.0 150.0 4237. 14.5 73 1 "plymouth fury gran sedan" 96 | 13.0 8 440.0 215.0 4735. 11.0 73 1 "chrysler new yorker brougham" 97 | 12.0 8 455.0 225.0 4951. 11.0 73 1 "buick electra 225 custom" 98 | 13.0 8 360.0 175.0 3821. 11.0 73 1 "amc ambassador brougham" 99 | 18.0 6 225.0 105.0 3121. 16.5 73 1 "plymouth valiant" 100 | 16.0 6 250.0 100.0 3278. 18.0 73 1 "chevrolet nova custom" 101 | 18.0 6 232.0 100.0 2945. 16.0 73 1 "amc hornet" 102 | 18.0 6 250.0 88.00 3021. 16.5 73 1 "ford maverick" 103 | 23.0 6 198.0 95.00 2904. 16.0 73 1 "plymouth duster" 104 | 26.0 4 97.00 46.00 1950. 21.0 73 2 "volkswagen super beetle" 105 | 11.0 8 400.0 150.0 4997. 14.0 73 1 "chevrolet impala" 106 | 12.0 8 400.0 167.0 4906. 12.5 73 1 "ford country" 107 | 13.0 8 360.0 170.0 4654. 13.0 73 1 "plymouth custom suburb" 108 | 12.0 8 350.0 180.0 4499. 12.5 73 1 "oldsmobile vista cruiser" 109 | 18.0 6 232.0 100.0 2789. 15.0 73 1 "amc gremlin" 110 | 20.0 4 97.00 88.00 2279. 19.0 73 3 "toyota carina" 111 | 21.0 4 140.0 72.00 2401. 19.5 73 1 "chevrolet vega" 112 | 22.0 4 108.0 94.00 2379. 16.5 73 3 "datsun 610" 113 | 18.0 3 70.00 90.00 2124. 13.5 73 3 "maxda rx3" 114 | 19.0 4 122.0 85.00 2310. 18.5 73 1 "ford pinto" 115 | 21.0 6 155.0 107.0 2472. 14.0 73 1 "mercury capri v6" 116 | 26.0 4 98.00 90.00 2265. 15.5 73 2 "fiat 124 sport coupe" 117 | 15.0 8 350.0 145.0 4082. 13.0 73 1 "chevrolet monte carlo s" 118 | 16.0 8 400.0 230.0 4278. 9.50 73 1 "pontiac grand prix" 119 | 29.0 4 68.00 49.00 1867. 19.5 73 2 "fiat 128" 120 | 24.0 4 116.0 75.00 2158. 15.5 73 2 "opel manta" 121 | 20.0 4 114.0 91.00 2582. 14.0 73 2 "audi 100ls" 122 | 19.0 4 121.0 112.0 2868. 15.5 73 2 "volvo 144ea" 123 | 15.0 8 318.0 150.0 3399. 11.0 73 1 "dodge dart custom" 124 | 24.0 4 121.0 110.0 2660. 14.0 73 2 "saab 99le" 125 | 20.0 6 156.0 122.0 2807. 13.5 73 3 "toyota mark ii" 126 | 11.0 8 350.0 180.0 3664. 11.0 73 1 "oldsmobile omega" 127 | 20.0 6 198.0 95.00 3102. 16.5 74 1 "plymouth duster" 128 | 21.0 6 200.0 nan 2875. 17.0 74 1 "ford maverick" 129 | 19.0 6 232.0 100.0 2901. 16.0 74 1 "amc hornet" 130 | 15.0 6 250.0 100.0 3336. 17.0 74 1 "chevrolet nova" 131 | 31.0 4 79.00 67.00 1950. 19.0 74 3 "datsun b210" 132 | 26.0 4 122.0 80.00 2451. 16.5 74 1 "ford pinto" 133 | 32.0 4 71.00 65.00 1836. 21.0 74 3 "toyota corolla 1200" 134 | 25.0 4 140.0 75.00 2542. 17.0 74 1 "chevrolet vega" 135 | 16.0 6 250.0 100.0 3781. 17.0 74 1 "chevrolet chevelle malibu classic" 136 | 16.0 6 258.0 110.0 3632. 18.0 74 1 "amc matador" 137 | 18.0 6 225.0 105.0 3613. 16.5 74 1 "plymouth satellite sebring" 138 | 16.0 8 302.0 140.0 4141. 14.0 74 1 "ford gran torino" 139 | 13.0 8 350.0 150.0 4699. 14.5 74 1 "buick century luxus (sw)" 140 | 14.0 8 318.0 150.0 4457. 13.5 74 1 "dodge coronet custom (sw)" 141 | 14.0 8 302.0 140.0 4638. 16.0 74 1 "ford gran torino (sw)" 142 | 14.0 8 304.0 150.0 4257. 15.5 74 1 "amc matador (sw)" 143 | 29.0 4 98.00 83.00 2219. 16.5 74 2 "audi fox" 144 | 26.0 4 79.00 67.00 1963. 15.5 74 2 "volkswagen dasher" 145 | 26.0 4 97.00 78.00 2300. 14.5 74 2 "opel manta" 146 | 31.0 4 76.00 52.00 1649. 16.5 74 3 "toyota corona" 147 | 32.0 4 83.00 61.00 2003. 19.0 74 3 "datsun 710" 148 | 28.0 4 90.00 75.00 2125. 14.5 74 1 "dodge colt" 149 | 24.0 4 90.00 75.00 2108. 15.5 74 2 "fiat 128" 150 | 26.0 4 116.0 75.00 2246. 14.0 74 2 "fiat 124 tc" 151 | 24.0 4 120.0 97.00 2489. 15.0 74 3 "honda civic" 152 | 26.0 4 108.0 93.00 2391. 15.5 74 3 "subaru" 153 | 31.0 4 79.00 67.00 2000. 16.0 74 2 "fiat x1.9" 154 | 19.0 6 225.0 95.00 3264. 16.0 75 1 "plymouth valiant custom" 155 | 18.0 6 250.0 105.0 3459. 16.0 75 1 "chevrolet nova" 156 | 15.0 6 250.0 72.00 3432. 21.0 75 1 "mercury monarch" 157 | 15.0 6 250.0 72.00 3158. 19.5 75 1 "ford maverick" 158 | 16.0 8 400.0 170.0 4668. 11.5 75 1 "pontiac catalina" 159 | 15.0 8 350.0 145.0 4440. 14.0 75 1 "chevrolet bel air" 160 | 16.0 8 318.0 150.0 4498. 14.5 75 1 "plymouth grand fury" 161 | 14.0 8 351.0 148.0 4657. 13.5 75 1 "ford ltd" 162 | 17.0 6 231.0 110.0 3907. 21.0 75 1 "buick century" 163 | 16.0 6 250.0 105.0 3897. 18.5 75 1 "chevroelt chevelle malibu" 164 | 15.0 6 258.0 110.0 3730. 19.0 75 1 "amc matador" 165 | 18.0 6 225.0 95.00 3785. 19.0 75 1 "plymouth fury" 166 | 21.0 6 231.0 110.0 3039. 15.0 75 1 "buick skyhawk" 167 | 20.0 8 262.0 110.0 3221. 13.5 75 1 "chevrolet monza 2+2" 168 | 13.0 8 302.0 129.0 3169. 12.0 75 1 "ford mustang ii" 169 | 29.0 4 97.00 75.00 2171. 16.0 75 3 "toyota corolla" 170 | 23.0 4 140.0 83.00 2639. 17.0 75 1 "ford pinto" 171 | 20.0 6 232.0 100.0 2914. 16.0 75 1 "amc gremlin" 172 | 23.0 4 140.0 78.00 2592. 18.5 75 1 "pontiac astro" 173 | 24.0 4 134.0 96.00 2702. 13.5 75 3 "toyota corona" 174 | 25.0 4 90.00 71.00 2223. 16.5 75 2 "volkswagen dasher" 175 | 24.0 4 119.0 97.00 2545. 17.0 75 3 "datsun 710" 176 | 18.0 6 171.0 97.00 2984. 14.5 75 1 "ford pinto" 177 | 29.0 4 90.00 70.00 1937. 14.0 75 2 "volkswagen rabbit" 178 | 19.0 6 232.0 90.00 3211. 17.0 75 1 "amc pacer" 179 | 23.0 4 115.0 95.00 2694. 15.0 75 2 "audi 100ls" 180 | 23.0 4 120.0 88.00 2957. 17.0 75 2 "peugeot 504" 181 | 22.0 4 121.0 98.00 2945. 14.5 75 2 "volvo 244dl" 182 | 25.0 4 121.0 115.0 2671. 13.5 75 2 "saab 99le" 183 | 33.0 4 91.00 53.00 1795. 17.5 75 3 "honda civic cvcc" 184 | 28.0 4 107.0 86.00 2464. 15.5 76 2 "fiat 131" 185 | 25.0 4 116.0 81.00 2220. 16.9 76 2 "opel 1900" 186 | 25.0 4 140.0 92.00 2572. 14.9 76 1 "capri ii" 187 | 26.0 4 98.00 79.00 2255. 17.7 76 1 "dodge colt" 188 | 27.0 4 101.0 83.00 2202. 15.3 76 2 "renault 12tl" 189 | 17.5 8 305.0 140.0 4215. 13.0 76 1 "chevrolet chevelle malibu classic" 190 | 16.0 8 318.0 150.0 4190. 13.0 76 1 "dodge coronet brougham" 191 | 15.5 8 304.0 120.0 3962. 13.9 76 1 "amc matador" 192 | 14.5 8 351.0 152.0 4215. 12.8 76 1 "ford gran torino" 193 | 22.0 6 225.0 100.0 3233. 15.4 76 1 "plymouth valiant" 194 | 22.0 6 250.0 105.0 3353. 14.5 76 1 "chevrolet nova" 195 | 24.0 6 200.0 81.00 3012. 17.6 76 1 "ford maverick" 196 | 22.5 6 232.0 90.00 3085. 17.6 76 1 "amc hornet" 197 | 29.0 4 85.00 52.00 2035. 22.2 76 1 "chevrolet chevette" 198 | 24.5 4 98.00 60.00 2164. 22.1 76 1 "chevrolet woody" 199 | 29.0 4 90.00 70.00 1937. 14.2 76 2 "vw rabbit" 200 | 33.0 4 91.00 53.00 1795. 17.4 76 3 "honda civic" 201 | 20.0 6 225.0 100.0 3651. 17.7 76 1 "dodge aspen se" 202 | 18.0 6 250.0 78.00 3574. 21.0 76 1 "ford granada ghia" 203 | 18.5 6 250.0 110.0 3645. 16.2 76 1 "pontiac ventura sj" 204 | 17.5 6 258.0 95.00 3193. 17.8 76 1 "amc pacer d/l" 205 | 29.5 4 97.00 71.00 1825. 12.2 76 2 "volkswagen rabbit" 206 | 32.0 4 85.00 70.00 1990. 17.0 76 3 "datsun b-210" 207 | 28.0 4 97.00 75.00 2155. 16.4 76 3 "toyota corolla" 208 | 26.5 4 140.0 72.00 2565. 13.6 76 1 "ford pinto" 209 | 20.0 4 130.0 102.0 3150. 15.7 76 2 "volvo 245" 210 | 13.0 8 318.0 150.0 3940. 13.2 76 1 "plymouth volare premier v8" 211 | 19.0 4 120.0 88.00 3270. 21.9 76 2 "peugeot 504" 212 | 19.0 6 156.0 108.0 2930. 15.5 76 3 "toyota mark ii" 213 | 16.5 6 168.0 120.0 3820. 16.7 76 2 "mercedes-benz 280s" 214 | 16.5 8 350.0 180.0 4380. 12.1 76 1 "cadillac seville" 215 | 13.0 8 350.0 145.0 4055. 12.0 76 1 "chevy c10" 216 | 13.0 8 302.0 130.0 3870. 15.0 76 1 "ford f108" 217 | 13.0 8 318.0 150.0 3755. 14.0 76 1 "dodge d100" 218 | 31.5 4 98.00 68.00 2045. 18.5 77 3 "honda accord cvcc" 219 | 30.0 4 111.0 80.00 2155. 14.8 77 1 "buick opel isuzu deluxe" 220 | 36.0 4 79.00 58.00 1825. 18.6 77 2 "renault 5 gtl" 221 | 25.5 4 122.0 96.00 2300. 15.5 77 1 "plymouth arrow gs" 222 | 33.5 4 85.00 70.00 1945. 16.8 77 3 "datsun f-10 hatchback" 223 | 17.5 8 305.0 145.0 3880. 12.5 77 1 "chevrolet caprice classic" 224 | 17.0 8 260.0 110.0 4060. 19.0 77 1 "oldsmobile cutlass supreme" 225 | 15.5 8 318.0 145.0 4140. 13.7 77 1 "dodge monaco brougham" 226 | 15.0 8 302.0 130.0 4295. 14.9 77 1 "mercury cougar brougham" 227 | 17.5 6 250.0 110.0 3520. 16.4 77 1 "chevrolet concours" 228 | 20.5 6 231.0 105.0 3425. 16.9 77 1 "buick skylark" 229 | 19.0 6 225.0 100.0 3630. 17.7 77 1 "plymouth volare custom" 230 | 18.5 6 250.0 98.00 3525. 19.0 77 1 "ford granada" 231 | 16.0 8 400.0 180.0 4220. 11.1 77 1 "pontiac grand prix lj" 232 | 15.5 8 350.0 170.0 4165. 11.4 77 1 "chevrolet monte carlo landau" 233 | 15.5 8 400.0 190.0 4325. 12.2 77 1 "chrysler cordoba" 234 | 16.0 8 351.0 149.0 4335. 14.5 77 1 "ford thunderbird" 235 | 29.0 4 97.00 78.00 1940. 14.5 77 2 "volkswagen rabbit custom" 236 | 24.5 4 151.0 88.00 2740. 16.0 77 1 "pontiac sunbird coupe" 237 | 26.0 4 97.00 75.00 2265. 18.2 77 3 "toyota corolla liftback" 238 | 25.5 4 140.0 89.00 2755. 15.8 77 1 "ford mustang ii 2+2" 239 | 30.5 4 98.00 63.00 2051. 17.0 77 1 "chevrolet chevette" 240 | 33.5 4 98.00 83.00 2075. 15.9 77 1 "dodge colt m/m" 241 | 30.0 4 97.00 67.00 1985. 16.4 77 3 "subaru dl" 242 | 30.5 4 97.00 78.00 2190. 14.1 77 2 "volkswagen dasher" 243 | 22.0 6 146.0 97.00 2815. 14.5 77 3 "datsun 810" 244 | 21.5 4 121.0 110.0 2600. 12.8 77 2 "bmw 320i" 245 | 21.5 3 80.00 110.0 2720. 13.5 77 3 "mazda rx-4" 246 | 43.1 4 90.00 48.00 1985. 21.5 78 2 "volkswagen rabbit custom diesel" 247 | 36.1 4 98.00 66.00 1800. 14.4 78 1 "ford fiesta" 248 | 32.8 4 78.00 52.00 1985. 19.4 78 3 "mazda glc deluxe" 249 | 39.4 4 85.00 70.00 2070. 18.6 78 3 "datsun b210 gx" 250 | 36.1 4 91.00 60.00 1800. 16.4 78 3 "honda civic cvcc" 251 | 19.9 8 260.0 110.0 3365. 15.5 78 1 "oldsmobile cutlass salon brougham" 252 | 19.4 8 318.0 140.0 3735. 13.2 78 1 "dodge diplomat" 253 | 20.2 8 302.0 139.0 3570. 12.8 78 1 "mercury monarch ghia" 254 | 19.2 6 231.0 105.0 3535. 19.2 78 1 "pontiac phoenix lj" 255 | 20.5 6 200.0 95.00 3155. 18.2 78 1 "chevrolet malibu" 256 | 20.2 6 200.0 85.00 2965. 15.8 78 1 "ford fairmont (auto)" 257 | 25.1 4 140.0 88.00 2720. 15.4 78 1 "ford fairmont (man)" 258 | 20.5 6 225.0 100.0 3430. 17.2 78 1 "plymouth volare" 259 | 19.4 6 232.0 90.00 3210. 17.2 78 1 "amc concord" 260 | 20.6 6 231.0 105.0 3380. 15.8 78 1 "buick century special" 261 | 20.8 6 200.0 85.00 3070. 16.7 78 1 "mercury zephyr" 262 | 18.6 6 225.0 110.0 3620. 18.7 78 1 "dodge aspen" 263 | 18.1 6 258.0 120.0 3410. 15.1 78 1 "amc concord d/l" 264 | 19.2 8 305.0 145.0 3425. 13.2 78 1 "chevrolet monte carlo landau" 265 | 17.7 6 231.0 165.0 3445. 13.4 78 1 "buick regal sport coupe (turbo)" 266 | 18.1 8 302.0 139.0 3205. 11.2 78 1 "ford futura" 267 | 17.5 8 318.0 140.0 4080. 13.7 78 1 "dodge magnum xe" 268 | 30.0 4 98.00 68.00 2155. 16.5 78 1 "chevrolet chevette" 269 | 27.5 4 134.0 95.00 2560. 14.2 78 3 "toyota corona" 270 | 27.2 4 119.0 97.00 2300. 14.7 78 3 "datsun 510" 271 | 30.9 4 105.0 75.00 2230. 14.5 78 1 "dodge omni" 272 | 21.1 4 134.0 95.00 2515. 14.8 78 3 "toyota celica gt liftback" 273 | 23.2 4 156.0 105.0 2745. 16.7 78 1 "plymouth sapporo" 274 | 23.8 4 151.0 85.00 2855. 17.6 78 1 "oldsmobile starfire sx" 275 | 23.9 4 119.0 97.00 2405. 14.9 78 3 "datsun 200-sx" 276 | 20.3 5 131.0 103.0 2830. 15.9 78 2 "audi 5000" 277 | 17.0 6 163.0 125.0 3140. 13.6 78 2 "volvo 264gl" 278 | 21.6 4 121.0 115.0 2795. 15.7 78 2 "saab 99gle" 279 | 16.2 6 163.0 133.0 3410. 15.8 78 2 "peugeot 604sl" 280 | 31.5 4 89.00 71.00 1990. 14.9 78 2 "volkswagen scirocco" 281 | 29.5 4 98.00 68.00 2135. 16.6 78 3 "honda accord lx" 282 | 21.5 6 231.0 115.0 3245. 15.4 79 1 "pontiac lemans v6" 283 | 19.8 6 200.0 85.00 2990. 18.2 79 1 "mercury zephyr 6" 284 | 22.3 4 140.0 88.00 2890. 17.3 79 1 "ford fairmont 4" 285 | 20.2 6 232.0 90.00 3265. 18.2 79 1 "amc concord dl 6" 286 | 20.6 6 225.0 110.0 3360. 16.6 79 1 "dodge aspen 6" 287 | 17.0 8 305.0 130.0 3840. 15.4 79 1 "chevrolet caprice classic" 288 | 17.6 8 302.0 129.0 3725. 13.4 79 1 "ford ltd landau" 289 | 16.5 8 351.0 138.0 3955. 13.2 79 1 "mercury grand marquis" 290 | 18.2 8 318.0 135.0 3830. 15.2 79 1 "dodge st. regis" 291 | 16.9 8 350.0 155.0 4360. 14.9 79 1 "buick estate wagon (sw)" 292 | 15.5 8 351.0 142.0 4054. 14.3 79 1 "ford country squire (sw)" 293 | 19.2 8 267.0 125.0 3605. 15.0 79 1 "chevrolet malibu classic (sw)" 294 | 18.5 8 360.0 150.0 3940. 13.0 79 1 "chrysler lebaron town @ country (sw)" 295 | 31.9 4 89.00 71.00 1925. 14.0 79 2 "vw rabbit custom" 296 | 34.1 4 86.00 65.00 1975. 15.2 79 3 "maxda glc deluxe" 297 | 35.7 4 98.00 80.00 1915. 14.4 79 1 "dodge colt hatchback custom" 298 | 27.4 4 121.0 80.00 2670. 15.0 79 1 "amc spirit dl" 299 | 25.4 5 183.0 77.00 3530. 20.1 79 2 "mercedes benz 300d" 300 | 23.0 8 350.0 125.0 3900. 17.4 79 1 "cadillac eldorado" 301 | 27.2 4 141.0 71.00 3190. 24.8 79 2 "peugeot 504" 302 | 23.9 8 260.0 90.00 3420. 22.2 79 1 "oldsmobile cutlass salon brougham" 303 | 34.2 4 105.0 70.00 2200. 13.2 79 1 "plymouth horizon" 304 | 34.5 4 105.0 70.00 2150. 14.9 79 1 "plymouth horizon tc3" 305 | 31.8 4 85.00 65.00 2020. 19.2 79 3 "datsun 210" 306 | 37.3 4 91.00 69.00 2130. 14.7 79 2 "fiat strada custom" 307 | 28.4 4 151.0 90.00 2670. 16.0 79 1 "buick skylark limited" 308 | 28.8 6 173.0 115.0 2595. 11.3 79 1 "chevrolet citation" 309 | 26.8 6 173.0 115.0 2700. 12.9 79 1 "oldsmobile omega brougham" 310 | 33.5 4 151.0 90.00 2556. 13.2 79 1 "pontiac phoenix" 311 | 41.5 4 98.00 76.00 2144. 14.7 80 2 "vw rabbit" 312 | 38.1 4 89.00 60.00 1968. 18.8 80 3 "toyota corolla tercel" 313 | 32.1 4 98.00 70.00 2120. 15.5 80 1 "chevrolet chevette" 314 | 37.2 4 86.00 65.00 2019. 16.4 80 3 "datsun 310" 315 | 28.0 4 151.0 90.00 2678. 16.5 80 1 "chevrolet citation" 316 | 26.4 4 140.0 88.00 2870. 18.1 80 1 "ford fairmont" 317 | 24.3 4 151.0 90.00 3003. 20.1 80 1 "amc concord" 318 | 19.1 6 225.0 90.00 3381. 18.7 80 1 "dodge aspen" 319 | 34.3 4 97.00 78.00 2188. 15.8 80 2 "audi 4000" 320 | 29.8 4 134.0 90.00 2711. 15.5 80 3 "toyota corona liftback" 321 | 31.3 4 120.0 75.00 2542. 17.5 80 3 "mazda 626" 322 | 37.0 4 119.0 92.00 2434. 15.0 80 3 "datsun 510 hatchback" 323 | 32.2 4 108.0 75.00 2265. 15.2 80 3 "toyota corolla" 324 | 46.6 4 86.00 65.00 2110. 17.9 80 3 "mazda glc" 325 | 27.9 4 156.0 105.0 2800. 14.4 80 1 "dodge colt" 326 | 40.8 4 85.00 65.00 2110. 19.2 80 3 "datsun 210" 327 | 44.3 4 90.00 48.00 2085. 21.7 80 2 "vw rabbit c (diesel)" 328 | 43.4 4 90.00 48.00 2335. 23.7 80 2 "vw dasher (diesel)" 329 | 36.4 5 121.0 67.00 2950. 19.9 80 2 "audi 5000s (diesel)" 330 | 30.0 4 146.0 67.00 3250. 21.8 80 2 "mercedes-benz 240d" 331 | 44.6 4 91.00 67.00 1850. 13.8 80 3 "honda civic 1500 gl" 332 | 40.9 4 85.00 nan 1835. 17.3 80 2 "renault lecar deluxe" 333 | 33.8 4 97.00 67.00 2145. 18.0 80 3 "subaru dl" 334 | 29.8 4 89.00 62.00 1845. 15.3 80 2 "vokswagen rabbit" 335 | 32.7 6 168.0 132.0 2910. 11.4 80 3 "datsun 280-zx" 336 | 23.7 3 70.00 100.0 2420. 12.5 80 3 "mazda rx-7 gs" 337 | 35.0 4 122.0 88.00 2500. 15.1 80 2 "triumph tr7 coupe" 338 | 23.6 4 140.0 nan 2905. 14.3 80 1 "ford mustang cobra" 339 | 32.4 4 107.0 72.00 2290. 17.0 80 3 "honda accord" 340 | 27.2 4 135.0 84.00 2490. 15.7 81 1 "plymouth reliant" 341 | 26.6 4 151.0 84.00 2635. 16.4 81 1 "buick skylark" 342 | 25.8 4 156.0 92.00 2620. 14.4 81 1 "dodge aries wagon (sw)" 343 | 23.5 6 173.0 110.0 2725. 12.6 81 1 "chevrolet citation" 344 | 30.0 4 135.0 84.00 2385. 12.9 81 1 "plymouth reliant" 345 | 39.1 4 79.00 58.00 1755. 16.9 81 3 "toyota starlet" 346 | 39.0 4 86.00 64.00 1875. 16.4 81 1 "plymouth champ" 347 | 35.1 4 81.00 60.00 1760. 16.1 81 3 "honda civic 1300" 348 | 32.3 4 97.00 67.00 2065. 17.8 81 3 "subaru" 349 | 37.0 4 85.00 65.00 1975. 19.4 81 3 "datsun 210 mpg" 350 | 37.7 4 89.00 62.00 2050. 17.3 81 3 "toyota tercel" 351 | 34.1 4 91.00 68.00 1985. 16.0 81 3 "mazda glc 4" 352 | 34.7 4 105.0 63.00 2215. 14.9 81 1 "plymouth horizon 4" 353 | 34.4 4 98.00 65.00 2045. 16.2 81 1 "ford escort 4w" 354 | 29.9 4 98.00 65.00 2380. 20.7 81 1 "ford escort 2h" 355 | 33.0 4 105.0 74.00 2190. 14.2 81 2 "volkswagen jetta" 356 | 34.5 4 100.0 nan 2320. 15.8 81 2 "renault 18i" 357 | 33.7 4 107.0 75.00 2210. 14.4 81 3 "honda prelude" 358 | 32.4 4 108.0 75.00 2350. 16.8 81 3 "toyota corolla" 359 | 32.9 4 119.0 100.0 2615. 14.8 81 3 "datsun 200sx" 360 | 31.6 4 120.0 74.00 2635. 18.3 81 3 "mazda 626" 361 | 28.1 4 141.0 80.00 3230. 20.4 81 2 "peugeot 505s turbo diesel" 362 | 30.7 6 145.0 76.00 3160. 19.6 81 2 "volvo diesel" 363 | 25.4 6 168.0 116.0 2900. 12.6 81 3 "toyota cressida" 364 | 24.2 6 146.0 120.0 2930. 13.8 81 3 "datsun 810 maxima" 365 | 22.4 6 231.0 110.0 3415. 15.8 81 1 "buick century" 366 | 26.6 8 350.0 105.0 3725. 19.0 81 1 "oldsmobile cutlass ls" 367 | 20.2 6 200.0 88.00 3060. 17.1 81 1 "ford granada gl" 368 | 17.6 6 225.0 85.00 3465. 16.6 81 1 "chrysler lebaron salon" 369 | 28.0 4 112.0 88.00 2605. 19.6 82 1 "chevrolet cavalier" 370 | 27.0 4 112.0 88.00 2640. 18.6 82 1 "chevrolet cavalier wagon" 371 | 34.0 4 112.0 88.00 2395. 18.0 82 1 "chevrolet cavalier 2-door" 372 | 31.0 4 112.0 85.00 2575. 16.2 82 1 "pontiac j2000 se hatchback" 373 | 29.0 4 135.0 84.00 2525. 16.0 82 1 "dodge aries se" 374 | 27.0 4 151.0 90.00 2735. 18.0 82 1 "pontiac phoenix" 375 | 24.0 4 140.0 92.00 2865. 16.4 82 1 "ford fairmont futura" 376 | 23.0 4 151.0 nan 3035. 20.5 82 1 "amc concord dl" 377 | 36.0 4 105.0 74.00 1980. 15.3 82 2 "volkswagen rabbit l" 378 | 37.0 4 91.00 68.00 2025. 18.2 82 3 "mazda glc custom l" 379 | 31.0 4 91.00 68.00 1970. 17.6 82 3 "mazda glc custom" 380 | 38.0 4 105.0 63.00 2125. 14.7 82 1 "plymouth horizon miser" 381 | 36.0 4 98.00 70.00 2125. 17.3 82 1 "mercury lynx l" 382 | 36.0 4 120.0 88.00 2160. 14.5 82 3 "nissan stanza xe" 383 | 36.0 4 107.0 75.00 2205. 14.5 82 3 "honda accord" 384 | 34.0 4 108.0 70.00 2245 16.9 82 3 "toyota corolla" 385 | 38.0 4 91.00 67.00 1965. 15.0 82 3 "honda civic" 386 | 32.0 4 91.00 67.00 1965. 15.7 82 3 "honda civic (auto)" 387 | 38.0 4 91.00 67.00 1995. 16.2 82 3 "datsun 310 gx" 388 | 25.0 6 181.0 110.0 2945. 16.4 82 1 "buick century limited" 389 | 38.0 6 262.0 85.00 3015. 17.0 82 1 "oldsmobile cutlass ciera (diesel)" 390 | 26.0 4 156.0 92.00 2585. 14.5 82 1 "chrysler lebaron medallion" 391 | 22.0 6 232.0 112.0 2835 14.7 82 1 "ford granada l" 392 | 32.0 4 144.0 96.00 2665. 13.9 82 3 "toyota celica gt" 393 | 36.0 4 135.0 84.00 2370. 13.0 82 1 "dodge charger 2.2" 394 | 27.0 4 151.0 90.00 2950. 17.3 82 1 "chevrolet camaro" 395 | 27.0 4 140.0 86.00 2790. 15.6 82 1 "ford mustang gl" 396 | 44.0 4 97.00 52.00 2130. 24.6 82 2 "vw pickup" 397 | 32.0 4 135.0 84.00 2295. 11.6 82 1 "dodge rampage" 398 | 28.0 4 120.0 79.00 2625. 18.6 82 1 "ford ranger" 399 | 31.0 4 119.0 82.00 2720. 19.4 82 1 "chevy s-10" 400 | -------------------------------------------------------------------------------- /notebooks/data/auto-mpg.txt: -------------------------------------------------------------------------------- 1 | Autodata from 2 | 3 | https://archive.ics.uci.edu/ml/datasets/auto+mpg 4 | 5 | modified to add column names to datafile and to convert missing data from `?` -> `nan` -------------------------------------------------------------------------------- /notebooks/data/dem.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/story645/mpl_tutorial/bdf9c547eebae76b65eb75cde62546e0b3149701/notebooks/data/dem.npy -------------------------------------------------------------------------------- /notebooks/helpers/ensure_print.py: -------------------------------------------------------------------------------- 1 | ''' 2 | make sure prints in callbacks make it to the notebook 3 | 4 | See https://ipywidgets.readthedocs.io/en/stable/examples/Output%20Widget.html 5 | 6 | By default, calling `print` in a ipywidgets callback results in the output 7 | being lost (because it is not clear _where_ it should go). You can explictily 8 | capture that the text to a given output area using at Output widget. 9 | 10 | This is a wrapper for `plt.subplots` that makes sure 11 | a) an ipywidgets.widgets.Output is created with each Figure 12 | b) the `mpl_connect` on the canvas is monkey-patched such that all 13 | user callbacks run in a context where the stdout is captured and sent 14 | 15 | to that output area. 16 | ''' 17 | 18 | import matplotlib.pyplot as plt 19 | 20 | 21 | def _monkey_patch_pyplot(): 22 | import matplotlib.pyplot as plt 23 | import functools 24 | from IPython.display import display 25 | import ipywidgets as widgets 26 | import weakref 27 | 28 | @functools.wraps(plt.figure) 29 | def figure(*args, **kwargs): 30 | fig = figure._figure(*args, **kwargs) 31 | fig._output = output = widgets.Output() 32 | display(output) 33 | 34 | orig_mpl_connect = fig.canvas.mpl_connect 35 | 36 | @functools.wraps(orig_mpl_connect) 37 | def mpl_connect(key, cb, **kwargs): 38 | # try to use a WeakMethod to make sure we don't keep objects alive 39 | # to match the behavior of the base mpl_connect 40 | try: 41 | r = weakref.WeakMethod(cb) 42 | except TypeError: 43 | def r(): 44 | return cb 45 | 46 | def wrapper(*args, **kw): 47 | cb = r() 48 | 49 | with output: 50 | if cb is not None: 51 | cb(*args, **kw) 52 | 53 | orig_mpl_connect(key, wrapper, **kwargs) 54 | 55 | # mokeny patch the canvas 56 | fig.canvas.mpl_connect = mpl_connect 57 | return fig 58 | 59 | # stash the orginal 60 | figure._figure = plt.figure 61 | # monkey patch pyplot (!?) 62 | plt.figure = figure 63 | plt._print_hacked = True 64 | 65 | 66 | # make sure we only do this once! 67 | if getattr(plt, '_print_hacked', False): 68 | ... 69 | else: 70 | _monkey_patch_pyplot() 71 | 72 | # clean up after our selves and do not polute the user's namespace 73 | del _monkey_patch_pyplot 74 | del plt 75 | --------------------------------------------------------------------------------