├── .gitattributes
├── .gitignore
├── 01-intro-to-pandas-part-1-answers.ipynb
├── 01-intro-to-pandas-part-1-exercises.ipynb
├── 01-intro-to-pandas-part-1-slides.ipynb
├── 02-intro-to-pandas-part-2-answers.ipynb
├── 02-intro-to-pandas-part-2-exercises.ipynb
├── 02-intro-to-pandas-part-2-slides.ipynb
├── 03-group-apply-answers.ipynb
├── 03-group-apply-exercises.ipynb
├── 03-group-apply-slides.ipynb
├── 04-time-series-answers.ipynb
├── 04-time-series-exercises.ipynb
├── 04-time-series-slides.ipynb
├── 05-merge-pivot-answers.ipynb
├── 05-merge-pivot-exercises.ipynb
├── 05-merge-pivot-slides.ipynb
├── 06-advanced-merge-reshape-answers.ipynb
├── 06-advanced-merge-reshape-exercises.ipynb
├── 06-advanced-merge-reshape-slides.ipynb
├── LICENSE
├── README.md
├── assets
    ├── data-label-arrays.png
    ├── enplus-logo-colored.png
    ├── enplus-logo-colored.svg
    ├── full-join.png
    ├── inner-join.png
    ├── lag.png
    ├── lead.png
    ├── left-join.png
    ├── lesson-01-key-value.png
    ├── right-join.png
    ├── rolling.png
    ├── split-apply-combine.png
    ├── stock-trading-1600x1200.jpg
    ├── vectorized-multiplication.png
    └── venn-diagrams.sketch
├── binder
    └── environment.yml
├── build.py
├── build.sh
├── build
    ├── .gitignore
    ├── custom.css
    ├── favicon.ico
    └── reveal.js
    │   └── .gitignore
├── config.py
├── data
    ├── 726505-04845-2009
    └── weather-6m.csv
├── docker-compose.yml
├── environment-dev.yml
├── environment.yml
├── scripts
    └── combine-envs.sh
└── sp500.csv


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.jpg filter=lfs diff=lfs merge=lfs -text
2 | 
3 | *.ipynb	diff=jupyternotebook
4 | 
5 | *.ipynb	merge=jupyternotebook
6 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # OS X
  7 | .DS_Store
  8 | 
  9 | # intellij, vscode
 10 | .idea/
 11 | .vscode/
 12 | 
 13 | # C extensions
 14 | *.so
 15 | 
 16 | # Distribution / packaging
 17 | .Python
 18 | env/
 19 | develop-eggs/
 20 | dist/
 21 | downloads/
 22 | eggs/
 23 | .eggs/
 24 | lib/
 25 | lib64/
 26 | parts/
 27 | sdist/
 28 | var/
 29 | *.egg-info/
 30 | .installed.cfg
 31 | *.egg
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | log.txt
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *,cover
 53 | .hypothesis/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | 
 63 | # Flask stuff:
 64 | instance/
 65 | .webassets-cache
 66 | 
 67 | # Scrapy stuff:
 68 | .scrapy
 69 | 
 70 | # Sphinx documentation
 71 | docs/_build/
 72 | 
 73 | # PyBuilder
 74 | target/
 75 | 
 76 | # IPython Notebook
 77 | .ipynb_checkpoints
 78 | 
 79 | # pyenv
 80 | .python-version
 81 | 
 82 | # celery beat schedule file
 83 | celerybeat-schedule
 84 | 
 85 | # dotenv
 86 | .env
 87 | 
 88 | # virtualenv
 89 | venv/
 90 | ENV/
 91 | 
 92 | # Spyder project settings
 93 | .spyderproject
 94 | 
 95 | # Rope project settings
 96 | .ropeproject
 97 | 
 98 | *.html
 99 | reveal.js
100 | !build.sh
101 | 


--------------------------------------------------------------------------------
/01-intro-to-pandas-part-1-answers.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "tags": [
  7 |      "setup"
  8 |     ]
  9 |    },
 10 |    "source": [
 11 |     "(c) 2016 - present. Enplus Advisors, Inc."
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 1,
 17 |    "metadata": {
 18 |     "tags": [
 19 |      "setup"
 20 |     ]
 21 |    },
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "import numpy as np\n",
 25 |     "import pandas as pd"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "metadata": {
 31 |     "tags": [
 32 |      "exercise"
 33 |     ]
 34 |    },
 35 |    "source": [
 36 |     "**Exercise:**\n",
 37 |     "\n",
 38 |     "* Create a `Series` and assign it to `s1a` from the integers 8, 6, 7, 5\n",
 39 |     "* From a `dict`, create an integer `Series` and assign it to `s1b` with values 8, 6, 7, 5 named `s1b` and a string index 'a', 'b', 'c', and 'd'\n",
 40 |     "* Convert the `s1b` values to 64-bit floating point values and assign it to `s1c`\n",
 41 |     "* Extract only the values from the `s1c` as a `PandasArray`"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 2,
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "s1a = pd.Series([8, 6, 7, 5])"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": 3,
 56 |    "metadata": {},
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "s1b = pd.Series({'a': 8, 'b': 6, 'c': 7, 'd': 5})"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 4,
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "s1c = s1b.astype(np.float64)"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": 5,
 74 |    "metadata": {},
 75 |    "outputs": [
 76 |     {
 77 |      "data": {
 78 |       "text/plain": [
 79 |        "<PandasArray>\n",
 80 |        "[8.0, 6.0, 7.0, 5.0]\n",
 81 |        "Length: 4, dtype: float64"
 82 |       ]
 83 |      },
 84 |      "execution_count": 5,
 85 |      "metadata": {},
 86 |      "output_type": "execute_result"
 87 |     }
 88 |    ],
 89 |    "source": [
 90 |     "s1c.array # s1c.values for pandas < 0.24.0"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 6,
 96 |    "metadata": {},
 97 |    "outputs": [
 98 |     {
 99 |      "data": {
100 |       "text/plain": [
101 |        "array([8., 6., 7., 5.])"
102 |       ]
103 |      },
104 |      "execution_count": 6,
105 |      "metadata": {},
106 |      "output_type": "execute_result"
107 |     }
108 |    ],
109 |    "source": [
110 |     "s1c.to_numpy() # to convert to a `numpy.ndarray`"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "markdown",
115 |    "metadata": {
116 |     "tags": [
117 |      "exercise"
118 |     ]
119 |    },
120 |    "source": [
121 |     "**Exercise:**\n",
122 |     "\n",
123 |     "* Select the first element of `s2` using integer based lookup (`iloc`).\n",
124 |     "* Select the first element of the `s2` using label indexing (`loc`).\n",
125 |     "* Select all elements greater than 6 in `s2` using a boolean `Series`"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": 7,
131 |    "metadata": {
132 |     "tags": [
133 |      "exercise"
134 |     ]
135 |    },
136 |    "outputs": [],
137 |    "source": [
138 |     "s2 = pd.Series([6, 8, 7, 5], index=list('abcd'), dtype='Int64')"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": 8,
144 |    "metadata": {},
145 |    "outputs": [
146 |     {
147 |      "data": {
148 |       "text/plain": [
149 |        "6"
150 |       ]
151 |      },
152 |      "execution_count": 8,
153 |      "metadata": {},
154 |      "output_type": "execute_result"
155 |     }
156 |    ],
157 |    "source": [
158 |     "s2.iloc[0] # also s2.iat[0]"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": 9,
164 |    "metadata": {},
165 |    "outputs": [
166 |     {
167 |      "data": {
168 |       "text/plain": [
169 |        "6"
170 |       ]
171 |      },
172 |      "execution_count": 9,
173 |      "metadata": {},
174 |      "output_type": "execute_result"
175 |     }
176 |    ],
177 |    "source": [
178 |     "s2.loc['a']"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": 10,
184 |    "metadata": {},
185 |    "outputs": [
186 |     {
187 |      "data": {
188 |       "text/plain": [
189 |        "b    8\n",
190 |        "c    7\n",
191 |        "dtype: Int64"
192 |       ]
193 |      },
194 |      "execution_count": 10,
195 |      "metadata": {},
196 |      "output_type": "execute_result"
197 |     }
198 |    ],
199 |    "source": [
200 |     "s2[s2 > 6]"
201 |    ]
202 |   },
203 |   {
204 |    "cell_type": "markdown",
205 |    "metadata": {
206 |     "tags": [
207 |      "exercise"
208 |     ]
209 |    },
210 |    "source": [
211 |     "**Exercise**\n",
212 |     "\n",
213 |     "* Select all non-NaN values in `s3`\n",
214 |     "* What will the result of adding `s2` and `s3` together be? \n",
215 |     "  Figure it out on paper then check in the notebook with `s2 + s3`"
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "code",
220 |    "execution_count": 11,
221 |    "metadata": {
222 |     "tags": [
223 |      "exercise"
224 |     ]
225 |    },
226 |    "outputs": [],
227 |    "source": [
228 |     "s3 = pd.Series([9., 100., np.nan], index=list('ayz'), dtype='Int64')"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "code",
233 |    "execution_count": 12,
234 |    "metadata": {},
235 |    "outputs": [
236 |     {
237 |      "data": {
238 |       "text/plain": [
239 |        "a      9\n",
240 |        "y    100\n",
241 |        "dtype: Int64"
242 |       ]
243 |      },
244 |      "execution_count": 12,
245 |      "metadata": {},
246 |      "output_type": "execute_result"
247 |     }
248 |    ],
249 |    "source": [
250 |     "s3[s3.notnull()]"
251 |    ]
252 |   },
253 |   {
254 |    "cell_type": "code",
255 |    "execution_count": 13,
256 |    "metadata": {},
257 |    "outputs": [
258 |     {
259 |      "data": {
260 |       "text/plain": [
261 |        "a      15\n",
262 |        "b    <NA>\n",
263 |        "c    <NA>\n",
264 |        "d    <NA>\n",
265 |        "y    <NA>\n",
266 |        "z    <NA>\n",
267 |        "dtype: Int64"
268 |       ]
269 |      },
270 |      "execution_count": 13,
271 |      "metadata": {},
272 |      "output_type": "execute_result"
273 |     }
274 |    ],
275 |    "source": [
276 |     "s2 + s3"
277 |    ]
278 |   }
279 |  ],
280 |  "metadata": {
281 |   "celltoolbar": "Tags",
282 |   "kernelspec": {
283 |    "display_name": "Python 3",
284 |    "language": "python",
285 |    "name": "python3"
286 |   },
287 |   "language_info": {
288 |    "codemirror_mode": {
289 |     "name": "ipython",
290 |     "version": 3
291 |    },
292 |    "file_extension": ".py",
293 |    "mimetype": "text/x-python",
294 |    "name": "python",
295 |    "nbconvert_exporter": "python",
296 |    "pygments_lexer": "ipython3",
297 |    "version": "3.7.7"
298 |   },
299 |   "toc": {
300 |    "base_numbering": 1,
301 |    "nav_menu": {},
302 |    "number_sections": false,
303 |    "sideBar": true,
304 |    "skip_h1_title": true,
305 |    "title_cell": "Table of Contents",
306 |    "title_sidebar": "Contents",
307 |    "toc_cell": false,
308 |    "toc_position": {},
309 |    "toc_section_display": true,
310 |    "toc_window_display": false
311 |   }
312 |  },
313 |  "nbformat": 4,
314 |  "nbformat_minor": 4
315 | }
316 | 


--------------------------------------------------------------------------------
/01-intro-to-pandas-part-1-exercises.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "tags": [
  7 |      "setup"
  8 |     ]
  9 |    },
 10 |    "source": [
 11 |     "(c) 2016 - present. Enplus Advisors, Inc."
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": null,
 17 |    "metadata": {
 18 |     "tags": [
 19 |      "setup"
 20 |     ]
 21 |    },
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "import numpy as np\n",
 25 |     "import pandas as pd"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "metadata": {
 31 |     "tags": [
 32 |      "exercise"
 33 |     ]
 34 |    },
 35 |    "source": [
 36 |     "**Exercise:**\n",
 37 |     "\n",
 38 |     "* Create a `Series` and assign it to `s1a` from the integers 8, 6, 7, 5\n",
 39 |     "* From a `dict`, create an integer `Series` and assign it to `s1b` with values 8, 6, 7, 5 named `s1b` and a string index 'a', 'b', 'c', and 'd'\n",
 40 |     "* Convert the `s1b` values to 64-bit floating point values and assign it to `s1c`\n",
 41 |     "* Extract only the values from the `s1c` as a `PandasArray`"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "markdown",
 46 |    "metadata": {
 47 |     "tags": [
 48 |      "exercise"
 49 |     ]
 50 |    },
 51 |    "source": [
 52 |     "**Exercise:**\n",
 53 |     "\n",
 54 |     "* Select the first element of `s2` using integer based lookup (`iloc`).\n",
 55 |     "* Select the first element of the `s2` using label indexing (`loc`).\n",
 56 |     "* Select all elements greater than 6 in `s2` using a boolean `Series`"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": null,
 62 |    "metadata": {
 63 |     "tags": [
 64 |      "exercise"
 65 |     ]
 66 |    },
 67 |    "outputs": [],
 68 |    "source": [
 69 |     "s2 = pd.Series([6, 8, 7, 5], index=list('abcd'), dtype='Int64')"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "markdown",
 74 |    "metadata": {
 75 |     "tags": [
 76 |      "exercise"
 77 |     ]
 78 |    },
 79 |    "source": [
 80 |     "**Exercise**\n",
 81 |     "\n",
 82 |     "* Select all non-NaN values in `s3`\n",
 83 |     "* What will the result of adding `s2` and `s3` together be? \n",
 84 |     "  Figure it out on paper then check in the notebook with `s2 + s3`"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": null,
 90 |    "metadata": {
 91 |     "tags": [
 92 |      "exercise"
 93 |     ]
 94 |    },
 95 |    "outputs": [],
 96 |    "source": [
 97 |     "s3 = pd.Series([9., 100., np.nan], index=list('ayz'), dtype='Int64')"
 98 |    ]
 99 |   }
100 |  ],
101 |  "metadata": {
102 |   "kernelspec": {
103 |    "display_name": "Python 3",
104 |    "language": "python",
105 |    "name": "python3"
106 |   },
107 |   "language_info": {
108 |    "codemirror_mode": {
109 |     "name": "ipython",
110 |     "version": 3
111 |    },
112 |    "file_extension": ".py",
113 |    "mimetype": "text/x-python",
114 |    "name": "python",
115 |    "nbconvert_exporter": "python",
116 |    "pygments_lexer": "ipython3",
117 |    "version": "3.7.7"
118 |   }
119 |  },
120 |  "nbformat": 4,
121 |  "nbformat_minor": 4
122 | }
123 | 


--------------------------------------------------------------------------------
/02-intro-to-pandas-part-2-answers.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "slideshow": {
  7 |      "slide_type": "notes"
  8 |     },
  9 |     "tags": [
 10 |      "setup"
 11 |     ]
 12 |    },
 13 |    "source": [
 14 |     "(c) 2016 - present. Enplus Advisors, Inc."
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 1,
 20 |    "metadata": {
 21 |     "slideshow": {
 22 |      "slide_type": "skip"
 23 |     },
 24 |     "tags": [
 25 |      "setup"
 26 |     ]
 27 |    },
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "import numpy as np\n",
 31 |     "import pandas as pd"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": 2,
 37 |    "metadata": {
 38 |     "slideshow": {
 39 |      "slide_type": "slide"
 40 |     },
 41 |     "tags": [
 42 |      "setup"
 43 |     ]
 44 |    },
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "df = pd.DataFrame({\n",
 48 |     "    'ticker': ['AAPL', 'AAPL', 'MSFT', 'IBM', 'YHOO'],\n",
 49 |     "    'date': ['2015-12-30', '2015-12-31', '2015-12-30', '2015-12-30', '2015-12-30'],\n",
 50 |     "    'open': [426.23, 427.81, 42.3, 101.65, 35.53]\n",
 51 |     "})"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "markdown",
 56 |    "metadata": {
 57 |     "tags": [
 58 |      "exercise"
 59 |     ]
 60 |    },
 61 |    "source": [
 62 |     "**Exercise:**\n",
 63 |     "\n",
 64 |     "* Select the `open` column as a `Series` using attribute lookup\n",
 65 |     "* Select the `open` column as a `Series` using `dict`-style lookup\n",
 66 |     "* Select the `date` column as a `DataFrame`"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": 3,
 72 |    "metadata": {},
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "res1a = df.open\n",
 76 |     "res1b = df['open']\n",
 77 |     "res1c = df[['open']]"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "markdown",
 82 |    "metadata": {
 83 |     "tags": [
 84 |      "exercise"
 85 |     ]
 86 |    },
 87 |    "source": [
 88 |     "**Exercise:**\n",
 89 |     "\n",
 90 |     "* Select all rows with the `AAPL` ticker and the `date` and `open`\n",
 91 |     "  columns.\n",
 92 |     "* Assign to the variable `df1` a new `DataFrame` with `ticker` as\n",
 93 |     "  the index.\n",
 94 |     "* Assign to the variable `df2` a new `DataFrame` with `date` as\n",
 95 |     "  the index. Create this `DataFrame` from `df1` with a single\n",
 96 |     "  statement.\n",
 97 |     "* Sort `df2` by the index values."
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": 4,
103 |    "metadata": {},
104 |    "outputs": [],
105 |    "source": [
106 |     "res2a = df.loc[df.ticker == 'AAPL', ['date', 'open']]\n",
107 |     "df1 = df.set_index('ticker')\n",
108 |     "df2 = df1.reset_index().set_index('date')"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": 5,
114 |    "metadata": {},
115 |    "outputs": [],
116 |    "source": [
117 |     "df2_sorted = df2.sort_index()"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "markdown",
122 |    "metadata": {
123 |     "tags": [
124 |      "exercise"
125 |     ]
126 |    },
127 |    "source": [
128 |     "**Exercise:**\n",
129 |     "\n",
130 |     "* Create a copy of `df` called `df3`. Add a new column of `NaNs` \n",
131 |     "  to `df3` called `close`. Assign `close` the same value as `open`\n",
132 |     "  for all `open` values greater than 100.\n",
133 |     "* Sort `df3` by its `close` values."
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": 6,
139 |    "metadata": {},
140 |    "outputs": [
141 |     {
142 |      "data": {
143 |       "text/html": [
144 |        "<div>\n",
145 |        "<style scoped>\n",
146 |        "    .dataframe tbody tr th:only-of-type {\n",
147 |        "        vertical-align: middle;\n",
148 |        "    }\n",
149 |        "\n",
150 |        "    .dataframe tbody tr th {\n",
151 |        "        vertical-align: top;\n",
152 |        "    }\n",
153 |        "\n",
154 |        "    .dataframe thead th {\n",
155 |        "        text-align: right;\n",
156 |        "    }\n",
157 |        "</style>\n",
158 |        "<table border=\"1\" class=\"dataframe\">\n",
159 |        "  <thead>\n",
160 |        "    <tr style=\"text-align: right;\">\n",
161 |        "      <th></th>\n",
162 |        "      <th>ticker</th>\n",
163 |        "      <th>date</th>\n",
164 |        "      <th>open</th>\n",
165 |        "      <th>close</th>\n",
166 |        "    </tr>\n",
167 |        "  </thead>\n",
168 |        "  <tbody>\n",
169 |        "    <tr>\n",
170 |        "      <th>0</th>\n",
171 |        "      <td>AAPL</td>\n",
172 |        "      <td>2015-12-30</td>\n",
173 |        "      <td>426.23</td>\n",
174 |        "      <td>426.23</td>\n",
175 |        "    </tr>\n",
176 |        "    <tr>\n",
177 |        "      <th>1</th>\n",
178 |        "      <td>AAPL</td>\n",
179 |        "      <td>2015-12-31</td>\n",
180 |        "      <td>427.81</td>\n",
181 |        "      <td>427.81</td>\n",
182 |        "    </tr>\n",
183 |        "    <tr>\n",
184 |        "      <th>2</th>\n",
185 |        "      <td>MSFT</td>\n",
186 |        "      <td>2015-12-30</td>\n",
187 |        "      <td>42.30</td>\n",
188 |        "      <td>NaN</td>\n",
189 |        "    </tr>\n",
190 |        "    <tr>\n",
191 |        "      <th>3</th>\n",
192 |        "      <td>IBM</td>\n",
193 |        "      <td>2015-12-30</td>\n",
194 |        "      <td>101.65</td>\n",
195 |        "      <td>101.65</td>\n",
196 |        "    </tr>\n",
197 |        "    <tr>\n",
198 |        "      <th>4</th>\n",
199 |        "      <td>YHOO</td>\n",
200 |        "      <td>2015-12-30</td>\n",
201 |        "      <td>35.53</td>\n",
202 |        "      <td>NaN</td>\n",
203 |        "    </tr>\n",
204 |        "  </tbody>\n",
205 |        "</table>\n",
206 |        "</div>"
207 |       ],
208 |       "text/plain": [
209 |        "  ticker        date    open   close\n",
210 |        "0   AAPL  2015-12-30  426.23  426.23\n",
211 |        "1   AAPL  2015-12-31  427.81  427.81\n",
212 |        "2   MSFT  2015-12-30   42.30     NaN\n",
213 |        "3    IBM  2015-12-30  101.65  101.65\n",
214 |        "4   YHOO  2015-12-30   35.53     NaN"
215 |       ]
216 |      },
217 |      "execution_count": 6,
218 |      "metadata": {},
219 |      "output_type": "execute_result"
220 |     }
221 |    ],
222 |    "source": [
223 |     "df3 = df.copy()\n",
224 |     "\n",
225 |     "# this could be skipped from a functional standpoint, though\n",
226 |     "# the instructions say to do it\n",
227 |     "df3['close'] = np.nan \n",
228 |     "\n",
229 |     "gt100 = df3.open[df3.open > 100]\n",
230 |     "df3.close = gt100 # you can use dot syntax b/c `close` already exists\n",
231 |     "df3"
232 |    ]
233 |   }
234 |  ],
235 |  "metadata": {
236 |   "celltoolbar": "Tags",
237 |   "kernelspec": {
238 |    "display_name": "Python 3",
239 |    "language": "python",
240 |    "name": "python3"
241 |   },
242 |   "language_info": {
243 |    "codemirror_mode": {
244 |     "name": "ipython",
245 |     "version": 3
246 |    },
247 |    "file_extension": ".py",
248 |    "mimetype": "text/x-python",
249 |    "name": "python",
250 |    "nbconvert_exporter": "python",
251 |    "pygments_lexer": "ipython3",
252 |    "version": "3.7.3"
253 |   },
254 |   "toc": {
255 |    "base_numbering": 1,
256 |    "nav_menu": {},
257 |    "number_sections": false,
258 |    "sideBar": false,
259 |    "skip_h1_title": true,
260 |    "title_cell": "Table of Contents",
261 |    "title_sidebar": "Contents",
262 |    "toc_cell": false,
263 |    "toc_position": {},
264 |    "toc_section_display": false,
265 |    "toc_window_display": false
266 |   }
267 |  },
268 |  "nbformat": 4,
269 |  "nbformat_minor": 1
270 | }
271 | 


--------------------------------------------------------------------------------
/02-intro-to-pandas-part-2-exercises.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "slideshow": {
  7 |      "slide_type": "notes"
  8 |     },
  9 |     "tags": [
 10 |      "setup"
 11 |     ]
 12 |    },
 13 |    "source": [
 14 |     "(c) 2016 - present. Enplus Advisors, Inc."
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "metadata": {
 21 |     "slideshow": {
 22 |      "slide_type": "skip"
 23 |     },
 24 |     "tags": [
 25 |      "setup"
 26 |     ]
 27 |    },
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "import numpy as np\n",
 31 |     "import pandas as pd"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": null,
 37 |    "metadata": {
 38 |     "slideshow": {
 39 |      "slide_type": "slide"
 40 |     },
 41 |     "tags": [
 42 |      "setup"
 43 |     ]
 44 |    },
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "df = pd.DataFrame({\n",
 48 |     "    'ticker': ['AAPL', 'AAPL', 'MSFT', 'IBM', 'YHOO'],\n",
 49 |     "    'date': ['2015-12-30', '2015-12-31', '2015-12-30', '2015-12-30', '2015-12-30'],\n",
 50 |     "    'open': [426.23, 427.81, 42.3, 101.65, 35.53]\n",
 51 |     "})"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "markdown",
 56 |    "metadata": {
 57 |     "tags": [
 58 |      "exercise"
 59 |     ]
 60 |    },
 61 |    "source": [
 62 |     "**Exercise:**\n",
 63 |     "\n",
 64 |     "* Select the `open` column as a `Series` using attribute lookup\n",
 65 |     "* Select the `open` column as a `Series` using `dict`-style lookup\n",
 66 |     "* Select the `date` column as a `DataFrame`"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "markdown",
 71 |    "metadata": {
 72 |     "tags": [
 73 |      "exercise"
 74 |     ]
 75 |    },
 76 |    "source": [
 77 |     "**Exercise:**\n",
 78 |     "\n",
 79 |     "* Select all rows with the `AAPL` ticker and the `date` and `open`\n",
 80 |     "  columns.\n",
 81 |     "* Assign to the variable `df1` a new `DataFrame` with `ticker` as\n",
 82 |     "  the index.\n",
 83 |     "* Assign to the variable `df2` a new `DataFrame` with `date` as\n",
 84 |     "  the index. Create this `DataFrame` from `df1` with a single\n",
 85 |     "  statement.\n",
 86 |     "* Sort `df2` by the index values."
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "markdown",
 91 |    "metadata": {
 92 |     "tags": [
 93 |      "exercise"
 94 |     ]
 95 |    },
 96 |    "source": [
 97 |     "**Exercise:**\n",
 98 |     "\n",
 99 |     "* Create a copy of `df` called `df3`. Add a new column of `NaNs` \n",
100 |     "  to `df3` called `close`. Assign `close` the same value as `open`\n",
101 |     "  for all `open` values greater than 100.\n",
102 |     "* Sort `df3` by its `close` values."
103 |    ]
104 |   }
105 |  ],
106 |  "metadata": {
107 |   "kernelspec": {
108 |    "display_name": "Python 3",
109 |    "language": "python",
110 |    "name": "python3"
111 |   },
112 |   "language_info": {
113 |    "codemirror_mode": {
114 |     "name": "ipython",
115 |     "version": 3
116 |    },
117 |    "file_extension": ".py",
118 |    "mimetype": "text/x-python",
119 |    "name": "python",
120 |    "nbconvert_exporter": "python",
121 |    "pygments_lexer": "ipython3",
122 |    "version": "3.7.3"
123 |   }
124 |  },
125 |  "nbformat": 4,
126 |  "nbformat_minor": 1
127 | }
128 | 


--------------------------------------------------------------------------------
/03-group-apply-answers.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "tags": [
  7 |      "setup"
  8 |     ]
  9 |    },
 10 |    "source": [
 11 |     "(c) 2016 - present. Enplus Advisors, Inc."
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 1,
 17 |    "metadata": {
 18 |     "tags": [
 19 |      "setup"
 20 |     ]
 21 |    },
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "import numpy as np\n",
 25 |     "import pandas as pd\n",
 26 |     "\n",
 27 |     "pd.set_option('display.float_format', '{:,.1f}'.format)"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": 2,
 33 |    "metadata": {
 34 |     "tags": [
 35 |      "setup"
 36 |     ]
 37 |    },
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "dat = pd.read_csv('data/weather-6m.csv')"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "markdown",
 45 |    "metadata": {
 46 |     "tags": [
 47 |      "exercise"
 48 |     ]
 49 |    },
 50 |    "source": [
 51 |     "**Exercise:**\n",
 52 |     "\n",
 53 |     "Calculate the average `air_temp` by `month`."
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": 3,
 59 |    "metadata": {},
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "grp = dat.groupby('month')"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 4,
 68 |    "metadata": {
 69 |     "scrolled": false
 70 |    },
 71 |    "outputs": [
 72 |     {
 73 |      "data": {
 74 |       "text/plain": [
 75 |        "month\n",
 76 |        "1   -10.0\n",
 77 |        "2    -3.0\n",
 78 |        "3     2.1\n",
 79 |        "4     7.0\n",
 80 |        "5    14.0\n",
 81 |        "6    18.1\n",
 82 |        "Name: air_temp, dtype: float64"
 83 |       ]
 84 |      },
 85 |      "execution_count": 4,
 86 |      "metadata": {},
 87 |      "output_type": "execute_result"
 88 |     }
 89 |    ],
 90 |    "source": [
 91 |     "grp['air_temp'].mean()"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "markdown",
 96 |    "metadata": {
 97 |     "tags": [
 98 |      "exercise"
 99 |     ]
100 |    },
101 |    "source": [
102 |     "**Exercise:**\n",
103 |     "\n",
104 |     "Compute summary statistics on `air_temp` and `dew_point` using \n",
105 |     "the `describe` method."
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 5,
111 |    "metadata": {
112 |     "scrolled": true
113 |    },
114 |    "outputs": [
115 |     {
116 |      "data": {
117 |       "text/html": [
118 |        "<div>\n",
119 |        "<style scoped>\n",
120 |        "    .dataframe tbody tr th:only-of-type {\n",
121 |        "        vertical-align: middle;\n",
122 |        "    }\n",
123 |        "\n",
124 |        "    .dataframe tbody tr th {\n",
125 |        "        vertical-align: top;\n",
126 |        "    }\n",
127 |        "\n",
128 |        "    .dataframe thead tr th {\n",
129 |        "        text-align: left;\n",
130 |        "    }\n",
131 |        "\n",
132 |        "    .dataframe thead tr:last-of-type th {\n",
133 |        "        text-align: right;\n",
134 |        "    }\n",
135 |        "</style>\n",
136 |        "<table border=\"1\" class=\"dataframe\">\n",
137 |        "  <thead>\n",
138 |        "    <tr>\n",
139 |        "      <th></th>\n",
140 |        "      <th colspan=\"8\" halign=\"left\">air_temp</th>\n",
141 |        "      <th colspan=\"8\" halign=\"left\">dew_point</th>\n",
142 |        "    </tr>\n",
143 |        "    <tr>\n",
144 |        "      <th></th>\n",
145 |        "      <th>count</th>\n",
146 |        "      <th>mean</th>\n",
147 |        "      <th>std</th>\n",
148 |        "      <th>min</th>\n",
149 |        "      <th>25%</th>\n",
150 |        "      <th>50%</th>\n",
151 |        "      <th>75%</th>\n",
152 |        "      <th>max</th>\n",
153 |        "      <th>count</th>\n",
154 |        "      <th>mean</th>\n",
155 |        "      <th>std</th>\n",
156 |        "      <th>min</th>\n",
157 |        "      <th>25%</th>\n",
158 |        "      <th>50%</th>\n",
159 |        "      <th>75%</th>\n",
160 |        "      <th>max</th>\n",
161 |        "    </tr>\n",
162 |        "    <tr>\n",
163 |        "      <th>month</th>\n",
164 |        "      <th></th>\n",
165 |        "      <th></th>\n",
166 |        "      <th></th>\n",
167 |        "      <th></th>\n",
168 |        "      <th></th>\n",
169 |        "      <th></th>\n",
170 |        "      <th></th>\n",
171 |        "      <th></th>\n",
172 |        "      <th></th>\n",
173 |        "      <th></th>\n",
174 |        "      <th></th>\n",
175 |        "      <th></th>\n",
176 |        "      <th></th>\n",
177 |        "      <th></th>\n",
178 |        "      <th></th>\n",
179 |        "      <th></th>\n",
180 |        "    </tr>\n",
181 |        "  </thead>\n",
182 |        "  <tbody>\n",
183 |        "    <tr>\n",
184 |        "      <th>1</th>\n",
185 |        "      <td>712.0</td>\n",
186 |        "      <td>-10.0</td>\n",
187 |        "      <td>6.2</td>\n",
188 |        "      <td>-29.4</td>\n",
189 |        "      <td>-13.3</td>\n",
190 |        "      <td>-10.0</td>\n",
191 |        "      <td>-5.6</td>\n",
192 |        "      <td>2.8</td>\n",
193 |        "      <td>712.0</td>\n",
194 |        "      <td>-14.1</td>\n",
195 |        "      <td>6.8</td>\n",
196 |        "      <td>-32.8</td>\n",
197 |        "      <td>-18.3</td>\n",
198 |        "      <td>-13.9</td>\n",
199 |        "      <td>-8.9</td>\n",
200 |        "      <td>1.0</td>\n",
201 |        "    </tr>\n",
202 |        "    <tr>\n",
203 |        "      <th>2</th>\n",
204 |        "      <td>644.0</td>\n",
205 |        "      <td>-3.0</td>\n",
206 |        "      <td>6.8</td>\n",
207 |        "      <td>-19.4</td>\n",
208 |        "      <td>-7.2</td>\n",
209 |        "      <td>-2.2</td>\n",
210 |        "      <td>1.7</td>\n",
211 |        "      <td>15.0</td>\n",
212 |        "      <td>644.0</td>\n",
213 |        "      <td>-7.3</td>\n",
214 |        "      <td>7.3</td>\n",
215 |        "      <td>-22.8</td>\n",
216 |        "      <td>-12.2</td>\n",
217 |        "      <td>-7.2</td>\n",
218 |        "      <td>-2.2</td>\n",
219 |        "      <td>8.3</td>\n",
220 |        "    </tr>\n",
221 |        "    <tr>\n",
222 |        "      <th>3</th>\n",
223 |        "      <td>713.0</td>\n",
224 |        "      <td>2.1</td>\n",
225 |        "      <td>6.7</td>\n",
226 |        "      <td>-13.3</td>\n",
227 |        "      <td>-1.7</td>\n",
228 |        "      <td>2.2</td>\n",
229 |        "      <td>5.6</td>\n",
230 |        "      <td>22.8</td>\n",
231 |        "      <td>713.0</td>\n",
232 |        "      <td>-3.4</td>\n",
233 |        "      <td>6.1</td>\n",
234 |        "      <td>-17.2</td>\n",
235 |        "      <td>-7.8</td>\n",
236 |        "      <td>-2.8</td>\n",
237 |        "      <td>0.6</td>\n",
238 |        "      <td>13.3</td>\n",
239 |        "    </tr>\n",
240 |        "    <tr>\n",
241 |        "      <th>4</th>\n",
242 |        "      <td>691.0</td>\n",
243 |        "      <td>7.0</td>\n",
244 |        "      <td>6.0</td>\n",
245 |        "      <td>-2.8</td>\n",
246 |        "      <td>2.8</td>\n",
247 |        "      <td>5.6</td>\n",
248 |        "      <td>9.4</td>\n",
249 |        "      <td>28.3</td>\n",
250 |        "      <td>691.0</td>\n",
251 |        "      <td>0.3</td>\n",
252 |        "      <td>6.1</td>\n",
253 |        "      <td>-13.3</td>\n",
254 |        "      <td>-3.9</td>\n",
255 |        "      <td>-1.1</td>\n",
256 |        "      <td>3.9</td>\n",
257 |        "      <td>16.7</td>\n",
258 |        "    </tr>\n",
259 |        "    <tr>\n",
260 |        "      <th>5</th>\n",
261 |        "      <td>713.0</td>\n",
262 |        "      <td>14.0</td>\n",
263 |        "      <td>5.1</td>\n",
264 |        "      <td>1.1</td>\n",
265 |        "      <td>10.6</td>\n",
266 |        "      <td>13.9</td>\n",
267 |        "      <td>17.2</td>\n",
268 |        "      <td>28.3</td>\n",
269 |        "      <td>713.0</td>\n",
270 |        "      <td>6.2</td>\n",
271 |        "      <td>4.6</td>\n",
272 |        "      <td>-6.1</td>\n",
273 |        "      <td>2.8</td>\n",
274 |        "      <td>6.7</td>\n",
275 |        "      <td>10.0</td>\n",
276 |        "      <td>18.3</td>\n",
277 |        "    </tr>\n",
278 |        "    <tr>\n",
279 |        "      <th>6</th>\n",
280 |        "      <td>688.0</td>\n",
281 |        "      <td>18.1</td>\n",
282 |        "      <td>6.0</td>\n",
283 |        "      <td>3.3</td>\n",
284 |        "      <td>13.8</td>\n",
285 |        "      <td>17.8</td>\n",
286 |        "      <td>22.8</td>\n",
287 |        "      <td>33.3</td>\n",
288 |        "      <td>688.0</td>\n",
289 |        "      <td>12.3</td>\n",
290 |        "      <td>5.5</td>\n",
291 |        "      <td>-3.3</td>\n",
292 |        "      <td>8.9</td>\n",
293 |        "      <td>11.7</td>\n",
294 |        "      <td>17.2</td>\n",
295 |        "      <td>23.3</td>\n",
296 |        "    </tr>\n",
297 |        "  </tbody>\n",
298 |        "</table>\n",
299 |        "</div>"
300 |       ],
301 |       "text/plain": [
302 |        "      air_temp                                       dew_point            \\\n",
303 |        "         count  mean std   min   25%   50%  75%  max     count  mean std   \n",
304 |        "month                                                                      \n",
305 |        "1        712.0 -10.0 6.2 -29.4 -13.3 -10.0 -5.6  2.8     712.0 -14.1 6.8   \n",
306 |        "2        644.0  -3.0 6.8 -19.4  -7.2  -2.2  1.7 15.0     644.0  -7.3 7.3   \n",
307 |        "3        713.0   2.1 6.7 -13.3  -1.7   2.2  5.6 22.8     713.0  -3.4 6.1   \n",
308 |        "4        691.0   7.0 6.0  -2.8   2.8   5.6  9.4 28.3     691.0   0.3 6.1   \n",
309 |        "5        713.0  14.0 5.1   1.1  10.6  13.9 17.2 28.3     713.0   6.2 4.6   \n",
310 |        "6        688.0  18.1 6.0   3.3  13.8  17.8 22.8 33.3     688.0  12.3 5.5   \n",
311 |        "\n",
312 |        "                                   \n",
313 |        "        min   25%   50%  75%  max  \n",
314 |        "month                              \n",
315 |        "1     -32.8 -18.3 -13.9 -8.9  1.0  \n",
316 |        "2     -22.8 -12.2  -7.2 -2.2  8.3  \n",
317 |        "3     -17.2  -7.8  -2.8  0.6 13.3  \n",
318 |        "4     -13.3  -3.9  -1.1  3.9 16.7  \n",
319 |        "5      -6.1   2.8   6.7 10.0 18.3  \n",
320 |        "6      -3.3   8.9  11.7 17.2 23.3  "
321 |       ]
322 |      },
323 |      "execution_count": 5,
324 |      "metadata": {},
325 |      "output_type": "execute_result"
326 |     }
327 |    ],
328 |    "source": [
329 |     "grp[['air_temp', 'dew_point']].describe()"
330 |    ]
331 |   },
332 |   {
333 |    "cell_type": "markdown",
334 |    "metadata": {
335 |     "tags": [
336 |      "exercise"
337 |     ]
338 |    },
339 |    "source": [
340 |     "**Exercise:**\n",
341 |     "\n",
342 |     "For January and February and 0 - 11 hours, calculate the average and standard deviation of `air_temp` grouping by month and hour of the day. Name your result columns `air_temp_mean` and `air_temp_sd`.\n",
343 |     "\n",
344 |     "Your result `DataFrame` should have 24 rows, the number of months (2) times the number of hours (12). \n",
345 |     "\n",
346 |     "$2 * 12 = 24$\n",
347 |     "\n"
348 |    ]
349 |   },
350 |   {
351 |    "cell_type": "code",
352 |    "execution_count": 6,
353 |    "metadata": {},
354 |    "outputs": [],
355 |    "source": [
356 |     "idx = dat.month.isin([1, 2]) & (dat.hour < 12)\n",
357 |     "grp2 = dat[idx].groupby(['month', 'hour'])\n",
358 |     "hourly_temp = grp2.agg(\n",
359 |     "    air_temp_mean=('air_temp', 'mean'),\n",
360 |     "    air_temp_sd=('air_temp', 'std')\n",
361 |     ")"
362 |    ]
363 |   },
364 |   {
365 |    "cell_type": "markdown",
366 |    "metadata": {
367 |     "tags": [
368 |      "exercise"
369 |     ]
370 |    },
371 |    "source": [
372 |     "**Exercise:**\n",
373 |     "\n",
374 |     "By month, calculate quantiles for `air_temp` using the quantiles defined in `breaks`. \n",
375 |     "\n",
376 |     "Hint: Use the `quantile` method defined on a `Series` (`pd.Series.quantile`).\n"
377 |    ]
378 |   },
379 |   {
380 |    "cell_type": "code",
381 |    "execution_count": 7,
382 |    "metadata": {
383 |     "tags": [
384 |      "exercise"
385 |     ]
386 |    },
387 |    "outputs": [],
388 |    "source": [
389 |     "breaks = [0.01, 0.25, 0.5, 0.75, 0.99]"
390 |    ]
391 |   },
392 |   {
393 |    "cell_type": "code",
394 |    "execution_count": 8,
395 |    "metadata": {},
396 |    "outputs": [
397 |     {
398 |      "data": {
399 |       "text/html": [
400 |        "<div>\n",
401 |        "<style scoped>\n",
402 |        "    .dataframe tbody tr th:only-of-type {\n",
403 |        "        vertical-align: middle;\n",
404 |        "    }\n",
405 |        "\n",
406 |        "    .dataframe tbody tr th {\n",
407 |        "        vertical-align: top;\n",
408 |        "    }\n",
409 |        "\n",
410 |        "    .dataframe thead th {\n",
411 |        "        text-align: right;\n",
412 |        "    }\n",
413 |        "</style>\n",
414 |        "<table border=\"1\" class=\"dataframe\">\n",
415 |        "  <thead>\n",
416 |        "    <tr style=\"text-align: right;\">\n",
417 |        "      <th>air_temp</th>\n",
418 |        "      <th>0.0</th>\n",
419 |        "      <th>0.2</th>\n",
420 |        "      <th>0.5</th>\n",
421 |        "      <th>0.8</th>\n",
422 |        "      <th>1.0</th>\n",
423 |        "    </tr>\n",
424 |        "    <tr>\n",
425 |        "      <th>month</th>\n",
426 |        "      <th></th>\n",
427 |        "      <th></th>\n",
428 |        "      <th></th>\n",
429 |        "      <th></th>\n",
430 |        "      <th></th>\n",
431 |        "    </tr>\n",
432 |        "  </thead>\n",
433 |        "  <tbody>\n",
434 |        "    <tr>\n",
435 |        "      <th>1</th>\n",
436 |        "      <td>-25.0</td>\n",
437 |        "      <td>-13.3</td>\n",
438 |        "      <td>-10.0</td>\n",
439 |        "      <td>-5.6</td>\n",
440 |        "      <td>1.1</td>\n",
441 |        "    </tr>\n",
442 |        "    <tr>\n",
443 |        "      <th>2</th>\n",
444 |        "      <td>-18.3</td>\n",
445 |        "      <td>-7.2</td>\n",
446 |        "      <td>-2.2</td>\n",
447 |        "      <td>1.7</td>\n",
448 |        "      <td>12.8</td>\n",
449 |        "    </tr>\n",
450 |        "    <tr>\n",
451 |        "      <th>3</th>\n",
452 |        "      <td>-11.0</td>\n",
453 |        "      <td>-1.7</td>\n",
454 |        "      <td>2.2</td>\n",
455 |        "      <td>5.6</td>\n",
456 |        "      <td>19.3</td>\n",
457 |        "    </tr>\n",
458 |        "    <tr>\n",
459 |        "      <th>4</th>\n",
460 |        "      <td>-2.2</td>\n",
461 |        "      <td>2.8</td>\n",
462 |        "      <td>5.6</td>\n",
463 |        "      <td>9.4</td>\n",
464 |        "      <td>23.4</td>\n",
465 |        "    </tr>\n",
466 |        "    <tr>\n",
467 |        "      <th>5</th>\n",
468 |        "      <td>2.8</td>\n",
469 |        "      <td>10.6</td>\n",
470 |        "      <td>13.9</td>\n",
471 |        "      <td>17.2</td>\n",
472 |        "      <td>27.8</td>\n",
473 |        "    </tr>\n",
474 |        "    <tr>\n",
475 |        "      <th>6</th>\n",
476 |        "      <td>6.1</td>\n",
477 |        "      <td>13.8</td>\n",
478 |        "      <td>17.8</td>\n",
479 |        "      <td>22.8</td>\n",
480 |        "      <td>32.2</td>\n",
481 |        "    </tr>\n",
482 |        "  </tbody>\n",
483 |        "</table>\n",
484 |        "</div>"
485 |       ],
486 |       "text/plain": [
487 |        "air_temp   0.0   0.2   0.5  0.8  1.0\n",
488 |        "month                               \n",
489 |        "1        -25.0 -13.3 -10.0 -5.6  1.1\n",
490 |        "2        -18.3  -7.2  -2.2  1.7 12.8\n",
491 |        "3        -11.0  -1.7   2.2  5.6 19.3\n",
492 |        "4         -2.2   2.8   5.6  9.4 23.4\n",
493 |        "5          2.8  10.6  13.9 17.2 27.8\n",
494 |        "6          6.1  13.8  17.8 22.8 32.2"
495 |       ]
496 |      },
497 |      "execution_count": 8,
498 |      "metadata": {},
499 |      "output_type": "execute_result"
500 |     }
501 |    ],
502 |    "source": [
503 |     "grp3 = dat.groupby('month')\n",
504 |     "grp3.apply(lambda x: x.air_temp.quantile(breaks))"
505 |    ]
506 |   }
507 |  ],
508 |  "metadata": {
509 |   "celltoolbar": "Tags",
510 |   "kernelspec": {
511 |    "display_name": "Python 3",
512 |    "language": "python",
513 |    "name": "python3"
514 |   },
515 |   "language_info": {
516 |    "codemirror_mode": {
517 |     "name": "ipython",
518 |     "version": 3
519 |    },
520 |    "file_extension": ".py",
521 |    "mimetype": "text/x-python",
522 |    "name": "python",
523 |    "nbconvert_exporter": "python",
524 |    "pygments_lexer": "ipython3",
525 |    "version": "3.7.7"
526 |   },
527 |   "toc": {
528 |    "base_numbering": 1,
529 |    "nav_menu": {},
530 |    "number_sections": false,
531 |    "sideBar": true,
532 |    "skip_h1_title": true,
533 |    "title_cell": "Table of Contents",
534 |    "title_sidebar": "Contents",
535 |    "toc_cell": false,
536 |    "toc_position": {},
537 |    "toc_section_display": true,
538 |    "toc_window_display": false
539 |   }
540 |  },
541 |  "nbformat": 4,
542 |  "nbformat_minor": 1
543 | }
544 | 


--------------------------------------------------------------------------------
/03-group-apply-exercises.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "tags": [
  7 |      "setup"
  8 |     ]
  9 |    },
 10 |    "source": [
 11 |     "(c) 2016 - present. Enplus Advisors, Inc."
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": null,
 17 |    "metadata": {
 18 |     "tags": [
 19 |      "setup"
 20 |     ]
 21 |    },
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "import numpy as np\n",
 25 |     "import pandas as pd\n",
 26 |     "\n",
 27 |     "pd.set_option('display.float_format', '{:,.1f}'.format)"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": null,
 33 |    "metadata": {
 34 |     "tags": [
 35 |      "setup"
 36 |     ]
 37 |    },
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "dat = pd.read_csv('data/weather-6m.csv')"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "markdown",
 45 |    "metadata": {
 46 |     "tags": [
 47 |      "exercise"
 48 |     ]
 49 |    },
 50 |    "source": [
 51 |     "**Exercise:**\n",
 52 |     "\n",
 53 |     "Calculate the average `air_temp` by `month`."
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "markdown",
 58 |    "metadata": {
 59 |     "tags": [
 60 |      "exercise"
 61 |     ]
 62 |    },
 63 |    "source": [
 64 |     "**Exercise:**\n",
 65 |     "\n",
 66 |     "Compute summary statistics on `air_temp` and `dew_point` using \n",
 67 |     "the `describe` method."
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {
 73 |     "tags": [
 74 |      "exercise"
 75 |     ]
 76 |    },
 77 |    "source": [
 78 |     "**Exercise:**\n",
 79 |     "\n",
 80 |     "For January and February and 0 - 11 hours, calculate the average and standard deviation of `air_temp` grouping by month and hour of the day. Name your result columns `air_temp_mean` and `air_temp_sd`.\n",
 81 |     "\n",
 82 |     "Your result `DataFrame` should have 24 rows, the number of months (2) times the number of hours (12). \n",
 83 |     "\n",
 84 |     "$2 * 12 = 24$\n",
 85 |     "\n"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "markdown",
 90 |    "metadata": {
 91 |     "tags": [
 92 |      "exercise"
 93 |     ]
 94 |    },
 95 |    "source": [
 96 |     "**Exercise:**\n",
 97 |     "\n",
 98 |     "By month, calculate quantiles for `air_temp` using the quantiles defined in `breaks`. \n",
 99 |     "\n",
100 |     "Hint: Use the `quantile` method defined on a `Series` (`pd.Series.quantile`).\n"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": null,
106 |    "metadata": {
107 |     "tags": [
108 |      "exercise"
109 |     ]
110 |    },
111 |    "outputs": [],
112 |    "source": [
113 |     "breaks = [0.01, 0.25, 0.5, 0.75, 0.99]"
114 |    ]
115 |   }
116 |  ],
117 |  "metadata": {
118 |   "kernelspec": {
119 |    "display_name": "Python 3",
120 |    "language": "python",
121 |    "name": "python3"
122 |   },
123 |   "language_info": {
124 |    "codemirror_mode": {
125 |     "name": "ipython",
126 |     "version": 3
127 |    },
128 |    "file_extension": ".py",
129 |    "mimetype": "text/x-python",
130 |    "name": "python",
131 |    "nbconvert_exporter": "python",
132 |    "pygments_lexer": "ipython3",
133 |    "version": "3.7.7"
134 |   }
135 |  },
136 |  "nbformat": 4,
137 |  "nbformat_minor": 1
138 | }
139 | 


--------------------------------------------------------------------------------
/04-time-series-answers.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "tags": [
  7 |      "setup"
  8 |     ]
  9 |    },
 10 |    "source": [
 11 |     "(c) 2016 - present. Enplus Advisors, Inc."
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {
 17 |     "tags": [
 18 |      "setup"
 19 |     ]
 20 |    },
 21 |    "source": [
 22 |     "This module uses:\n",
 23 |     "* SP500 returns"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 1,
 29 |    "metadata": {
 30 |     "tags": [
 31 |      "setup"
 32 |     ]
 33 |    },
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "import datetime as dt\n",
 37 |     "\n",
 38 |     "import numpy as np\n",
 39 |     "import pandas as pd\n",
 40 |     "\n",
 41 |     "pd.set_option('display.precision', 2)"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 2,
 47 |    "metadata": {
 48 |     "tags": [
 49 |      "setup"
 50 |     ]
 51 |    },
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "sp5_df = pd.read_csv(\n",
 55 |     "    'sp500.csv', usecols=['date', 'adj_close'], \n",
 56 |     "    parse_dates=['date'])"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "markdown",
 61 |    "metadata": {
 62 |     "tags": [
 63 |      "exercise"
 64 |     ]
 65 |    },
 66 |    "source": [
 67 |     "**Exercise:**\n",
 68 |     "\n",
 69 |     "Create a `pandas` Timestamp for January 1st, 1993 16:00 (don't worry about timezone)."
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 3,
 75 |    "metadata": {},
 76 |    "outputs": [
 77 |     {
 78 |      "data": {
 79 |       "text/plain": [
 80 |        "Timestamp('1993-01-01 16:00:00')"
 81 |       ]
 82 |      },
 83 |      "execution_count": 3,
 84 |      "metadata": {},
 85 |      "output_type": "execute_result"
 86 |     }
 87 |    ],
 88 |    "source": [
 89 |     "pd.Timestamp('1993-01-01 16:00') # __"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "markdown",
 94 |    "metadata": {
 95 |     "tags": [
 96 |      "exercise"
 97 |     ]
 98 |    },
 99 |    "source": [
100 |     "**Exercise:**\n",
101 |     "\n",
102 |     "Generate a an Index of:\n",
103 |     "* 5 calendar days starting on January 1, 2010.\n",
104 |     "* All US business days (weekdays) starting on January 1, 2010\n",
105 |     "  and ending on January 15, 2010.\n",
106 |     "  \n",
107 |     "__Hint:__ You can view the help for a function by running `help(function_name)`, e.g. `help(pd.Timestamp)`. Try looking at the help for `pd.date_range`."
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": 4,
113 |    "metadata": {},
114 |    "outputs": [
115 |     {
116 |      "data": {
117 |       "text/plain": [
118 |        "DatetimeIndex(['2010-01-01', '2010-01-02', '2010-01-03', '2010-01-04',\n",
119 |        "               '2010-01-05'],\n",
120 |        "              dtype='datetime64[ns]', freq='D')"
121 |       ]
122 |      },
123 |      "execution_count": 4,
124 |      "metadata": {},
125 |      "output_type": "execute_result"
126 |     }
127 |    ],
128 |    "source": [
129 |     "pd.date_range(start='2010-01-01', periods=5, freq='D') # __"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": 5,
135 |    "metadata": {},
136 |    "outputs": [
137 |     {
138 |      "data": {
139 |       "text/plain": [
140 |        "DatetimeIndex(['2010-01-01', '2010-01-04', '2010-01-05', '2010-01-06',\n",
141 |        "               '2010-01-07', '2010-01-08', '2010-01-11', '2010-01-12',\n",
142 |        "               '2010-01-13', '2010-01-14', '2010-01-15'],\n",
143 |        "              dtype='datetime64[ns]', freq='B')"
144 |       ]
145 |      },
146 |      "execution_count": 5,
147 |      "metadata": {},
148 |      "output_type": "execute_result"
149 |     }
150 |    ],
151 |    "source": [
152 |     "pd.date_range(start='2010-01-01', end='2010-01-15', freq='B') # __"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "markdown",
157 |    "metadata": {
158 |     "tags": [
159 |      "exercise"
160 |     ]
161 |    },
162 |    "source": [
163 |     "**Exercise:**\n",
164 |     "\n",
165 |     "Create a Series named `sp5` from the `adj_close` column `sp5_df`, using `date` as the\n",
166 |     "index. Make sure you call `sort_index()` to make sure the index is sorted.\n",
167 |     "\n",
168 |     "__Hint:__ The first two parameters of `pd.Series` are `data` and `index`. When both `data` and `index` are `Series`, the `index` of `data` is aligned against the values in `Series`. You can always force positional alignment by converting a `Series` to an `PandasArray` (`pd.Series.array`)\n"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": 6,
174 |    "metadata": {},
175 |    "outputs": [],
176 |    "source": [
177 |     "sp5 = pd.Series( # __\n",
178 |     "    sp5_df.adj_close.array, index=sp5_df.date, \n",
179 |     "    name='adj_close').sort_index()"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "markdown",
184 |    "metadata": {
185 |     "tags": [
186 |      "exercise"
187 |     ]
188 |    },
189 |    "source": [
190 |     "**Exercise:**\n",
191 |     "\n",
192 |     "Write 2 different ways to select January 3, 1995 from the `sp5` series. \n",
193 |     "\n",
194 |     "_There are more than 2 ways to do this, but you only need 2!_"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": 7,
200 |    "metadata": {},
201 |    "outputs": [],
202 |    "source": [
203 |     "d1a = sp5['19950103'] # __\n",
204 |     "d1b = sp5['1995-01-03'] # __\n",
205 |     "d1c = sp5[dt.datetime(1995, 1, 3)] # __"
206 |    ]
207 |   },
208 |   {
209 |    "cell_type": "markdown",
210 |    "metadata": {
211 |     "tags": [
212 |      "exercise"
213 |     ]
214 |    },
215 |    "source": [
216 |     "**Exercise:**\n",
217 |     "\n",
218 |     "Select from `sp5` all observations for:\n",
219 |     "* March 1995\n",
220 |     "* Year of 1995"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "code",
225 |    "execution_count": 8,
226 |    "metadata": {},
227 |    "outputs": [],
228 |    "source": [
229 |     "mar_95 = sp5['1995-03'] # __"
230 |    ]
231 |   },
232 |   {
233 |    "cell_type": "code",
234 |    "execution_count": 9,
235 |    "metadata": {},
236 |    "outputs": [],
237 |    "source": [
238 |     "y_95 = sp5['1995'] # __"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "markdown",
243 |    "metadata": {
244 |     "tags": [
245 |      "exercise"
246 |     ]
247 |    },
248 |    "source": [
249 |     "**Exercise**\n",
250 |     "\n",
251 |     "For `sp5`:\n",
252 |     "\n",
253 |     "Calculate the day-over-day percent change in the values and to assign the result to the variable `sp5_rtn`.\n",
254 |     "\n",
255 |     "Hint: Use `shift`"
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "code",
260 |    "execution_count": 10,
261 |    "metadata": {},
262 |    "outputs": [],
263 |    "source": [
264 |     "sp5_rtn = sp5 / sp5.shift(1) - 1 # __"
265 |    ]
266 |   },
267 |   {
268 |    "cell_type": "code",
269 |    "execution_count": 11,
270 |    "metadata": {},
271 |    "outputs": [],
272 |    "source": [
273 |     "# alternative solution\n",
274 |     "# sp5.pct_change()"
275 |    ]
276 |   },
277 |   {
278 |    "cell_type": "markdown",
279 |    "metadata": {
280 |     "tags": [
281 |      "exercise"
282 |     ]
283 |    },
284 |    "source": [
285 |     "**Exercise**\n",
286 |     "\n",
287 |     "Resample the data from daily to monthly to calculate average day-over-day percent change."
288 |    ]
289 |   },
290 |   {
291 |    "cell_type": "code",
292 |    "execution_count": 12,
293 |    "metadata": {},
294 |    "outputs": [],
295 |    "source": [
296 |     "rtn_mnth = sp5_rtn.resample('M').mean() # __"
297 |    ]
298 |   }
299 |  ],
300 |  "metadata": {
301 |   "celltoolbar": "Tags",
302 |   "kernelspec": {
303 |    "display_name": "Python 3 (ipykernel)",
304 |    "language": "python",
305 |    "name": "python3"
306 |   },
307 |   "language_info": {
308 |    "codemirror_mode": {
309 |     "name": "ipython",
310 |     "version": 3
311 |    },
312 |    "file_extension": ".py",
313 |    "mimetype": "text/x-python",
314 |    "name": "python",
315 |    "nbconvert_exporter": "python",
316 |    "pygments_lexer": "ipython3",
317 |    "version": "3.10.8"
318 |   },
319 |   "toc": {
320 |    "base_numbering": 1,
321 |    "nav_menu": {},
322 |    "number_sections": false,
323 |    "sideBar": true,
324 |    "skip_h1_title": true,
325 |    "title_cell": "Table of Contents",
326 |    "title_sidebar": "Contents",
327 |    "toc_cell": false,
328 |    "toc_position": {},
329 |    "toc_section_display": true,
330 |    "toc_window_display": false
331 |   }
332 |  },
333 |  "nbformat": 4,
334 |  "nbformat_minor": 4
335 | }
336 | 


--------------------------------------------------------------------------------
/04-time-series-exercises.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "tags": [
  7 |      "setup"
  8 |     ]
  9 |    },
 10 |    "source": [
 11 |     "(c) 2016 - present. Enplus Advisors, Inc."
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {
 17 |     "tags": [
 18 |      "setup"
 19 |     ]
 20 |    },
 21 |    "source": [
 22 |     "This module uses:\n",
 23 |     "* SP500 returns"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": null,
 29 |    "metadata": {
 30 |     "tags": [
 31 |      "setup"
 32 |     ]
 33 |    },
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "import datetime as dt\n",
 37 |     "\n",
 38 |     "import numpy as np\n",
 39 |     "import pandas as pd\n",
 40 |     "\n",
 41 |     "pd.set_option('display.precision', 2)"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": null,
 47 |    "metadata": {
 48 |     "tags": [
 49 |      "setup"
 50 |     ]
 51 |    },
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "sp5_df = pd.read_csv(\n",
 55 |     "    'sp500.csv', usecols=['date', 'adj_close'], \n",
 56 |     "    parse_dates=['date'])"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "markdown",
 61 |    "metadata": {
 62 |     "tags": [
 63 |      "exercise"
 64 |     ]
 65 |    },
 66 |    "source": [
 67 |     "**Exercise:**\n",
 68 |     "\n",
 69 |     "Create a `pandas` Timestamp for January 1st, 1993 16:00 (don't worry about timezone)."
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "markdown",
 74 |    "metadata": {
 75 |     "tags": [
 76 |      "exercise"
 77 |     ]
 78 |    },
 79 |    "source": [
 80 |     "**Exercise:**\n",
 81 |     "\n",
 82 |     "Generate a an Index of:\n",
 83 |     "* 5 calendar days starting on January 1, 2010.\n",
 84 |     "* All US business days (weekdays) starting on January 1, 2010\n",
 85 |     "  and ending on January 15, 2010.\n",
 86 |     "  \n",
 87 |     "__Hint:__ You can view the help for a function by running `help(function_name)`, e.g. `help(pd.Timestamp)`. Try looking at the help for `pd.date_range`."
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "markdown",
 92 |    "metadata": {
 93 |     "tags": [
 94 |      "exercise"
 95 |     ]
 96 |    },
 97 |    "source": [
 98 |     "**Exercise:**\n",
 99 |     "\n",
100 |     "Create a Series named `sp5` from the `adj_close` column `sp5_df`, using `date` as the\n",
101 |     "index. Make sure you call `sort_index()` to make sure the index is sorted.\n",
102 |     "\n",
103 |     "__Hint:__ The first two parameters of `pd.Series` are `data` and `index`. When both `data` and `index` are `Series`, the `index` of `data` is aligned against the values in `Series`. You can always force positional alignment by converting a `Series` to an `PandasArray` (`pd.Series.array`)\n"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "markdown",
108 |    "metadata": {
109 |     "tags": [
110 |      "exercise"
111 |     ]
112 |    },
113 |    "source": [
114 |     "**Exercise:**\n",
115 |     "\n",
116 |     "Write 2 different ways to select January 3, 1995 from the `sp5` series. \n",
117 |     "\n",
118 |     "_There are more than 2 ways to do this, but you only need 2!_"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "metadata": {
124 |     "tags": [
125 |      "exercise"
126 |     ]
127 |    },
128 |    "source": [
129 |     "**Exercise:**\n",
130 |     "\n",
131 |     "Select from `sp5` all observations for:\n",
132 |     "* March 1995\n",
133 |     "* Year of 1995"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "markdown",
138 |    "metadata": {
139 |     "tags": [
140 |      "exercise"
141 |     ]
142 |    },
143 |    "source": [
144 |     "**Exercise**\n",
145 |     "\n",
146 |     "For `sp5`:\n",
147 |     "\n",
148 |     "Calculate the day-over-day percent change in the values and to assign the result to the variable `sp5_rtn`.\n",
149 |     "\n",
150 |     "Hint: Use `shift`"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "markdown",
155 |    "metadata": {
156 |     "tags": [
157 |      "exercise"
158 |     ]
159 |    },
160 |    "source": [
161 |     "**Exercise**\n",
162 |     "\n",
163 |     "Resample the data from daily to monthly to calculate average day-over-day percent change."
164 |    ]
165 |   }
166 |  ],
167 |  "metadata": {
168 |   "kernelspec": {
169 |    "display_name": "Python 3 (ipykernel)",
170 |    "language": "python",
171 |    "name": "python3"
172 |   },
173 |   "language_info": {
174 |    "codemirror_mode": {
175 |     "name": "ipython",
176 |     "version": 3
177 |    },
178 |    "file_extension": ".py",
179 |    "mimetype": "text/x-python",
180 |    "name": "python",
181 |    "nbconvert_exporter": "python",
182 |    "pygments_lexer": "ipython3",
183 |    "version": "3.10.8"
184 |   }
185 |  },
186 |  "nbformat": 4,
187 |  "nbformat_minor": 4
188 | }
189 | 


--------------------------------------------------------------------------------
/05-merge-pivot-answers.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "tags": [
  7 |      "setup"
  8 |     ]
  9 |    },
 10 |    "source": [
 11 |     "(c) 2016 - present. Enplus Advisors, Inc."
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": null,
 17 |    "metadata": {
 18 |     "tags": [
 19 |      "setup"
 20 |     ]
 21 |    },
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "import io\n",
 25 |     "\n",
 26 |     "import numpy as np\n",
 27 |     "import pandas as pd\n",
 28 |     "\n",
 29 |     "pd.set_option('display.precision', 2)"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": null,
 35 |    "metadata": {
 36 |     "tags": [
 37 |      "setup"
 38 |     ]
 39 |    },
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "prc = pd.read_csv(\n",
 43 |     "    io.StringIO('ticker,open,date,close\\nAAPL,426.23,2018-01-04,435.23\\nMSFT,42.3,2018-01-04,51.3\\nAAPL,436.23,2018-01-05,\\nMSFT,52.3,2018-01-05,\\n'),\n",
 44 |     "    parse_dates=['date']\n",
 45 |     ")\n",
 46 |     "prc2 = prc.assign(\n",
 47 |     "    date=pd.to_datetime('2018-01-06'),\n",
 48 |     "    close=prc.open + np.random.randn(len(prc.open))\n",
 49 |     ").drop('open', axis=1)"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": null,
 55 |    "metadata": {
 56 |     "tags": [
 57 |      "setup"
 58 |     ]
 59 |    },
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "volume = pd.DataFrame({\n",
 63 |     "    'ticker': ['AAPL', 'MSFT', 'IBM', 'YHOO', 'GOOG'],\n",
 64 |     "    'volume': [1954.73,  335.83,  362.79,  858.18,  629.79]\n",
 65 |     "}).assign(date=pd.to_datetime('2018-01-05'))"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": null,
 71 |    "metadata": {},
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "prc"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": null,
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "volume"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "metadata": {
 89 |     "tags": [
 90 |      "exercise"
 91 |     ]
 92 |    },
 93 |    "source": [
 94 |     "**Exercise**\n",
 95 |     "\n",
 96 |     "Merge `prc` and `volume` on `ticker, date`:\n",
 97 |     "\n",
 98 |     "* Preserving only the records with common `ticker`s and `date`s\n",
 99 |     "* Preserving all the records in `prc`\n",
100 |     "* Preserving the records in both `prc` and `volume`\n",
101 |     "\n",
102 |     "_All of these merges should be performed on `ticker` and `date`_"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": null,
108 |    "metadata": {},
109 |    "outputs": [],
110 |    "source": [
111 |     "cols = ['ticker', 'date']\n",
112 |     "pd.merge(prc, volume, on=cols)"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": null,
118 |    "metadata": {},
119 |    "outputs": [],
120 |    "source": [
121 |     "pd.merge(prc, volume, on=cols, how='left')"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": null,
127 |    "metadata": {},
128 |    "outputs": [],
129 |    "source": [
130 |     "pd.merge(prc, volume, on=cols, how='outer')"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "markdown",
135 |    "metadata": {
136 |     "tags": [
137 |      "exercise"
138 |     ]
139 |    },
140 |    "source": [
141 |     "**Exercise:**\n",
142 |     "\n",
143 |     "Using `pd.concat`, concatenate the rows of `prc` and `prc2`, making\n",
144 |     "a single call to `pd.concat` for each bulleted sub-exercise:\n",
145 |     "\n",
146 |     "* Make sure your result generates a new index like in the previous\n",
147 |     "  exercise\n",
148 |     "* Only include the columns in both `prc` and `prc2` in the result,\n",
149 |     "  additionally generating a new index\n",
150 |     "* Make your result include a `MultiIndex` with a value of `prc`\n",
151 |     "  or `prc2` to indicate which `DataFrame` provided the values"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": null,
157 |    "metadata": {},
158 |    "outputs": [],
159 |    "source": [
160 |     "pd.concat([prc, prc2], ignore_index=True)"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "code",
165 |    "execution_count": null,
166 |    "metadata": {},
167 |    "outputs": [],
168 |    "source": [
169 |     "pd.concat([prc, prc2], join='inner', ignore_index=True)"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "code",
174 |    "execution_count": null,
175 |    "metadata": {},
176 |    "outputs": [],
177 |    "source": [
178 |     "pd.concat([prc, prc2], keys=['prc', 'prc2'])"
179 |    ]
180 |   }
181 |  ],
182 |  "metadata": {
183 |   "celltoolbar": "Tags",
184 |   "kernelspec": {
185 |    "display_name": "Python 3 (ipykernel)",
186 |    "language": "python",
187 |    "name": "python3"
188 |   },
189 |   "language_info": {
190 |    "codemirror_mode": {
191 |     "name": "ipython",
192 |     "version": 3
193 |    },
194 |    "file_extension": ".py",
195 |    "mimetype": "text/x-python",
196 |    "name": "python",
197 |    "nbconvert_exporter": "python",
198 |    "pygments_lexer": "ipython3",
199 |    "version": "3.10.8"
200 |   },
201 |   "toc": {
202 |    "base_numbering": 1,
203 |    "nav_menu": {},
204 |    "number_sections": false,
205 |    "sideBar": true,
206 |    "skip_h1_title": true,
207 |    "title_cell": "Table of Contents",
208 |    "title_sidebar": "Contents",
209 |    "toc_cell": false,
210 |    "toc_position": {},
211 |    "toc_section_display": true,
212 |    "toc_window_display": false
213 |   }
214 |  },
215 |  "nbformat": 4,
216 |  "nbformat_minor": 4
217 | }
218 | 


--------------------------------------------------------------------------------
/05-merge-pivot-exercises.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "tags": [
  7 |      "setup"
  8 |     ]
  9 |    },
 10 |    "source": [
 11 |     "(c) 2016 - present. Enplus Advisors, Inc."
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": null,
 17 |    "metadata": {
 18 |     "tags": [
 19 |      "setup"
 20 |     ]
 21 |    },
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "import io\n",
 25 |     "\n",
 26 |     "import numpy as np\n",
 27 |     "import pandas as pd\n",
 28 |     "\n",
 29 |     "pd.set_option('display.precision', 2)"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": null,
 35 |    "metadata": {
 36 |     "tags": [
 37 |      "setup"
 38 |     ]
 39 |    },
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "prc = pd.read_csv(\n",
 43 |     "    io.StringIO('ticker,open,date,close\\nAAPL,426.23,2018-01-04,435.23\\nMSFT,42.3,2018-01-04,51.3\\nAAPL,436.23,2018-01-05,\\nMSFT,52.3,2018-01-05,\\n'),\n",
 44 |     "    parse_dates=['date']\n",
 45 |     ")\n",
 46 |     "prc2 = prc.assign(\n",
 47 |     "    date=pd.to_datetime('2018-01-06'),\n",
 48 |     "    close=prc.open + np.random.randn(len(prc.open))\n",
 49 |     ").drop('open', axis=1)"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": null,
 55 |    "metadata": {
 56 |     "tags": [
 57 |      "setup"
 58 |     ]
 59 |    },
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "volume = pd.DataFrame({\n",
 63 |     "    'ticker': ['AAPL', 'MSFT', 'IBM', 'YHOO', 'GOOG'],\n",
 64 |     "    'volume': [1954.73,  335.83,  362.79,  858.18,  629.79]\n",
 65 |     "}).assign(date=pd.to_datetime('2018-01-05'))"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "markdown",
 70 |    "metadata": {
 71 |     "tags": [
 72 |      "exercise"
 73 |     ]
 74 |    },
 75 |    "source": [
 76 |     "**Exercise**\n",
 77 |     "\n",
 78 |     "Merge `prc` and `volume` on `ticker, date`:\n",
 79 |     "\n",
 80 |     "* Preserving only the records with common `ticker`s and `date`s\n",
 81 |     "* Preserving all the records in `prc`\n",
 82 |     "* Preserving the records in both `prc` and `volume`\n",
 83 |     "\n",
 84 |     "_All of these merges should be performed on `ticker` and `date`_"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "markdown",
 89 |    "metadata": {
 90 |     "tags": [
 91 |      "exercise"
 92 |     ]
 93 |    },
 94 |    "source": [
 95 |     "**Exercise:**\n",
 96 |     "\n",
 97 |     "Using `pd.concat`, concatenate the rows of `prc` and `prc2`, making\n",
 98 |     "a single call to `pd.concat` for each bulleted sub-exercise:\n",
 99 |     "\n",
100 |     "* Make sure your result generates a new index like in the previous\n",
101 |     "  exercise\n",
102 |     "* Only include the columns in both `prc` and `prc2` in the result,\n",
103 |     "  additionally generating a new index\n",
104 |     "* Make your result include a `MultiIndex` with a value of `prc`\n",
105 |     "  or `prc2` to indicate which `DataFrame` provided the values"
106 |    ]
107 |   }
108 |  ],
109 |  "metadata": {
110 |   "kernelspec": {
111 |    "display_name": "Python 3 (ipykernel)",
112 |    "language": "python",
113 |    "name": "python3"
114 |   },
115 |   "language_info": {
116 |    "codemirror_mode": {
117 |     "name": "ipython",
118 |     "version": 3
119 |    },
120 |    "file_extension": ".py",
121 |    "mimetype": "text/x-python",
122 |    "name": "python",
123 |    "nbconvert_exporter": "python",
124 |    "pygments_lexer": "ipython3",
125 |    "version": "3.10.8"
126 |   }
127 |  },
128 |  "nbformat": 4,
129 |  "nbformat_minor": 4
130 | }
131 | 


--------------------------------------------------------------------------------
/06-advanced-merge-reshape-answers.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "tags": [
  7 |      "setup"
  8 |     ]
  9 |    },
 10 |    "source": [
 11 |     "(c) 2016 - present. Enplus Advisors, Inc."
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": null,
 17 |    "metadata": {
 18 |     "tags": [
 19 |      "setup"
 20 |     ]
 21 |    },
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "import numpy as np\n",
 25 |     "import pandas as pd\n",
 26 |     "\n",
 27 |     "pd.set_option('display.precision', 2)"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "metadata": {
 33 |     "tags": [
 34 |      "setup"
 35 |     ]
 36 |    },
 37 |    "source": [
 38 |     "**Data**\n",
 39 |     "\n",
 40 |     "* `sp5_jan` is SP500 market close prices and trading volume for\n",
 41 |     "  January 2015.\n",
 42 |     "* `sales` is weekly sales data for Acme Widgets Co. for January\n",
 43 |     "  2015 in thousands of widgets sold and \\$ millions in revenue"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "metadata": {
 50 |     "tags": [
 51 |      "setup"
 52 |     ]
 53 |    },
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "np.random.seed(100)\n",
 57 |     "\n",
 58 |     "sp5 = pd.read_csv(\n",
 59 |     "    'sp500.csv', parse_dates=['date'], index_col=['date'],\n",
 60 |     "    usecols=['date', 'close', 'volume'])\\\n",
 61 |     "    .sort_index()\n",
 62 |     "\n",
 63 |     "sp5_jan = sp5.loc['2015-01', :].copy()\n",
 64 |     "sp5_jan['volume'] = sp5_jan['volume'] / 1e6\n",
 65 |     "sales = pd.DataFrame({\n",
 66 |     "    'date': pd.date_range('2015-01-01', '2015-01-31', freq='W'),\n",
 67 |     "})\n",
 68 |     "sales['widgets_sold'] = abs(10 * np.random.randn(sales.shape[0])).round()\n",
 69 |     "sales['revenue'] = sales.widgets_sold * 20"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "markdown",
 74 |    "metadata": {
 75 |     "tags": [
 76 |      "exercise"
 77 |     ]
 78 |    },
 79 |    "source": [
 80 |     "**Exercise:**\n",
 81 |     "\n",
 82 |     "Merge `sp5_jan` with `sales`, filling sales data forward. Save\n",
 83 |     "the result as `res_1`. Your result should have the same number of records\n",
 84 |     "as `sp5_jan`."
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": null,
 90 |    "metadata": {},
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "sales_jan = sales.set_index(\"date\").reindex(sp5_jan.index, method=\"ffill\")\n",
 94 |     "res_1 = sp5_jan.join(sales_jan)\n",
 95 |     "\n",
 96 |     "assert res_1.shape[0] == sp5_jan.shape[0]"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "markdown",
101 |    "metadata": {
102 |     "tags": [
103 |      "exercise"
104 |     ]
105 |    },
106 |    "source": [
107 |     "**Exercise:**\n",
108 |     "\n",
109 |     "Convert the output from the previous exercise to long format with\n",
110 |     "`date` as the ID variable, saving the result as `res_2`"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": null,
116 |    "metadata": {},
117 |    "outputs": [],
118 |    "source": [
119 |     "res_2 = pd.melt(res_1.reset_index(), id_vars='date')\n",
120 |     "res_2.head()"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "markdown",
125 |    "metadata": {
126 |     "tags": [
127 |      "exercise"
128 |     ]
129 |    },
130 |    "source": [
131 |     "**Exercise**\n",
132 |     "\n",
133 |     "Convert `res_2` back to wide format using the `pivot` method."
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": null,
139 |    "metadata": {},
140 |    "outputs": [],
141 |    "source": [
142 |     "res_3a = res_2.pivot(index='date', columns='variable')\n",
143 |     "res_3a.head()"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "markdown",
148 |    "metadata": {
149 |     "tags": [
150 |      "exercise"
151 |     ]
152 |    },
153 |    "source": [
154 |     "**Exercise**\n",
155 |     "\n",
156 |     "Convert `res_2` back to wide format using the `unstack` method."
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "code",
161 |    "execution_count": null,
162 |    "metadata": {},
163 |    "outputs": [],
164 |    "source": [
165 |     "res_3b = res_2.set_index(['date', 'variable']).unstack()\n",
166 |     "res_3b.head()"
167 |    ]
168 |   }
169 |  ],
170 |  "metadata": {
171 |   "celltoolbar": "Tags",
172 |   "kernelspec": {
173 |    "display_name": "Python 3 (ipykernel)",
174 |    "language": "python",
175 |    "name": "python3"
176 |   },
177 |   "language_info": {
178 |    "codemirror_mode": {
179 |     "name": "ipython",
180 |     "version": 3
181 |    },
182 |    "file_extension": ".py",
183 |    "mimetype": "text/x-python",
184 |    "name": "python",
185 |    "nbconvert_exporter": "python",
186 |    "pygments_lexer": "ipython3",
187 |    "version": "3.10.8"
188 |   },
189 |   "toc": {
190 |    "base_numbering": 1,
191 |    "nav_menu": {},
192 |    "number_sections": false,
193 |    "sideBar": true,
194 |    "skip_h1_title": true,
195 |    "title_cell": "Table of Contents",
196 |    "title_sidebar": "Contents",
197 |    "toc_cell": false,
198 |    "toc_position": {},
199 |    "toc_section_display": true,
200 |    "toc_window_display": false
201 |   }
202 |  },
203 |  "nbformat": 4,
204 |  "nbformat_minor": 4
205 | }
206 | 


--------------------------------------------------------------------------------
/06-advanced-merge-reshape-exercises.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "tags": [
  7 |      "setup"
  8 |     ]
  9 |    },
 10 |    "source": [
 11 |     "(c) 2016 - present. Enplus Advisors, Inc."
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": null,
 17 |    "metadata": {
 18 |     "tags": [
 19 |      "setup"
 20 |     ]
 21 |    },
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "import numpy as np\n",
 25 |     "import pandas as pd\n",
 26 |     "\n",
 27 |     "pd.set_option('display.precision', 2)"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "metadata": {
 33 |     "tags": [
 34 |      "setup"
 35 |     ]
 36 |    },
 37 |    "source": [
 38 |     "**Data**\n",
 39 |     "\n",
 40 |     "* `sp5_jan` is SP500 market close prices and trading volume for\n",
 41 |     "  January 2015.\n",
 42 |     "* `sales` is weekly sales data for Acme Widgets Co. for January\n",
 43 |     "  2015 in thousands of widgets sold and \\$ millions in revenue"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "metadata": {
 50 |     "tags": [
 51 |      "setup"
 52 |     ]
 53 |    },
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "np.random.seed(100)\n",
 57 |     "\n",
 58 |     "sp5 = pd.read_csv(\n",
 59 |     "    'sp500.csv', parse_dates=['date'], index_col=['date'],\n",
 60 |     "    usecols=['date', 'close', 'volume'])\\\n",
 61 |     "    .sort_index()\n",
 62 |     "\n",
 63 |     "sp5_jan = sp5.loc['2015-01', :].copy()\n",
 64 |     "sp5_jan['volume'] = sp5_jan['volume'] / 1e6\n",
 65 |     "sales = pd.DataFrame({\n",
 66 |     "    'date': pd.date_range('2015-01-01', '2015-01-31', freq='W'),\n",
 67 |     "})\n",
 68 |     "sales['widgets_sold'] = abs(10 * np.random.randn(sales.shape[0])).round()\n",
 69 |     "sales['revenue'] = sales.widgets_sold * 20"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "markdown",
 74 |    "metadata": {
 75 |     "tags": [
 76 |      "exercise"
 77 |     ]
 78 |    },
 79 |    "source": [
 80 |     "**Exercise:**\n",
 81 |     "\n",
 82 |     "Merge `sp5_jan` with `sales`, filling sales data forward. Save\n",
 83 |     "the result as `res_1`. Your result should have the same number of records\n",
 84 |     "as `sp5_jan`."
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "markdown",
 89 |    "metadata": {
 90 |     "tags": [
 91 |      "exercise"
 92 |     ]
 93 |    },
 94 |    "source": [
 95 |     "**Exercise:**\n",
 96 |     "\n",
 97 |     "Convert the output from the previous exercise to long format with\n",
 98 |     "`date` as the ID variable, saving the result as `res_2`"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "markdown",
103 |    "metadata": {
104 |     "tags": [
105 |      "exercise"
106 |     ]
107 |    },
108 |    "source": [
109 |     "**Exercise**\n",
110 |     "\n",
111 |     "Convert `res_2` back to wide format using the `pivot` method."
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "markdown",
116 |    "metadata": {
117 |     "tags": [
118 |      "exercise"
119 |     ]
120 |    },
121 |    "source": [
122 |     "**Exercise**\n",
123 |     "\n",
124 |     "Convert `res_2` back to wide format using the `unstack` method."
125 |    ]
126 |   }
127 |  ],
128 |  "metadata": {
129 |   "kernelspec": {
130 |    "display_name": "Python 3 (ipykernel)",
131 |    "language": "python",
132 |    "name": "python3"
133 |   },
134 |   "language_info": {
135 |    "codemirror_mode": {
136 |     "name": "ipython",
137 |     "version": 3
138 |    },
139 |    "file_extension": ".py",
140 |    "mimetype": "text/x-python",
141 |    "name": "python",
142 |    "nbconvert_exporter": "python",
143 |    "pygments_lexer": "ipython3",
144 |    "version": "3.10.8"
145 |   }
146 |  },
147 |  "nbformat": 4,
148 |  "nbformat_minor": 4
149 | }
150 | 


--------------------------------------------------------------------------------
/06-advanced-merge-reshape-slides.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {
   6 |     "slideshow": {
   7 |      "slide_type": "skip"
   8 |     }
   9 |    },
  10 |    "source": [
  11 |     "(c) 2016 - present. Enplus Advisors, Inc."
  12 |    ]
  13 |   },
  14 |   {
  15 |    "cell_type": "code",
  16 |    "execution_count": 1,
  17 |    "metadata": {
  18 |     "slideshow": {
  19 |      "slide_type": "skip"
  20 |     }
  21 |    },
  22 |    "outputs": [],
  23 |    "source": [
  24 |     "import datetime as dt\n",
  25 |     "\n",
  26 |     "import numpy as np\n",
  27 |     "import pandas as pd\n",
  28 |     "\n",
  29 |     "pd.set_option('display.precision', 2)"
  30 |    ]
  31 |   },
  32 |   {
  33 |    "cell_type": "code",
  34 |    "execution_count": 2,
  35 |    "metadata": {
  36 |     "slideshow": {
  37 |      "slide_type": "skip"
  38 |     }
  39 |    },
  40 |    "outputs": [],
  41 |    "source": [
  42 |     "# Sample dataset w/ 5 days of data\n",
  43 |     "\n",
  44 |     "def make_dataset(n_days=4):\n",
  45 |     "    if n_days not in (4, 5):\n",
  46 |     "        raise ValueError(f\"n_days must be 4 or 5, got {n_days:d}.\")\n",
  47 |     "    \n",
  48 |     "    data = {\n",
  49 |     "        'date': ['2015-12-28', '2015-12-29', '2015-12-30', \n",
  50 |     "                 '2015-12-31', '2016-01-04'],\n",
  51 |     "        'goog': [762.51, 776.60, 771.00, 758.88, 741.84],\n",
  52 |     "        'aapl': [106.82, 108.74, 107.32, 105.26, 105.35]\n",
  53 |     "    }\n",
  54 |     "    \n",
  55 |     "    n_max = len(data['date'])\n",
  56 |     "    slice_ = slice(n_max - n_days, n_max)\n",
  57 |     "    \n",
  58 |     "    sub = {k: v[slice_] for k,v in data.items()}\n",
  59 |     "    dates = sub['date']\n",
  60 |     "    n = len(dates)\n",
  61 |     "    \n",
  62 |     "    # breakpoint()\n",
  63 |     "    rv = pd.DataFrame({\n",
  64 |     "        'ticker': ['GOOG'] * n + ['AAPL'] * n,\n",
  65 |     "        'date': [pd.to_datetime(x) for x in dates] * 2,\n",
  66 |     "        'close': sub['goog'] + sub['aapl']\n",
  67 |     "    })\n",
  68 |     "    \n",
  69 |     "    return rv"
  70 |    ]
  71 |   },
  72 |   {
  73 |    "cell_type": "code",
  74 |    "execution_count": 3,
  75 |    "metadata": {
  76 |     "slideshow": {
  77 |      "slide_type": "skip"
  78 |     }
  79 |    },
  80 |    "outputs": [],
  81 |    "source": [
  82 |     "def make_long():\n",
  83 |     "    day_1 = dt.date(2015, 12, 29)\n",
  84 |     "    day_2 = dt.date(2015, 12, 30)\n",
  85 |     "    \n",
  86 |     "    col_close = 'close'\n",
  87 |     "    col_open = 'open'\n",
  88 |     "    \n",
  89 |     "    cols = ['date', 'ticker', 'variable', 'value']\n",
  90 |     "    \n",
  91 |     "    rv = pd.DataFrame([\n",
  92 |     "      {'ticker': 'GOOG', 'date': day_1, 'variable': col_close, 'value': 776.60},\n",
  93 |     "      {'ticker': 'GOOG', 'date': day_2, 'variable': col_close, 'value': 771.00},\n",
  94 |     "      {'ticker': 'AAPL', 'date': day_1, 'variable': col_open,  'value': 107.01},\n",
  95 |     "      {'ticker': 'AAPL', 'date': day_1, 'variable': col_close, 'value': 105.26}\n",
  96 |     "    ], columns=cols)\n",
  97 |     "    \n",
  98 |     "    return rv"
  99 |    ]
 100 |   },
 101 |   {
 102 |    "cell_type": "markdown",
 103 |    "metadata": {
 104 |     "slideshow": {
 105 |      "slide_type": "slide"
 106 |     }
 107 |    },
 108 |    "source": [
 109 |     "# Programming with Data:<br>Advanced Python and Pandas\n",
 110 |     "\n",
 111 |     "# Advanced Merging & Reshaping"
 112 |    ]
 113 |   },
 114 |   {
 115 |    "cell_type": "markdown",
 116 |    "metadata": {
 117 |     "slideshow": {
 118 |      "slide_type": "slide"
 119 |     }
 120 |    },
 121 |    "source": [
 122 |     "## Grouped and Ordered Data"
 123 |    ]
 124 |   },
 125 |   {
 126 |    "cell_type": "markdown",
 127 |    "metadata": {
 128 |     "slideshow": {
 129 |      "slide_type": "fragment"
 130 |     }
 131 |    },
 132 |    "source": [
 133 |     "Working again with securities market data. In quant finance, this is a\n",
 134 |     "common data type, daily stock prices."
 135 |    ]
 136 |   },
 137 |   {
 138 |    "cell_type": "markdown",
 139 |    "metadata": {
 140 |     "slideshow": {
 141 |      "slide_type": "slide"
 142 |     }
 143 |    },
 144 |    "source": [
 145 |     "### Display the data"
 146 |    ]
 147 |   },
 148 |   {
 149 |    "cell_type": "code",
 150 |    "execution_count": 4,
 151 |    "metadata": {
 152 |     "slideshow": {
 153 |      "slide_type": "fragment"
 154 |     }
 155 |    },
 156 |    "outputs": [
 157 |     {
 158 |      "data": {
 159 |       "text/html": [
 160 |        "<div>\n",
 161 |        "<style scoped>\n",
 162 |        "    .dataframe tbody tr th:only-of-type {\n",
 163 |        "        vertical-align: middle;\n",
 164 |        "    }\n",
 165 |        "\n",
 166 |        "    .dataframe tbody tr th {\n",
 167 |        "        vertical-align: top;\n",
 168 |        "    }\n",
 169 |        "\n",
 170 |        "    .dataframe thead th {\n",
 171 |        "        text-align: right;\n",
 172 |        "    }\n",
 173 |        "</style>\n",
 174 |        "<table border=\"1\" class=\"dataframe\">\n",
 175 |        "  <thead>\n",
 176 |        "    <tr style=\"text-align: right;\">\n",
 177 |        "      <th></th>\n",
 178 |        "      <th>ticker</th>\n",
 179 |        "      <th>date</th>\n",
 180 |        "      <th>close</th>\n",
 181 |        "    </tr>\n",
 182 |        "  </thead>\n",
 183 |        "  <tbody>\n",
 184 |        "    <tr>\n",
 185 |        "      <th>0</th>\n",
 186 |        "      <td>GOOG</td>\n",
 187 |        "      <td>2015-12-29</td>\n",
 188 |        "      <td>776.60</td>\n",
 189 |        "    </tr>\n",
 190 |        "    <tr>\n",
 191 |        "      <th>1</th>\n",
 192 |        "      <td>GOOG</td>\n",
 193 |        "      <td>2015-12-30</td>\n",
 194 |        "      <td>771.00</td>\n",
 195 |        "    </tr>\n",
 196 |        "    <tr>\n",
 197 |        "      <th>2</th>\n",
 198 |        "      <td>GOOG</td>\n",
 199 |        "      <td>2015-12-31</td>\n",
 200 |        "      <td>758.88</td>\n",
 201 |        "    </tr>\n",
 202 |        "    <tr>\n",
 203 |        "      <th>3</th>\n",
 204 |        "      <td>GOOG</td>\n",
 205 |        "      <td>2016-01-04</td>\n",
 206 |        "      <td>741.84</td>\n",
 207 |        "    </tr>\n",
 208 |        "    <tr>\n",
 209 |        "      <th>4</th>\n",
 210 |        "      <td>AAPL</td>\n",
 211 |        "      <td>2015-12-29</td>\n",
 212 |        "      <td>108.74</td>\n",
 213 |        "    </tr>\n",
 214 |        "    <tr>\n",
 215 |        "      <th>5</th>\n",
 216 |        "      <td>AAPL</td>\n",
 217 |        "      <td>2015-12-30</td>\n",
 218 |        "      <td>107.32</td>\n",
 219 |        "    </tr>\n",
 220 |        "    <tr>\n",
 221 |        "      <th>6</th>\n",
 222 |        "      <td>AAPL</td>\n",
 223 |        "      <td>2015-12-31</td>\n",
 224 |        "      <td>105.26</td>\n",
 225 |        "    </tr>\n",
 226 |        "    <tr>\n",
 227 |        "      <th>7</th>\n",
 228 |        "      <td>AAPL</td>\n",
 229 |        "      <td>2016-01-04</td>\n",
 230 |        "      <td>105.35</td>\n",
 231 |        "    </tr>\n",
 232 |        "  </tbody>\n",
 233 |        "</table>\n",
 234 |        "</div>"
 235 |       ],
 236 |       "text/plain": [
 237 |        "  ticker       date   close\n",
 238 |        "0   GOOG 2015-12-29  776.60\n",
 239 |        "1   GOOG 2015-12-30  771.00\n",
 240 |        "2   GOOG 2015-12-31  758.88\n",
 241 |        "3   GOOG 2016-01-04  741.84\n",
 242 |        "4   AAPL 2015-12-29  108.74\n",
 243 |        "5   AAPL 2015-12-30  107.32\n",
 244 |        "6   AAPL 2015-12-31  105.26\n",
 245 |        "7   AAPL 2016-01-04  105.35"
 246 |       ]
 247 |      },
 248 |      "execution_count": 4,
 249 |      "metadata": {},
 250 |      "output_type": "execute_result"
 251 |     }
 252 |    ],
 253 |    "source": [
 254 |     "_dts = ['2015-12-29', '2015-12-30', '2015-12-31', '2016-01-04']\n",
 255 |     "_goog = [776.60, 771.00, 758.88, 741.84]\n",
 256 |     "_aapl = [108.74, 107.32, 105.26, 105.35]\n",
 257 |     "\n",
 258 |     "df = pd.DataFrame({\n",
 259 |     "    'ticker': ['GOOG'] * 4 + ['AAPL'] * 4,\n",
 260 |     "    'date': [pd.to_datetime(x) for x in _dts] * 2,\n",
 261 |     "    'close': _goog + _aapl\n",
 262 |     "})\n",
 263 |     "df\n"
 264 |    ]
 265 |   },
 266 |   {
 267 |    "cell_type": "markdown",
 268 |    "metadata": {
 269 |     "slideshow": {
 270 |      "slide_type": "slide"
 271 |     }
 272 |    },
 273 |    "source": [
 274 |     "### A single, ordered series"
 275 |    ]
 276 |   },
 277 |   {
 278 |    "cell_type": "code",
 279 |    "execution_count": 5,
 280 |    "metadata": {
 281 |     "slideshow": {
 282 |      "slide_type": "fragment"
 283 |     }
 284 |    },
 285 |    "outputs": [
 286 |     {
 287 |      "data": {
 288 |       "text/html": [
 289 |        "<div>\n",
 290 |        "<style scoped>\n",
 291 |        "    .dataframe tbody tr th:only-of-type {\n",
 292 |        "        vertical-align: middle;\n",
 293 |        "    }\n",
 294 |        "\n",
 295 |        "    .dataframe tbody tr th {\n",
 296 |        "        vertical-align: top;\n",
 297 |        "    }\n",
 298 |        "\n",
 299 |        "    .dataframe thead th {\n",
 300 |        "        text-align: right;\n",
 301 |        "    }\n",
 302 |        "</style>\n",
 303 |        "<table border=\"1\" class=\"dataframe\">\n",
 304 |        "  <thead>\n",
 305 |        "    <tr style=\"text-align: right;\">\n",
 306 |        "      <th></th>\n",
 307 |        "      <th>date</th>\n",
 308 |        "      <th>rate</th>\n",
 309 |        "    </tr>\n",
 310 |        "  </thead>\n",
 311 |        "  <tbody>\n",
 312 |        "    <tr>\n",
 313 |        "      <th>0</th>\n",
 314 |        "      <td>2015-12-30</td>\n",
 315 |        "      <td>2.40</td>\n",
 316 |        "    </tr>\n",
 317 |        "    <tr>\n",
 318 |        "      <th>1</th>\n",
 319 |        "      <td>2016-01-04</td>\n",
 320 |        "      <td>2.56</td>\n",
 321 |        "    </tr>\n",
 322 |        "  </tbody>\n",
 323 |        "</table>\n",
 324 |        "</div>"
 325 |       ],
 326 |       "text/plain": [
 327 |        "        date  rate\n",
 328 |        "0 2015-12-30  2.40\n",
 329 |        "1 2016-01-04  2.56"
 330 |       ]
 331 |      },
 332 |      "execution_count": 5,
 333 |      "metadata": {},
 334 |      "output_type": "execute_result"
 335 |     }
 336 |    ],
 337 |    "source": [
 338 |     "tbill = pd.DataFrame({\n",
 339 |     "    'date': [pd.to_datetime(x) for x in ['2015-12-30', '2016-01-04']],\n",
 340 |     "    'rate': [2.40, 2.56]\n",
 341 |     "})\n",
 342 |     "tbill"
 343 |    ]
 344 |   },
 345 |   {
 346 |    "cell_type": "markdown",
 347 |    "metadata": {
 348 |     "slideshow": {
 349 |      "slide_type": "slide"
 350 |     }
 351 |    },
 352 |    "source": [
 353 |     "## Merge data that is grouped and ordered\n",
 354 |     "\n",
 355 |     "* Left panel is irregularly spaced, e.g. business days\n",
 356 |     "* Right time series also irregularly spaced, e.g. a sparse subset of the first\n",
 357 |     "  series"
 358 |    ]
 359 |   },
 360 |   {
 361 |    "cell_type": "markdown",
 362 |    "metadata": {
 363 |     "slideshow": {
 364 |      "slide_type": "slide"
 365 |     }
 366 |    },
 367 |    "source": [
 368 |     "### How not to do the merge"
 369 |    ]
 370 |   },
 371 |   {
 372 |    "cell_type": "markdown",
 373 |    "metadata": {
 374 |     "slideshow": {
 375 |      "slide_type": "fragment"
 376 |     }
 377 |    },
 378 |    "source": [
 379 |     "Don't use plain `pd.merge` and fill forward across groups."
 380 |    ]
 381 |   },
 382 |   {
 383 |    "cell_type": "code",
 384 |    "execution_count": 6,
 385 |    "metadata": {
 386 |     "slideshow": {
 387 |      "slide_type": "fragment"
 388 |     }
 389 |    },
 390 |    "outputs": [
 391 |     {
 392 |      "data": {
 393 |       "text/html": [
 394 |        "<div>\n",
 395 |        "<style scoped>\n",
 396 |        "    .dataframe tbody tr th:only-of-type {\n",
 397 |        "        vertical-align: middle;\n",
 398 |        "    }\n",
 399 |        "\n",
 400 |        "    .dataframe tbody tr th {\n",
 401 |        "        vertical-align: top;\n",
 402 |        "    }\n",
 403 |        "\n",
 404 |        "    .dataframe thead th {\n",
 405 |        "        text-align: right;\n",
 406 |        "    }\n",
 407 |        "</style>\n",
 408 |        "<table border=\"1\" class=\"dataframe\">\n",
 409 |        "  <thead>\n",
 410 |        "    <tr style=\"text-align: right;\">\n",
 411 |        "      <th></th>\n",
 412 |        "      <th>ticker</th>\n",
 413 |        "      <th>date</th>\n",
 414 |        "      <th>close</th>\n",
 415 |        "      <th>rate</th>\n",
 416 |        "    </tr>\n",
 417 |        "  </thead>\n",
 418 |        "  <tbody>\n",
 419 |        "    <tr>\n",
 420 |        "      <th>0</th>\n",
 421 |        "      <td>GOOG</td>\n",
 422 |        "      <td>2015-12-29</td>\n",
 423 |        "      <td>776.60</td>\n",
 424 |        "      <td>NaN</td>\n",
 425 |        "    </tr>\n",
 426 |        "    <tr>\n",
 427 |        "      <th>1</th>\n",
 428 |        "      <td>GOOG</td>\n",
 429 |        "      <td>2015-12-30</td>\n",
 430 |        "      <td>771.00</td>\n",
 431 |        "      <td>2.40</td>\n",
 432 |        "    </tr>\n",
 433 |        "    <tr>\n",
 434 |        "      <th>2</th>\n",
 435 |        "      <td>GOOG</td>\n",
 436 |        "      <td>2015-12-31</td>\n",
 437 |        "      <td>758.88</td>\n",
 438 |        "      <td>2.40</td>\n",
 439 |        "    </tr>\n",
 440 |        "    <tr>\n",
 441 |        "      <th>3</th>\n",
 442 |        "      <td>GOOG</td>\n",
 443 |        "      <td>2016-01-04</td>\n",
 444 |        "      <td>741.84</td>\n",
 445 |        "      <td>2.56</td>\n",
 446 |        "    </tr>\n",
 447 |        "    <tr>\n",
 448 |        "      <th>4</th>\n",
 449 |        "      <td>AAPL</td>\n",
 450 |        "      <td>2015-12-29</td>\n",
 451 |        "      <td>108.74</td>\n",
 452 |        "      <td>2.56</td>\n",
 453 |        "    </tr>\n",
 454 |        "    <tr>\n",
 455 |        "      <th>5</th>\n",
 456 |        "      <td>AAPL</td>\n",
 457 |        "      <td>2015-12-30</td>\n",
 458 |        "      <td>107.32</td>\n",
 459 |        "      <td>2.40</td>\n",
 460 |        "    </tr>\n",
 461 |        "    <tr>\n",
 462 |        "      <th>6</th>\n",
 463 |        "      <td>AAPL</td>\n",
 464 |        "      <td>2015-12-31</td>\n",
 465 |        "      <td>105.26</td>\n",
 466 |        "      <td>2.40</td>\n",
 467 |        "    </tr>\n",
 468 |        "    <tr>\n",
 469 |        "      <th>7</th>\n",
 470 |        "      <td>AAPL</td>\n",
 471 |        "      <td>2016-01-04</td>\n",
 472 |        "      <td>105.35</td>\n",
 473 |        "      <td>2.56</td>\n",
 474 |        "    </tr>\n",
 475 |        "  </tbody>\n",
 476 |        "</table>\n",
 477 |        "</div>"
 478 |       ],
 479 |       "text/plain": [
 480 |        "  ticker       date   close  rate\n",
 481 |        "0   GOOG 2015-12-29  776.60   NaN\n",
 482 |        "1   GOOG 2015-12-30  771.00  2.40\n",
 483 |        "2   GOOG 2015-12-31  758.88  2.40\n",
 484 |        "3   GOOG 2016-01-04  741.84  2.56\n",
 485 |        "4   AAPL 2015-12-29  108.74  2.56\n",
 486 |        "5   AAPL 2015-12-30  107.32  2.40\n",
 487 |        "6   AAPL 2015-12-31  105.26  2.40\n",
 488 |        "7   AAPL 2016-01-04  105.35  2.56"
 489 |       ]
 490 |      },
 491 |      "execution_count": 6,
 492 |      "metadata": {},
 493 |      "output_type": "execute_result"
 494 |     }
 495 |    ],
 496 |    "source": [
 497 |     "pd.merge(df, tbill, on='date', how='left').ffill()"
 498 |    ]
 499 |   },
 500 |   {
 501 |    "cell_type": "markdown",
 502 |    "metadata": {
 503 |     "slideshow": {
 504 |      "slide_type": "slide"
 505 |     }
 506 |    },
 507 |    "source": [
 508 |     "### Merge Ordered V2"
 509 |    ]
 510 |   },
 511 |   {
 512 |    "cell_type": "code",
 513 |    "execution_count": 7,
 514 |    "metadata": {
 515 |     "slideshow": {
 516 |      "slide_type": "fragment"
 517 |     }
 518 |    },
 519 |    "outputs": [
 520 |     {
 521 |      "data": {
 522 |       "text/html": [
 523 |        "<div>\n",
 524 |        "<style scoped>\n",
 525 |        "    .dataframe tbody tr th:only-of-type {\n",
 526 |        "        vertical-align: middle;\n",
 527 |        "    }\n",
 528 |        "\n",
 529 |        "    .dataframe tbody tr th {\n",
 530 |        "        vertical-align: top;\n",
 531 |        "    }\n",
 532 |        "\n",
 533 |        "    .dataframe thead th {\n",
 534 |        "        text-align: right;\n",
 535 |        "    }\n",
 536 |        "</style>\n",
 537 |        "<table border=\"1\" class=\"dataframe\">\n",
 538 |        "  <thead>\n",
 539 |        "    <tr style=\"text-align: right;\">\n",
 540 |        "      <th></th>\n",
 541 |        "      <th>ticker</th>\n",
 542 |        "      <th>date</th>\n",
 543 |        "      <th>close</th>\n",
 544 |        "      <th>rate</th>\n",
 545 |        "    </tr>\n",
 546 |        "  </thead>\n",
 547 |        "  <tbody>\n",
 548 |        "    <tr>\n",
 549 |        "      <th>0</th>\n",
 550 |        "      <td>GOOG</td>\n",
 551 |        "      <td>2015-12-29</td>\n",
 552 |        "      <td>776.60</td>\n",
 553 |        "      <td>NaN</td>\n",
 554 |        "    </tr>\n",
 555 |        "    <tr>\n",
 556 |        "      <th>1</th>\n",
 557 |        "      <td>GOOG</td>\n",
 558 |        "      <td>2015-12-30</td>\n",
 559 |        "      <td>771.00</td>\n",
 560 |        "      <td>2.40</td>\n",
 561 |        "    </tr>\n",
 562 |        "    <tr>\n",
 563 |        "      <th>2</th>\n",
 564 |        "      <td>GOOG</td>\n",
 565 |        "      <td>2015-12-31</td>\n",
 566 |        "      <td>758.88</td>\n",
 567 |        "      <td>2.40</td>\n",
 568 |        "    </tr>\n",
 569 |        "    <tr>\n",
 570 |        "      <th>3</th>\n",
 571 |        "      <td>GOOG</td>\n",
 572 |        "      <td>2016-01-04</td>\n",
 573 |        "      <td>741.84</td>\n",
 574 |        "      <td>2.56</td>\n",
 575 |        "    </tr>\n",
 576 |        "    <tr>\n",
 577 |        "      <th>4</th>\n",
 578 |        "      <td>AAPL</td>\n",
 579 |        "      <td>2015-12-29</td>\n",
 580 |        "      <td>108.74</td>\n",
 581 |        "      <td>NaN</td>\n",
 582 |        "    </tr>\n",
 583 |        "    <tr>\n",
 584 |        "      <th>5</th>\n",
 585 |        "      <td>AAPL</td>\n",
 586 |        "      <td>2015-12-30</td>\n",
 587 |        "      <td>107.32</td>\n",
 588 |        "      <td>2.40</td>\n",
 589 |        "    </tr>\n",
 590 |        "    <tr>\n",
 591 |        "      <th>6</th>\n",
 592 |        "      <td>AAPL</td>\n",
 593 |        "      <td>2015-12-31</td>\n",
 594 |        "      <td>105.26</td>\n",
 595 |        "      <td>2.40</td>\n",
 596 |        "    </tr>\n",
 597 |        "    <tr>\n",
 598 |        "      <th>7</th>\n",
 599 |        "      <td>AAPL</td>\n",
 600 |        "      <td>2016-01-04</td>\n",
 601 |        "      <td>105.35</td>\n",
 602 |        "      <td>2.56</td>\n",
 603 |        "    </tr>\n",
 604 |        "  </tbody>\n",
 605 |        "</table>\n",
 606 |        "</div>"
 607 |       ],
 608 |       "text/plain": [
 609 |        "  ticker       date   close  rate\n",
 610 |        "0   GOOG 2015-12-29  776.60   NaN\n",
 611 |        "1   GOOG 2015-12-30  771.00  2.40\n",
 612 |        "2   GOOG 2015-12-31  758.88  2.40\n",
 613 |        "3   GOOG 2016-01-04  741.84  2.56\n",
 614 |        "4   AAPL 2015-12-29  108.74   NaN\n",
 615 |        "5   AAPL 2015-12-30  107.32  2.40\n",
 616 |        "6   AAPL 2015-12-31  105.26  2.40\n",
 617 |        "7   AAPL 2016-01-04  105.35  2.56"
 618 |       ]
 619 |      },
 620 |      "execution_count": 7,
 621 |      "metadata": {},
 622 |      "output_type": "execute_result"
 623 |     }
 624 |    ],
 625 |    "source": [
 626 |     "mkt = pd.merge_ordered(df, tbill, on='date', left_by='ticker', fill_method='ffill')\n",
 627 |     "mkt"
 628 |    ]
 629 |   },
 630 |   {
 631 |    "cell_type": "markdown",
 632 |    "metadata": {
 633 |     "slideshow": {
 634 |      "slide_type": "slide"
 635 |     }
 636 |    },
 637 |    "source": [
 638 |     "# Reshaping & Pivoting"
 639 |    ]
 640 |   },
 641 |   {
 642 |    "cell_type": "markdown",
 643 |    "metadata": {
 644 |     "slideshow": {
 645 |      "slide_type": "slide"
 646 |     }
 647 |    },
 648 |    "source": [
 649 |     "## Wide and Long Formats\n",
 650 |     "\n",
 651 |     "* Depending on the operation or the data storage location, data stored\n",
 652 |     "  in a \"wide\" or \"long\" format"
 653 |    ]
 654 |   },
 655 |   {
 656 |    "cell_type": "markdown",
 657 |    "metadata": {
 658 |     "slideshow": {
 659 |      "slide_type": "slide"
 660 |     }
 661 |    },
 662 |    "source": [
 663 |     "### Long Format\n",
 664 |     "\n",
 665 |     "* Common format for data in relational databases because allows\n",
 666 |     "  new attributes without a schema change\n",
 667 |     "* \"Long\" format is also called \"stacked\" or \"record\" format in the\n",
 668 |     "  `pandas` documentation. Also called `Entity-Attribute-Value (EAV)`\n",
 669 |     "* \"Sparse\" by design"
 670 |    ]
 671 |   },
 672 |   {
 673 |    "cell_type": "markdown",
 674 |    "metadata": {
 675 |     "slideshow": {
 676 |      "slide_type": "skip"
 677 |     }
 678 |    },
 679 |    "source": [
 680 |     "TODO: Include diagram of Long Format"
 681 |    ]
 682 |   },
 683 |   {
 684 |    "cell_type": "code",
 685 |    "execution_count": 8,
 686 |    "metadata": {
 687 |     "slideshow": {
 688 |      "slide_type": "skip"
 689 |     }
 690 |    },
 691 |    "outputs": [],
 692 |    "source": [
 693 |     "def make_long_aapl():\n",
 694 |     "    day_1 = dt.date(2015, 12, 29)\n",
 695 |     "    day_2 = dt.date(2015, 12, 30)\n",
 696 |     "    \n",
 697 |     "    col_close = 'close'\n",
 698 |     "    col_open = 'open'\n",
 699 |     "    \n",
 700 |     "    cols = ['date', 'ticker', 'variable', 'value']\n",
 701 |     "    \n",
 702 |     "    rv = pd.DataFrame([\n",
 703 |     "      {'ticker': 'AAPL', 'date': day_1, 'variable': col_open,  'value': 106.96},\n",
 704 |     "      {'ticker': 'AAPL', 'date': day_1, 'variable': col_close, 'value': 108.74},\n",
 705 |     "      {'ticker': 'AAPL', 'date': day_2, 'variable': col_open,  'value': 108.58},\n",
 706 |     "      {'ticker': 'AAPL', 'date': day_2, 'variable': col_close, 'value': 107.32}\n",
 707 |     "    ], columns=cols)\n",
 708 |     "    \n",
 709 |     "    return rv"
 710 |    ]
 711 |   },
 712 |   {
 713 |    "cell_type": "markdown",
 714 |    "metadata": {
 715 |     "slideshow": {
 716 |      "slide_type": "slide"
 717 |     }
 718 |    },
 719 |    "source": [
 720 |     "### Simplest Long Format\n",
 721 |     "\n",
 722 |     "* Multiple attributes for a single entity (AAPL)\n",
 723 |     "* Row for every period (12/29 & 12/30) x (number of attributes)"
 724 |    ]
 725 |   },
 726 |   {
 727 |    "cell_type": "code",
 728 |    "execution_count": 9,
 729 |    "metadata": {
 730 |     "slideshow": {
 731 |      "slide_type": "fragment"
 732 |     }
 733 |    },
 734 |    "outputs": [
 735 |     {
 736 |      "data": {
 737 |       "text/html": [
 738 |        "<div>\n",
 739 |        "<style scoped>\n",
 740 |        "    .dataframe tbody tr th:only-of-type {\n",
 741 |        "        vertical-align: middle;\n",
 742 |        "    }\n",
 743 |        "\n",
 744 |        "    .dataframe tbody tr th {\n",
 745 |        "        vertical-align: top;\n",
 746 |        "    }\n",
 747 |        "\n",
 748 |        "    .dataframe thead th {\n",
 749 |        "        text-align: right;\n",
 750 |        "    }\n",
 751 |        "</style>\n",
 752 |        "<table border=\"1\" class=\"dataframe\">\n",
 753 |        "  <thead>\n",
 754 |        "    <tr style=\"text-align: right;\">\n",
 755 |        "      <th></th>\n",
 756 |        "      <th>date</th>\n",
 757 |        "      <th>ticker</th>\n",
 758 |        "      <th>variable</th>\n",
 759 |        "      <th>value</th>\n",
 760 |        "    </tr>\n",
 761 |        "  </thead>\n",
 762 |        "  <tbody>\n",
 763 |        "    <tr>\n",
 764 |        "      <th>0</th>\n",
 765 |        "      <td>2015-12-29</td>\n",
 766 |        "      <td>AAPL</td>\n",
 767 |        "      <td>open</td>\n",
 768 |        "      <td>106.96</td>\n",
 769 |        "    </tr>\n",
 770 |        "    <tr>\n",
 771 |        "      <th>1</th>\n",
 772 |        "      <td>2015-12-29</td>\n",
 773 |        "      <td>AAPL</td>\n",
 774 |        "      <td>close</td>\n",
 775 |        "      <td>108.74</td>\n",
 776 |        "    </tr>\n",
 777 |        "    <tr>\n",
 778 |        "      <th>2</th>\n",
 779 |        "      <td>2015-12-30</td>\n",
 780 |        "      <td>AAPL</td>\n",
 781 |        "      <td>open</td>\n",
 782 |        "      <td>108.58</td>\n",
 783 |        "    </tr>\n",
 784 |        "    <tr>\n",
 785 |        "      <th>3</th>\n",
 786 |        "      <td>2015-12-30</td>\n",
 787 |        "      <td>AAPL</td>\n",
 788 |        "      <td>close</td>\n",
 789 |        "      <td>107.32</td>\n",
 790 |        "    </tr>\n",
 791 |        "  </tbody>\n",
 792 |        "</table>\n",
 793 |        "</div>"
 794 |       ],
 795 |       "text/plain": [
 796 |        "         date ticker variable   value\n",
 797 |        "0  2015-12-29   AAPL     open  106.96\n",
 798 |        "1  2015-12-29   AAPL    close  108.74\n",
 799 |        "2  2015-12-30   AAPL     open  108.58\n",
 800 |        "3  2015-12-30   AAPL    close  107.32"
 801 |       ]
 802 |      },
 803 |      "execution_count": 9,
 804 |      "metadata": {},
 805 |      "output_type": "execute_result"
 806 |     }
 807 |    ],
 808 |    "source": [
 809 |     "aapl_long = make_long_aapl()\n",
 810 |     "aapl_long"
 811 |    ]
 812 |   },
 813 |   {
 814 |    "cell_type": "markdown",
 815 |    "metadata": {
 816 |     "slideshow": {
 817 |      "slide_type": "slide"
 818 |     }
 819 |    },
 820 |    "source": [
 821 |     "### Wide Format\n",
 822 |     "\n",
 823 |     "* Identifiers stored in the index\n",
 824 |     "* Each attribute has its own column\n",
 825 |     "* Common format for use by machine learning algorithms"
 826 |    ]
 827 |   },
 828 |   {
 829 |    "cell_type": "markdown",
 830 |    "metadata": {
 831 |     "slideshow": {
 832 |      "slide_type": "skip"
 833 |     }
 834 |    },
 835 |    "source": [
 836 |     "TODO: Include diagram of wide format"
 837 |    ]
 838 |   },
 839 |   {
 840 |    "cell_type": "markdown",
 841 |    "metadata": {
 842 |     "slideshow": {
 843 |      "slide_type": "slide"
 844 |     }
 845 |    },
 846 |    "source": [
 847 |     "### Long-to-Wide"
 848 |    ]
 849 |   },
 850 |   {
 851 |    "cell_type": "code",
 852 |    "execution_count": 10,
 853 |    "metadata": {
 854 |     "slideshow": {
 855 |      "slide_type": "fragment"
 856 |     }
 857 |    },
 858 |    "outputs": [
 859 |     {
 860 |      "data": {
 861 |       "text/html": [
 862 |        "<div>\n",
 863 |        "<style scoped>\n",
 864 |        "    .dataframe tbody tr th:only-of-type {\n",
 865 |        "        vertical-align: middle;\n",
 866 |        "    }\n",
 867 |        "\n",
 868 |        "    .dataframe tbody tr th {\n",
 869 |        "        vertical-align: top;\n",
 870 |        "    }\n",
 871 |        "\n",
 872 |        "    .dataframe thead th {\n",
 873 |        "        text-align: right;\n",
 874 |        "    }\n",
 875 |        "</style>\n",
 876 |        "<table border=\"1\" class=\"dataframe\">\n",
 877 |        "  <thead>\n",
 878 |        "    <tr style=\"text-align: right;\">\n",
 879 |        "      <th></th>\n",
 880 |        "      <th>date</th>\n",
 881 |        "      <th>ticker</th>\n",
 882 |        "      <th>variable</th>\n",
 883 |        "      <th>value</th>\n",
 884 |        "    </tr>\n",
 885 |        "  </thead>\n",
 886 |        "  <tbody>\n",
 887 |        "    <tr>\n",
 888 |        "      <th>0</th>\n",
 889 |        "      <td>2015-12-29</td>\n",
 890 |        "      <td>AAPL</td>\n",
 891 |        "      <td>open</td>\n",
 892 |        "      <td>106.96</td>\n",
 893 |        "    </tr>\n",
 894 |        "    <tr>\n",
 895 |        "      <th>1</th>\n",
 896 |        "      <td>2015-12-29</td>\n",
 897 |        "      <td>AAPL</td>\n",
 898 |        "      <td>close</td>\n",
 899 |        "      <td>108.74</td>\n",
 900 |        "    </tr>\n",
 901 |        "    <tr>\n",
 902 |        "      <th>2</th>\n",
 903 |        "      <td>2015-12-30</td>\n",
 904 |        "      <td>AAPL</td>\n",
 905 |        "      <td>open</td>\n",
 906 |        "      <td>108.58</td>\n",
 907 |        "    </tr>\n",
 908 |        "    <tr>\n",
 909 |        "      <th>3</th>\n",
 910 |        "      <td>2015-12-30</td>\n",
 911 |        "      <td>AAPL</td>\n",
 912 |        "      <td>close</td>\n",
 913 |        "      <td>107.32</td>\n",
 914 |        "    </tr>\n",
 915 |        "  </tbody>\n",
 916 |        "</table>\n",
 917 |        "</div>"
 918 |       ],
 919 |       "text/plain": [
 920 |        "         date ticker variable   value\n",
 921 |        "0  2015-12-29   AAPL     open  106.96\n",
 922 |        "1  2015-12-29   AAPL    close  108.74\n",
 923 |        "2  2015-12-30   AAPL     open  108.58\n",
 924 |        "3  2015-12-30   AAPL    close  107.32"
 925 |       ]
 926 |      },
 927 |      "execution_count": 10,
 928 |      "metadata": {},
 929 |      "output_type": "execute_result"
 930 |     }
 931 |    ],
 932 |    "source": [
 933 |     "aapl_long"
 934 |    ]
 935 |   },
 936 |   {
 937 |    "cell_type": "code",
 938 |    "execution_count": 11,
 939 |    "metadata": {
 940 |     "slideshow": {
 941 |      "slide_type": "fragment"
 942 |     }
 943 |    },
 944 |    "outputs": [
 945 |     {
 946 |      "data": {
 947 |       "text/html": [
 948 |        "<div>\n",
 949 |        "<style scoped>\n",
 950 |        "    .dataframe tbody tr th:only-of-type {\n",
 951 |        "        vertical-align: middle;\n",
 952 |        "    }\n",
 953 |        "\n",
 954 |        "    .dataframe tbody tr th {\n",
 955 |        "        vertical-align: top;\n",
 956 |        "    }\n",
 957 |        "\n",
 958 |        "    .dataframe thead th {\n",
 959 |        "        text-align: right;\n",
 960 |        "    }\n",
 961 |        "</style>\n",
 962 |        "<table border=\"1\" class=\"dataframe\">\n",
 963 |        "  <thead>\n",
 964 |        "    <tr style=\"text-align: right;\">\n",
 965 |        "      <th>variable</th>\n",
 966 |        "      <th>close</th>\n",
 967 |        "      <th>open</th>\n",
 968 |        "    </tr>\n",
 969 |        "    <tr>\n",
 970 |        "      <th>date</th>\n",
 971 |        "      <th></th>\n",
 972 |        "      <th></th>\n",
 973 |        "    </tr>\n",
 974 |        "  </thead>\n",
 975 |        "  <tbody>\n",
 976 |        "    <tr>\n",
 977 |        "      <th>2015-12-29</th>\n",
 978 |        "      <td>108.74</td>\n",
 979 |        "      <td>106.96</td>\n",
 980 |        "    </tr>\n",
 981 |        "    <tr>\n",
 982 |        "      <th>2015-12-30</th>\n",
 983 |        "      <td>107.32</td>\n",
 984 |        "      <td>108.58</td>\n",
 985 |        "    </tr>\n",
 986 |        "  </tbody>\n",
 987 |        "</table>\n",
 988 |        "</div>"
 989 |       ],
 990 |       "text/plain": [
 991 |        "variable     close    open\n",
 992 |        "date                      \n",
 993 |        "2015-12-29  108.74  106.96\n",
 994 |        "2015-12-30  107.32  108.58"
 995 |       ]
 996 |      },
 997 |      "execution_count": 11,
 998 |      "metadata": {},
 999 |      "output_type": "execute_result"
1000 |     }
1001 |    ],
1002 |    "source": [
1003 |     "aapl_long.pivot(index='date', columns='variable', values='value')"
1004 |    ]
1005 |   },
1006 |   {
1007 |    "cell_type": "markdown",
1008 |    "metadata": {
1009 |     "slideshow": {
1010 |      "slide_type": "slide"
1011 |     }
1012 |    },
1013 |    "source": [
1014 |     "### Long-to-Wide with multiple ID columns"
1015 |    ]
1016 |   },
1017 |   {
1018 |    "cell_type": "code",
1019 |    "execution_count": 12,
1020 |    "metadata": {
1021 |     "slideshow": {
1022 |      "slide_type": "fragment"
1023 |     }
1024 |    },
1025 |    "outputs": [
1026 |     {
1027 |      "data": {
1028 |       "text/html": [
1029 |        "<div>\n",
1030 |        "<style scoped>\n",
1031 |        "    .dataframe tbody tr th:only-of-type {\n",
1032 |        "        vertical-align: middle;\n",
1033 |        "    }\n",
1034 |        "\n",
1035 |        "    .dataframe tbody tr th {\n",
1036 |        "        vertical-align: top;\n",
1037 |        "    }\n",
1038 |        "\n",
1039 |        "    .dataframe thead tr th {\n",
1040 |        "        text-align: left;\n",
1041 |        "    }\n",
1042 |        "\n",
1043 |        "    .dataframe thead tr:last-of-type th {\n",
1044 |        "        text-align: right;\n",
1045 |        "    }\n",
1046 |        "</style>\n",
1047 |        "<table border=\"1\" class=\"dataframe\">\n",
1048 |        "  <thead>\n",
1049 |        "    <tr>\n",
1050 |        "      <th></th>\n",
1051 |        "      <th></th>\n",
1052 |        "      <th colspan=\"2\" halign=\"left\">value</th>\n",
1053 |        "    </tr>\n",
1054 |        "    <tr>\n",
1055 |        "      <th></th>\n",
1056 |        "      <th>variable</th>\n",
1057 |        "      <th>close</th>\n",
1058 |        "      <th>open</th>\n",
1059 |        "    </tr>\n",
1060 |        "    <tr>\n",
1061 |        "      <th>date</th>\n",
1062 |        "      <th>ticker</th>\n",
1063 |        "      <th></th>\n",
1064 |        "      <th></th>\n",
1065 |        "    </tr>\n",
1066 |        "  </thead>\n",
1067 |        "  <tbody>\n",
1068 |        "    <tr>\n",
1069 |        "      <th>2015-12-29</th>\n",
1070 |        "      <th>AAPL</th>\n",
1071 |        "      <td>108.74</td>\n",
1072 |        "      <td>106.96</td>\n",
1073 |        "    </tr>\n",
1074 |        "    <tr>\n",
1075 |        "      <th>2015-12-30</th>\n",
1076 |        "      <th>AAPL</th>\n",
1077 |        "      <td>107.32</td>\n",
1078 |        "      <td>108.58</td>\n",
1079 |        "    </tr>\n",
1080 |        "  </tbody>\n",
1081 |        "</table>\n",
1082 |        "</div>"
1083 |       ],
1084 |       "text/plain": [
1085 |        "                    value        \n",
1086 |        "variable            close    open\n",
1087 |        "date       ticker                \n",
1088 |        "2015-12-29 AAPL    108.74  106.96\n",
1089 |        "2015-12-30 AAPL    107.32  108.58"
1090 |       ]
1091 |      },
1092 |      "execution_count": 12,
1093 |      "metadata": {},
1094 |      "output_type": "execute_result"
1095 |     }
1096 |    ],
1097 |    "source": [
1098 |     "aapl_wide = aapl_long.set_index(['date', 'ticker', 'variable']).unstack()\n",
1099 |     "aapl_wide"
1100 |    ]
1101 |   },
1102 |   {
1103 |    "cell_type": "markdown",
1104 |    "metadata": {
1105 |     "slideshow": {
1106 |      "slide_type": "slide"
1107 |     }
1108 |    },
1109 |    "source": [
1110 |     "### Wide-to-Long"
1111 |    ]
1112 |   },
1113 |   {
1114 |    "cell_type": "code",
1115 |    "execution_count": 13,
1116 |    "metadata": {
1117 |     "slideshow": {
1118 |      "slide_type": "fragment"
1119 |     }
1120 |    },
1121 |    "outputs": [
1122 |     {
1123 |      "data": {
1124 |       "text/html": [
1125 |        "<div>\n",
1126 |        "<style scoped>\n",
1127 |        "    .dataframe tbody tr th:only-of-type {\n",
1128 |        "        vertical-align: middle;\n",
1129 |        "    }\n",
1130 |        "\n",
1131 |        "    .dataframe tbody tr th {\n",
1132 |        "        vertical-align: top;\n",
1133 |        "    }\n",
1134 |        "\n",
1135 |        "    .dataframe thead th {\n",
1136 |        "        text-align: right;\n",
1137 |        "    }\n",
1138 |        "</style>\n",
1139 |        "<table border=\"1\" class=\"dataframe\">\n",
1140 |        "  <thead>\n",
1141 |        "    <tr style=\"text-align: right;\">\n",
1142 |        "      <th></th>\n",
1143 |        "      <th>date</th>\n",
1144 |        "      <th>ticker</th>\n",
1145 |        "      <th>variable</th>\n",
1146 |        "      <th>value</th>\n",
1147 |        "    </tr>\n",
1148 |        "  </thead>\n",
1149 |        "  <tbody>\n",
1150 |        "    <tr>\n",
1151 |        "      <th>0</th>\n",
1152 |        "      <td>2015-12-29</td>\n",
1153 |        "      <td>AAPL</td>\n",
1154 |        "      <td>close</td>\n",
1155 |        "      <td>108.74</td>\n",
1156 |        "    </tr>\n",
1157 |        "    <tr>\n",
1158 |        "      <th>1</th>\n",
1159 |        "      <td>2015-12-29</td>\n",
1160 |        "      <td>AAPL</td>\n",
1161 |        "      <td>open</td>\n",
1162 |        "      <td>106.96</td>\n",
1163 |        "    </tr>\n",
1164 |        "    <tr>\n",
1165 |        "      <th>2</th>\n",
1166 |        "      <td>2015-12-30</td>\n",
1167 |        "      <td>AAPL</td>\n",
1168 |        "      <td>close</td>\n",
1169 |        "      <td>107.32</td>\n",
1170 |        "    </tr>\n",
1171 |        "    <tr>\n",
1172 |        "      <th>3</th>\n",
1173 |        "      <td>2015-12-30</td>\n",
1174 |        "      <td>AAPL</td>\n",
1175 |        "      <td>open</td>\n",
1176 |        "      <td>108.58</td>\n",
1177 |        "    </tr>\n",
1178 |        "  </tbody>\n",
1179 |        "</table>\n",
1180 |        "</div>"
1181 |       ],
1182 |       "text/plain": [
1183 |        "         date ticker variable   value\n",
1184 |        "0  2015-12-29   AAPL    close  108.74\n",
1185 |        "1  2015-12-29   AAPL     open  106.96\n",
1186 |        "2  2015-12-30   AAPL    close  107.32\n",
1187 |        "3  2015-12-30   AAPL     open  108.58"
1188 |       ]
1189 |      },
1190 |      "execution_count": 13,
1191 |      "metadata": {},
1192 |      "output_type": "execute_result"
1193 |     }
1194 |    ],
1195 |    "source": [
1196 |     "aapl_wide.stack().reset_index()"
1197 |    ]
1198 |   },
1199 |   {
1200 |    "cell_type": "markdown",
1201 |    "metadata": {
1202 |     "slideshow": {
1203 |      "slide_type": "slide"
1204 |     }
1205 |    },
1206 |    "source": [
1207 |     "## Pivot Tables"
1208 |    ]
1209 |   },
1210 |   {
1211 |    "cell_type": "code",
1212 |    "execution_count": 14,
1213 |    "metadata": {
1214 |     "slideshow": {
1215 |      "slide_type": "fragment"
1216 |     }
1217 |    },
1218 |    "outputs": [
1219 |     {
1220 |      "data": {
1221 |       "text/html": [
1222 |        "<div>\n",
1223 |        "<style scoped>\n",
1224 |        "    .dataframe tbody tr th:only-of-type {\n",
1225 |        "        vertical-align: middle;\n",
1226 |        "    }\n",
1227 |        "\n",
1228 |        "    .dataframe tbody tr th {\n",
1229 |        "        vertical-align: top;\n",
1230 |        "    }\n",
1231 |        "\n",
1232 |        "    .dataframe thead th {\n",
1233 |        "        text-align: right;\n",
1234 |        "    }\n",
1235 |        "</style>\n",
1236 |        "<table border=\"1\" class=\"dataframe\">\n",
1237 |        "  <thead>\n",
1238 |        "    <tr style=\"text-align: right;\">\n",
1239 |        "      <th></th>\n",
1240 |        "      <th>ticker</th>\n",
1241 |        "      <th>date</th>\n",
1242 |        "      <th>close</th>\n",
1243 |        "      <th>rate</th>\n",
1244 |        "    </tr>\n",
1245 |        "  </thead>\n",
1246 |        "  <tbody>\n",
1247 |        "    <tr>\n",
1248 |        "      <th>0</th>\n",
1249 |        "      <td>GOOG</td>\n",
1250 |        "      <td>2015-12-29</td>\n",
1251 |        "      <td>776.60</td>\n",
1252 |        "      <td>NaN</td>\n",
1253 |        "    </tr>\n",
1254 |        "    <tr>\n",
1255 |        "      <th>1</th>\n",
1256 |        "      <td>GOOG</td>\n",
1257 |        "      <td>2015-12-30</td>\n",
1258 |        "      <td>771.00</td>\n",
1259 |        "      <td>2.40</td>\n",
1260 |        "    </tr>\n",
1261 |        "    <tr>\n",
1262 |        "      <th>2</th>\n",
1263 |        "      <td>GOOG</td>\n",
1264 |        "      <td>2015-12-31</td>\n",
1265 |        "      <td>758.88</td>\n",
1266 |        "      <td>2.40</td>\n",
1267 |        "    </tr>\n",
1268 |        "    <tr>\n",
1269 |        "      <th>3</th>\n",
1270 |        "      <td>GOOG</td>\n",
1271 |        "      <td>2016-01-04</td>\n",
1272 |        "      <td>741.84</td>\n",
1273 |        "      <td>2.56</td>\n",
1274 |        "    </tr>\n",
1275 |        "    <tr>\n",
1276 |        "      <th>4</th>\n",
1277 |        "      <td>AAPL</td>\n",
1278 |        "      <td>2015-12-29</td>\n",
1279 |        "      <td>108.74</td>\n",
1280 |        "      <td>NaN</td>\n",
1281 |        "    </tr>\n",
1282 |        "    <tr>\n",
1283 |        "      <th>5</th>\n",
1284 |        "      <td>AAPL</td>\n",
1285 |        "      <td>2015-12-30</td>\n",
1286 |        "      <td>107.32</td>\n",
1287 |        "      <td>2.40</td>\n",
1288 |        "    </tr>\n",
1289 |        "    <tr>\n",
1290 |        "      <th>6</th>\n",
1291 |        "      <td>AAPL</td>\n",
1292 |        "      <td>2015-12-31</td>\n",
1293 |        "      <td>105.26</td>\n",
1294 |        "      <td>2.40</td>\n",
1295 |        "    </tr>\n",
1296 |        "    <tr>\n",
1297 |        "      <th>7</th>\n",
1298 |        "      <td>AAPL</td>\n",
1299 |        "      <td>2016-01-04</td>\n",
1300 |        "      <td>105.35</td>\n",
1301 |        "      <td>2.56</td>\n",
1302 |        "    </tr>\n",
1303 |        "  </tbody>\n",
1304 |        "</table>\n",
1305 |        "</div>"
1306 |       ],
1307 |       "text/plain": [
1308 |        "  ticker       date   close  rate\n",
1309 |        "0   GOOG 2015-12-29  776.60   NaN\n",
1310 |        "1   GOOG 2015-12-30  771.00  2.40\n",
1311 |        "2   GOOG 2015-12-31  758.88  2.40\n",
1312 |        "3   GOOG 2016-01-04  741.84  2.56\n",
1313 |        "4   AAPL 2015-12-29  108.74   NaN\n",
1314 |        "5   AAPL 2015-12-30  107.32  2.40\n",
1315 |        "6   AAPL 2015-12-31  105.26  2.40\n",
1316 |        "7   AAPL 2016-01-04  105.35  2.56"
1317 |       ]
1318 |      },
1319 |      "execution_count": 14,
1320 |      "metadata": {},
1321 |      "output_type": "execute_result"
1322 |     }
1323 |    ],
1324 |    "source": [
1325 |     "mkt"
1326 |    ]
1327 |   },
1328 |   {
1329 |    "cell_type": "markdown",
1330 |    "metadata": {
1331 |     "slideshow": {
1332 |      "slide_type": "slide"
1333 |     }
1334 |    },
1335 |    "source": [
1336 |     "### Simple Pivot Table"
1337 |    ]
1338 |   },
1339 |   {
1340 |    "cell_type": "code",
1341 |    "execution_count": 15,
1342 |    "metadata": {
1343 |     "slideshow": {
1344 |      "slide_type": "fragment"
1345 |     }
1346 |    },
1347 |    "outputs": [
1348 |     {
1349 |      "data": {
1350 |       "text/html": [
1351 |        "<div>\n",
1352 |        "<style scoped>\n",
1353 |        "    .dataframe tbody tr th:only-of-type {\n",
1354 |        "        vertical-align: middle;\n",
1355 |        "    }\n",
1356 |        "\n",
1357 |        "    .dataframe tbody tr th {\n",
1358 |        "        vertical-align: top;\n",
1359 |        "    }\n",
1360 |        "\n",
1361 |        "    .dataframe thead th {\n",
1362 |        "        text-align: right;\n",
1363 |        "    }\n",
1364 |        "</style>\n",
1365 |        "<table border=\"1\" class=\"dataframe\">\n",
1366 |        "  <thead>\n",
1367 |        "    <tr style=\"text-align: right;\">\n",
1368 |        "      <th></th>\n",
1369 |        "      <th>close</th>\n",
1370 |        "      <th>rate</th>\n",
1371 |        "    </tr>\n",
1372 |        "    <tr>\n",
1373 |        "      <th>ticker</th>\n",
1374 |        "      <th></th>\n",
1375 |        "      <th></th>\n",
1376 |        "    </tr>\n",
1377 |        "  </thead>\n",
1378 |        "  <tbody>\n",
1379 |        "    <tr>\n",
1380 |        "      <th>AAPL</th>\n",
1381 |        "      <td>106.67</td>\n",
1382 |        "      <td>2.45</td>\n",
1383 |        "    </tr>\n",
1384 |        "    <tr>\n",
1385 |        "      <th>GOOG</th>\n",
1386 |        "      <td>762.08</td>\n",
1387 |        "      <td>2.45</td>\n",
1388 |        "    </tr>\n",
1389 |        "  </tbody>\n",
1390 |        "</table>\n",
1391 |        "</div>"
1392 |       ],
1393 |       "text/plain": [
1394 |        "         close  rate\n",
1395 |        "ticker              \n",
1396 |        "AAPL    106.67  2.45\n",
1397 |        "GOOG    762.08  2.45"
1398 |       ]
1399 |      },
1400 |      "execution_count": 15,
1401 |      "metadata": {},
1402 |      "output_type": "execute_result"
1403 |     }
1404 |    ],
1405 |    "source": [
1406 |     "select_cols = ['ticker', 'close', 'rate']\n",
1407 |     "pd.pivot_table(mkt.loc[:, select_cols], index='ticker', aggfunc='mean')"
1408 |    ]
1409 |   }
1410 |  ],
1411 |  "metadata": {
1412 |   "celltoolbar": "Slideshow",
1413 |   "kernelspec": {
1414 |    "display_name": "Python 3 (ipykernel)",
1415 |    "language": "python",
1416 |    "name": "python3"
1417 |   },
1418 |   "language_info": {
1419 |    "codemirror_mode": {
1420 |     "name": "ipython",
1421 |     "version": 3
1422 |    },
1423 |    "file_extension": ".py",
1424 |    "mimetype": "text/x-python",
1425 |    "name": "python",
1426 |    "nbconvert_exporter": "python",
1427 |    "pygments_lexer": "ipython3",
1428 |    "version": "3.10.8"
1429 |   },
1430 |   "toc": {
1431 |    "base_numbering": 1,
1432 |    "nav_menu": {},
1433 |    "number_sections": false,
1434 |    "sideBar": true,
1435 |    "skip_h1_title": true,
1436 |    "title_cell": "Table of Contents",
1437 |    "title_sidebar": "Contents",
1438 |    "toc_cell": false,
1439 |    "toc_position": {
1440 |     "height": "calc(100% - 180px)",
1441 |     "left": "10px",
1442 |     "top": "150px",
1443 |     "width": "351px"
1444 |    },
1445 |    "toc_section_display": true,
1446 |    "toc_window_display": false
1447 |   }
1448 |  },
1449 |  "nbformat": 4,
1450 |  "nbformat_minor": 4
1451 | }
1452 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | (c) 2020 Daniel J. Gerlanc
  2 | 
  3 | Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International
  4 | Public License
  5 | 
  6 | By exercising the Licensed Rights (defined below), You accept and agree
  7 | to be bound by the terms and conditions of this Creative Commons
  8 | Attribution-NonCommercial-ShareAlike 4.0 International Public License
  9 | ("Public License"). To the extent this Public License may be
 10 | interpreted as a contract, You are granted the Licensed Rights in
 11 | consideration of Your acceptance of these terms and conditions, and the
 12 | Licensor grants You such rights in consideration of benefits the
 13 | Licensor receives from making the Licensed Material available under
 14 | these terms and conditions.
 15 | 
 16 | 
 17 | Section 1 -- Definitions.
 18 | 
 19 |   a. Adapted Material means material subject to Copyright and Similar
 20 |      Rights that is derived from or based upon the Licensed Material
 21 |      and in which the Licensed Material is translated, altered,
 22 |      arranged, transformed, or otherwise modified in a manner requiring
 23 |      permission under the Copyright and Similar Rights held by the
 24 |      Licensor. For purposes of this Public License, where the Licensed
 25 |      Material is a musical work, performance, or sound recording,
 26 |      Adapted Material is always produced where the Licensed Material is
 27 |      synched in timed relation with a moving image.
 28 | 
 29 |   b. Adapter's License means the license You apply to Your Copyright
 30 |      and Similar Rights in Your contributions to Adapted Material in
 31 |      accordance with the terms and conditions of this Public License.
 32 | 
 33 |   c. BY-NC-SA Compatible License means a license listed at
 34 |      creativecommons.org/compatiblelicenses, approved by Creative
 35 |      Commons as essentially the equivalent of this Public License.
 36 | 
 37 |   d. Copyright and Similar Rights means copyright and/or similar rights
 38 |      closely related to copyright including, without limitation,
 39 |      performance, broadcast, sound recording, and Sui Generis Database
 40 |      Rights, without regard to how the rights are labeled or
 41 |      categorized. For purposes of this Public License, the rights
 42 |      specified in Section 2(b)(1)-(2) are not Copyright and Similar
 43 |      Rights.
 44 | 
 45 |   e. Effective Technological Measures means those measures that, in the
 46 |      absence of proper authority, may not be circumvented under laws
 47 |      fulfilling obligations under Article 11 of the WIPO Copyright
 48 |      Treaty adopted on December 20, 1996, and/or similar international
 49 |      agreements.
 50 | 
 51 |   f. Exceptions and Limitations means fair use, fair dealing, and/or
 52 |      any other exception or limitation to Copyright and Similar Rights
 53 |      that applies to Your use of the Licensed Material.
 54 | 
 55 |   g. License Elements means the license attributes listed in the name
 56 |      of a Creative Commons Public License. The License Elements of this
 57 |      Public License are Attribution, NonCommercial, and ShareAlike.
 58 | 
 59 |   h. Licensed Material means the artistic or literary work, database,
 60 |      or other material to which the Licensor applied this Public
 61 |      License.
 62 | 
 63 |   i. Licensed Rights means the rights granted to You subject to the
 64 |      terms and conditions of this Public License, which are limited to
 65 |      all Copyright and Similar Rights that apply to Your use of the
 66 |      Licensed Material and that the Licensor has authority to license.
 67 | 
 68 |   j. Licensor means the individual(s) or entity(ies) granting rights
 69 |      under this Public License.
 70 | 
 71 |   k. NonCommercial means not primarily intended for or directed towards
 72 |      commercial advantage or monetary compensation. For purposes of
 73 |      this Public License, the exchange of the Licensed Material for
 74 |      other material subject to Copyright and Similar Rights by digital
 75 |      file-sharing or similar means is NonCommercial provided there is
 76 |      no payment of monetary compensation in connection with the
 77 |      exchange.
 78 | 
 79 |   l. Share means to provide material to the public by any means or
 80 |      process that requires permission under the Licensed Rights, such
 81 |      as reproduction, public display, public performance, distribution,
 82 |      dissemination, communication, or importation, and to make material
 83 |      available to the public including in ways that members of the
 84 |      public may access the material from a place and at a time
 85 |      individually chosen by them.
 86 | 
 87 |   m. Sui Generis Database Rights means rights other than copyright
 88 |      resulting from Directive 96/9/EC of the European Parliament and of
 89 |      the Council of 11 March 1996 on the legal protection of databases,
 90 |      as amended and/or succeeded, as well as other essentially
 91 |      equivalent rights anywhere in the world.
 92 | 
 93 |   n. You means the individual or entity exercising the Licensed Rights
 94 |      under this Public License. Your has a corresponding meaning.
 95 | 
 96 | 
 97 | Section 2 -- Scope.
 98 | 
 99 |   a. License grant.
100 | 
101 |        1. Subject to the terms and conditions of this Public License,
102 |           the Licensor hereby grants You a worldwide, royalty-free,
103 |           non-sublicensable, non-exclusive, irrevocable license to
104 |           exercise the Licensed Rights in the Licensed Material to:
105 | 
106 |             a. reproduce and Share the Licensed Material, in whole or
107 |                in part, for NonCommercial purposes only; and
108 | 
109 |             b. produce, reproduce, and Share Adapted Material for
110 |                NonCommercial purposes only.
111 | 
112 |        2. Exceptions and Limitations. For the avoidance of doubt, where
113 |           Exceptions and Limitations apply to Your use, this Public
114 |           License does not apply, and You do not need to comply with
115 |           its terms and conditions.
116 | 
117 |        3. Term. The term of this Public License is specified in Section
118 |           6(a).
119 | 
120 |        4. Media and formats; technical modifications allowed. The
121 |           Licensor authorizes You to exercise the Licensed Rights in
122 |           all media and formats whether now known or hereafter created,
123 |           and to make technical modifications necessary to do so. The
124 |           Licensor waives and/or agrees not to assert any right or
125 |           authority to forbid You from making technical modifications
126 |           necessary to exercise the Licensed Rights, including
127 |           technical modifications necessary to circumvent Effective
128 |           Technological Measures. For purposes of this Public License,
129 |           simply making modifications authorized by this Section 2(a)
130 |           (4) never produces Adapted Material.
131 | 
132 |        5. Downstream recipients.
133 | 
134 |             a. Offer from the Licensor -- Licensed Material. Every
135 |                recipient of the Licensed Material automatically
136 |                receives an offer from the Licensor to exercise the
137 |                Licensed Rights under the terms and conditions of this
138 |                Public License.
139 | 
140 |             b. Additional offer from the Licensor -- Adapted Material.
141 |                Every recipient of Adapted Material from You
142 |                automatically receives an offer from the Licensor to
143 |                exercise the Licensed Rights in the Adapted Material
144 |                under the conditions of the Adapter's License You apply.
145 | 
146 |             c. No downstream restrictions. You may not offer or impose
147 |                any additional or different terms or conditions on, or
148 |                apply any Effective Technological Measures to, the
149 |                Licensed Material if doing so restricts exercise of the
150 |                Licensed Rights by any recipient of the Licensed
151 |                Material.
152 | 
153 |        6. No endorsement. Nothing in this Public License constitutes or
154 |           may be construed as permission to assert or imply that You
155 |           are, or that Your use of the Licensed Material is, connected
156 |           with, or sponsored, endorsed, or granted official status by,
157 |           the Licensor or others designated to receive attribution as
158 |           provided in Section 3(a)(1)(A)(i).
159 | 
160 |   b. Other rights.
161 | 
162 |        1. Moral rights, such as the right of integrity, are not
163 |           licensed under this Public License, nor are publicity,
164 |           privacy, and/or other similar personality rights; however, to
165 |           the extent possible, the Licensor waives and/or agrees not to
166 |           assert any such rights held by the Licensor to the limited
167 |           extent necessary to allow You to exercise the Licensed
168 |           Rights, but not otherwise.
169 | 
170 |        2. Patent and trademark rights are not licensed under this
171 |           Public License.
172 | 
173 |        3. To the extent possible, the Licensor waives any right to
174 |           collect royalties from You for the exercise of the Licensed
175 |           Rights, whether directly or through a collecting society
176 |           under any voluntary or waivable statutory or compulsory
177 |           licensing scheme. In all other cases the Licensor expressly
178 |           reserves any right to collect such royalties, including when
179 |           the Licensed Material is used other than for NonCommercial
180 |           purposes.
181 | 
182 | 
183 | Section 3 -- License Conditions.
184 | 
185 | Your exercise of the Licensed Rights is expressly made subject to the
186 | following conditions.
187 | 
188 |   a. Attribution.
189 | 
190 |        1. If You Share the Licensed Material (including in modified
191 |           form), You must:
192 | 
193 |             a. retain the following if it is supplied by the Licensor
194 |                with the Licensed Material:
195 | 
196 |                  i. identification of the creator(s) of the Licensed
197 |                     Material and any others designated to receive
198 |                     attribution, in any reasonable manner requested by
199 |                     the Licensor (including by pseudonym if
200 |                     designated);
201 | 
202 |                 ii. a copyright notice;
203 | 
204 |                iii. a notice that refers to this Public License;
205 | 
206 |                 iv. a notice that refers to the disclaimer of
207 |                     warranties;
208 | 
209 |                  v. a URI or hyperlink to the Licensed Material to the
210 |                     extent reasonably practicable;
211 | 
212 |             b. indicate if You modified the Licensed Material and
213 |                retain an indication of any previous modifications; and
214 | 
215 |             c. indicate the Licensed Material is licensed under this
216 |                Public License, and include the text of, or the URI or
217 |                hyperlink to, this Public License.
218 | 
219 |        2. You may satisfy the conditions in Section 3(a)(1) in any
220 |           reasonable manner based on the medium, means, and context in
221 |           which You Share the Licensed Material. For example, it may be
222 |           reasonable to satisfy the conditions by providing a URI or
223 |           hyperlink to a resource that includes the required
224 |           information.
225 |        3. If requested by the Licensor, You must remove any of the
226 |           information required by Section 3(a)(1)(A) to the extent
227 |           reasonably practicable.
228 | 
229 |   b. ShareAlike.
230 | 
231 |      In addition to the conditions in Section 3(a), if You Share
232 |      Adapted Material You produce, the following conditions also apply.
233 | 
234 |        1. The Adapter's License You apply must be a Creative Commons
235 |           license with the same License Elements, this version or
236 |           later, or a BY-NC-SA Compatible License.
237 | 
238 |        2. You must include the text of, or the URI or hyperlink to, the
239 |           Adapter's License You apply. You may satisfy this condition
240 |           in any reasonable manner based on the medium, means, and
241 |           context in which You Share Adapted Material.
242 | 
243 |        3. You may not offer or impose any additional or different terms
244 |           or conditions on, or apply any Effective Technological
245 |           Measures to, Adapted Material that restrict exercise of the
246 |           rights granted under the Adapter's License You apply.
247 | 
248 | 
249 | Section 4 -- Sui Generis Database Rights.
250 | 
251 | Where the Licensed Rights include Sui Generis Database Rights that
252 | apply to Your use of the Licensed Material:
253 | 
254 |   a. for the avoidance of doubt, Section 2(a)(1) grants You the right
255 |      to extract, reuse, reproduce, and Share all or a substantial
256 |      portion of the contents of the database for NonCommercial purposes
257 |      only;
258 | 
259 |   b. if You include all or a substantial portion of the database
260 |      contents in a database in which You have Sui Generis Database
261 |      Rights, then the database in which You have Sui Generis Database
262 |      Rights (but not its individual contents) is Adapted Material,
263 |      including for purposes of Section 3(b); and
264 | 
265 |   c. You must comply with the conditions in Section 3(a) if You Share
266 |      all or a substantial portion of the contents of the database.
267 | 
268 | For the avoidance of doubt, this Section 4 supplements and does not
269 | replace Your obligations under this Public License where the Licensed
270 | Rights include other Copyright and Similar Rights.
271 | 
272 | 
273 | Section 5 -- Disclaimer of Warranties and Limitation of Liability.
274 | 
275 |   a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
276 |      EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
277 |      AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
278 |      ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
279 |      IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
280 |      WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
281 |      PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
282 |      ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
283 |      KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
284 |      ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
285 | 
286 |   b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
287 |      TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
288 |      NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
289 |      INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
290 |      COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
291 |      USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
292 |      ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
293 |      DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
294 |      IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
295 | 
296 |   c. The disclaimer of warranties and limitation of liability provided
297 |      above shall be interpreted in a manner that, to the extent
298 |      possible, most closely approximates an absolute disclaimer and
299 |      waiver of all liability.
300 | 
301 | 
302 | Section 6 -- Term and Termination.
303 | 
304 |   a. This Public License applies for the term of the Copyright and
305 |      Similar Rights licensed here. However, if You fail to comply with
306 |      this Public License, then Your rights under this Public License
307 |      terminate automatically.
308 | 
309 |   b. Where Your right to use the Licensed Material has terminated under
310 |      Section 6(a), it reinstates:
311 | 
312 |        1. automatically as of the date the violation is cured, provided
313 |           it is cured within 30 days of Your discovery of the
314 |           violation; or
315 | 
316 |        2. upon express reinstatement by the Licensor.
317 | 
318 |      For the avoidance of doubt, this Section 6(b) does not affect any
319 |      right the Licensor may have to seek remedies for Your violations
320 |      of this Public License.
321 | 
322 |   c. For the avoidance of doubt, the Licensor may also offer the
323 |      Licensed Material under separate terms or conditions or stop
324 |      distributing the Licensed Material at any time; however, doing so
325 |      will not terminate this Public License.
326 | 
327 |   d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
328 |      License.
329 | 
330 | 
331 | Section 7 -- Other Terms and Conditions.
332 | 
333 |   a. The Licensor shall not be bound by any additional or different
334 |      terms or conditions communicated by You unless expressly agreed.
335 | 
336 |   b. Any arrangements, understandings, or agreements regarding the
337 |      Licensed Material not stated herein are separate from and
338 |      independent of the terms and conditions of this Public License.
339 | 
340 | 
341 | Section 8 -- Interpretation.
342 | 
343 |   a. For the avoidance of doubt, this Public License does not, and
344 |      shall not be interpreted to, reduce, limit, restrict, or impose
345 |      conditions on any use of the Licensed Material that could lawfully
346 |      be made without permission under this Public License.
347 | 
348 |   b. To the extent possible, if any provision of this Public License is
349 |      deemed unenforceable, it shall be automatically reformed to the
350 |      minimum extent necessary to make it enforceable. If the provision
351 |      cannot be reformed, it shall be severed from this Public License
352 |      without affecting the enforceability of the remaining terms and
353 |      conditions.
354 | 
355 |   c. No term or condition of this Public License will be waived and no
356 |      failure to comply consented to unless expressly agreed to by the
357 |      Licensor.
358 | 
359 |   d. Nothing in this Public License constitutes or may be interpreted
360 |      as a limitation upon, or waiver of, any privileges and immunities
361 |      that apply to the Licensor or You, including from the legal
362 |      processes of any jurisdiction or authority.
363 | 
364 | =======================================================================
365 | 
366 | Creative Commons is not a party to its public
367 | licenses. Notwithstanding, Creative Commons may elect to apply one of
368 | its public licenses to material it publishes and in those instances
369 | will be considered the “Licensor.” The text of the Creative Commons
370 | public licenses is dedicated to the public domain under the CC0 Public
371 | Domain Dedication. Except for the limited purpose of indicating that
372 | material is shared under a Creative Commons public license or as
373 | otherwise permitted by the Creative Commons policies published at
374 | creativecommons.org/policies, Creative Commons does not authorize the
375 | use of the trademark "Creative Commons" or any other trademark or logo
376 | of Creative Commons without its prior written consent including,
377 | without limitation, in connection with any unauthorized modifications
378 | to any of its public licenses or any other arrangements,
379 | understandings, or agreements concerning use of licensed material. For
380 | the avoidance of doubt, this paragraph does not form part of the
381 | public licenses.
382 | 
383 | Creative Commons may be contacted at creativecommons.org.
384 | 
385 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Programming with Data: Python and Pandas
  2 | 
  3 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/dgerlanc/programming-with-data/main?urlpath=lab)
  4 | 
  5 | This repository contains the slides, exercises, and answers for *Programming
  6 | with Data: Python and Pandas*. The goal of this tutorial is to teach you,
  7 | someone with experience programming in Python, most of the features available in
  8 | Pandas. The material from this course has been presented at conferences
  9 | including ODSC and Battlefin Discovery Data and online through the O'Reilly
 10 | platform.
 11 | 
 12 | ## Why this course exists
 13 | Whether in R, MATLAB, Stata, or python, modern data analysis, for many
 14 | researchers, requires some kind of programming. The preponderance of tools and
 15 | specialized languages for data analysis suggests that general purpose
 16 | programming languages like C and Java do not readily address the needs of data
 17 | scientists; something more is needed.
 18 | 
 19 | In this workshop, you will learn how to accelerate your data analyses using the
 20 | Python language and Pandas, a library specifically designed for interactive data
 21 | analysis. Pandas is a massive library, so we will focus on its core
 22 | functionality, specifically, loading, filtering, grouping, and transforming
 23 | data. Having completed this workshop, you will understand the fundamentals of
 24 | Pandas, be aware of common pitfalls, and be ready to perform your own analyses.
 25 | 
 26 | ### Prerequisites:
 27 | 
 28 | Workshop assumes that participants have intermediate-level programming ability
 29 | in Python. Participants should know the difference between a `dict`, `list`, and
 30 | `tuple`. Familiarity with control-flow (`if/else/for/while`) and error handling
 31 | (`try/catch`) are required.
 32 | 
 33 | No statistics background is required.
 34 | 
 35 | ## Installation
 36 | 
 37 | ### Binder
 38 | 
 39 | If you have a stable Internet connection and the free Binder service isn't under
 40 | too much load, the easiest way to interactively run the slides and try the
 41 | exercises is to click the Binder badge (make sure you open in a new window).
 42 | Keep in mind that Binder aggresively shuts down idle instances so you'll need to
 43 | refresh the link if you're idle for too long.
 44 | 
 45 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/dgerlanc/programming-with-data/main)
 46 | 
 47 | ### Prerendered Notebooks
 48 | 
 49 | You may view the HTML versions of slides and the answers directly in your browser on Github
 50 | though you will not be able to run them interactively:
 51 | 
 52 | * [Lesson 1 - Series](https://github.com/dgerlanc/programming-with-data/blob/main/01-intro-to-pandas-part-1-slides.ipynb)
 53 | * [Lesson 2 - DataFrames](https://github.com/dgerlanc/programming-with-data/blob/main/02-intro-to-pandas-part-2-slides.ipynb)
 54 | * [Lesson 3 - Split, Apply, Combine](https://github.com/dgerlanc/programming-with-data/blob/main/03-group-apply-slides.ipynb)
 55 | * [Lesson 4 - Time Series](https://github.com/dgerlanc/programming-with-data/blob/main/04-time-series-slides.ipynb)
 56 | * [Lesson 5 - Merge and Concat](https://github.com/dgerlanc/programming-with-data/blob/main/05-merge-pivot-slides.ipynb)
 57 | * [Lesson 6 - Advanced Merge and Reshape](https://github.com/dgerlanc/programming-with-data/blob/main/06-advanced-merge-reshape-slides.ipynb)
 58 | 
 59 | ### Local Installation
 60 | 
 61 | If you're taking the course, want to follow along with the slides and do the
 62 | exercises, and may not have Internet access, download and
 63 | install the Anaconda Python 3 distribution and `conda` package manager
 64 | ahead of time:
 65 | 
 66 | ```
 67 | https://www.anaconda.com/download/
 68 | ```
 69 | 
 70 | Download the latest version of the course materials
 71 | [here](https://github.com/dgerlanc/programming-with-data/archive/main.zip).
 72 | 
 73 | Alternatively, you may clone the course repository using `git`:
 74 | 
 75 | ```
 76 | $ git clone https://github.com/dgerlanc/programming-with-data.git
 77 | ```
 78 | 
 79 | The remainder of the installation requires that you use the command line.
 80 | 
 81 | To complete the course exercises, you must use `conda` to install the
 82 | dependencies specified in the `environment.yml` file in the repository:
 83 | 
 84 | ```
 85 | $ conda env create -f environment.yml
 86 | ```
 87 | 
 88 | This will create an `conda` environment called `progwd` which may be
 89 | "activated" with the following commands:
 90 | 
 91 | * Windows: `activate progwd`
 92 | * Linux and Mac: `conda activate progwd`
 93 | 
 94 | Once you've activated the environment your prompt will probably
 95 | look something like this:
 96 | 
 97 | ```
 98 | (progwd) $
 99 | ```
100 | 
101 | The entire course is designed to use `jupyter` notebooks. Start the
102 | notebook server to get started:
103 | 
104 | ```
105 | (progwd) $ jupyter lab
106 | ```
107 | 
108 | ## Feedback
109 | 
110 | Your feedback on the course helps to improve it for future students.
111 | Please leave feedback [here](https://danielgerlanc.typeform.com/to/RyB6AJ).
112 | 


--------------------------------------------------------------------------------
/assets/data-label-arrays.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dgerlanc/programming-with-data/39e26d54ca885ffb39a10591b7c314db6186e60a/assets/data-label-arrays.png


--------------------------------------------------------------------------------
/assets/enplus-logo-colored.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dgerlanc/programming-with-data/39e26d54ca885ffb39a10591b7c314db6186e60a/assets/enplus-logo-colored.png


--------------------------------------------------------------------------------
/assets/enplus-logo-colored.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
 3 | <svg width="100%" height="100%" viewBox="0 0 117 25" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" xmlns:serif="http://www.serif.com/" style="fill-rule:evenodd;clip-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:2;">
 4 |     <g transform="matrix(1,0,0,1,-41.6421,-44.5151)">
 5 |         <g transform="matrix(1,0,0,1,82.1699,60.7266)">
 6 |             <path d="M0,-8.106L0,-16.211L-8.105,-16.211L-8.105,-8.106L-16.211,-8.106L-16.211,-0.001L-8.105,-0.001L-8.105,8.105L0,8.105L0,-0.001L8.105,-0.001L8.105,-8.106L0,-8.106Z" style="fill:rgb(65,98,173);fill-rule:nonzero;"/>
 7 |         </g>
 8 |         <g transform="matrix(1,0,0,1,0,-694.863)">
 9 |             <rect x="57.853" y="739.379" width="8.106" height="8.105" style="fill:rgb(65,98,173);"/>
10 |         </g>
11 |         <g transform="matrix(1,0,0,1,41.6421,68.8311)">
12 |             <path d="M0,-24.316L0,0L8.105,0L8.105,-12.158L16.211,0L24.317,0L8.105,-24.316L0,-24.316Z" style="fill:rgb(65,98,173);fill-rule:nonzero;"/>
13 |         </g>
14 |         <g transform="matrix(1,0,0,1,105.234,46.8559)">
15 |             <path d="M0,6.689L-3.057,6.689L-3.057,4.002L0.275,4.002L0.275,2.356L-3.057,2.356L-3.057,0.007L-0.027,0.007L0.061,1.184L1.695,1.184L1.695,-1.639L-6.203,-1.639L-6.203,-0.335L-5.155,-0.132L-5.155,6.828L-6.203,7.031L-6.203,8.328L1.708,8.328L1.708,5.513L0.087,5.513L0,6.689Z" style="fill:rgb(35,31,32);fill-rule:nonzero;"/>
16 |         </g>
17 |         <g transform="matrix(1,0,0,1,115.256,52.2412)">
18 |             <path d="M0,-1.689C0,-2.692 -0.214,-3.441 -0.638,-3.914C-1.063,-4.39 -1.661,-4.631 -2.415,-4.631C-2.864,-4.631 -3.277,-4.524 -3.642,-4.312C-3.957,-4.128 -4.232,-3.877 -4.462,-3.564L-4.541,-4.496L-7.546,-4.496L-7.546,-3.192L-6.499,-2.99L-6.499,1.444L-7.445,1.647L-7.445,2.942L-3.603,2.942L-3.603,1.648L-4.4,1.446L-4.4,-2.405C-4.27,-2.582 -4.106,-2.722 -3.914,-2.82C-3.715,-2.921 -3.488,-2.972 -3.239,-2.972C-2.858,-2.972 -2.57,-2.875 -2.385,-2.683C-2.199,-2.49 -2.105,-2.154 -2.105,-1.682L-2.105,1.445L-2.95,1.648L-2.95,2.942L0.946,2.942L0.946,1.647L0,1.444L0,-1.689Z" style="fill:rgb(35,31,32);fill-rule:nonzero;"/>
19 |         </g>
20 |         <g transform="matrix(1,0,0,1,122.236,53.6125)">
21 |             <path d="M0,-1.945C0,-1.32 -0.109,-0.819 -0.326,-0.456C-0.539,-0.099 -0.865,0.075 -1.323,0.075C-1.617,0.075 -1.868,0.022 -2.068,-0.081C-2.261,-0.18 -2.417,-0.326 -2.531,-0.515L-2.531,-3.686C-2.416,-3.894 -2.26,-4.056 -2.066,-4.169C-1.866,-4.284 -1.621,-4.343 -1.336,-4.343C-0.884,-4.343 -0.557,-4.144 -0.337,-3.735C-0.113,-3.317 0,-2.763 0,-2.087L0,-1.945ZM-0.823,-6.003C-1.233,-6.003 -1.599,-5.913 -1.908,-5.737C-2.173,-5.587 -2.409,-5.379 -2.61,-5.119L-2.693,-5.867L-5.678,-5.867L-5.678,-4.564L-4.623,-4.361L-4.623,2.552L-5.576,2.755L-5.576,4.058L-1.579,4.058L-1.579,2.755L-2.531,2.552L-2.531,0.986C-2.337,1.202 -2.111,1.373 -1.857,1.493C-1.553,1.639 -1.2,1.713 -0.81,1.713C0.104,1.713 0.825,1.373 1.335,0.702C1.842,0.036 2.099,-0.854 2.099,-1.945L2.099,-2.087C2.099,-3.248 1.842,-4.2 1.337,-4.914C0.827,-5.636 0.1,-6.003 -0.823,-6.003" style="fill:rgb(35,31,32);fill-rule:nonzero;"/>
22 |         </g>
23 |         <g transform="matrix(1,0,0,1,127.897,55.1831)">
24 |             <path d="M0,-10.668L-3.153,-10.668L-3.153,-9.365L-2.099,-9.162L-2.099,-1.498L-3.052,-1.295L-3.052,0L0.953,0L0.953,-1.295L0,-1.498L0,-10.668Z" style="fill:rgb(35,31,32);fill-rule:nonzero;"/>
25 |         </g>
26 |         <g transform="matrix(1,0,0,1,136.467,55.3256)">
27 |             <path d="M0,-7.58L-2.963,-7.58L-2.963,-6.279L-2.098,-6.077L-2.098,-2.206C-2.233,-2.034 -2.409,-1.9 -2.619,-1.806C-2.837,-1.708 -3.101,-1.659 -3.401,-1.659C-3.735,-1.659 -3.979,-1.765 -4.146,-1.982C-4.314,-2.203 -4.4,-2.576 -4.4,-3.09L-4.4,-7.58L-7.303,-7.58L-7.303,-6.28L-6.499,-6.077L-6.499,-3.104C-6.499,-2.052 -6.272,-1.264 -5.828,-0.761C-5.38,-0.256 -4.755,0 -3.969,0C-3.519,0 -3.112,-0.096 -2.76,-0.285C-2.452,-0.45 -2.184,-0.679 -1.961,-0.968L-1.847,-0.142L0.886,-0.142L0.886,-1.437L0,-1.64L0,-7.58Z" style="fill:rgb(35,31,32);fill-rule:nonzero;"/>
28 |         </g>
29 |         <g transform="matrix(1,0,0,1,143.689,51.4152)">
30 |             <path d="M0,0.119C-0.443,-0.235 -1.13,-0.506 -2.041,-0.688C-2.61,-0.798 -2.993,-0.925 -3.178,-1.065C-3.357,-1.2 -3.443,-1.364 -3.443,-1.567C-3.443,-1.782 -3.352,-1.957 -3.164,-2.101C-2.973,-2.247 -2.708,-2.321 -2.377,-2.321C-2.138,-2.321 -1.917,-2.286 -1.719,-2.217C-1.529,-2.15 -1.369,-2.067 -1.244,-1.97L-1.028,-1.03L0.409,-1.03L0.389,-2.911L0.388,-2.941L0.367,-2.959C0.071,-3.203 -0.338,-3.405 -0.849,-3.559C-1.36,-3.713 -1.908,-3.792 -2.479,-3.792C-3.423,-3.792 -4.162,-3.562 -4.676,-3.11C-5.193,-2.656 -5.454,-2.107 -5.454,-1.478C-5.454,-0.866 -5.225,-0.376 -4.771,-0.02C-4.327,0.329 -3.662,0.596 -2.796,0.773C-2.241,0.891 -1.855,1.028 -1.65,1.181C-1.454,1.328 -1.358,1.495 -1.358,1.692C-1.358,1.92 -1.452,2.095 -1.646,2.228C-1.843,2.365 -2.14,2.433 -2.526,2.433C-2.742,2.433 -2.951,2.41 -3.147,2.364C-3.332,2.322 -3.479,2.256 -3.587,2.17L-3.864,1.19L-5.354,1.19L-5.333,2.896L-5.332,2.925L-5.31,2.943C-4.94,3.253 -4.512,3.494 -4.037,3.661C-3.563,3.826 -3.034,3.911 -2.465,3.911C-1.532,3.911 -0.768,3.696 -0.194,3.273C0.385,2.846 0.679,2.286 0.679,1.61C0.679,0.982 0.451,0.479 0,0.119" style="fill:rgb(35,31,32);fill-rule:nonzero;"/>
31 |         </g>
32 |         <g transform="matrix(1,0,0,1,101.737,62.3814)">
33 |             <path d="M0,2.663L3.887,2.663L1.974,-2.541L1.933,-2.541L0,2.663ZM-2.65,5.658L-1.926,5.603L1.568,-3.589L2.332,-3.589L5.792,5.603L6.529,5.658L6.529,6.252L4.103,6.252L4.103,5.658L4.961,5.589L4.15,3.386L-0.271,3.386L-1.088,5.589L-0.23,5.658L-0.23,6.252L-2.65,6.252L-2.65,5.658Z" style="fill:rgb(35,31,32);fill-rule:nonzero;"/>
34 |         </g>
35 |         <g transform="matrix(1,0,0,1,110.227,62.4767)">
36 |             <path d="M0,2.703C0,3.586 0.177,4.289 0.53,4.812C0.884,5.334 1.417,5.596 2.129,5.596C2.647,5.596 3.081,5.479 3.43,5.245C3.779,5.01 4.051,4.687 4.245,4.278L4.245,0.817C4.064,0.403 3.804,0.065 3.464,-0.197C3.124,-0.458 2.684,-0.589 2.143,-0.589C1.431,-0.589 0.896,-0.297 0.537,0.286C0.179,0.87 0,1.629 0,2.561L0,2.703ZM4.265,5.102C4.022,5.485 3.708,5.781 3.325,5.988C2.942,6.195 2.492,6.299 1.974,6.299C1.099,6.299 0.416,5.973 -0.078,5.319C-0.571,4.665 -0.818,3.793 -0.818,2.703L-0.818,2.561C-0.818,1.394 -0.571,0.46 -0.078,-0.241C0.416,-0.942 1.104,-1.292 1.987,-1.292C2.496,-1.292 2.938,-1.185 3.312,-0.97C3.686,-0.756 3.997,-0.451 4.245,-0.055L4.245,-2.908L3.143,-2.995L3.143,-3.596L5.056,-3.596L5.056,5.474L6.158,5.562L6.158,6.157L4.333,6.157L4.265,5.102Z" style="fill:rgb(35,31,32);fill-rule:nonzero;"/>
37 |         </g>
38 |         <g transform="matrix(1,0,0,1,119.529,68.0314)">
39 |             <path d="M0,-6.109L-0.913,-6.042L0.913,-1.23L1.149,-0.371L1.189,-0.371L1.446,-1.23L3.244,-6.042L2.332,-6.109L2.332,-6.711L4.792,-6.711L4.792,-6.109L4.076,-6.049L1.494,0.602L0.845,0.602L-1.744,-6.049L-2.46,-6.109L-2.46,-6.711L0,-6.711L0,-6.109Z" style="fill:rgb(35,31,32);fill-rule:nonzero;"/>
40 |         </g>
41 |         <g transform="matrix(1,0,0,1,0,-664.71)">
42 |             <path d="M127.065,724.502L126.261,724.502L126.261,723.367L127.065,723.367L127.065,724.502ZM125.16,732.749L126.261,732.661L126.261,726.72L125.16,726.632L125.16,726.03L127.065,726.03L127.065,732.661L128.167,732.749L128.167,733.343L125.16,733.343L125.16,732.749Z" style="fill:rgb(35,31,32);fill-rule:nonzero;"/>
43 |         </g>
44 |         <g transform="matrix(1,0,0,1,134.832,66.3821)">
45 |             <path d="M0,-2.805L-0.662,-2.805L-0.757,-3.839C-0.959,-4.05 -1.216,-4.215 -1.524,-4.332C-1.833,-4.45 -2.188,-4.508 -2.589,-4.508C-3.179,-4.508 -3.622,-4.38 -3.917,-4.126C-4.212,-3.871 -4.359,-3.573 -4.359,-3.231C-4.359,-2.892 -4.232,-2.613 -3.978,-2.392C-3.723,-2.172 -3.215,-1.978 -2.453,-1.811C-1.584,-1.618 -0.933,-1.352 -0.5,-1.016C-0.067,-0.681 0.149,-0.224 0.149,0.353C0.149,0.961 -0.098,1.452 -0.591,1.829C-1.084,2.205 -1.737,2.393 -2.548,2.393C-3.088,2.393 -3.581,2.318 -4.025,2.168C-4.469,2.016 -4.853,1.803 -5.177,1.528L-5.177,0.054L-4.521,0.054L-4.407,1.096C-4.19,1.312 -3.917,1.469 -3.585,1.566C-3.254,1.663 -2.909,1.711 -2.548,1.711C-1.962,1.711 -1.5,1.582 -1.163,1.326C-0.824,1.069 -0.655,0.758 -0.655,0.392C-0.655,0.05 -0.796,-0.249 -1.078,-0.507C-1.359,-0.763 -1.868,-0.972 -2.602,-1.135C-3.472,-1.324 -4.117,-1.576 -4.539,-1.888C-4.96,-2.202 -5.17,-2.64 -5.17,-3.203C-5.17,-3.762 -4.934,-4.234 -4.46,-4.619C-3.988,-5.005 -3.364,-5.198 -2.589,-5.198C-2.052,-5.198 -1.562,-5.115 -1.118,-4.95C-0.674,-4.786 -0.313,-4.564 -0.033,-4.284L0,-2.805Z" style="fill:rgb(35,31,32);fill-rule:nonzero;"/>
46 |         </g>
47 |         <g transform="matrix(1,0,0,1,137.265,64.8673)">
48 |             <path d="M0,0.225C0,1.072 0.214,1.784 0.642,2.36C1.07,2.937 1.667,3.225 2.434,3.225C3.186,3.225 3.776,2.937 4.204,2.36C4.632,1.784 4.847,1.072 4.847,0.225L4.847,0.002C4.847,-0.832 4.631,-1.539 4.201,-2.121C3.771,-2.703 3.177,-2.993 2.42,-2.993C1.663,-2.993 1.07,-2.703 0.642,-2.121C0.214,-1.539 0,-0.832 0,0.002L0,0.225ZM-0.811,0.002C-0.811,-1.076 -0.516,-1.958 0.075,-2.648C0.665,-3.338 1.447,-3.683 2.42,-3.683C3.397,-3.683 4.182,-3.338 4.772,-2.648C5.363,-1.958 5.658,-1.076 5.658,0.002L5.658,0.225C5.658,1.306 5.364,2.191 4.776,2.878C4.187,3.564 3.407,3.908 2.434,3.908C1.451,3.908 0.665,3.564 0.075,2.878C-0.516,2.191 -0.811,1.306 -0.811,0.225L-0.811,0.002Z" style="fill:rgb(35,31,32);fill-rule:nonzero;"/>
49 |         </g>
50 |         <g transform="matrix(1,0,0,1,145.991,68.4973)">
51 |             <path d="M0,-7.177L0.101,-5.961L0.101,-5.873C0.309,-6.323 0.591,-6.676 0.95,-6.931C1.308,-7.185 1.732,-7.313 2.224,-7.313C2.327,-7.313 2.428,-7.305 2.524,-7.288C2.622,-7.273 2.701,-7.256 2.765,-7.238L2.656,-6.481L2.055,-6.522C1.545,-6.522 1.127,-6.378 0.801,-6.092C0.475,-5.806 0.241,-5.413 0.101,-4.913L0.101,-0.547L1.203,-0.458L1.203,0.136L-1.805,0.136L-1.805,-0.458L-0.703,-0.547L-0.703,-6.488L-1.805,-6.575L-1.805,-7.177L0,-7.177Z" style="fill:rgb(35,31,32);fill-rule:nonzero;"/>
52 |         </g>
53 |         <g transform="matrix(1,0,0,1,155.109,66.3821)">
54 |             <path d="M0,-2.805L-0.663,-2.805L-0.757,-3.839C-0.96,-4.05 -1.216,-4.215 -1.524,-4.332C-1.833,-4.45 -2.188,-4.508 -2.589,-4.508C-3.18,-4.508 -3.622,-4.38 -3.917,-4.126C-4.212,-3.871 -4.36,-3.573 -4.36,-3.231C-4.36,-2.892 -4.232,-2.613 -3.978,-2.392C-3.724,-2.172 -3.215,-1.978 -2.454,-1.811C-1.584,-1.618 -0.933,-1.352 -0.5,-1.016C-0.068,-0.681 0.148,-0.224 0.148,0.353C0.148,0.961 -0.098,1.452 -0.592,1.829C-1.085,2.205 -1.737,2.393 -2.548,2.393C-3.089,2.393 -3.582,2.318 -4.025,2.168C-4.469,2.016 -4.853,1.803 -5.178,1.528L-5.178,0.054L-4.522,0.054L-4.407,1.096C-4.191,1.312 -3.917,1.469 -3.586,1.566C-3.254,1.663 -2.909,1.711 -2.548,1.711C-1.963,1.711 -1.501,1.582 -1.163,1.326C-0.825,1.069 -0.656,0.758 -0.656,0.392C-0.656,0.05 -0.797,-0.249 -1.078,-0.507C-1.36,-0.763 -1.868,-0.972 -2.603,-1.135C-3.472,-1.324 -4.118,-1.576 -4.539,-1.888C-4.96,-2.202 -5.171,-2.64 -5.171,-3.203C-5.171,-3.762 -4.935,-4.234 -4.461,-4.619C-3.988,-5.005 -3.364,-5.198 -2.589,-5.198C-2.053,-5.198 -1.563,-5.115 -1.119,-4.95C-0.675,-4.786 -0.313,-4.564 -0.034,-4.284L0,-2.805Z" style="fill:rgb(35,31,32);fill-rule:nonzero;"/>
55 |         </g>
56 |         <g transform="matrix(1,0,0,1,157.223,62.4956)">
57 |             <path d="M0,-0.104C0.149,-0.104 0.229,-0.184 0.229,-0.295L0.229,-0.302C0.229,-0.431 0.139,-0.497 -0.007,-0.497L-0.296,-0.497L-0.296,-0.104L0,-0.104ZM-0.592,-0.758L0.003,-0.758C0.184,-0.758 0.323,-0.706 0.417,-0.612C0.49,-0.539 0.528,-0.438 0.528,-0.32L0.528,-0.313C0.528,-0.097 0.41,0.039 0.24,0.101L0.566,0.581L0.222,0.581L-0.063,0.15L-0.296,0.15L-0.296,0.581L-0.592,0.581L-0.592,-0.758ZM1.04,-0.052L1.04,-0.059C1.04,-0.674 0.563,-1.172 -0.07,-1.172C-0.706,-1.172 -1.187,-0.667 -1.187,-0.052L-1.187,-0.045C-1.187,0.57 -0.71,1.068 -0.077,1.068C0.56,1.068 1.04,0.563 1.04,-0.052M-1.333,-0.045L-1.333,-0.052C-1.333,-0.737 -0.776,-1.311 -0.07,-1.311C0.636,-1.311 1.186,-0.744 1.186,-0.059L1.186,-0.052C1.186,0.633 0.629,1.207 -0.077,1.207C-0.783,1.207 -1.333,0.64 -1.333,-0.045" style="fill:rgb(35,31,32);fill-rule:nonzero;"/>
58 |         </g>
59 |     </g>
60 | </svg>
61 | 


--------------------------------------------------------------------------------
/assets/full-join.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dgerlanc/programming-with-data/39e26d54ca885ffb39a10591b7c314db6186e60a/assets/full-join.png


--------------------------------------------------------------------------------
/assets/inner-join.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dgerlanc/programming-with-data/39e26d54ca885ffb39a10591b7c314db6186e60a/assets/inner-join.png


--------------------------------------------------------------------------------
/assets/lag.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dgerlanc/programming-with-data/39e26d54ca885ffb39a10591b7c314db6186e60a/assets/lag.png


--------------------------------------------------------------------------------
/assets/lead.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dgerlanc/programming-with-data/39e26d54ca885ffb39a10591b7c314db6186e60a/assets/lead.png


--------------------------------------------------------------------------------
/assets/left-join.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dgerlanc/programming-with-data/39e26d54ca885ffb39a10591b7c314db6186e60a/assets/left-join.png


--------------------------------------------------------------------------------
/assets/lesson-01-key-value.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dgerlanc/programming-with-data/39e26d54ca885ffb39a10591b7c314db6186e60a/assets/lesson-01-key-value.png


--------------------------------------------------------------------------------
/assets/right-join.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dgerlanc/programming-with-data/39e26d54ca885ffb39a10591b7c314db6186e60a/assets/right-join.png


--------------------------------------------------------------------------------
/assets/rolling.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dgerlanc/programming-with-data/39e26d54ca885ffb39a10591b7c314db6186e60a/assets/rolling.png


--------------------------------------------------------------------------------
/assets/split-apply-combine.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dgerlanc/programming-with-data/39e26d54ca885ffb39a10591b7c314db6186e60a/assets/split-apply-combine.png


--------------------------------------------------------------------------------
/assets/stock-trading-1600x1200.jpg:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:bbdaf1957fa2eb0f2cd77f989bdcbad40630f947451baa95b19cfc030b484c28
3 | size 144575
4 | 


--------------------------------------------------------------------------------
/assets/vectorized-multiplication.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dgerlanc/programming-with-data/39e26d54ca885ffb39a10591b7c314db6186e60a/assets/vectorized-multiplication.png


--------------------------------------------------------------------------------
/assets/venn-diagrams.sketch:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dgerlanc/programming-with-data/39e26d54ca885ffb39a10591b7c314db6186e60a/assets/venn-diagrams.sketch


--------------------------------------------------------------------------------
/binder/environment.yml:
--------------------------------------------------------------------------------
1 | name: progwd
2 | channels:
3 |   - conda-forge
4 | dependencies:
5 |   - python=3.10.8
6 |   - pandas=1.5.2
7 |   - numpy
8 | 


--------------------------------------------------------------------------------
/build.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import subprocess
 4 | from functools import partial
 5 | from multiprocessing import cpu_count, Pool
 6 | from pathlib import Path
 7 | 
 8 | # TODO: Only update files changed since last run
 9 | # TODO: Add conversion tool from existing answer notebooks
10 | # TODO: add test for errors when individual tasks fail
11 | 
12 | 
13 | def answer2exercise(infile, outfile):
14 |     """
15 |     Convert answer notebooks to exercise notebooks
16 | 
17 |     TODO: Fail if output notebook is empty?
18 | 
19 |     """
20 |     cmd = "jupyter nbconvert --config config.py --to notebook --output".split()
21 |     cmd.extend([outfile, infile])
22 |     subprocess.run(cmd)
23 | 
24 | 
25 | def slide2html(infile):
26 |     """
27 |     Convert slide notebooks to reveal.js
28 | 
29 |     """
30 |     cmd = (
31 |         "jupyter nbconvert"
32 |         " --to slides"
33 |         " --reveal-prefix=reveal.js"
34 |         " --SlidesExporter.file_extension=.html"
35 |         " --output-dir build"
36 |     ).split()
37 |     cmd.append(str(infile))
38 |     subprocess.run(cmd)
39 | 
40 | 
41 | def run_slide(infile):
42 |     cmd = "jupyter nbconvert --to notebook --inplace --execute".split()
43 |     cmd.append(str(infile))
44 | 
45 |     devnull = subprocess.DEVNULL
46 |     subprocess.run(cmd, check=True, stdout=devnull, stderr=devnull)
47 | 
48 | 
49 | def main():
50 |     p = Path(".")
51 | 
52 |     slide_fns = sorted(str(x) for x in p.glob("*slides.ipynb"))
53 |     answer_nbs = sorted(str(x) for x in p.glob("*answers.ipynb"))
54 |     exercise_nbs = [x.replace("answer", "exercise") for x in answer_nbs]
55 | 
56 |     n_cpus = cpu_count()
57 |     with Pool(n_cpus) as pool:
58 |         print("Running notebooks")
59 |         pool.map(run_slide, slide_fns)
60 | 
61 |         print("ipynb slides -> reveal.js html")
62 |         pool.map(slide2html, slide_fns)
63 | 
64 |         print("Convert answers to exercises")
65 |         # print(f'{answer_nb} -> {exercise_nb}')
66 |         pool.starmap(answer2exercise, zip(answer_nbs, exercise_nbs))
67 | 
68 |     # copy over assets
69 | 
70 |     # html slides -> pdf
71 | 
72 | 
73 | if __name__ == "__main__":
74 |     main()
75 | 


--------------------------------------------------------------------------------
/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # 1) Run all notebooks and check for errors
 6 | # jupyter nbconvert --inplace --to notebook --execute *-slides.ipynb
 7 | 
 8 | # 2) Convert answers to exercises.
 9 | # jupyter nbconvert --config config.py --to notebook \
10 | #	--output 04-merge-pivot-exercises.ipynb 04-merge-pivot-answers.ipynb
11 | 
12 | # ls *slides.ipynb | parallel jupyter nbconvert --to slides --output-dir build
13 | jupyter nbconvert --to slides --reveal-prefix=reveal.js --output-dir build *slides.ipynb
14 | cp -a assets build/assets
15 | 
16 | # add converting to pdf
17 | # set query string to be ?print-pdf&pdfSeparateFragments=false&pdfMaxPagesPerSlide=1"
18 | 
19 | # add combining for classes
20 | # pdfconcat --output programming-with-data-foundations.pdf 0[1-3]*pdf
21 | 


--------------------------------------------------------------------------------
/build/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore everything in this directory
2 | *
3 | # Except this file
4 | !.gitignore
5 | !reveal.js/
6 | !custom.css
7 | !favicon.ico
8 | 


--------------------------------------------------------------------------------
/build/custom.css:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/build/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dgerlanc/programming-with-data/39e26d54ca885ffb39a10591b7c314db6186e60a/build/favicon.ico


--------------------------------------------------------------------------------
/build/reveal.js/.gitignore:
--------------------------------------------------------------------------------
 1 | .idea/
 2 | *.iml
 3 | *.iws
 4 | *.eml
 5 | out/
 6 | .DS_Store
 7 | .svn
 8 | log/*.log
 9 | tmp/**
10 | node_modules/
11 | package-lock.json
12 | .sass-cache
13 | css/reveal.min.css
14 | js/reveal.min.js
15 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from nbconvert.preprocessors import Preprocessor
 4 | 
 5 | 
 6 | def _all_tags(nb):
 7 |     rv = set()
 8 |     for cell in nb.cells:
 9 |         tags = cell.metadata.get("tags")
10 |         if tags:
11 |             for tag in tags:
12 |                 rv.add(tag)
13 |     return rv
14 | 
15 | 
16 | class ExercisePreprocessor(Preprocessor):
17 |     keep_tags = {"setup", "exercise"}
18 | 
19 |     def preprocess(self, nb, resources):
20 |         cells = []
21 |         for cell in nb.cells:
22 |             tags = cell.metadata.get("tags", tuple())
23 | 
24 |             if any(tag in self.keep_tags for tag in tags):
25 |                 # must check if cell.cell_type == 'code'
26 |                 #                cell.execution_count = None
27 |                 #                cell.outputs = []
28 |                 cells.append(cell)
29 | 
30 |         nb.cells = cells
31 | 
32 |         nb.metadata.pop("celltoolbar", None)
33 |         nb.metadata.pop("toc", None)
34 | 
35 |         return nb, resources
36 | 
37 | 
38 | c = get_config()  # noqa
39 | c.Exporter.preprocessors = [
40 |     ExercisePreprocessor,
41 |     "nbconvert.preprocessors.TagRemovePreprocessor",
42 |     "nbconvert.preprocessors.ClearOutputPreprocessor",
43 | ]
44 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "3.7"
 2 | 
 3 | services:
 4 | 
 5 |   # This works but currently only for a single slide
 6 |   # slides:
 7 |   #   image: astefanutti/decktape:2.11
 8 |   #   command: "-s 2048x1280 http://nginx/03-group-apply-slides.html slides-03.pdf"
 9 |   #   volumes:
10 |   #     - "./build:/slides"
11 |   #   depends_on:
12 |   #     - nginx
13 | 
14 |   nginx:
15 |     image: nginx:1.17
16 |     ports:
17 |       - "80:80"
18 |     restart: always
19 |     volumes:
20 |       - "./build:/usr/share/nginx/html:ro"
21 | 


--------------------------------------------------------------------------------
/environment-dev.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - conda-merge
5 |   - isort
6 |   - jupyterlab
7 |   - nbdime
8 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 | - conda-forge
 3 | dependencies:
 4 | - conda-merge
 5 | - isort
 6 | - jupyterlab
 7 | - nbdime
 8 | - numpy
 9 | - pandas=1.5.2
10 | - python=3.10.8
11 | name: progwd
12 | 


--------------------------------------------------------------------------------
/scripts/combine-envs.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # Merge the binder and dev dependencies into a single environment
4 | 
5 | conda-merge binder/environment.yml environment-dev.yml > environment.yml
6 | 


--------------------------------------------------------------------------------