├── 01-ipywidgets.ipynb
├── 02-ipywidgets-takes-on-the-datasaurus.ipynb
├── 03-bqplot-takes-on-the-datasaurus.ipynb
├── 04-plotly-and-dash-take-on-the-datasaurus.ipynb
├── 05-altair-takes-on-the-datasaurus.ipynb
├── 4fun-altair.ipynb
├── 4fun-bqplot.ipynb
├── 4fun-ipywidgets.ipynb
├── README.md
├── data
├── DatasaurusDozen.tsv
└── latimes-agency-totals.csv
├── exercises
├── altaircars.py
├── altairowidgetasaurus.py
├── ipywidgets1.py
├── ipywidgets2.py
├── plotlysaurus.py
└── widgetosaurus.py
└── requirements.txt
/01-ipywidgets.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# ipywidgets"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "The interact function (ipywidgets.interact) automatically creates user interface (UI) controls for exploring code and data interactively."
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": null,
20 | "metadata": {},
21 | "outputs": [],
22 | "source": [
23 | "import ipywidgets"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": null,
29 | "metadata": {},
30 | "outputs": [],
31 | "source": [
32 | "def f(x):\n",
33 | " return x"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": null,
39 | "metadata": {},
40 | "outputs": [],
41 | "source": [
42 | "f(10)"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": null,
48 | "metadata": {},
49 | "outputs": [],
50 | "source": [
51 | "f('a string')"
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": null,
57 | "metadata": {},
58 | "outputs": [],
59 | "source": [
60 | "ipywidgets.interact(f, x=10);"
61 | ]
62 | },
63 | {
64 | "cell_type": "code",
65 | "execution_count": null,
66 | "metadata": {},
67 | "outputs": [],
68 | "source": [
69 | "ipywidgets.interact(f, x=True);"
70 | ]
71 | },
72 | {
73 | "cell_type": "code",
74 | "execution_count": null,
75 | "metadata": {},
76 | "outputs": [],
77 | "source": [
78 | "ipywidgets.interact(f, x=10.6);"
79 | ]
80 | },
81 | {
82 | "cell_type": "code",
83 | "execution_count": null,
84 | "metadata": {},
85 | "outputs": [],
86 | "source": [
87 | "def h(p, q):\n",
88 | " return (p, q)"
89 | ]
90 | },
91 | {
92 | "cell_type": "code",
93 | "execution_count": null,
94 | "metadata": {},
95 | "outputs": [],
96 | "source": [
97 | "ipywidgets.interact(h, p=6, q=5);"
98 | ]
99 | },
100 | {
101 | "cell_type": "code",
102 | "execution_count": null,
103 | "metadata": {},
104 | "outputs": [],
105 | "source": [
106 | "ipywidgets.interact(h,\n",
107 | " p=6,\n",
108 | " q=ipywidgets.IntSlider(min=0,max=100,step=5,value=10));"
109 | ]
110 | },
111 | {
112 | "cell_type": "code",
113 | "execution_count": null,
114 | "metadata": {},
115 | "outputs": [],
116 | "source": [
117 | "ipywidgets.interact(h, p=6, q=(0,100,5));"
118 | ]
119 | },
120 | {
121 | "cell_type": "code",
122 | "execution_count": null,
123 | "metadata": {},
124 | "outputs": [],
125 | "source": [
126 | "ipywidgets.interact(f, x=['apples','oranges']);"
127 | ]
128 | },
129 | {
130 | "cell_type": "code",
131 | "execution_count": null,
132 | "metadata": {},
133 | "outputs": [],
134 | "source": [
135 | "ipywidgets.interact(f, x=[('apples',10),('oranges',20)]);"
136 | ]
137 | },
138 | {
139 | "cell_type": "code",
140 | "execution_count": null,
141 | "metadata": {},
142 | "outputs": [],
143 | "source": [
144 | "from IPython.display import display\n",
145 | "def f(a, b):\n",
146 | " display(a + b)\n",
147 | " return a+b"
148 | ]
149 | },
150 | {
151 | "cell_type": "code",
152 | "execution_count": null,
153 | "metadata": {},
154 | "outputs": [],
155 | "source": [
156 | "w = ipywidgets.interactive(f, a=10, b=20)"
157 | ]
158 | },
159 | {
160 | "cell_type": "code",
161 | "execution_count": null,
162 | "metadata": {},
163 | "outputs": [],
164 | "source": [
165 | "w"
166 | ]
167 | },
168 | {
169 | "cell_type": "code",
170 | "execution_count": null,
171 | "metadata": {},
172 | "outputs": [],
173 | "source": [
174 | "type(w)"
175 | ]
176 | },
177 | {
178 | "cell_type": "code",
179 | "execution_count": null,
180 | "metadata": {},
181 | "outputs": [],
182 | "source": [
183 | "w.children"
184 | ]
185 | },
186 | {
187 | "cell_type": "code",
188 | "execution_count": null,
189 | "metadata": {},
190 | "outputs": [],
191 | "source": [
192 | "w.kwargs"
193 | ]
194 | },
195 | {
196 | "cell_type": "code",
197 | "execution_count": null,
198 | "metadata": {},
199 | "outputs": [],
200 | "source": [
201 | "w.result"
202 | ]
203 | },
204 | {
205 | "cell_type": "code",
206 | "execution_count": null,
207 | "metadata": {},
208 | "outputs": [],
209 | "source": [
210 | "def slow_function(i):\n",
211 | " print(int(i),list(x for x in range(int(i)) if\n",
212 | " str(x)==str(x)[::-1] and\n",
213 | " str(x**2)==str(x**2)[::-1]))\n",
214 | " return"
215 | ]
216 | },
217 | {
218 | "cell_type": "code",
219 | "execution_count": null,
220 | "metadata": {},
221 | "outputs": [],
222 | "source": [
223 | "ipywidgets.interact(slow_function,i=ipywidgets.FloatSlider(min=1e5, max=1e7, step=1e5));"
224 | ]
225 | },
226 | {
227 | "cell_type": "code",
228 | "execution_count": null,
229 | "metadata": {},
230 | "outputs": [],
231 | "source": [
232 | "ipywidgets.interact_manual(slow_function,i=ipywidgets.FloatSlider(min=1e5, max=1e7, step=1e5));"
233 | ]
234 | },
235 | {
236 | "cell_type": "code",
237 | "execution_count": null,
238 | "metadata": {},
239 | "outputs": [],
240 | "source": [
241 | "ipywidgets.interact(slow_function,i=ipywidgets.FloatSlider(min=1e5, max=1e7, step=1e5,continuous_update=False));"
242 | ]
243 | },
244 | {
245 | "cell_type": "code",
246 | "execution_count": null,
247 | "metadata": {},
248 | "outputs": [],
249 | "source": [
250 | "#\n",
251 | "# Exercise: \n",
252 | "# Execute this cell to see what the function does.\n",
253 | "# Then use interact to make an interactive control for this function\n",
254 | "#\n",
255 | "\n",
256 | "def reverse(x):\n",
257 | " return x[::-1]\n",
258 | "\n",
259 | "reverse('I am printed backwards.')\n"
260 | ]
261 | },
262 | {
263 | "cell_type": "code",
264 | "execution_count": null,
265 | "metadata": {},
266 | "outputs": [],
267 | "source": [
268 | "# Execute this cell to see an example solution\n",
269 | "%load 'exercises/ipywidgets1.py'"
270 | ]
271 | },
272 | {
273 | "cell_type": "markdown",
274 | "metadata": {},
275 | "source": [
276 | "# Usefulness in exploring data and visualization"
277 | ]
278 | },
279 | {
280 | "cell_type": "markdown",
281 | "metadata": {},
282 | "source": [
283 | "## Let's take the Mean and Standard Deviation\n",
284 | "\n",
285 | "How can we conceptually grasp these concepts?"
286 | ]
287 | },
288 | {
289 | "cell_type": "markdown",
290 | "metadata": {},
291 | "source": [
292 | "Gaussian distribution function:\n",
293 | "\n",
294 | "$$y(x) = \\frac{1}{\\sigma\\sqrt{2\\pi}}\\text{exp}\\left(-\\frac{1}{2}\\frac{(x-\\mu)^2}{\\sigma^2}\\right)$$\n",
295 | "\n",
296 | "* $\\mu$ is the mean\n",
297 | "* $\\sigma$ is the standard deviation"
298 | ]
299 | },
300 | {
301 | "cell_type": "code",
302 | "execution_count": null,
303 | "metadata": {},
304 | "outputs": [],
305 | "source": [
306 | "import matplotlib.pyplot as plt\n",
307 | "import numpy as np"
308 | ]
309 | },
310 | {
311 | "cell_type": "code",
312 | "execution_count": null,
313 | "metadata": {},
314 | "outputs": [],
315 | "source": [
316 | "mu = 0\n",
317 | "sigma = 1\n",
318 | "\n",
319 | "x = np.linspace(-10,10,200)\n",
320 | "y = 1/sigma/np.sqrt(2*np.pi) * np.exp(-0.5*((x-mu)**2/sigma**2))\n",
321 | "\n",
322 | "plt.figure(figsize=(8,4))\n",
323 | "plt.plot(x,y,'k-')\n",
324 | "plt.grid()\n",
325 | "plt.ylim([-0.01,0.5])\n",
326 | "plt.show()"
327 | ]
328 | },
329 | {
330 | "cell_type": "code",
331 | "execution_count": null,
332 | "metadata": {},
333 | "outputs": [],
334 | "source": [
335 | "def gaussian(mu=0,sigma=1):\n",
336 | " x = np.linspace(-10,10,200)\n",
337 | " y = 1/sigma/np.sqrt(2*np.pi) * np.exp(-0.5*((x-mu)**2/sigma**2))\n",
338 | "\n",
339 | " plt.figure(figsize=(8,4))\n",
340 | " plt.plot(x,y,'k-')\n",
341 | " plt.grid()\n",
342 | " plt.ylim([-0.01,0.5])\n",
343 | " plt.show()"
344 | ]
345 | },
346 | {
347 | "cell_type": "code",
348 | "execution_count": null,
349 | "metadata": {},
350 | "outputs": [],
351 | "source": [
352 | "gaussian(0,1)"
353 | ]
354 | },
355 | {
356 | "cell_type": "code",
357 | "execution_count": null,
358 | "metadata": {},
359 | "outputs": [],
360 | "source": [
361 | "ipywidgets.interactive(gaussian,mu=(-10,10),sigma=(0.1,10))"
362 | ]
363 | },
364 | {
365 | "cell_type": "code",
366 | "execution_count": null,
367 | "metadata": {},
368 | "outputs": [],
369 | "source": [
370 | "def plotgaus(mu=0,sigma=1):\n",
371 | "\n",
372 | " x = np.linspace(-5,5,100)\n",
373 | " y = 1 / (sigma*np.sqrt(2*np.pi)) * np.exp(-1/2 * (x-mu)**2 / sigma**2)\n",
374 | "\n",
375 | " plt.figure(figsize=(6,5))\n",
376 | " plt.plot(x,y,lw=2)\n",
377 | " plt.ylim([0,0.5])\n",
378 | " plt.xlim([-5,5])\n",
379 | " plt.xlabel('x',fontsize=16)\n",
380 | " plt.ylabel('y',fontsize=16)\n",
381 | " plt.xticks(np.linspace(-5,5,11))\n",
382 | "\n",
383 | " plt.text(6,0.45,f'$\\mu = {mu}$',fontsize=16,color='red')\n",
384 | " plt.plot([mu,mu],\n",
385 | " [0,1 / (sigma*np.sqrt(2*np.pi)) * np.exp(-1/2 * (mu-mu)**2 / sigma**2)],\n",
386 | " 'r--')\n",
387 | " plt.plot([mu,mu+sigma*np.sqrt(2*np.log(2))],\n",
388 | " [1 / (sigma*np.sqrt(2*np.pi)) / 2, 1 / (sigma*np.sqrt(2*np.pi)) / 2],\n",
389 | " 'g--')\n",
390 | " plt.text(6,0.4,f'$\\sigma = {sigma}$',fontsize=16)\n",
391 | " plt.text(6,0.35,'{:s}{:.2f}'.format('half-width half max = $\\sigma\\sqrt{2\\ln2} = $',\n",
392 | " sigma*np.sqrt(2*np.log(2))),color='green',fontsize=16)\n",
393 | " plt.show()\n",
394 | " \n",
395 | "ipywidgets.interactive(plotgaus,mu=(-5,5),sigma=(0.1,3))"
396 | ]
397 | },
398 | {
399 | "cell_type": "code",
400 | "execution_count": null,
401 | "metadata": {},
402 | "outputs": [],
403 | "source": [
404 | "#\n",
405 | "# Exercise: Here is a function that plots sin(k*x + p)\n",
406 | "# Execute the cell to see an example plot\n",
407 | "# Then use interact to make sliders for the parameters $k$ and $p$, \n",
408 | "# where 0.5 <= k <= 2 and 0 <= p <= 2*pi (hint: use `np.pi` for pi).\n",
409 | "#\n",
410 | "#\n",
411 | "def plot_f(k, p):\n",
412 | " x = np.linspace(0, 4 * np.pi)\n",
413 | " y = np.sin(k*x + p)\n",
414 | " plt.plot(x, y)\n",
415 | " \n",
416 | "plot_f(1.3, 3)"
417 | ]
418 | },
419 | {
420 | "cell_type": "code",
421 | "execution_count": null,
422 | "metadata": {},
423 | "outputs": [],
424 | "source": [
425 | "# Execute this cell to see an example solution\n",
426 | "%load 'exercises/ipywidgets2.py'"
427 | ]
428 | },
429 | {
430 | "cell_type": "code",
431 | "execution_count": null,
432 | "metadata": {},
433 | "outputs": [],
434 | "source": []
435 | }
436 | ],
437 | "metadata": {
438 | "kernelspec": {
439 | "display_name": "Python 3",
440 | "language": "python",
441 | "name": "python3"
442 | },
443 | "language_info": {
444 | "codemirror_mode": {
445 | "name": "ipython",
446 | "version": 3
447 | },
448 | "file_extension": ".py",
449 | "mimetype": "text/x-python",
450 | "name": "python",
451 | "nbconvert_exporter": "python",
452 | "pygments_lexer": "ipython3",
453 | "version": "3.8.8"
454 | },
455 | "toc": {
456 | "base_numbering": 1,
457 | "nav_menu": {},
458 | "number_sections": true,
459 | "sideBar": true,
460 | "skip_h1_title": false,
461 | "title_cell": "Table of Contents",
462 | "title_sidebar": "Contents",
463 | "toc_cell": false,
464 | "toc_position": {},
465 | "toc_section_display": true,
466 | "toc_window_display": false
467 | }
468 | },
469 | "nbformat": 4,
470 | "nbformat_minor": 2
471 | }
472 |
--------------------------------------------------------------------------------
/02-ipywidgets-takes-on-the-datasaurus.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "This dataset is taken from a research study called The Datasaurus Dozen by Autodesk research and the original Datasaurus provided by Alberto Cairo."
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import pandas as pd\n",
17 | "import matplotlib.pyplot as plt\n",
18 | "import ipywidgets"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": null,
24 | "metadata": {},
25 | "outputs": [],
26 | "source": [
27 | "dinodf = pd.read_csv('data/DatasaurusDozen.tsv', delimiter='\\t')"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": null,
33 | "metadata": {},
34 | "outputs": [],
35 | "source": [
36 | "dinodf"
37 | ]
38 | },
39 | {
40 | "cell_type": "code",
41 | "execution_count": null,
42 | "metadata": {},
43 | "outputs": [],
44 | "source": [
45 | "fig,ax = plt.subplots(2,2,figsize=(8,8))\n",
46 | "\n",
47 | "x = dinodf[dinodf.dataset=='dino'].x\n",
48 | "y = dinodf[dinodf.dataset=='dino'].y\n",
49 | "\n",
50 | "ax[1,0].scatter(x, y)\n",
51 | "\n",
52 | "ax[0,0].hist(x, bins=10, rwidth=0.9)\n",
53 | "ax[1,1].hist(y, bins=10, rwidth=0.9, orientation='horizontal')\n",
54 | "\n",
55 | "ax[0,1].text(0.2,0.8,'x_mean = {:.2f}'.format(x.mean()))\n",
56 | "ax[0,1].text(0.2,0.7,'x_stddev = {:.2f}'.format(x.std()))\n",
57 | "ax[0,1].text(0.2,0.6,'y_mean = {:.2f}'.format(y.mean()))\n",
58 | "ax[0,1].text(0.2,0.5,'y_stddev = {:.2f}'.format(y.std()))\n",
59 | "ax[0,1].text(0.2,0.4,'corr = {:.2f}'.format(x.corr(y)))\n",
60 | "\n",
61 | "fig.show()"
62 | ]
63 | },
64 | {
65 | "cell_type": "code",
66 | "execution_count": null,
67 | "metadata": {},
68 | "outputs": [],
69 | "source": [
70 | "#\n",
71 | "# Exercise: use ipywidgets to make a menu for cycling through all of the dataset values\n",
72 | "# \n",
73 | "\n"
74 | ]
75 | },
76 | {
77 | "cell_type": "code",
78 | "execution_count": null,
79 | "metadata": {},
80 | "outputs": [],
81 | "source": [
82 | "# Execute this cell to see one possible solution\n",
83 | "%load 'exercises/widgetosaurus.py'"
84 | ]
85 | }
86 | ],
87 | "metadata": {
88 | "kernelspec": {
89 | "display_name": "Python 3",
90 | "language": "python",
91 | "name": "python3"
92 | },
93 | "language_info": {
94 | "codemirror_mode": {
95 | "name": "ipython",
96 | "version": 3
97 | },
98 | "file_extension": ".py",
99 | "mimetype": "text/x-python",
100 | "name": "python",
101 | "nbconvert_exporter": "python",
102 | "pygments_lexer": "ipython3",
103 | "version": "3.8.8"
104 | },
105 | "toc": {
106 | "base_numbering": 1,
107 | "nav_menu": {},
108 | "number_sections": true,
109 | "sideBar": true,
110 | "skip_h1_title": false,
111 | "title_cell": "Table of Contents",
112 | "title_sidebar": "Contents",
113 | "toc_cell": false,
114 | "toc_position": {},
115 | "toc_section_display": true,
116 | "toc_window_display": false
117 | }
118 | },
119 | "nbformat": 4,
120 | "nbformat_minor": 2
121 | }
122 |
--------------------------------------------------------------------------------
/03-bqplot-takes-on-the-datasaurus.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "Note: This notebook is taken from Chakri Cherukuri's [GitHub repo](https://github.com/ChakriCherukuri/mlviz) showing a variety of examples on using bqplot to visualize theoretical and applied machine learning algorithms/models.\n",
8 | "*****"
9 | ]
10 | },
11 | {
12 | "cell_type": "markdown",
13 | "metadata": {},
14 | "source": [
15 | "In this notebook we'll look at how data visualization can help us understand the relationship between two features(x and y). This is based on a research study called The Datasaurus Dozen by Autodesk research and the original Datasaurus provided by Alberto Cairo.\n",
16 | "\n",
17 | "Takeaway: Never trust summary statistics alone; always visualize your data\n",
18 | "\n",
19 | "Use the dropdown to select different datasets. Note that the basic stats (first, second moments and correlation) are almost the same for all the datasets eventhough though the relationships between `x` and `y` are quite different (as evident from the scatter plot and histograms)"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": null,
25 | "metadata": {},
26 | "outputs": [],
27 | "source": [
28 | "import pandas as pd\n",
29 | "\n",
30 | "import ipywidgets as widgets\n",
31 | "import bqplot.pyplot as plt"
32 | ]
33 | },
34 | {
35 | "cell_type": "code",
36 | "execution_count": null,
37 | "metadata": {},
38 | "outputs": [],
39 | "source": [
40 | "pd.options.display.float_format = '{:.2f}'.format"
41 | ]
42 | },
43 | {
44 | "cell_type": "code",
45 | "execution_count": null,
46 | "metadata": {},
47 | "outputs": [],
48 | "source": [
49 | "datasaurus_data = pd.read_csv('data/DatasaurusDozen.tsv', delimiter='\\t')\n",
50 | "\n",
51 | "# group by dataset and compute first two moments and corr\n",
52 | "dataset_gby = datasaurus_data.groupby('dataset')\n",
53 | "\n",
54 | "# basic stats for all datasets: mean and std\n",
55 | "stats = dataset_gby.agg(['mean', 'std'])\n",
56 | "\n",
57 | "# correlation between x and y for all datasets\n",
58 | "corr = dataset_gby.apply(lambda g: g['x'].corr(g['y']))\n",
59 | "\n",
60 | "# stats for all datasets\n",
61 | "stats_df = pd.concat([stats, corr], axis=1)\n",
62 | "stats_df.columns = ['x_mean', 'x_std', 'y_mean', 'y_std', 'corr']"
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": null,
68 | "metadata": {},
69 | "outputs": [],
70 | "source": [
71 | "type_dropdown = widgets.Dropdown(description='Dataset', options=list(dataset_gby.groups.keys()))\n",
72 | "stats_table_placeholder = widgets.Box()"
73 | ]
74 | },
75 | {
76 | "cell_type": "code",
77 | "execution_count": null,
78 | "metadata": {
79 | "scrolled": true
80 | },
81 | "outputs": [],
82 | "source": [
83 | "scat_fig = plt.figure(animation_duration=1000, preserve_aspect=True)\n",
84 | "scat_fig.layout.width = '800px'\n",
85 | "scat_fig.layout.height = '650px'\n",
86 | "scat = plt.scatter([], [], colors=['deepskyblue'], default_size=40, stroke='black')\n",
87 | "plt.xlabel('X')\n",
88 | "plt.ylabel('Y')\n",
89 | "\n",
90 | "# historgrams of X and Y\n",
91 | "hist_layout = widgets.Layout(height='320px', width='400px')\n",
92 | "hist_title_tmpl = 'Histogram of {dataset}[{var}]'\n",
93 | "x_hist_fig = plt.figure(layout=hist_layout)\n",
94 | "x_hist = plt.hist([], colors=['orangered'], bins=30)\n",
95 | "\n",
96 | "y_hist_fig = plt.figure(layout=hist_layout)\n",
97 | "y_hist = plt.hist([], colors=['lightgreen'], bins=30)\n",
98 | "\n",
99 | "for axis in x_hist_fig.axes:\n",
100 | " axis.grid_lines = 'none'\n",
101 | "\n",
102 | "for axis in y_hist_fig.axes:\n",
103 | " axis.grid_lines = 'none'\n",
104 | " \n",
105 | "# create a callback to update the scatter and the stats table\n",
106 | "def update(*args):\n",
107 | " dataset = type_dropdown.value\n",
108 | " scat_fig.title = dataset\n",
109 | " with scat.hold_sync():\n",
110 | " x, y = (dataset_gby\n",
111 | " .get_group(dataset)[['x', 'y']]\n",
112 | " .values.T)\n",
113 | " scat.x, scat.y = x, y\n",
114 | " \n",
115 | " x_hist.sample = x\n",
116 | " x_hist_fig.title = hist_title_tmpl.format(dataset=dataset,\n",
117 | " var='x')\n",
118 | " y_hist.sample = y\n",
119 | " y_hist_fig.title = hist_title_tmpl.format(dataset=dataset,\n",
120 | " var='y')\n",
121 | "\n",
122 | " out = widgets.Output()\n",
123 | " with out:\n",
124 | " display(stats_df.loc[dataset].to_frame())\n",
125 | " stats_table_placeholder.children = [out]\n",
126 | "\n",
127 | "type_dropdown.observe(update, 'value')\n",
128 | "\n",
129 | "# invoke the callback on startup\n",
130 | "update(None)\n",
131 | "\n",
132 | "histograms = widgets.VBox([x_hist_fig, y_hist_fig])\n",
133 | "widgets.VBox([type_dropdown, \n",
134 | " widgets.HBox([scat_fig, \n",
135 | " histograms, \n",
136 | " stats_table_placeholder])])"
137 | ]
138 | }
139 | ],
140 | "metadata": {
141 | "kernelspec": {
142 | "display_name": "Python 3",
143 | "language": "python",
144 | "name": "python3"
145 | },
146 | "language_info": {
147 | "codemirror_mode": {
148 | "name": "ipython",
149 | "version": 3
150 | },
151 | "file_extension": ".py",
152 | "mimetype": "text/x-python",
153 | "name": "python",
154 | "nbconvert_exporter": "python",
155 | "pygments_lexer": "ipython3",
156 | "version": "3.8.8"
157 | },
158 | "toc": {
159 | "base_numbering": 1,
160 | "nav_menu": {},
161 | "number_sections": true,
162 | "sideBar": true,
163 | "skip_h1_title": false,
164 | "title_cell": "Table of Contents",
165 | "title_sidebar": "Contents",
166 | "toc_cell": false,
167 | "toc_position": {},
168 | "toc_section_display": true,
169 | "toc_window_display": false
170 | }
171 | },
172 | "nbformat": 4,
173 | "nbformat_minor": 2
174 | }
175 |
--------------------------------------------------------------------------------
/04-plotly-and-dash-take-on-the-datasaurus.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "id": "varied-syndicate",
7 | "metadata": {
8 | "jupyter": {
9 | "outputs_hidden": true
10 | }
11 | },
12 | "outputs": [],
13 | "source": [
14 | "# need to install jupyter-dash before running this notebook\n",
15 | "# !pip install jupyter-dash"
16 | ]
17 | },
18 | {
19 | "cell_type": "markdown",
20 | "id": "dated-stamp",
21 | "metadata": {},
22 | "source": [
23 | "# Plotly and Dash"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": null,
29 | "id": "annual-chamber",
30 | "metadata": {},
31 | "outputs": [],
32 | "source": [
33 | "import pandas as pd\n",
34 | "import plotly.express as px"
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": null,
40 | "id": "activated-worthy",
41 | "metadata": {},
42 | "outputs": [],
43 | "source": [
44 | "dinodf = pd.read_csv('data/DatasaurusDozen.tsv',sep='\\t')"
45 | ]
46 | },
47 | {
48 | "cell_type": "code",
49 | "execution_count": null,
50 | "id": "animated-engineer",
51 | "metadata": {},
52 | "outputs": [],
53 | "source": [
54 | "px.scatter(dinodf[dinodf.dataset=='dino'], x=\"x\", y=\"y\")"
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": null,
60 | "id": "asian-kingston",
61 | "metadata": {},
62 | "outputs": [],
63 | "source": [
64 | "px.scatter(dinodf[dinodf.dataset=='dino'], x=\"x\", y=\"y\",\n",
65 | " width=500, height=500)"
66 | ]
67 | },
68 | {
69 | "cell_type": "code",
70 | "execution_count": null,
71 | "id": "matched-myrtle",
72 | "metadata": {},
73 | "outputs": [],
74 | "source": [
75 | "dinodf"
76 | ]
77 | },
78 | {
79 | "cell_type": "code",
80 | "execution_count": null,
81 | "id": "informed-channel",
82 | "metadata": {},
83 | "outputs": [],
84 | "source": [
85 | "import numpy as np"
86 | ]
87 | },
88 | {
89 | "cell_type": "code",
90 | "execution_count": null,
91 | "id": "infinite-screen",
92 | "metadata": {},
93 | "outputs": [],
94 | "source": [
95 | "dinodf['arbitrary_z'] = 0.\n",
96 | "for i in dinodf.index:\n",
97 | " dinodf.loc[i,'arbitrary_z'] = np.where(dinodf.dataset.unique()==dinodf.loc[i,'dataset'])[0][0]"
98 | ]
99 | },
100 | {
101 | "cell_type": "code",
102 | "execution_count": null,
103 | "id": "respiratory-paragraph",
104 | "metadata": {},
105 | "outputs": [],
106 | "source": [
107 | "px.scatter_3d(dinodf, x=\"x\", y=\"y\", z='arbitrary_z', color='dataset',\n",
108 | " width=1000, height=1000)"
109 | ]
110 | },
111 | {
112 | "cell_type": "code",
113 | "execution_count": null,
114 | "id": "tamil-department",
115 | "metadata": {},
116 | "outputs": [],
117 | "source": [
118 | "import plotly.graph_objects as go"
119 | ]
120 | },
121 | {
122 | "cell_type": "code",
123 | "execution_count": null,
124 | "id": "intended-prairie",
125 | "metadata": {},
126 | "outputs": [],
127 | "source": [
128 | "fig = go.Figure()\n",
129 | "\n",
130 | "localdf = dinodf[dinodf.dataset=='dino']\n",
131 | "\n",
132 | "fig.add_trace(go.Scatter(\n",
133 | " x=localdf.x,\n",
134 | " y=localdf.y\n",
135 | "))"
136 | ]
137 | },
138 | {
139 | "cell_type": "code",
140 | "execution_count": null,
141 | "id": "nutritional-respondent",
142 | "metadata": {},
143 | "outputs": [],
144 | "source": [
145 | "fig = go.Figure()\n",
146 | "\n",
147 | "localdf = dinodf[dinodf.dataset=='dino']\n",
148 | "\n",
149 | "fig.add_trace(go.Scatter(\n",
150 | " x=localdf.x,\n",
151 | " y=localdf.y,\n",
152 | " mode='markers',\n",
153 | " marker=dict(\n",
154 | " size=16,\n",
155 | " color=np.random.randn(500), #set color equal to a variable\n",
156 | " colorscale='rainbow', # one of plotly colorscales\n",
157 | " showscale=True\n",
158 | " )\n",
159 | "))"
160 | ]
161 | },
162 | {
163 | "cell_type": "code",
164 | "execution_count": null,
165 | "id": "spectacular-yugoslavia",
166 | "metadata": {},
167 | "outputs": [],
168 | "source": [
169 | "#\n",
170 | "# Exercise: Use the ipywidget library to make a dropdown menu that can plot the above for different dataset values\n",
171 | "#\n",
172 | "\n"
173 | ]
174 | },
175 | {
176 | "cell_type": "code",
177 | "execution_count": null,
178 | "id": "familiar-relaxation",
179 | "metadata": {},
180 | "outputs": [],
181 | "source": [
182 | "# For one solution, execute this cell\n",
183 | "%load 'exercises/plotlysaurus.py'"
184 | ]
185 | },
186 | {
187 | "cell_type": "markdown",
188 | "id": "latter-chicken",
189 | "metadata": {},
190 | "source": [
191 | "# JupyterDash\n",
192 | "The `jupyter-dash` package makes it easy to develop Plotly Dash apps from the Jupyter Notebook and JupyterLab.\n",
193 | "\n",
194 | "Just replace the standard `dash.Dash` class with the `jupyter_dash.JupyterDash` subclass."
195 | ]
196 | },
197 | {
198 | "cell_type": "code",
199 | "execution_count": null,
200 | "id": "nominated-chain",
201 | "metadata": {},
202 | "outputs": [],
203 | "source": [
204 | "from jupyter_dash import JupyterDash\n",
205 | "import dash_core_components as dcc\n",
206 | "import dash_html_components as html\n",
207 | "from dash.dependencies import Input, Output"
208 | ]
209 | },
210 | {
211 | "cell_type": "markdown",
212 | "id": "institutional-malpractice",
213 | "metadata": {},
214 | "source": [
215 | "When running in JupyterHub or Binder, call the `infer_jupyter_config` function to detect the proxy configuration."
216 | ]
217 | },
218 | {
219 | "cell_type": "code",
220 | "execution_count": null,
221 | "id": "curious-plane",
222 | "metadata": {},
223 | "outputs": [],
224 | "source": [
225 | "# Get proxy to run server within this JupyterHub environment\n",
226 | "JupyterDash.infer_jupyter_proxy_config()"
227 | ]
228 | },
229 | {
230 | "cell_type": "code",
231 | "execution_count": null,
232 | "id": "domestic-nursery",
233 | "metadata": {},
234 | "outputs": [],
235 | "source": [
236 | "# Build App\n",
237 | "app = JupyterDash(__name__)\n",
238 | "\n",
239 | "# Create server variable with Flask server object for use with gunicorn\n",
240 | "server = app.server\n",
241 | "\n",
242 | "app.layout = html.Div([\n",
243 | " html.H1(\"JupyterDash for you Demo\"),\n",
244 | " html.Label([\n",
245 | " \"colorscale\",\n",
246 | " dcc.Dropdown(\n",
247 | " id='colorscale-dropdown', clearable=False,\n",
248 | " value='dino', options=[\n",
249 | " {'label': c, 'value': c}\n",
250 | " for c in dinodf.dataset.unique()\n",
251 | " ])\n",
252 | " ]),\n",
253 | " dcc.Graph(id='graph'),\n",
254 | "])\n",
255 | "\n",
256 | "# Define callback to update graph\n",
257 | "@app.callback(\n",
258 | " Output('graph', 'figure'),\n",
259 | " [Input(\"colorscale-dropdown\", \"value\")]\n",
260 | ")\n",
261 | "\n",
262 | "def update_figure(dinoshape):\n",
263 | " return px.scatter(\n",
264 | " dinodf[dinodf.dataset==dinoshape], x=\"x\", y=\"y\",\n",
265 | " marginal_y=\"histogram\",marginal_x=\"histogram\",\n",
266 | " width=500, height=500\n",
267 | " )"
268 | ]
269 | },
270 | {
271 | "cell_type": "code",
272 | "execution_count": null,
273 | "id": "progressive-clarity",
274 | "metadata": {},
275 | "outputs": [],
276 | "source": [
277 | "# Run app and display result inline in the notebook\n",
278 | "app.run_server(mode='inline')"
279 | ]
280 | },
281 | {
282 | "cell_type": "code",
283 | "execution_count": null,
284 | "id": "attractive-footwear",
285 | "metadata": {},
286 | "outputs": [],
287 | "source": [
288 | "app.run_server()"
289 | ]
290 | },
291 | {
292 | "cell_type": "markdown",
293 | "id": "physical-guarantee",
294 | "metadata": {},
295 | "source": [
296 | "# An example taken from the JupyterDash repo\n",
297 | "\n",
298 | "https://github.com/plotly/jupyter-dash"
299 | ]
300 | },
301 | {
302 | "cell_type": "code",
303 | "execution_count": null,
304 | "id": "economic-haven",
305 | "metadata": {},
306 | "outputs": [],
307 | "source": [
308 | "import dash"
309 | ]
310 | },
311 | {
312 | "cell_type": "code",
313 | "execution_count": null,
314 | "id": "retained-group",
315 | "metadata": {},
316 | "outputs": [],
317 | "source": [
318 | "df = pd.read_csv('https://plotly.github.io/datasets/country_indicators.csv')\n",
319 | "available_indicators = df['Indicator Name'].unique()"
320 | ]
321 | },
322 | {
323 | "cell_type": "markdown",
324 | "id": "running-program",
325 | "metadata": {},
326 | "source": [
327 | "Construct the app and callbacks"
328 | ]
329 | },
330 | {
331 | "cell_type": "code",
332 | "execution_count": null,
333 | "id": "floppy-cigarette",
334 | "metadata": {},
335 | "outputs": [],
336 | "source": [
337 | "external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']\n",
338 | "\n",
339 | "app = JupyterDash(__name__, external_stylesheets=external_stylesheets)\n",
340 | "\n",
341 | "# Create server variable with Flask server object for use with gunicorn\n",
342 | "server = app.server\n",
343 | "\n",
344 | "app.layout = html.Div([\n",
345 | " html.Div([\n",
346 | "\n",
347 | " html.Div([\n",
348 | " dcc.Dropdown(\n",
349 | " id='crossfilter-xaxis-column',\n",
350 | " options=[{'label': i, 'value': i} for i in available_indicators],\n",
351 | " value='Agriculture, value added (% of GDP)'\n",
352 | " ),\n",
353 | " dcc.RadioItems(\n",
354 | " id='crossfilter-xaxis-type',\n",
355 | " options=[{'label': i, 'value': i} for i in ['Linear', 'Log']],\n",
356 | " value='Linear',\n",
357 | " labelStyle={'display': 'inline-block'}\n",
358 | " )\n",
359 | " ],\n",
360 | " style={'width': '49%', 'display': 'inline-block'}),\n",
361 | "\n",
362 | " html.Div([\n",
363 | " dcc.Dropdown(\n",
364 | " id='crossfilter-yaxis-column',\n",
365 | " options=[{'label': i, 'value': i} for i in available_indicators],\n",
366 | " value='Life expectancy at birth, total (years)'\n",
367 | " ),\n",
368 | " dcc.RadioItems(\n",
369 | " id='crossfilter-yaxis-type',\n",
370 | " options=[{'label': i, 'value': i} for i in ['Linear', 'Log']],\n",
371 | " value='Linear',\n",
372 | " labelStyle={'display': 'inline-block'}\n",
373 | " )\n",
374 | " ], style={'width': '49%', 'float': 'right', 'display': 'inline-block'})\n",
375 | " ], style={\n",
376 | " 'borderBottom': 'thin lightgrey solid',\n",
377 | " 'backgroundColor': 'rgb(250, 250, 250)',\n",
378 | " 'padding': '10px 5px'\n",
379 | " }),\n",
380 | "\n",
381 | " html.Div([\n",
382 | " dcc.Graph(\n",
383 | " id='crossfilter-indicator-scatter',\n",
384 | " hoverData={'points': [{'customdata': 'Japan'}]}\n",
385 | " )\n",
386 | " ], style={'width': '49%', 'display': 'inline-block', 'padding': '0 20'}),\n",
387 | " html.Div([\n",
388 | " dcc.Graph(id='x-time-series'),\n",
389 | " dcc.Graph(id='y-time-series'),\n",
390 | " ], style={'display': 'inline-block', 'width': '49%'}),\n",
391 | "\n",
392 | " html.Div(dcc.Slider(\n",
393 | " id='crossfilter-year--slider',\n",
394 | " min=df['Year'].min(),\n",
395 | " max=df['Year'].max(),\n",
396 | " value=df['Year'].max(),\n",
397 | " marks={str(year): str(year) for year in df['Year'].unique()},\n",
398 | " step=None\n",
399 | " ), style={'width': '49%', 'padding': '0px 20px 20px 20px'})\n",
400 | "])\n",
401 | "\n",
402 | "\n",
403 | "@app.callback(\n",
404 | " dash.dependencies.Output('crossfilter-indicator-scatter', 'figure'),\n",
405 | " [dash.dependencies.Input('crossfilter-xaxis-column', 'value'),\n",
406 | " dash.dependencies.Input('crossfilter-yaxis-column', 'value'),\n",
407 | " dash.dependencies.Input('crossfilter-xaxis-type', 'value'),\n",
408 | " dash.dependencies.Input('crossfilter-yaxis-type', 'value'),\n",
409 | " dash.dependencies.Input('crossfilter-year--slider', 'value')])\n",
410 | "def update_graph(xaxis_column_name, yaxis_column_name,\n",
411 | " xaxis_type, yaxis_type,\n",
412 | " year_value):\n",
413 | " dff = df[df['Year'] == year_value]\n",
414 | "\n",
415 | " return {\n",
416 | " 'data': [dict(\n",
417 | " x=dff[dff['Indicator Name'] == xaxis_column_name]['Value'],\n",
418 | " y=dff[dff['Indicator Name'] == yaxis_column_name]['Value'],\n",
419 | " text=dff[dff['Indicator Name'] == yaxis_column_name]['Country Name'],\n",
420 | " customdata=dff[dff['Indicator Name'] == yaxis_column_name]['Country Name'],\n",
421 | " mode='markers',\n",
422 | " marker={\n",
423 | " 'size': 25,\n",
424 | " 'opacity': 0.7,\n",
425 | " 'color': 'orange',\n",
426 | " 'line': {'width': 2, 'color': 'purple'}\n",
427 | " }\n",
428 | " )],\n",
429 | " 'layout': dict(\n",
430 | " xaxis={\n",
431 | " 'title': xaxis_column_name,\n",
432 | " 'type': 'linear' if xaxis_type == 'Linear' else 'log'\n",
433 | " },\n",
434 | " yaxis={\n",
435 | " 'title': yaxis_column_name,\n",
436 | " 'type': 'linear' if yaxis_type == 'Linear' else 'log'\n",
437 | " },\n",
438 | " margin={'l': 40, 'b': 30, 't': 10, 'r': 0},\n",
439 | " height=450,\n",
440 | " hovermode='closest'\n",
441 | " )\n",
442 | " }\n",
443 | "\n",
444 | "\n",
445 | "def create_time_series(dff, axis_type, title):\n",
446 | " return {\n",
447 | " 'data': [dict(\n",
448 | " x=dff['Year'],\n",
449 | " y=dff['Value'],\n",
450 | " mode='lines+markers'\n",
451 | " )],\n",
452 | " 'layout': {\n",
453 | " 'height': 225,\n",
454 | " 'margin': {'l': 20, 'b': 30, 'r': 10, 't': 10},\n",
455 | " 'annotations': [{\n",
456 | " 'x': 0, 'y': 0.85, 'xanchor': 'left', 'yanchor': 'bottom',\n",
457 | " 'xref': 'paper', 'yref': 'paper', 'showarrow': False,\n",
458 | " 'align': 'left', 'bgcolor': 'rgba(255, 255, 255, 0.5)',\n",
459 | " 'text': title\n",
460 | " }],\n",
461 | " 'yaxis': {'type': 'linear' if axis_type == 'Linear' else 'log'},\n",
462 | " 'xaxis': {'showgrid': False}\n",
463 | " }\n",
464 | " }\n",
465 | "\n",
466 | "\n",
467 | "@app.callback(\n",
468 | " dash.dependencies.Output('x-time-series', 'figure'),\n",
469 | " [dash.dependencies.Input('crossfilter-indicator-scatter', 'hoverData'),\n",
470 | " dash.dependencies.Input('crossfilter-xaxis-column', 'value'),\n",
471 | " dash.dependencies.Input('crossfilter-xaxis-type', 'value')])\n",
472 | "def update_y_timeseries(hoverData, xaxis_column_name, axis_type):\n",
473 | " country_name = hoverData['points'][0]['customdata']\n",
474 | " dff = df[df['Country Name'] == country_name]\n",
475 | " dff = dff[dff['Indicator Name'] == xaxis_column_name]\n",
476 | " title = '{}
{}'.format(country_name, xaxis_column_name)\n",
477 | " return create_time_series(dff, axis_type, title)\n",
478 | "\n",
479 | "\n",
480 | "@app.callback(\n",
481 | " dash.dependencies.Output('y-time-series', 'figure'),\n",
482 | " [dash.dependencies.Input('crossfilter-indicator-scatter', 'hoverData'),\n",
483 | " dash.dependencies.Input('crossfilter-yaxis-column', 'value'),\n",
484 | " dash.dependencies.Input('crossfilter-yaxis-type', 'value')])\n",
485 | "def update_x_timeseries(hoverData, yaxis_column_name, axis_type):\n",
486 | " dff = df[df['Country Name'] == hoverData['points'][0]['customdata']]\n",
487 | " dff = dff[dff['Indicator Name'] == yaxis_column_name]\n",
488 | " return create_time_series(dff, axis_type, yaxis_column_name)"
489 | ]
490 | },
491 | {
492 | "cell_type": "markdown",
493 | "id": "worst-speaker",
494 | "metadata": {},
495 | "source": [
496 | "Serve the app using `run_server`. Unlike the standard `Dash.run_server` method, the `JupyterDash.run_server` method doesn't block execution of the notebook. It serves the app in a background thread, making it possible to run other notebook calculations while the app is running.\n",
497 | "\n",
498 | "This makes it possible to iterativly update the app without rerunning the potentially expensive data processing steps."
499 | ]
500 | },
501 | {
502 | "cell_type": "code",
503 | "execution_count": null,
504 | "id": "backed-advance",
505 | "metadata": {},
506 | "outputs": [],
507 | "source": [
508 | "app.run_server()\n",
509 | "# app.run_server(mode=\"inline\")"
510 | ]
511 | },
512 | {
513 | "cell_type": "markdown",
514 | "id": "according-restriction",
515 | "metadata": {},
516 | "source": [
517 | "By default, `run_server` displays a URL that you can click on to open the app in a browser tab. The `mode` argument to `run_server` can be used to change this behavior. Setting `mode=\"inline\"` will display the app directly in the notebook output cell."
518 | ]
519 | },
520 | {
521 | "cell_type": "code",
522 | "execution_count": null,
523 | "id": "alpha-commodity",
524 | "metadata": {},
525 | "outputs": [],
526 | "source": [
527 | "#app.run_server()\n",
528 | "app.run_server(mode=\"inline\")"
529 | ]
530 | },
531 | {
532 | "cell_type": "markdown",
533 | "id": "figured-singapore",
534 | "metadata": {},
535 | "source": [
536 | "When running in JupyterLab, with the `jupyterlab-dash` extension, setting `mode=\"jupyterlab\"` will open the app in a tab in JupyterLab.\n",
537 | "\n",
538 | "```python\n",
539 | "app.run_server(mode=\"jupyterlab\")\n",
540 | "```"
541 | ]
542 | },
543 | {
544 | "cell_type": "code",
545 | "execution_count": null,
546 | "id": "connected-livestock",
547 | "metadata": {},
548 | "outputs": [],
549 | "source": [
550 | "app.run_server(mode=\"jupyterlab\")"
551 | ]
552 | },
553 | {
554 | "cell_type": "code",
555 | "execution_count": null,
556 | "id": "trying-scottish",
557 | "metadata": {},
558 | "outputs": [],
559 | "source": []
560 | }
561 | ],
562 | "metadata": {
563 | "kernelspec": {
564 | "display_name": "Python 3",
565 | "language": "python",
566 | "name": "python3"
567 | },
568 | "language_info": {
569 | "codemirror_mode": {
570 | "name": "ipython",
571 | "version": 3
572 | },
573 | "file_extension": ".py",
574 | "mimetype": "text/x-python",
575 | "name": "python",
576 | "nbconvert_exporter": "python",
577 | "pygments_lexer": "ipython3",
578 | "version": "3.8.8"
579 | },
580 | "toc": {
581 | "base_numbering": 1,
582 | "nav_menu": {},
583 | "number_sections": true,
584 | "sideBar": true,
585 | "skip_h1_title": false,
586 | "title_cell": "Table of Contents",
587 | "title_sidebar": "Contents",
588 | "toc_cell": false,
589 | "toc_position": {},
590 | "toc_section_display": true,
591 | "toc_window_display": false
592 | }
593 | },
594 | "nbformat": 4,
595 | "nbformat_minor": 5
596 | }
597 |
--------------------------------------------------------------------------------
/05-altair-takes-on-the-datasaurus.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "id": "beautiful-premium",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import altair as alt\n",
11 | "import pandas as pd\n",
12 | "import ipywidgets as widgets"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": null,
18 | "id": "marine-anchor",
19 | "metadata": {},
20 | "outputs": [],
21 | "source": [
22 | "dinodf = pd.read_csv('data/DatasaurusDozen.tsv',sep='\\t')"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": null,
28 | "id": "theoretical-allen",
29 | "metadata": {},
30 | "outputs": [],
31 | "source": [
32 | "dinodf = dinodf[dinodf.dataset=='dino']"
33 | ]
34 | },
35 | {
36 | "cell_type": "code",
37 | "execution_count": null,
38 | "id": "cellular-portrait",
39 | "metadata": {},
40 | "outputs": [],
41 | "source": [
42 | "dinodf"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": null,
48 | "id": "historical-portsmouth",
49 | "metadata": {},
50 | "outputs": [],
51 | "source": [
52 | "points = alt.Chart(dinodf).mark_point().encode(\n",
53 | " x='x',\n",
54 | " y='y'\n",
55 | ")\n",
56 | "points"
57 | ]
58 | },
59 | {
60 | "cell_type": "code",
61 | "execution_count": null,
62 | "id": "laden-individual",
63 | "metadata": {},
64 | "outputs": [],
65 | "source": [
66 | "bars = alt.Chart(dinodf).mark_bar().encode(\n",
67 | " x='count(x)',\n",
68 | " y='x'\n",
69 | ")\n",
70 | "bars"
71 | ]
72 | },
73 | {
74 | "cell_type": "code",
75 | "execution_count": null,
76 | "id": "twelve-jewelry",
77 | "metadata": {},
78 | "outputs": [],
79 | "source": [
80 | "barsX = alt.Chart(dinodf).mark_bar().encode(\n",
81 | " alt.X('x',bin=True),\n",
82 | " y='count()'\n",
83 | ")\n",
84 | "barsX"
85 | ]
86 | },
87 | {
88 | "cell_type": "code",
89 | "execution_count": null,
90 | "id": "empty-pollution",
91 | "metadata": {},
92 | "outputs": [],
93 | "source": [
94 | "bars = alt.Chart(dinodf).mark_bar().encode(\n",
95 | " alt.X('y',bin=True),\n",
96 | " y='count()'\n",
97 | ")\n",
98 | "bars"
99 | ]
100 | },
101 | {
102 | "cell_type": "code",
103 | "execution_count": null,
104 | "id": "obvious-globe",
105 | "metadata": {},
106 | "outputs": [],
107 | "source": [
108 | "barsY = alt.Chart(dinodf).mark_bar().encode(\n",
109 | " alt.Y('y',bin=True),\n",
110 | " x='count()'\n",
111 | ")\n",
112 | "barsY"
113 | ]
114 | },
115 | {
116 | "cell_type": "code",
117 | "execution_count": null,
118 | "id": "conditional-canadian",
119 | "metadata": {},
120 | "outputs": [],
121 | "source": [
122 | "barsX & barsY"
123 | ]
124 | },
125 | {
126 | "cell_type": "code",
127 | "execution_count": null,
128 | "id": "psychological-fossil",
129 | "metadata": {},
130 | "outputs": [],
131 | "source": [
132 | "alt.vconcat(barsX,\n",
133 | " alt.hconcat(points,barsY))"
134 | ]
135 | },
136 | {
137 | "cell_type": "code",
138 | "execution_count": null,
139 | "id": "missing-lewis",
140 | "metadata": {},
141 | "outputs": [],
142 | "source": [
143 | "my_si = alt.selection_interval()"
144 | ]
145 | },
146 | {
147 | "cell_type": "code",
148 | "execution_count": null,
149 | "id": "sorted-learning",
150 | "metadata": {},
151 | "outputs": [],
152 | "source": [
153 | "points = alt.Chart(dinodf).mark_point().encode(\n",
154 | " x='x',\n",
155 | " y='y'\n",
156 | ").add_selection(\n",
157 | " my_si\n",
158 | ")\n",
159 | "\n",
160 | "barsX = alt.Chart(dinodf).mark_bar().encode(\n",
161 | " alt.X('x',bin=True),\n",
162 | " y='count()'\n",
163 | ").transform_filter(\n",
164 | " my_si\n",
165 | ")\n",
166 | "\n",
167 | "barsY = alt.Chart(dinodf).mark_bar().encode(\n",
168 | " alt.Y('y',bin=True),\n",
169 | " x='count()'\n",
170 | ").transform_filter(\n",
171 | " my_si\n",
172 | ")\n",
173 | "\n",
174 | "chart = alt.vconcat(barsX,\n",
175 | " alt.hconcat(points,barsY))"
176 | ]
177 | },
178 | {
179 | "cell_type": "code",
180 | "execution_count": null,
181 | "id": "monthly-alloy",
182 | "metadata": {},
183 | "outputs": [],
184 | "source": [
185 | "chart"
186 | ]
187 | },
188 | {
189 | "cell_type": "code",
190 | "execution_count": null,
191 | "id": "embedded-farmer",
192 | "metadata": {},
193 | "outputs": [],
194 | "source": [
195 | "points = alt.Chart(dinodf).mark_point().encode(\n",
196 | " x='x',\n",
197 | " y='y'\n",
198 | ").add_selection(\n",
199 | " my_si\n",
200 | ")\n",
201 | "\n",
202 | "barsX = alt.Chart(dinodf).mark_bar().encode(\n",
203 | " alt.X('x',bin=True,scale=alt.Scale(domain=[0, 100])),\n",
204 | " y='count()'\n",
205 | ").transform_filter(\n",
206 | " my_si\n",
207 | ")\n",
208 | "\n",
209 | "barsY = alt.Chart(dinodf).mark_bar().encode(\n",
210 | " alt.Y('y',bin=True,scale=alt.Scale(domain=[0, 100])),\n",
211 | " x='count()'\n",
212 | ").transform_filter(\n",
213 | " my_si\n",
214 | ")\n",
215 | "\n",
216 | "chart = alt.vconcat(barsX,\n",
217 | " alt.hconcat(points,barsY))\n",
218 | "\n",
219 | "chart"
220 | ]
221 | },
222 | {
223 | "cell_type": "code",
224 | "execution_count": null,
225 | "id": "underlying-expense",
226 | "metadata": {},
227 | "outputs": [],
228 | "source": [
229 | "chart.save('altairasaurus_chart.html')"
230 | ]
231 | },
232 | {
233 | "cell_type": "code",
234 | "execution_count": null,
235 | "id": "neutral-granny",
236 | "metadata": {},
237 | "outputs": [],
238 | "source": [
239 | "#\n",
240 | "# Exercise: Use ipywidgets to enable this chart for any dataset in the datasaurus dataframe\n",
241 | "#\n",
242 | "\n"
243 | ]
244 | },
245 | {
246 | "cell_type": "code",
247 | "execution_count": null,
248 | "id": "sticky-junction",
249 | "metadata": {
250 | "scrolled": false
251 | },
252 | "outputs": [],
253 | "source": [
254 | "# Execute this cell to see a solution\n",
255 | "%load 'exercises/altairowidgetasaurus.py'"
256 | ]
257 | },
258 | {
259 | "cell_type": "code",
260 | "execution_count": null,
261 | "id": "heavy-harvey",
262 | "metadata": {},
263 | "outputs": [],
264 | "source": [
265 | "#\n",
266 | "# Exercise: If you have ipywidgets down, try this:\n",
267 | "# 1. Execute this cell to see the plots\n",
268 | "# 2. Link the two plots with a selection interval like the above\n",
269 | "#\n",
270 | "\n",
271 | "from vega_datasets import data\n",
272 | "cars = data.cars()\n",
273 | "\n",
274 | "points = alt.Chart(cars).mark_point().encode(\n",
275 | " x='Horsepower',\n",
276 | " y='Miles_per_Gallon',\n",
277 | " color='Origin'\n",
278 | ")\n",
279 | "\n",
280 | "bars = alt.Chart(cars).mark_bar().encode(\n",
281 | " x='count(Origin)',\n",
282 | " y='Origin',\n",
283 | " color='Origin'\n",
284 | ")\n",
285 | "\n",
286 | "points & bars"
287 | ]
288 | },
289 | {
290 | "cell_type": "code",
291 | "execution_count": null,
292 | "id": "expired-people",
293 | "metadata": {},
294 | "outputs": [],
295 | "source": [
296 | "# Execute this cell to see a solution\n",
297 | "%load 'exercises/altaircars.py'"
298 | ]
299 | },
300 | {
301 | "cell_type": "code",
302 | "execution_count": null,
303 | "id": "small-queensland",
304 | "metadata": {},
305 | "outputs": [],
306 | "source": []
307 | }
308 | ],
309 | "metadata": {
310 | "kernelspec": {
311 | "display_name": "Python 3",
312 | "language": "python",
313 | "name": "python3"
314 | },
315 | "language_info": {
316 | "codemirror_mode": {
317 | "name": "ipython",
318 | "version": 3
319 | },
320 | "file_extension": ".py",
321 | "mimetype": "text/x-python",
322 | "name": "python",
323 | "nbconvert_exporter": "python",
324 | "pygments_lexer": "ipython3",
325 | "version": "3.8.8"
326 | },
327 | "toc": {
328 | "base_numbering": 1,
329 | "nav_menu": {},
330 | "number_sections": true,
331 | "sideBar": true,
332 | "skip_h1_title": false,
333 | "title_cell": "Table of Contents",
334 | "title_sidebar": "Contents",
335 | "toc_cell": false,
336 | "toc_position": {},
337 | "toc_section_display": true,
338 | "toc_window_display": false
339 | }
340 | },
341 | "nbformat": 4,
342 | "nbformat_minor": 5
343 | }
344 |
--------------------------------------------------------------------------------
/4fun-altair.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# NOTE: This notebook is taken from the GitHub repo of the LA Times datadesk:\n",
8 | "\n",
9 | "Original Title: california-coronavirus-data examples\n",
10 | "\n",
11 | "By [Ben Welsh](https://palewi.re/who-is-ben-welsh)\n",
12 | "\n",
13 | "A demonstration of how to use Python to work with the Los Angeles Times' independent tally of coronavirus cases in California published on GitHub at [datadesk/california-coronavirus-data](https://github.com/datadesk/california-coronavirus-data#state-cdph-totalscsv). To run this notebook immediately in the cloud, click the [Binder](https://mybinder.org/) launcher below.\n",
14 | "\n",
15 | "[](https://mybinder.org/v2/gh/datadesk/california-coronavirus-data/master?urlpath=lab/tree/notebooks/examples.ipynb)\n",
16 | "\n",
17 | "## Subnotes: \n",
18 | "\n",
19 | "* Notebook and data were retrieved on April 23, 2021\n",
20 | "* Modifications for the IDRE workshop are made at the very bottom below the header \"IDRE Workshop Additions\"\n",
21 | "* The Binder link for interacting with the IDRE workshop materials is [Here](https://mybinder.org/v2/gh/benjum/idre-spring21-python-data-viz-2/HEAD)"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": null,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": [
30 | "# commented out for IDRE class\n",
31 | "# %load_ext lab_black"
32 | ]
33 | },
34 | {
35 | "cell_type": "markdown",
36 | "metadata": {},
37 | "source": [
38 | "## Import Python tools"
39 | ]
40 | },
41 | {
42 | "cell_type": "markdown",
43 | "metadata": {},
44 | "source": [
45 | "Our data analysis and plotting tools"
46 | ]
47 | },
48 | {
49 | "cell_type": "code",
50 | "execution_count": null,
51 | "metadata": {},
52 | "outputs": [],
53 | "source": [
54 | "import pandas as pd\n",
55 | "import altair as alt"
56 | ]
57 | },
58 | {
59 | "cell_type": "markdown",
60 | "metadata": {},
61 | "source": [
62 | "Customizations to the Altair theme"
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": null,
68 | "metadata": {},
69 | "outputs": [],
70 | "source": [
71 | "import altair_latimes as lat"
72 | ]
73 | },
74 | {
75 | "cell_type": "code",
76 | "execution_count": null,
77 | "metadata": {},
78 | "outputs": [],
79 | "source": [
80 | "alt.themes.register(\"latimes\", lat.theme)\n",
81 | "alt.themes.enable(\"latimes\")"
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "execution_count": null,
87 | "metadata": {},
88 | "outputs": [],
89 | "source": [
90 | "alt.data_transformers.disable_max_rows()"
91 | ]
92 | },
93 | {
94 | "cell_type": "markdown",
95 | "metadata": {},
96 | "source": [
97 | "## Import data"
98 | ]
99 | },
100 | {
101 | "cell_type": "markdown",
102 | "metadata": {},
103 | "source": [
104 | "Read in the agency totals"
105 | ]
106 | },
107 | {
108 | "cell_type": "code",
109 | "execution_count": null,
110 | "metadata": {},
111 | "outputs": [],
112 | "source": [
113 | "agency_df = pd.read_csv(\"data/latimes-agency-totals.csv\", parse_dates=[\"date\"])"
114 | ]
115 | },
116 | {
117 | "cell_type": "code",
118 | "execution_count": null,
119 | "metadata": {},
120 | "outputs": [],
121 | "source": [
122 | "agency_df.head()"
123 | ]
124 | },
125 | {
126 | "cell_type": "code",
127 | "execution_count": null,
128 | "metadata": {},
129 | "outputs": [],
130 | "source": [
131 | "agency_df.info()"
132 | ]
133 | },
134 | {
135 | "cell_type": "markdown",
136 | "metadata": {},
137 | "source": [
138 | "## Aggregate data"
139 | ]
140 | },
141 | {
142 | "cell_type": "markdown",
143 | "metadata": {},
144 | "source": [
145 | "### By state"
146 | ]
147 | },
148 | {
149 | "cell_type": "markdown",
150 | "metadata": {},
151 | "source": [
152 | "Lump all the agencies together and you get the statewide totals."
153 | ]
154 | },
155 | {
156 | "cell_type": "code",
157 | "execution_count": null,
158 | "metadata": {},
159 | "outputs": [],
160 | "source": [
161 | "state_df = (\n",
162 | " agency_df.groupby([\"date\"])\n",
163 | " .agg({\"confirmed_cases\": \"sum\", \"deaths\": \"sum\"})\n",
164 | " .reset_index()\n",
165 | ")"
166 | ]
167 | },
168 | {
169 | "cell_type": "code",
170 | "execution_count": null,
171 | "metadata": {},
172 | "outputs": [],
173 | "source": [
174 | "state_df.head()"
175 | ]
176 | },
177 | {
178 | "cell_type": "code",
179 | "execution_count": null,
180 | "metadata": {},
181 | "outputs": [],
182 | "source": [
183 | "state_df.info()"
184 | ]
185 | },
186 | {
187 | "cell_type": "markdown",
188 | "metadata": {},
189 | "source": [
190 | "### By county"
191 | ]
192 | },
193 | {
194 | "cell_type": "markdown",
195 | "metadata": {},
196 | "source": [
197 | "Three cities — Berkeley, Long Beach and Pasadena — run independent public health departments. Calculating county-level totals requires grouping them with their local peers."
198 | ]
199 | },
200 | {
201 | "cell_type": "code",
202 | "execution_count": null,
203 | "metadata": {},
204 | "outputs": [],
205 | "source": [
206 | "county_df = (\n",
207 | " agency_df.groupby([\"date\", \"county\"])\n",
208 | " .agg({\"confirmed_cases\": \"sum\", \"deaths\": \"sum\"})\n",
209 | " .reset_index()\n",
210 | ")"
211 | ]
212 | },
213 | {
214 | "cell_type": "code",
215 | "execution_count": null,
216 | "metadata": {},
217 | "outputs": [],
218 | "source": [
219 | "county_df.head()"
220 | ]
221 | },
222 | {
223 | "cell_type": "code",
224 | "execution_count": null,
225 | "metadata": {},
226 | "outputs": [],
227 | "source": [
228 | "county_df.info()"
229 | ]
230 | },
231 | {
232 | "cell_type": "markdown",
233 | "metadata": {},
234 | "source": [
235 | "## Chart the statewide totals over time"
236 | ]
237 | },
238 | {
239 | "cell_type": "code",
240 | "execution_count": null,
241 | "metadata": {},
242 | "outputs": [],
243 | "source": [
244 | "# Create a base chart with the common x-axis\n",
245 | "chart = alt.Chart(state_df).encode(x=alt.X(\"date:T\", title=None))\n",
246 | "\n",
247 | "# Create the cases line\n",
248 | "cases = chart.mark_line(color=lat.palette[\"default\"]).encode(\n",
249 | " y=alt.Y(\"confirmed_cases:Q\", title=\"Confirmed cases\")\n",
250 | ")\n",
251 | "\n",
252 | "# Create the deaths line\n",
253 | "deaths = chart.mark_line(color=lat.palette[\"schemes\"][\"ice-7\"][3]).encode(\n",
254 | " y=alt.Y(\"deaths:Q\", title=\"Deaths\")\n",
255 | ")\n",
256 | "\n",
257 | "# Combine them into a single chart\n",
258 | "(cases & deaths).properties(title=\"Statewide cumulative totals\")"
259 | ]
260 | },
261 | {
262 | "cell_type": "markdown",
263 | "metadata": {},
264 | "source": [
265 | "## Chart the county totals"
266 | ]
267 | },
268 | {
269 | "cell_type": "markdown",
270 | "metadata": {},
271 | "source": [
272 | "First on a linear scale"
273 | ]
274 | },
275 | {
276 | "cell_type": "code",
277 | "execution_count": null,
278 | "metadata": {},
279 | "outputs": [],
280 | "source": [
281 | "# Create the base chart\n",
282 | "chart = (\n",
283 | " alt.Chart(county_df)\n",
284 | " .mark_line()\n",
285 | " .encode(\n",
286 | " x=alt.X(\"date:T\", title=None),\n",
287 | " color=alt.Color(\"county:N\", title=\"County\", legend=None),\n",
288 | " )\n",
289 | ")\n",
290 | "\n",
291 | "# The cases line\n",
292 | "cases = chart.encode(y=alt.Y(\"confirmed_cases:Q\", title=\"Confirmed cases\"),)\n",
293 | "\n",
294 | "# The deaths line\n",
295 | "deaths = chart.mark_line().encode(y=alt.Y(\"deaths:Q\", title=\"Deaths\"),)\n",
296 | "\n",
297 | "# Combined into a chart\n",
298 | "(cases & deaths).properties(title=\"Cumulative totals by county\")"
299 | ]
300 | },
301 | {
302 | "cell_type": "markdown",
303 | "metadata": {},
304 | "source": [
305 | "Again on a logarithmic scale"
306 | ]
307 | },
308 | {
309 | "cell_type": "code",
310 | "execution_count": null,
311 | "metadata": {},
312 | "outputs": [],
313 | "source": [
314 | "# Make a base chart\n",
315 | "chart = (\n",
316 | " alt.Chart(county_df)\n",
317 | " .mark_line()\n",
318 | " .encode(\n",
319 | " x=alt.X(\"date:T\", title=None),\n",
320 | " color=alt.Color(\"county:N\", title=\"County\", legend=None),\n",
321 | " )\n",
322 | ")\n",
323 | "\n",
324 | "# The cases lines\n",
325 | "cases = chart.transform_filter(alt.datum.confirmed_cases > 0).encode(\n",
326 | " y=alt.Y(\"confirmed_cases:Q\", scale=alt.Scale(type=\"log\"), title=\"Confirmed cases\"),\n",
327 | ")\n",
328 | "\n",
329 | "# The deaths lines\n",
330 | "deaths = chart.transform_filter(alt.datum.deaths > 0).encode(\n",
331 | " y=alt.Y(\"deaths:Q\", scale=alt.Scale(type=\"log\"), title=\"Deaths\"),\n",
332 | ")\n",
333 | "\n",
334 | "# Slapping them together\n",
335 | "(cases & deaths).properties(title=\"Cumulative totals by county\")"
336 | ]
337 | },
338 | {
339 | "cell_type": "markdown",
340 | "metadata": {},
341 | "source": [
342 | "A common technique for clarifying these charts to begin each line on the day the county hit a minimum number. Let's try it with 10."
343 | ]
344 | },
345 | {
346 | "cell_type": "code",
347 | "execution_count": null,
348 | "metadata": {},
349 | "outputs": [],
350 | "source": [
351 | "day_10_df = (\n",
352 | " county_df[\n",
353 | " # Filter down to only days with 10 or more cumulative cases\n",
354 | " county_df.confirmed_cases\n",
355 | " >= 10\n",
356 | " ]\n",
357 | " .groupby(\n",
358 | " # And then get the minimum date for each county\n",
359 | " \"county\"\n",
360 | " )\n",
361 | " .date.min()\n",
362 | " .reset_index()\n",
363 | ")"
364 | ]
365 | },
366 | {
367 | "cell_type": "markdown",
368 | "metadata": {},
369 | "source": [
370 | "Merge that date to each row in the data."
371 | ]
372 | },
373 | {
374 | "cell_type": "code",
375 | "execution_count": null,
376 | "metadata": {},
377 | "outputs": [],
378 | "source": [
379 | "county_date_diff_df = county_df.merge(\n",
380 | " day_10_df, how=\"inner\", on=\"county\", suffixes=[\"\", \"_gte_10_cases\"]\n",
381 | ")"
382 | ]
383 | },
384 | {
385 | "cell_type": "markdown",
386 | "metadata": {},
387 | "source": [
388 | "Calculate each day's distance from its tenth day."
389 | ]
390 | },
391 | {
392 | "cell_type": "code",
393 | "execution_count": null,
394 | "metadata": {},
395 | "outputs": [],
396 | "source": [
397 | "county_date_diff_df[\"days_since_10\"] = (\n",
398 | " county_date_diff_df.date - county_date_diff_df.date_gte_10_cases\n",
399 | ").dt.days"
400 | ]
401 | },
402 | {
403 | "cell_type": "markdown",
404 | "metadata": {},
405 | "source": [
406 | "Chart it."
407 | ]
408 | },
409 | {
410 | "cell_type": "code",
411 | "execution_count": null,
412 | "metadata": {},
413 | "outputs": [],
414 | "source": [
415 | "alt.Chart(county_date_diff_df).transform_filter(\n",
416 | " # Only keep everything once they hit 10 cases\n",
417 | " alt.datum.days_since_10\n",
418 | " >= 0\n",
419 | ").mark_line().encode(\n",
420 | " x=alt.X(\"days_since_10:O\", title=\"Days since 10th case\"),\n",
421 | " y=alt.Y(\"confirmed_cases:Q\", scale=alt.Scale(type=\"log\"), title=\"Confirmed cases\"),\n",
422 | " color=alt.Color(\"county:N\", title=\"County\", legend=None),\n",
423 | ").properties(\n",
424 | " title=\"Cumulative totals by county\"\n",
425 | ")"
426 | ]
427 | },
428 | {
429 | "cell_type": "markdown",
430 | "metadata": {},
431 | "source": [
432 | "## County trends on a linear 'Pez' plot"
433 | ]
434 | },
435 | {
436 | "cell_type": "markdown",
437 | "metadata": {},
438 | "source": [
439 | "Fill in any date gaps so that every county has a row for every date."
440 | ]
441 | },
442 | {
443 | "cell_type": "code",
444 | "execution_count": null,
445 | "metadata": {},
446 | "outputs": [],
447 | "source": [
448 | "backfilled_county_df = (\n",
449 | " county_df.set_index([\"county\", \"date\"])\n",
450 | " .unstack(\"county\")\n",
451 | " .fillna(0)\n",
452 | " .stack(\"county\")\n",
453 | " .reset_index()\n",
454 | ")"
455 | ]
456 | },
457 | {
458 | "cell_type": "markdown",
459 | "metadata": {},
460 | "source": [
461 | "Calculate the rolling change in each county."
462 | ]
463 | },
464 | {
465 | "cell_type": "code",
466 | "execution_count": null,
467 | "metadata": {},
468 | "outputs": [],
469 | "source": [
470 | "chronological_county_df = backfilled_county_df.sort_values([\"county\", \"date\"])"
471 | ]
472 | },
473 | {
474 | "cell_type": "markdown",
475 | "metadata": {},
476 | "source": [
477 | "Calculate the daily change in each county."
478 | ]
479 | },
480 | {
481 | "cell_type": "code",
482 | "execution_count": null,
483 | "metadata": {},
484 | "outputs": [],
485 | "source": [
486 | "chronological_county_df[\"new_confirmed_cases\"] = chronological_county_df.groupby(\n",
487 | " \"county\"\n",
488 | ").confirmed_cases.diff()"
489 | ]
490 | },
491 | {
492 | "cell_type": "markdown",
493 | "metadata": {},
494 | "source": [
495 | "Let's chill that out as a seven-day average."
496 | ]
497 | },
498 | {
499 | "cell_type": "code",
500 | "execution_count": null,
501 | "metadata": {},
502 | "outputs": [],
503 | "source": [
504 | "chronological_county_df[\"new_confirmed_cases_rolling_average\"] = (\n",
505 | " chronological_county_df.groupby(\"county\")\n",
506 | " .new_confirmed_cases.rolling(7)\n",
507 | " .mean()\n",
508 | " .droplevel(0)\n",
509 | ")"
510 | ]
511 | },
512 | {
513 | "cell_type": "markdown",
514 | "metadata": {},
515 | "source": [
516 | "Make the chart."
517 | ]
518 | },
519 | {
520 | "cell_type": "code",
521 | "execution_count": null,
522 | "metadata": {},
523 | "outputs": [],
524 | "source": [
525 | "alt.Chart(chronological_county_df, title=\"New cases by day\").mark_rect(\n",
526 | " stroke=None\n",
527 | ").encode(\n",
528 | " x=alt.X(\n",
529 | " \"date:O\", axis=alt.Axis(ticks=False, grid=False, labels=False,), title=None\n",
530 | " ),\n",
531 | " y=alt.Y(\n",
532 | " \"county:N\",\n",
533 | " title=\"County\",\n",
534 | " axis=alt.Axis(ticks=False, grid=False, labelPadding=5),\n",
535 | " ),\n",
536 | " color=alt.Color(\n",
537 | " \"new_confirmed_cases_rolling_average:Q\",\n",
538 | " scale=alt.Scale(\n",
539 | " type=\"threshold\", domain=[0, 3, 10, 25, 50, 100, 500], scheme=\"blues\"\n",
540 | " ),\n",
541 | " title=\"New cases (7-day average)\",\n",
542 | " ),\n",
543 | ").properties(\n",
544 | " height=800\n",
545 | ")"
546 | ]
547 | },
548 | {
549 | "cell_type": "markdown",
550 | "metadata": {},
551 | "source": [
552 | "## Chart new cases and deaths"
553 | ]
554 | },
555 | {
556 | "cell_type": "markdown",
557 | "metadata": {},
558 | "source": [
559 | "Calculate the number of new cases each day using panda's [diff](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.diff.html) method."
560 | ]
561 | },
562 | {
563 | "cell_type": "code",
564 | "execution_count": null,
565 | "metadata": {},
566 | "outputs": [],
567 | "source": [
568 | "state_df[\"new_confirmed_cases\"] = state_df.confirmed_cases.diff()"
569 | ]
570 | },
571 | {
572 | "cell_type": "markdown",
573 | "metadata": {},
574 | "source": [
575 | "Do the same for deaths"
576 | ]
577 | },
578 | {
579 | "cell_type": "code",
580 | "execution_count": null,
581 | "metadata": {},
582 | "outputs": [],
583 | "source": [
584 | "state_df[\"new_deaths\"] = state_df.deaths.diff()"
585 | ]
586 | },
587 | {
588 | "cell_type": "markdown",
589 | "metadata": {},
590 | "source": [
591 | "Now calculate the moving seven-day average of each using panda's [rolling](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rolling.html) method."
592 | ]
593 | },
594 | {
595 | "cell_type": "code",
596 | "execution_count": null,
597 | "metadata": {},
598 | "outputs": [],
599 | "source": [
600 | "state_df[\"new_confirmed_cases_rolling_average\"] = state_df.new_confirmed_cases.rolling(\n",
601 | " 7\n",
602 | ").mean()"
603 | ]
604 | },
605 | {
606 | "cell_type": "code",
607 | "execution_count": null,
608 | "metadata": {},
609 | "outputs": [],
610 | "source": [
611 | "state_df[\"new_deaths_rolling_average\"] = state_df.new_deaths.rolling(7).mean()"
612 | ]
613 | },
614 | {
615 | "cell_type": "markdown",
616 | "metadata": {},
617 | "source": [
618 | "Put it all together on the chart "
619 | ]
620 | },
621 | {
622 | "cell_type": "code",
623 | "execution_count": null,
624 | "metadata": {},
625 | "outputs": [],
626 | "source": [
627 | "# One base chart object with the data they all share\n",
628 | "chart = alt.Chart(state_df).encode(x=alt.X(\"date:T\", title=None),)\n",
629 | "\n",
630 | "# The new cases bars\n",
631 | "cases_bars = chart.mark_bar(color=lat.palette[\"default\"]).encode(\n",
632 | " y=alt.Y(\"new_confirmed_cases:Q\", title=\"New confirmed cases\")\n",
633 | ")\n",
634 | "\n",
635 | "# The cases rolling average\n",
636 | "cases_line = chart.mark_line(color=lat.palette[\"accent\"]).encode(\n",
637 | " y=alt.Y(\"new_confirmed_cases_rolling_average:Q\", title=\"7-day average\")\n",
638 | ")\n",
639 | "\n",
640 | "# The new deaths bars\n",
641 | "deaths_bars = chart.mark_bar(color=lat.palette[\"schemes\"][\"ice-7\"][3]).encode(\n",
642 | " y=alt.Y(\"new_deaths:Q\", title=\"New deaths\")\n",
643 | ")\n",
644 | "\n",
645 | "# The deaths rolling average\n",
646 | "deaths_line = chart.mark_line(color=lat.palette[\"schemes\"][\"ice-7\"][6]).encode(\n",
647 | " y=alt.Y(\"new_deaths_rolling_average:Q\", title=\"7-day average\")\n",
648 | ")\n",
649 | "\n",
650 | "# Combine it all together into one paired chart\n",
651 | "((cases_bars + cases_line) & (deaths_bars + deaths_line)).properties(\n",
652 | " title=\"New case and deaths statewide by day\"\n",
653 | ")"
654 | ]
655 | },
656 | {
657 | "cell_type": "markdown",
658 | "metadata": {},
659 | "source": [
660 | "Now do it by county"
661 | ]
662 | },
663 | {
664 | "cell_type": "code",
665 | "execution_count": null,
666 | "metadata": {},
667 | "outputs": [],
668 | "source": [
669 | "chronological_county_df.head()"
670 | ]
671 | },
672 | {
673 | "cell_type": "markdown",
674 | "metadata": {},
675 | "source": [
676 | "Try it by county"
677 | ]
678 | },
679 | {
680 | "cell_type": "code",
681 | "execution_count": null,
682 | "metadata": {},
683 | "outputs": [],
684 | "source": [
685 | "alt.Chart(chronological_county_df, title=\"New cases by day\").mark_line().encode(\n",
686 | " x=alt.X(\"date:O\", axis=alt.Axis(ticks=False, grid=False, labels=False), title=None),\n",
687 | " y=alt.Y(\"new_confirmed_cases_rolling_average:Q\", title=\"7-day average\"),\n",
688 | " color=alt.Color(\"county:N\", title=\"County\", legend=None),\n",
689 | ")"
690 | ]
691 | },
692 | {
693 | "cell_type": "markdown",
694 | "metadata": {},
695 | "source": [
696 | "Create a statistic to measure recent changes in new cases"
697 | ]
698 | },
699 | {
700 | "cell_type": "code",
701 | "execution_count": null,
702 | "metadata": {},
703 | "outputs": [],
704 | "source": [
705 | "chronological_county_df.tail(14)"
706 | ]
707 | },
708 | {
709 | "cell_type": "code",
710 | "execution_count": null,
711 | "metadata": {},
712 | "outputs": [],
713 | "source": [
714 | "chronological_county_df[\n",
715 | " \"new_confirmed_cases_rolling_average_two_week_pct_change\"\n",
716 | "] = chronological_county_df.groupby(\n",
717 | " \"county\"\n",
718 | ").new_confirmed_cases_rolling_average.pct_change(\n",
719 | " 14\n",
720 | ")"
721 | ]
722 | },
723 | {
724 | "cell_type": "code",
725 | "execution_count": null,
726 | "metadata": {},
727 | "outputs": [],
728 | "source": [
729 | "latest_county_df = chronological_county_df[\n",
730 | " chronological_county_df.date == chronological_county_df.date.max()\n",
731 | "]"
732 | ]
733 | },
734 | {
735 | "cell_type": "code",
736 | "execution_count": null,
737 | "metadata": {},
738 | "outputs": [],
739 | "source": [
740 | "biggest_county_jumps = latest_county_df[\n",
741 | " latest_county_df.new_confirmed_cases_rolling_average >= 25\n",
742 | "].sort_values(\n",
743 | " \"new_confirmed_cases_rolling_average_two_week_pct_change\", ascending=False\n",
744 | ")"
745 | ]
746 | },
747 | {
748 | "cell_type": "code",
749 | "execution_count": null,
750 | "metadata": {},
751 | "outputs": [],
752 | "source": [
753 | "def facet_wrap(subplts, plots_per_row):\n",
754 | " rows = [\n",
755 | " subplts[i : i + plots_per_row] for i in range(0, len(subplts), plots_per_row)\n",
756 | " ]\n",
757 | " compound_chart = alt.hconcat()\n",
758 | " for r in rows:\n",
759 | " rowplot = alt.vconcat() # start a new row\n",
760 | " for item in r:\n",
761 | " rowplot |= item # add suplot to current row as a new column\n",
762 | " compound_chart &= rowplot # add the entire row of plots as a new row\n",
763 | " return compound_chart"
764 | ]
765 | },
766 | {
767 | "cell_type": "code",
768 | "execution_count": null,
769 | "metadata": {},
770 | "outputs": [],
771 | "source": []
772 | },
773 | {
774 | "cell_type": "code",
775 | "execution_count": null,
776 | "metadata": {},
777 | "outputs": [],
778 | "source": [
779 | "chart_list = []\n",
780 | "for county in list(biggest_county_jumps.head(12).county):\n",
781 | " this_df = chronological_county_df[chronological_county_df.county == county]\n",
782 | " chart = alt.Chart(this_df, title=county).encode(\n",
783 | " x=alt.X(\"date:T\", title=None, axis=None),\n",
784 | " )\n",
785 | " lines = chart.mark_line(color=lat.palette[\"accent\"]).encode(\n",
786 | " y=alt.Y(\"new_confirmed_cases_rolling_average:Q\", title=None,),\n",
787 | " )\n",
788 | " bars = chart.mark_bar(color=lat.palette[\"default\"], opacity=0.33).encode(\n",
789 | " y=alt.Y(\"new_confirmed_cases:Q\", title=\"New confirmed cases\",),\n",
790 | " )\n",
791 | " chart_list.append((bars + lines).properties(height=200, width=250))\n",
792 | "facet_wrap(chart_list, plots_per_row=4)"
793 | ]
794 | },
795 | {
796 | "cell_type": "code",
797 | "execution_count": null,
798 | "metadata": {},
799 | "outputs": [],
800 | "source": [
801 | "chart_list = []\n",
802 | "for county in list(biggest_county_jumps.tail(12).county):\n",
803 | " this_df = chronological_county_df[chronological_county_df.county == county]\n",
804 | " chart = alt.Chart(this_df, title=county).encode(\n",
805 | " x=alt.X(\"date:T\", title=None, axis=None),\n",
806 | " )\n",
807 | " lines = chart.mark_line(color=lat.palette[\"accent\"]).encode(\n",
808 | " y=alt.Y(\"new_confirmed_cases_rolling_average:Q\", title=None,),\n",
809 | " )\n",
810 | " bars = chart.mark_bar(color=lat.palette[\"default\"], opacity=0.33).encode(\n",
811 | " y=alt.Y(\"new_confirmed_cases:Q\", title=\"New confirmed cases\",),\n",
812 | " )\n",
813 | " chart_list.append((bars + lines).properties(height=200, width=250))\n",
814 | "facet_wrap(chart_list, plots_per_row=4)"
815 | ]
816 | },
817 | {
818 | "cell_type": "code",
819 | "execution_count": null,
820 | "metadata": {},
821 | "outputs": [],
822 | "source": [
823 | "biggest_county_jumps.new_confirmed_cases_rolling_average_two_week_pct_change.describe()"
824 | ]
825 | },
826 | {
827 | "cell_type": "code",
828 | "execution_count": null,
829 | "metadata": {},
830 | "outputs": [],
831 | "source": [
832 | "biggest_county_jumps[\n",
833 | " biggest_county_jumps.new_confirmed_cases_rolling_average_two_week_pct_change < 0\n",
834 | "]"
835 | ]
836 | },
837 | {
838 | "cell_type": "markdown",
839 | "metadata": {},
840 | "source": [
841 | "# IDRE Workshop Additions"
842 | ]
843 | },
844 | {
845 | "cell_type": "code",
846 | "execution_count": null,
847 | "metadata": {},
848 | "outputs": [],
849 | "source": [
850 | "alt.Chart(chronological_county_df, title=\"New cases by day\").mark_line().encode(\n",
851 | " x=alt.X(\"date:O\", axis=alt.Axis(ticks=False, grid=False, labels=False), title=None),\n",
852 | " y=alt.Y(\"new_confirmed_cases_rolling_average:Q\", title=\"7-day average\"),\n",
853 | " color=alt.Color(\"county:N\", title=\"County\", legend=None),\n",
854 | ")"
855 | ]
856 | },
857 | {
858 | "cell_type": "code",
859 | "execution_count": null,
860 | "metadata": {},
861 | "outputs": [],
862 | "source": [
863 | "selopac = alt.selection_single(fields=['county'],bind='legend')\n",
864 | "how_to_select = 'CLICK ON COUNTY IN LEGEND TO SELECT'\n",
865 | "chronological_county_df_minusLA = chronological_county_df[chronological_county_df['county']!='Los Angeles']\n",
866 | "\n",
867 | "alt.Chart(chronological_county_df_minusLA, title=\"New cases by day\"+how_to_select).mark_line().encode(\n",
868 | " x=alt.X(\"date:O\", axis=alt.Axis(ticks=False, grid=False, labels=False), title=None),\n",
869 | " y=alt.Y(\"new_confirmed_cases_rolling_average:Q\", title=\"7-day average\"),\n",
870 | " color=alt.Color(\"county:N\", title=\"County\"),\n",
871 | " opacity=alt.condition(selopac, alt.value(1), alt.value(0.1))\n",
872 | ").add_selection(selopac)"
873 | ]
874 | },
875 | {
876 | "cell_type": "code",
877 | "execution_count": null,
878 | "metadata": {},
879 | "outputs": [],
880 | "source": [
881 | "# top 20 counties in terms of mean new confirmed cases\n",
882 | "top20=list(chronological_county_df.groupby('county')['new_confirmed_cases'].mean().sort_values(ascending=False)[:20].index)"
883 | ]
884 | },
885 | {
886 | "cell_type": "code",
887 | "execution_count": null,
888 | "metadata": {},
889 | "outputs": [],
890 | "source": [
891 | "selopac = alt.selection_single(on='mouseover',fields=['county'],bind='legend') \n",
892 | "how_to_select = 'MOVE MOUSE OVER LINE TO SELECT LINE'\n",
893 | "chronological_county_df_top20 = chronological_county_df[chronological_county_df['county'].isin(top20)]\n",
894 | "\n",
895 | "alt.Chart(chronological_county_df_top20, title=\"New cases by day\"+how_to_select).mark_line().encode(\n",
896 | " x=alt.X(\"date:O\", axis=alt.Axis(ticks=False, grid=False, labels=False), title=None),\n",
897 | " y=alt.Y(\"new_confirmed_cases_rolling_average:Q\", title=\"7-day average\"),\n",
898 | " color=alt.Color(\"county:N\", title=\"County\"),\n",
899 | " opacity=alt.condition(selopac, alt.value(1), alt.value(0.1))\n",
900 | ").add_selection(selopac)"
901 | ]
902 | },
903 | {
904 | "cell_type": "code",
905 | "execution_count": null,
906 | "metadata": {},
907 | "outputs": [],
908 | "source": [
909 | "selopac = alt.selection_single(fields=['county'],bind='legend') \n",
910 | "how_to_select = 'CLICK ON COUNTY IN LEGEND TO SELECT'\n",
911 | "chronological_county_df_top20 = chronological_county_df[chronological_county_df['county'].isin(top20)]\n",
912 | "chronological_county_df_top20 = chronological_county_df_top20[chronological_county_df_top20.date > '2021-03-01']\n",
913 | "\n",
914 | "alt.Chart(chronological_county_df_top20, title=\"New cases by day\"+how_to_select).mark_line().encode(\n",
915 | " x=alt.X(\"date:O\", axis=alt.Axis(ticks=False, grid=False, labels=False), title=None),\n",
916 | " y=alt.Y(\"new_confirmed_cases_rolling_average:Q\", title=\"7-day average\"),\n",
917 | " color=alt.Color(\"county:N\", title=\"County\"),\n",
918 | " opacity=alt.condition(selopac, alt.value(1), alt.value(0.1))\n",
919 | ").add_selection(selopac)"
920 | ]
921 | },
922 | {
923 | "cell_type": "code",
924 | "execution_count": null,
925 | "metadata": {},
926 | "outputs": [],
927 | "source": [
928 | "chart_list = []\n",
929 | "for county in list(biggest_county_jumps.tail(12).county):\n",
930 | " this_df = chronological_county_df[chronological_county_df.county == county]\n",
931 | " chart = alt.Chart(this_df, title=county).encode(\n",
932 | " x=alt.X(\"date:T\", title=None, axis=None),\n",
933 | " )\n",
934 | " lines = chart.mark_line(color=lat.palette[\"accent\"]).encode(\n",
935 | " y=alt.Y(\"new_confirmed_cases_rolling_average:Q\", title=None,),\n",
936 | " )\n",
937 | " bars = chart.mark_bar(color=lat.palette[\"default\"], opacity=0.33).encode(\n",
938 | " y=alt.Y(\"new_confirmed_cases:Q\", title=\"New confirmed cases\",),\n",
939 | " )\n",
940 | " chart_list.append((bars + lines).properties(height=200, width=250))\n",
941 | "facet_wrap(chart_list, plots_per_row=4)"
942 | ]
943 | },
944 | {
945 | "cell_type": "code",
946 | "execution_count": null,
947 | "metadata": {},
948 | "outputs": [],
949 | "source": [
950 | "import ipywidgets"
951 | ]
952 | },
953 | {
954 | "cell_type": "code",
955 | "execution_count": null,
956 | "metadata": {},
957 | "outputs": [],
958 | "source": [
959 | "# for county in list(biggest_county_jumps.tail(12).county):\n",
960 | "def countyplot(county='Los Angeles'):\n",
961 | " this_df = chronological_county_df[chronological_county_df.county == county]\n",
962 | " chart = alt.Chart(this_df, title=county).encode(\n",
963 | " x=alt.X(\"date:T\", title=None, axis=None),\n",
964 | " )\n",
965 | " lines = chart.mark_line(color=lat.palette[\"accent\"]).encode(\n",
966 | " y=alt.Y(\"new_confirmed_cases_rolling_average:Q\", title=None,),\n",
967 | " )\n",
968 | " bars = chart.mark_bar(color=lat.palette[\"default\"], opacity=0.33).encode(\n",
969 | " y=alt.Y(\"new_confirmed_cases:Q\", title=\"New confirmed cases\",),\n",
970 | " )\n",
971 | " #chart_list.append((bars + lines).properties(height=200, width=250))\n",
972 | " return (bars + lines).properties(height=200, width=250)\n",
973 | " \n",
974 | "#facet_wrap(chart_list, plots_per_row=4)\n",
975 | "ipywidgets.interact(countyplot,county=biggest_county_jumps.county.unique());"
976 | ]
977 | },
978 | {
979 | "cell_type": "code",
980 | "execution_count": null,
981 | "metadata": {},
982 | "outputs": [],
983 | "source": []
984 | }
985 | ],
986 | "metadata": {
987 | "kernelspec": {
988 | "display_name": "Python 3",
989 | "language": "python",
990 | "name": "python3"
991 | },
992 | "language_info": {
993 | "codemirror_mode": {
994 | "name": "ipython",
995 | "version": 3
996 | },
997 | "file_extension": ".py",
998 | "mimetype": "text/x-python",
999 | "name": "python",
1000 | "nbconvert_exporter": "python",
1001 | "pygments_lexer": "ipython3",
1002 | "version": "3.8.8"
1003 | },
1004 | "toc": {
1005 | "base_numbering": 1,
1006 | "nav_menu": {},
1007 | "number_sections": true,
1008 | "sideBar": true,
1009 | "skip_h1_title": false,
1010 | "title_cell": "Table of Contents",
1011 | "title_sidebar": "Contents",
1012 | "toc_cell": false,
1013 | "toc_position": {},
1014 | "toc_section_display": true,
1015 | "toc_window_display": false
1016 | }
1017 | },
1018 | "nbformat": 4,
1019 | "nbformat_minor": 4
1020 | }
1021 |
--------------------------------------------------------------------------------
/4fun-bqplot.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "Note: This notebook is taken from Chakri Cherukuri's [GitHub repo](https://github.com/ChakriCherukuri/mlviz) showing a variety of examples on using bqplot to visualize theoretical and applied machine learning algorithms/models.\n",
8 | "*****"
9 | ]
10 | },
11 | {
12 | "cell_type": "markdown",
13 | "metadata": {},
14 | "source": [
15 | "