├── combined.png
├── datasets.yml
├── README.md
├── environment.yml
├── nyc_parambokeh.py
├── osm_parambokeh.py
├── download_sample_data.py
└── SciPy2017-Slides.ipynb


/combined.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/philippjfr/scipy-talk-2017/HEAD/combined.png


--------------------------------------------------------------------------------
/datasets.yml:
--------------------------------------------------------------------------------
1 | ---
2 | 
3 | data:
4 | 
5 |   - url: https://s3-eu-west-1.amazonaws.com/assets.holoviews.org/nyc_taxi.parq.zip
6 |     title: 'NYC Taxi Data'
7 |     files:
8 |       - nyc_taxi.parq
9 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | To get started:
 2 | 
 3 | ```
 4 | git clone https://github.com/philippjfr/scipy-talk-2017.git
 5 | cd scipy-talk-2017
 6 | conda env create -f environment.yml
 7 | source activate scipy2017
 8 | python download_sample_data.py
 9 | ```
10 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: scipy2017
 2 | channels:
 3 |   - ioam
 4 |   - bokeh
 5 |   - conda-forge
 6 |   - damianavila82
 7 |   - defaults
 8 | dependencies:
 9 |   - python=3
10 |   - notebook
11 |   - holoviews
12 |   - geoviews
13 |   - pandas
14 |   - xarray
15 |   - datashader
16 |   - paramnb
17 |   - rise
18 |   - jupyter_dashboards
19 |   - fastparquet
20 |   - python-snappy
21 |   - parambokeh
22 | 


--------------------------------------------------------------------------------
/nyc_parambokeh.py:
--------------------------------------------------------------------------------
 1 | import holoviews as hv, geoviews as gv, param, parambokeh, dask.dataframe as dd
 2 | 
 3 | from colorcet import cm
 4 | from bokeh.models import WMTSTileSource
 5 | from holoviews.operation.datashader import datashade
 6 | from holoviews.streams import RangeXY, PlotSize
 7 | 
 8 | hv.extension('bokeh')
 9 | 
10 | df = dd.read_parquet('./data/nyc_taxi.parq/').persist()
11 | url='https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{Z}/{Y}/{X}.jpg'
12 | tiles = gv.WMTS(WMTSTileSource(url=url))
13 | tile_options = dict(width=800,height=475,xaxis=None,yaxis=None,bgcolor='black',show_grid=False)
14 | 
15 | passenger_counts = (0, df.passenger_count.max().compute()+1)
16 | 
17 | class NYCTaxiExplorer(hv.streams.Stream):
18 |     alpha      = param.Magnitude(default=0.75, doc="Alpha value for the map opacity")
19 |     colormap   = param.ObjectSelector(default=cm["fire"], objects=[cm[k] for k in cm.keys() if not '_' in k])
20 |     plot       = param.ObjectSelector(default="pickup",   objects=["pickup","dropoff"])
21 |     passengers = param.Range(default=passenger_counts, bounds=passenger_counts)
22 |     output     = parambokeh.view.Plot()
23 | 
24 |     def make_view(self, x_range, y_range, alpha, colormap, plot, passengers, **kwargs):
25 |         map_tiles = tiles(style=dict(alpha=alpha), plot=tile_options)
26 |         points = hv.Points(df, kdims=[plot+'_x', plot+'_y'], vdims=['passenger_count'])
27 |         if passengers != passenger_counts: points = points.select(passenger_count=passengers)
28 |         taxi_trips = datashade(points, x_sampling=1, y_sampling=1, cmap=colormap,
29 |                                dynamic=False, x_range=x_range, y_range=y_range)
30 |         return map_tiles * taxi_trips
31 | 
32 | selector = NYCTaxiExplorer(name="NYC Taxi Trips")
33 | selector.output = hv.DynamicMap(selector.make_view, streams=[selector, RangeXY(), PlotSize()])
34 | 
35 | doc = parambokeh.Widgets(selector, view_position='right', callback=selector.event, mode='server')
36 | 


--------------------------------------------------------------------------------
/osm_parambokeh.py:
--------------------------------------------------------------------------------
 1 | import holoviews as hv, geoviews as gv, param, parambokeh, dask.dataframe as dd
 2 | 
 3 | from copy import deepcopy
 4 | import numpy as np
 5 | from colorcet import cm
 6 | from bokeh.models import WMTSTileSource
 7 | from holoviews.operation.datashader import aggregate, shade
 8 | from holoviews.operation import histogram
 9 | from holoviews.streams import RangeXY, PlotSize
10 | 
11 | hv.extension('bokeh')
12 | 
13 | df = dd.read_parquet('./data/osm-1billion.snappy.parq/').persist()
14 | 
15 | url='https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{Z}/{Y}/{X}.jpg'
16 | tiles = gv.WMTS(WMTSTileSource(url=url))
17 | 
18 | hv.opts("WMTS [width=800 height=475 xaxis=None yaxis=None bgcolor='black']"
19 |         "Histogram [logy=True] (fill_color='white') {+framewise} VLine (color='black')")
20 | 
21 | class OSMExplorer(hv.streams.Stream):
22 |     alpha      = param.Magnitude(default=0.75, doc="Alpha value for the map opacity")
23 |     cmap        = param.ObjectSelector(default=cm["fire"], objects=cm.values())
24 |     min_count   = param.Number(default=0, bounds=(0, 100))
25 |     output      = parambokeh.view.Plot()
26 | 
27 | def filter_count(agg, min_count, **kwargs):
28 |     if min_count:
29 |         agg = deepcopy(agg)
30 |         agg.data.Count.data[agg.data.Count.data<min_count] = 0
31 |     return agg
32 | 
33 | def hline_fn(min_count, **kwargs): return hv.VLine(min_count)
34 | 
35 | def tiles_fn(alpha, **kwargs): return tiles.opts(style=dict(alpha=alpha))
36 | 
37 | explorer = OSMExplorer(name="OpenStreetMap GPS Explorer")
38 | 
39 | tile = hv.DynamicMap(tiles_fn, streams=[explorer])
40 | agg = aggregate(hv.Points(df))
41 | filtered = hv.util.Dynamic(agg, operation=filter_count, streams=[explorer])
42 | shaded = shade(filtered, streams=[explorer])
43 | hline = hv.DynamicMap(hline_fn, streams=[explorer])
44 | explorer.output = (tile * shaded) << histogram(agg, log=True) * hline
45 | 
46 | doc = parambokeh.Widgets(explorer, view_position='right', callback=explorer.event, mode='server')
47 | 


--------------------------------------------------------------------------------
/download_sample_data.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | from __future__ import print_function, absolute_import, division
  4 | 
  5 | # -*- coding: utf-8 -*-
  6 | """
  7 | Copyright (c) 2011, Kenneth Reitz <me@kennethreitz.com>
  8 | 
  9 | Permission to use, copy, modify, and/or distribute this software for any
 10 | purpose with or without fee is hereby granted, provided that the above
 11 | copyright notice and this permission notice appear in all copies.
 12 | 
 13 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 14 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 15 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 16 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 17 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 18 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 19 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 20 | 
 21 | clint.textui.progress
 22 | ~~~~~~~~~~~~~~~~~
 23 | 
 24 | This module provides the progressbar functionality.
 25 | 
 26 | """
 27 | from collections import OrderedDict
 28 | from os import path
 29 | import glob
 30 | import os
 31 | import subprocess
 32 | import sys
 33 | import tarfile
 34 | import time
 35 | import zipfile
 36 | 
 37 | import yaml
 38 | try:
 39 |     import requests
 40 | except ImportError:
 41 |     print('this download script requires the requests module: conda install requests')
 42 |     sys.exit(1)
 43 | 
 44 | STREAM = sys.stderr
 45 | 
 46 | BAR_TEMPLATE = '%s[%s%s] %i/%i - %s\r'
 47 | MILL_TEMPLATE = '%s %s %i/%i\r'
 48 | 
 49 | DOTS_CHAR = '.'
 50 | BAR_FILLED_CHAR = '#'
 51 | BAR_EMPTY_CHAR = ' '
 52 | MILL_CHARS = ['|', '/', '-', '\\']
 53 | 
 54 | # How long to wait before recalculating the ETA
 55 | ETA_INTERVAL = 1
 56 | # How many intervals (excluding the current one) to calculate the simple moving
 57 | # average
 58 | ETA_SMA_WINDOW = 9
 59 | 
 60 | 
 61 | class Bar(object):
 62 |     def __enter__(self):
 63 |         return self
 64 | 
 65 |     def __exit__(self, exc_type, exc_val, exc_tb):
 66 |         self.done()
 67 |         return False  # we're not suppressing exceptions
 68 | 
 69 |     def __init__(self, label='', width=32, hide=None, empty_char=BAR_EMPTY_CHAR,
 70 |                  filled_char=BAR_FILLED_CHAR, expected_size=None, every=1):
 71 |         '''Bar is a class for printing the status of downloads'''
 72 |         self.label = label
 73 |         self.width = width
 74 |         self.hide = hide
 75 |         # Only show bar in terminals by default (better for piping, logging etc.)
 76 |         if hide is None:
 77 |             try:
 78 |                 self.hide = not STREAM.isatty()
 79 |             except AttributeError:  # output does not support isatty()
 80 |                 self.hide = True
 81 |         self.empty_char =    empty_char
 82 |         self.filled_char =   filled_char
 83 |         self.expected_size = expected_size
 84 |         self.every =         every
 85 |         self.start =         time.time()
 86 |         self.ittimes =       []
 87 |         self.eta =           0
 88 |         self.etadelta =      time.time()
 89 |         self.etadisp =       self.format_time(self.eta)
 90 |         self.last_progress = 0
 91 |         if (self.expected_size):
 92 |             self.show(0)
 93 | 
 94 |     def show(self, progress, count=None):
 95 |         if count is not None:
 96 |             self.expected_size = count
 97 |         if self.expected_size is None:
 98 |             raise Exception("expected_size not initialized")
 99 |         self.last_progress = progress
100 |         if (time.time() - self.etadelta) > ETA_INTERVAL:
101 |             self.etadelta = time.time()
102 |             self.ittimes = \
103 |                 self.ittimes[-ETA_SMA_WINDOW:] + \
104 |                     [-(self.start - time.time()) / (progress+1)]
105 |             self.eta = \
106 |                 sum(self.ittimes) / float(len(self.ittimes)) * \
107 |                 (self.expected_size - progress)
108 |             self.etadisp = self.format_time(self.eta)
109 |         x = int(self.width * progress / self.expected_size)
110 |         if not self.hide:
111 |             if ((progress % self.every) == 0 or      # True every "every" updates
112 |                 (progress == self.expected_size)):   # And when we're done
113 |                 STREAM.write(BAR_TEMPLATE % (
114 |                     self.label, self.filled_char * x,
115 |                     self.empty_char * (self.width - x), progress,
116 |                     self.expected_size, self.etadisp))
117 |                 STREAM.flush()
118 | 
119 |     def done(self):
120 |         self.elapsed = time.time() - self.start
121 |         elapsed_disp = self.format_time(self.elapsed)
122 |         if not self.hide:
123 |             # Print completed bar with elapsed time
124 |             STREAM.write(BAR_TEMPLATE % (
125 |                 self.label, self.filled_char * self.width,
126 |                 self.empty_char * 0, self.last_progress,
127 |                 self.expected_size, elapsed_disp))
128 |             STREAM.write('\n')
129 |             STREAM.flush()
130 | 
131 |     def format_time(self, seconds):
132 |         return time.strftime('%H:%M:%S', time.gmtime(seconds))
133 | 
134 | 
135 | def bar(it, label='', width=32, hide=None, empty_char=BAR_EMPTY_CHAR,
136 |         filled_char=BAR_FILLED_CHAR, expected_size=None, every=1):
137 |     """Progress iterator. Wrap your iterables with it."""
138 | 
139 |     count = len(it) if expected_size is None else expected_size
140 | 
141 |     with Bar(label=label, width=width, hide=hide, empty_char=BAR_EMPTY_CHAR,
142 |              filled_char=BAR_FILLED_CHAR, expected_size=count, every=every) \
143 |             as bar:
144 |         for i, item in enumerate(it):
145 |             yield item
146 |             bar.show(i + 1)
147 | 
148 | 
149 | def ordered_load(stream, Loader=yaml.Loader, object_pairs_hook=OrderedDict):
150 |     class OrderedLoader(Loader):
151 |         pass
152 |     def construct_mapping(loader, node):
153 |         loader.flatten_mapping(node)
154 |         return object_pairs_hook(loader.construct_pairs(node))
155 |     OrderedLoader.add_constructor(
156 |         yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG,
157 |         construct_mapping)
158 |     return yaml.load(stream, OrderedLoader)
159 | 
160 | 
161 | class DirectoryContext(object):
162 |     """
163 |     Context Manager for changing directories
164 |     """
165 |     def __init__(self, path):
166 |         self.old_dir = os.getcwd()
167 |         self.new_dir = path
168 | 
169 |     def __enter__(self):
170 |         os.chdir(self.new_dir)
171 | 
172 |     def __exit__(self, *args):
173 |         os.chdir(self.old_dir)
174 | 
175 | 
176 | def _url_to_binary_write(url, output_path, title):
177 |     '''Given a url, output_path and title,
178 |     write the contents of a requests get operation to
179 |     the url in binary mode and print the title of operation'''
180 |     print('Downloading {0}'.format(title))
181 |     resp = requests.get(url, stream=True)
182 |     try:
183 |         with open(output_path, 'wb') as f:
184 |             total_length = int(resp.headers.get('content-length'))
185 |             for chunk in bar(resp.iter_content(chunk_size=1024), expected_size=(total_length/1024) + 1, every=1000):
186 |                 if chunk:
187 |                     f.write(chunk)
188 |                     f.flush()
189 |     except:
190 |         # Don't leave a half-written zip file
191 |         if path.exists(output_path):
192 |             os.remove(output_path)
193 |         raise
194 | 
195 | 
196 | def _extract_downloaded_archive(output_path):
197 |     '''Extract a local archive, e.g. zip or tar, then
198 |     delete the archive'''
199 |     if output_path.endswith("tar.gz"):
200 |         with tarfile.open(output_path, "r:gz") as tar:
201 |             tar.extractall()
202 |         os.remove(output_path)
203 |     elif output_path.endswith("tar"):
204 |         with tarfile.open(output_path, "r:") as tar:
205 |             tar.extractall()
206 |         os.remove(output_path)
207 |     elif output_path.endswith("tar.bz2"):
208 |         with tarfile.open(output_path, "r:bz2") as tar:
209 |             tar.extractall()
210 |         os.remove(output_path)
211 |     elif output_path.endswith("zip"):
212 |         with zipfile.ZipFile(output_path, 'r') as zipf:
213 |             zipf.extractall()
214 |         os.remove(output_path)
215 | 
216 | 
217 | def _process_dataset(dataset, output_dir, here):
218 |     '''Process each download spec in datasets.yml
219 | 
220 |     Typically each dataset list entry in the yml has
221 |     "files" and "url" and "title" keys/values to show
222 |     local files that must be present / extracted from
223 |     a decompression of contents downloaded from the url.
224 | 
225 |     If a url endswith '/', then all files given
226 |     are assumed to be added to the url pattern at the
227 |     end
228 |     '''
229 |     if not path.exists(output_dir):
230 |         os.makedirs(output_dir)
231 | 
232 |     with DirectoryContext(output_dir) as d:
233 |         requires_download = False
234 |         for f in dataset.get('files', []):
235 |             if not path.exists(f):
236 |                 requires_download = True
237 |                 break
238 | 
239 |         if not requires_download:
240 |             print('Skipping {0}'.format(dataset['title']))
241 |             return
242 |         url = dataset['url']
243 |         title_fmt = dataset['title'] + ' {} of {}'
244 |         if url.endswith('/'):
245 |             urls = [url + f for f in dataset['files']]
246 |             output_paths = [os.path.join(here, 'data', fname)
247 |                             for fname in dataset['files']]
248 | 
249 |             unpacked = ['.'.join(output_path.split('.')[:(-2 if output_path.endswith('gz') else -1)]) + '*'
250 |                         for output_path in output_paths]
251 |         else:
252 |             urls = [url]
253 |             output_paths = [path.split(url)[1]]
254 |             unpacked = dataset['files']
255 |             if not isinstance(unpacked, (tuple, list)):
256 |                 unpacked = [unpacked]
257 |         zipped = zip(urls, output_paths, unpacked)
258 |         for idx, (url, output_path, unpack) in enumerate(zipped):
259 |             running_title = title_fmt.format(idx + 1, len(urls))
260 |             if glob.glob(unpack) or os.path.exists(unpack.replace('*','')):
261 |                 # Skip a file if a similar one is downloaded:
262 |                 # i.e. one that has same name but dif't extension
263 |                 print('Skipping {0}'.format(running_title))
264 |                 continue
265 |             _url_to_binary_write(url, output_path, running_title)
266 |             _extract_downloaded_archive(output_path)
267 | 
268 | 
269 | def main():
270 |     '''Download each dataset specified by datasets.yml in this directory'''
271 |     here = contrib_dir = path.abspath(path.join(path.split(__file__)[0]))
272 |     info_file = path.join(here, 'datasets.yml')
273 |     with open(info_file) as f:
274 |         info = ordered_load(f.read())
275 |         for topic, downloads in info.items():
276 |             output_dir = path.join(here, topic)
277 |             for d in downloads:
278 |                 _process_dataset(d, output_dir, here)
279 | 
280 | if __name__ == '__main__':
281 |     main()
282 | 


--------------------------------------------------------------------------------
/SciPy2017-Slides.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "extensions": {
  7 |      "jupyter_dashboards": {
  8 |       "version": 1,
  9 |       "views": {
 10 |        "grid_default": {
 11 |         "col": 0,
 12 |         "height": 4,
 13 |         "hidden": false,
 14 |         "row": 0,
 15 |         "width": 12
 16 |        },
 17 |        "report_default": {}
 18 |       }
 19 |      }
 20 |     },
 21 |     "slideshow": {
 22 |      "slide_type": "slide"
 23 |     }
 24 |    },
 25 |    "source": [
 26 |     "<H1 align=\"center\">Dashboards Visualizing<br><br>Hundreds of Millions of Datapoints<br><br>in 30 Lines of Python</H1>\n",
 27 |     "<br>\n",
 28 |     "<H3 align=\"center\">Philipp Rudiger, James A. Bednar, and Jean-Luc Stevens<br><br>Continuum Analytics<H3>\n",
 29 |     "\n",
 30 |     "<center>\n",
 31 |     "<img src=\"./combined.png\" width='550px'></img>\n",
 32 |     "</center>"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {
 38 |     "extensions": {
 39 |      "jupyter_dashboards": {
 40 |       "version": 1,
 41 |       "views": {
 42 |        "grid_default": {
 43 |         "col": 0,
 44 |         "height": 5,
 45 |         "hidden": false,
 46 |         "row": 4,
 47 |         "width": 12
 48 |        },
 49 |        "report_default": {}
 50 |       }
 51 |      }
 52 |     },
 53 |     "slideshow": {
 54 |      "slide_type": "slide"
 55 |     }
 56 |    },
 57 |    "source": [
 58 |     "**Let's say you want to:** \n",
 59 |     "\n",
 60 |     "* Make it easy to explore some dataset with custom widgets.\n",
 61 |     "\n",
 62 |     "**But then you have to:**\n",
 63 |     "* Spend days of effort to get something working in a notebook\n",
 64 |     "* Build an opaque mishmash of domain-specific, widget, and plotting code\n",
 65 |     "* Start over from scratch whenever you need to:\n",
 66 |     "    - Deploy in a standalone server\n",
 67 |     "    - Visualize different aspects of your data\n",
 68 |     "    - Scale up to larger (>100K) datasets"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "markdown",
 73 |    "metadata": {
 74 |     "slideshow": {
 75 |      "slide_type": "slide"
 76 |     }
 77 |    },
 78 |    "source": [
 79 |     "Here we'll show a simple, flexible, powerful, step-by-step way to solve problems like this, by combining open-source libraries:\n",
 80 |     "\n",
 81 |     "* [**Dask**](http://dask.pydata.org): Efficient out-of-core/distributed computation on massive datasets\n",
 82 |     "* [**Fastparquet**](https://fastparquet.readthedocs.io): Efficient storage for columnar data\n",
 83 |     "* [**HoloViews**](http://holoviews.org): Declarative objects for instantly visualizable data\n",
 84 |     "* [**GeoViews**](http://geo.holoviews.org): Easy mix-and-matching of geographic data with custom plots\n",
 85 |     "* [**Bokeh**](http://bokeh.pydata.org): Interactive plotting in web browsers, controlled by Python\n",
 86 |     "* [**Numba**](http://numba.pydata.org): Accelerated machine code for inner loops\n",
 87 |     "* [**Datashader**](https://github.com/bokeh/datashader): Rasterizing huge datasets quickly using Dask and Numba\n",
 88 |     "* [**Param**](https://github.com/ioam/param): Declaring user-relevant parameters in domain-specific code"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {
 94 |     "slideshow": {
 95 |      "slide_type": "slide"
 96 |     }
 97 |    },
 98 |    "source": [
 99 |     "We'll be working through this process:\n",
100 |     "-  Step 1: Get some data\n",
101 |     "-  Step 2: Prototype a plot in a notebook\n",
102 |     "-  Step 3: Declare your Parameters\n",
103 |     "-  Step 4: Get a widget-based UI for free\n",
104 |     "-  Step 5: Link your Parameters to your data\n",
105 |     "-  Step 6: Widgets now control your interactive plots\n",
106 |     "-  Step 7: Deploy your dashboard"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": null,
112 |    "metadata": {
113 |     "slideshow": {
114 |      "slide_type": "skip"
115 |     }
116 |    },
117 |    "outputs": [],
118 |    "source": [
119 |     "import holoviews as hv\n",
120 |     "import geoviews as gv\n",
121 |     "import param, paramnb, parambokeh\n",
122 |     "import pandas as pd\n",
123 |     "import dask.dataframe as dd\n",
124 |     "\n",
125 |     "from colorcet import cm\n",
126 |     "from bokeh.models import WMTSTileSource\n",
127 |     "from holoviews.operation.datashader import datashade\n",
128 |     "from holoviews.streams import RangeXY, PlotSize"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "markdown",
133 |    "metadata": {
134 |     "slideshow": {
135 |      "slide_type": "slide"
136 |     }
137 |    },
138 |    "source": [
139 |     "## Step 1: Get some data\n",
140 |     "\n",
141 |     "* Here we'll use a subset of the often-studied NYC Taxi dataset\n",
142 |     "* About 12 million points of GPS locations from taxis\n",
143 |     "* Stored in the efficient Parquet format for easy access\n",
144 |     "* Loaded into a Dask dataframe for multi-core<br>(and if needed, out of core or distributed) computation"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": null,
150 |    "metadata": {
151 |     "slideshow": {
152 |      "slide_type": "fragment"
153 |     }
154 |    },
155 |    "outputs": [],
156 |    "source": [
157 |     "df = dd.read_parquet('./data/nyc_taxi.parq/').persist()\n",
158 |     "print(len(df))\n",
159 |     "df.head(2)"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "markdown",
164 |    "metadata": {
165 |     "slideshow": {
166 |      "slide_type": "slide"
167 |     }
168 |    },
169 |    "source": [
170 |     "## Step 2: Prototype a plot in a notebook\n",
171 |     "\n",
172 |     "* A text-based representation isn't very useful for big datasets like this, so we need to build a plot\n",
173 |     "* But we don't want to start a software project, so we use HoloViews:\n",
174 |     "    - Simple, declarative way to annotate your data for visualization\n",
175 |     "    - Large library of Elements with associated visual representation\n",
176 |     "    - Elements combine (lay out or overlay) easily\n",
177 |     "* And we'll want live interactivity, so we'll use a Bokeh backend\n",
178 |     "* But our data is much too big for Bokeh directly, so we'll use Datashader to rasterize it first"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": null,
184 |    "metadata": {
185 |     "slideshow": {
186 |      "slide_type": "slide"
187 |     }
188 |    },
189 |    "outputs": [],
190 |    "source": [
191 |     "hv.extension('bokeh')\n",
192 |     "points = hv.Points(df, kdims=['pickup_x', 'pickup_y'], vdims=['passenger_count'])\n",
193 |     "options = dict(width=800,height=475,xaxis=None,yaxis=None,bgcolor='black',show_grid=False)\n",
194 |     "taxi_trips = datashade(points, x_sampling=1, y_sampling=1, cmap=cm['fire']).opts(plot=options)\n",
195 |     "taxi_trips"
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "markdown",
200 |    "metadata": {
201 |     "slideshow": {
202 |      "slide_type": "slide"
203 |     }
204 |    },
205 |    "source": [
206 |     "Let's put the data in context, overlaying it on a map:"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": null,
212 |    "metadata": {
213 |     "slideshow": {
214 |      "slide_type": "fragment"
215 |     }
216 |    },
217 |    "outputs": [],
218 |    "source": [
219 |     "tiles = gv.WMTS(WMTSTileSource(url='https://server.arcgisonline.com/ArcGIS/rest/services/'\n",
220 |     "                                   'World_Imagery/MapServer/tile/{Z}/{Y}/{X}.jpg'))\n",
221 |     "tiles * taxi_trips"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "markdown",
226 |    "metadata": {
227 |     "slideshow": {
228 |      "slide_type": "slide"
229 |     }
230 |    },
231 |    "source": [
232 |     "## Step 3: Declare your Parameters\n",
233 |     "\n",
234 |     "Now that we've prototyped a nice plot, we want it to be widely sharable, with controls for safe and easy exploration. \n",
235 |     "\n",
236 |     "So the next step: declare what the intended user can change, with:\n",
237 |     "\n",
238 |     "  - type and range checking\n",
239 |     "  - documentation strings\n",
240 |     "  - default values\n",
241 |     "  \n",
242 |     "The Param library allows declaring Python attributes having these features<br>(and more, such as dynamic values and inheritance)."
243 |    ]
244 |   },
245 |   {
246 |    "cell_type": "markdown",
247 |    "metadata": {
248 |     "slideshow": {
249 |      "slide_type": "slide"
250 |     }
251 |    },
252 |    "source": [
253 |     "## NYC Taxi Parameters"
254 |    ]
255 |   },
256 |   {
257 |    "cell_type": "code",
258 |    "execution_count": null,
259 |    "metadata": {
260 |     "slideshow": {
261 |      "slide_type": "fragment"
262 |     }
263 |    },
264 |    "outputs": [],
265 |    "source": [
266 |     "class NYCTaxiExplorer(hv.streams.Stream):\n",
267 |     "    alpha       = param.Magnitude(default=0.75, doc=\"Alpha value for the map opacity\")\n",
268 |     "    plot        = param.ObjectSelector(default=\"pickup\", objects=[\"pickup\",\"dropoff\"])\n",
269 |     "    colormap    = param.ObjectSelector(default=cm[\"fire\"], objects=cm.values())\n",
270 |     "    passengers  = param.Range(default=(0, 10), bounds=(0, 10), doc=\"\"\"\n",
271 |     "        Filter for taxi trips by number of passengers\"\"\")"
272 |    ]
273 |   },
274 |   {
275 |    "cell_type": "markdown",
276 |    "metadata": {
277 |     "slideshow": {
278 |      "slide_type": "fragment"
279 |     }
280 |    },
281 |    "source": [
282 |     "Each Parameter is a normal Python attribute, but with special checks and functions run automatically when getting or setting.\n",
283 |     "\n",
284 |     "Parameters capture your goals and your knowledge about your domain, declaratively."
285 |    ]
286 |   },
287 |   {
288 |    "cell_type": "markdown",
289 |    "metadata": {
290 |     "slideshow": {
291 |      "slide_type": "slide"
292 |     }
293 |    },
294 |    "source": [
295 |     "### Class level parameters"
296 |    ]
297 |   },
298 |   {
299 |    "cell_type": "code",
300 |    "execution_count": null,
301 |    "metadata": {
302 |     "slideshow": {
303 |      "slide_type": "fragment"
304 |     }
305 |    },
306 |    "outputs": [],
307 |    "source": [
308 |     "NYCTaxiExplorer.alpha"
309 |    ]
310 |   },
311 |   {
312 |    "cell_type": "code",
313 |    "execution_count": null,
314 |    "metadata": {
315 |     "slideshow": {
316 |      "slide_type": "fragment"
317 |     }
318 |    },
319 |    "outputs": [],
320 |    "source": [
321 |     "NYCTaxiExplorer.alpha = 0.5\n",
322 |     "NYCTaxiExplorer.alpha"
323 |    ]
324 |   },
325 |   {
326 |    "cell_type": "markdown",
327 |    "metadata": {
328 |     "slideshow": {
329 |      "slide_type": "slide"
330 |     }
331 |    },
332 |    "source": [
333 |     "### Validation"
334 |    ]
335 |   },
336 |   {
337 |    "cell_type": "code",
338 |    "execution_count": null,
339 |    "metadata": {
340 |     "slideshow": {
341 |      "slide_type": "fragment"
342 |     }
343 |    },
344 |    "outputs": [],
345 |    "source": [
346 |     "try:\n",
347 |     "   NYCTaxiExplorer.alpha = '0'\n",
348 |     "except Exception as e:\n",
349 |     "    print(e)"
350 |    ]
351 |   },
352 |   {
353 |    "cell_type": "markdown",
354 |    "metadata": {
355 |     "slideshow": {
356 |      "slide_type": "slide"
357 |     }
358 |    },
359 |    "source": [
360 |     "### Instance parameters"
361 |    ]
362 |   },
363 |   {
364 |    "cell_type": "code",
365 |    "execution_count": null,
366 |    "metadata": {
367 |     "slideshow": {
368 |      "slide_type": "fragment"
369 |     }
370 |    },
371 |    "outputs": [],
372 |    "source": [
373 |     "explorer = NYCTaxiExplorer(alpha=0.6)\n",
374 |     "explorer.alpha"
375 |    ]
376 |   },
377 |   {
378 |    "cell_type": "code",
379 |    "execution_count": null,
380 |    "metadata": {
381 |     "slideshow": {
382 |      "slide_type": "fragment"
383 |     }
384 |    },
385 |    "outputs": [],
386 |    "source": [
387 |     "NYCTaxiExplorer.alpha"
388 |    ]
389 |   },
390 |   {
391 |    "cell_type": "markdown",
392 |    "metadata": {
393 |     "slideshow": {
394 |      "slide_type": "slide"
395 |     }
396 |    },
397 |    "source": [
398 |     "## Step 4: Get a widget-based UI for free\n",
399 |     "\n",
400 |     "* Parameters are purely declarative, but contain all the information needed to build interactive widgets\n",
401 |     "* ParamNB generates UIs from Parameters, using ipywidgets"
402 |    ]
403 |   },
404 |   {
405 |    "cell_type": "code",
406 |    "execution_count": null,
407 |    "metadata": {
408 |     "slideshow": {
409 |      "slide_type": "fragment"
410 |     }
411 |    },
412 |    "outputs": [],
413 |    "source": [
414 |     "paramnb.Widgets(NYCTaxiExplorer)"
415 |    ]
416 |   },
417 |   {
418 |    "cell_type": "code",
419 |    "execution_count": null,
420 |    "metadata": {
421 |     "slideshow": {
422 |      "slide_type": "fragment"
423 |     }
424 |    },
425 |    "outputs": [],
426 |    "source": [
427 |     "NYCTaxiExplorer.passengers"
428 |    ]
429 |   },
430 |   {
431 |    "cell_type": "markdown",
432 |    "metadata": {
433 |     "slideshow": {
434 |      "slide_type": "fragment"
435 |     }
436 |    },
437 |    "source": [
438 |     "* ipywidgets work with Jupyter Dashboards Server for deployment"
439 |    ]
440 |   },
441 |   {
442 |    "cell_type": "markdown",
443 |    "metadata": {
444 |     "slideshow": {
445 |      "slide_type": "slide"
446 |     }
447 |    },
448 |    "source": [
449 |     "* Declaration of parameters is independent of the UI library used\n",
450 |     "* ParamBokeh generates UIs from Parameters, using Bokeh widgets"
451 |    ]
452 |   },
453 |   {
454 |    "cell_type": "code",
455 |    "execution_count": null,
456 |    "metadata": {
457 |     "slideshow": {
458 |      "slide_type": "fragment"
459 |     }
460 |    },
461 |    "outputs": [],
462 |    "source": [
463 |     "parambokeh.Widgets(NYCTaxiExplorer)"
464 |    ]
465 |   },
466 |   {
467 |    "cell_type": "markdown",
468 |    "metadata": {
469 |     "slideshow": {
470 |      "slide_type": "fragment"
471 |     }
472 |    },
473 |    "source": [
474 |     "* Bokeh widgets work with Bokeh Server for deployment"
475 |    ]
476 |   },
477 |   {
478 |    "cell_type": "markdown",
479 |    "metadata": {
480 |     "slideshow": {
481 |      "slide_type": "slide"
482 |     }
483 |    },
484 |    "source": [
485 |     "## Step 5: Link your Parameters to your data\n",
486 |     "\n",
487 |     "Because the Parameters defined earlier are *about* a plot, it makes sense to combine the parameter and plotting declarations into a single object:"
488 |    ]
489 |   },
490 |   {
491 |    "cell_type": "code",
492 |    "execution_count": null,
493 |    "metadata": {
494 |     "collapsed": true,
495 |     "extensions": {
496 |      "jupyter_dashboards": {
497 |       "version": 1,
498 |       "views": {
499 |        "grid_default": {
500 |         "hidden": true
501 |        },
502 |        "report_default": {}
503 |       }
504 |      }
505 |     },
506 |     "slideshow": {
507 |      "slide_type": "fragment"
508 |     }
509 |    },
510 |    "outputs": [],
511 |    "source": [
512 |     "class NYCTaxiExplorer(hv.streams.Stream):\n",
513 |     "    alpha       = param.Magnitude(default=0.75, doc=\"Alpha value for the map opacity\")\n",
514 |     "    colormap    = param.ObjectSelector(default=cm[\"fire\"], objects=cm.values())\n",
515 |     "    plot        = param.ObjectSelector(default=\"pickup\",   objects=[\"pickup\",\"dropoff\"])\n",
516 |     "    passengers  = param.Range(default=(1, 9), bounds=(1, 9))\n",
517 |     "\n",
518 |     "    def make_view(self, x_range=None, y_range=None, **kwargs):\n",
519 |     "        map_tiles = tiles.opts(style=dict(alpha=self.alpha), plot=options) \n",
520 |     "\n",
521 |     "        points = hv.Points(df, kdims=[self.plot+'_x', self.plot+'_y'], vdims=['passenger_count'])\n",
522 |     "        selected = points.select(passenger_count=self.passengers)\n",
523 |     "        taxi_trips = datashade(selected, x_sampling=1, y_sampling=1, cmap=self.colormap,\n",
524 |     "                               dynamic=False, x_range=x_range, y_range=y_range,\n",
525 |     "                               width=800, height=475)\n",
526 |     "        return map_tiles * taxi_trips"
527 |    ]
528 |   },
529 |   {
530 |    "cell_type": "markdown",
531 |    "metadata": {
532 |     "slideshow": {
533 |      "slide_type": "slide"
534 |     }
535 |    },
536 |    "source": [
537 |     "Note that the `NYCTaxiExplorer` class is entirely declarative (no widgets), and can be used \"by hand\" to provide range-checked and type-checked plotting:"
538 |    ]
539 |   },
540 |   {
541 |    "cell_type": "code",
542 |    "execution_count": null,
543 |    "metadata": {
544 |     "extensions": {
545 |      "jupyter_dashboards": {
546 |       "version": 1,
547 |       "views": {
548 |        "grid_default": {
549 |         "hidden": true
550 |        },
551 |        "report_default": {
552 |         "hidden": true
553 |        }
554 |       }
555 |      }
556 |     },
557 |     "slideshow": {
558 |      "slide_type": "fragment"
559 |     }
560 |    },
561 |    "outputs": [],
562 |    "source": [
563 |     "explorer = NYCTaxiExplorer(alpha=0.2, plot=\"dropoff\")\n",
564 |     "explorer.make_view()"
565 |    ]
566 |   },
567 |   {
568 |    "cell_type": "markdown",
569 |    "metadata": {
570 |     "slideshow": {
571 |      "slide_type": "slide"
572 |     }
573 |    },
574 |    "source": [
575 |     "## Step 6: Widgets now control your interactive plots"
576 |    ]
577 |   },
578 |   {
579 |    "cell_type": "code",
580 |    "execution_count": null,
581 |    "metadata": {
582 |     "slideshow": {
583 |      "slide_type": "slide"
584 |     }
585 |    },
586 |    "outputs": [],
587 |    "source": [
588 |     "explorer = NYCTaxiExplorer()\n",
589 |     "paramnb.Widgets(explorer, callback=explorer.event)\n",
590 |     "hv.DynamicMap(explorer.make_view, streams=[explorer, RangeXY()])"
591 |    ]
592 |   },
593 |   {
594 |    "cell_type": "markdown",
595 |    "metadata": {
596 |     "slideshow": {
597 |      "slide_type": "slide"
598 |     }
599 |    },
600 |    "source": [
601 |     "## Step 7: Deploy your dashboard\n",
602 |     "\n",
603 |     "If you want to share your work with people who don't use Python, you'll now want to run a server with this same code.\n",
604 |     "\n",
605 |     "* If you used **ParamBokeh**, deploy with **Bokeh Server**:\n",
606 |     "    - Write the above code to a file ``nyc_parambokeh.py``,<br> saving the `Widgets` object as the attribute `doc`\n",
607 |     "    - ``bokeh serve nyc_parambokeh.py``"
608 |    ]
609 |   },
610 |   {
611 |    "cell_type": "code",
612 |    "execution_count": null,
613 |    "metadata": {
614 |     "collapsed": true,
615 |     "slideshow": {
616 |      "slide_type": "subslide"
617 |     }
618 |    },
619 |    "outputs": [],
620 |    "source": [
621 |     "explorer = NYCTaxiExplorer()\n",
622 |     "parambokeh.Widgets(explorer, callback=explorer.event)\n",
623 |     "hv.DynamicMap(explorer.make_view, streams=[explorer, RangeXY()])"
624 |    ]
625 |   },
626 |   {
627 |    "cell_type": "markdown",
628 |    "metadata": {
629 |     "slideshow": {
630 |      "slide_type": "slide"
631 |     }
632 |    },
633 |    "source": [
634 |     "* If you used **ParamNB**, deploy with **Jupyter Dashboard Server**:\n",
635 |     "    - Use Jupyter Dashboards Extension to select cells from the notebook to display\n",
636 |     "    - Use preview mode to see layout\n",
637 |     "    - Use Jupyter Dashboards Server to deploy\n",
638 |     "    - Note various caveats below"
639 |    ]
640 |   },
641 |   {
642 |    "cell_type": "markdown",
643 |    "metadata": {
644 |     "slideshow": {
645 |      "slide_type": "slide"
646 |     }
647 |    },
648 |    "source": [
649 |     "# Branching out\n",
650 |     "\n",
651 |     "Much more ambitious apps are possible with very little additional code or effort:\n",
652 |     "\n",
653 |     "* Adding additional linked or separate subplots of any type; see [holoviews.org](http://holoviews.org)\n",
654 |     "* Declaring code that runs for clicking or selecting *within* the Bokeh plot; see \"streams\" at [holoviews.org](http://holoviews.org)\n",
655 |     "* Using multiple sets of widgets of many different types; see [ParamNB](https://github.com/ioam/paramnb) and [ParamBokeh](https://github.com/ioam/parambokeh)\n",
656 |     "* Using datasets too big for any one machine, with [Dask.Distributed](https://distributed.readthedocs.io)"
657 |    ]
658 |   },
659 |   {
660 |    "cell_type": "markdown",
661 |    "metadata": {
662 |     "slideshow": {
663 |      "slide_type": "slide"
664 |     }
665 |    },
666 |    "source": [
667 |     "# Future work\n",
668 |     "\n",
669 |     "* Jupyter Dashboards Server not currently maintained; requires older ipywidgets version\n",
670 |     "* Bokeh Server is mature and well supported, but does not currently support drag-and-drop layout like Jupyter Dashboards does\n",
671 |     "* ParamBokeh still needs some polishing and work to make it ready for widespread use; ParamNB is more mature so far\n",
672 |     "* Both ParamNB and ParamBokeh should provide more flexible widget layouts\n",
673 |     "* Let us know what you would like to see out of these tools!"
674 |    ]
675 |   }
676 |  ],
677 |  "metadata": {
678 |   "celltoolbar": "Slideshow",
679 |   "extensions": {
680 |    "jupyter_dashboards": {
681 |     "activeView": "grid_default",
682 |     "version": 1,
683 |     "views": {
684 |      "grid_default": {
685 |       "cellMargin": 10,
686 |       "defaultCellHeight": 20,
687 |       "maxColumns": 12,
688 |       "name": "grid",
689 |       "type": "grid"
690 |      },
691 |      "report_default": {
692 |       "name": "report",
693 |       "type": "report"
694 |      }
695 |     }
696 |    }
697 |   },
698 |   "kernelspec": {
699 |    "display_name": "Python 3",
700 |    "language": "python",
701 |    "name": "python3"
702 |   },
703 |   "language_info": {
704 |    "codemirror_mode": {
705 |     "name": "ipython",
706 |     "version": 3
707 |    },
708 |    "file_extension": ".py",
709 |    "mimetype": "text/x-python",
710 |    "name": "python",
711 |    "nbconvert_exporter": "python",
712 |    "pygments_lexer": "ipython3",
713 |    "version": "3.6.1"
714 |   }
715 |  },
716 |  "nbformat": 4,
717 |  "nbformat_minor": 2
718 | }
719 | 


--------------------------------------------------------------------------------