├── combined.png ├── datasets.yml ├── README.md ├── environment.yml ├── nyc_parambokeh.py ├── osm_parambokeh.py ├── download_sample_data.py └── SciPy2017-Slides.ipynb /combined.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philippjfr/scipy-talk-2017/HEAD/combined.png -------------------------------------------------------------------------------- /datasets.yml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | data: 4 | 5 | - url: https://s3-eu-west-1.amazonaws.com/assets.holoviews.org/nyc_taxi.parq.zip 6 | title: 'NYC Taxi Data' 7 | files: 8 | - nyc_taxi.parq 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | To get started: 2 | 3 | ``` 4 | git clone https://github.com/philippjfr/scipy-talk-2017.git 5 | cd scipy-talk-2017 6 | conda env create -f environment.yml 7 | source activate scipy2017 8 | python download_sample_data.py 9 | ``` 10 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: scipy2017 2 | channels: 3 | - ioam 4 | - bokeh 5 | - conda-forge 6 | - damianavila82 7 | - defaults 8 | dependencies: 9 | - python=3 10 | - notebook 11 | - holoviews 12 | - geoviews 13 | - pandas 14 | - xarray 15 | - datashader 16 | - paramnb 17 | - rise 18 | - jupyter_dashboards 19 | - fastparquet 20 | - python-snappy 21 | - parambokeh 22 | -------------------------------------------------------------------------------- /nyc_parambokeh.py: -------------------------------------------------------------------------------- 1 | import holoviews as hv, geoviews as gv, param, parambokeh, dask.dataframe as dd 2 | 3 | from colorcet import cm 4 | from bokeh.models import WMTSTileSource 5 | from holoviews.operation.datashader import datashade 6 | from holoviews.streams import RangeXY, PlotSize 7 | 8 | hv.extension('bokeh') 9 | 10 | df = dd.read_parquet('./data/nyc_taxi.parq/').persist() 11 | url='https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{Z}/{Y}/{X}.jpg' 12 | tiles = gv.WMTS(WMTSTileSource(url=url)) 13 | tile_options = dict(width=800,height=475,xaxis=None,yaxis=None,bgcolor='black',show_grid=False) 14 | 15 | passenger_counts = (0, df.passenger_count.max().compute()+1) 16 | 17 | class NYCTaxiExplorer(hv.streams.Stream): 18 | alpha = param.Magnitude(default=0.75, doc="Alpha value for the map opacity") 19 | colormap = param.ObjectSelector(default=cm["fire"], objects=[cm[k] for k in cm.keys() if not '_' in k]) 20 | plot = param.ObjectSelector(default="pickup", objects=["pickup","dropoff"]) 21 | passengers = param.Range(default=passenger_counts, bounds=passenger_counts) 22 | output = parambokeh.view.Plot() 23 | 24 | def make_view(self, x_range, y_range, alpha, colormap, plot, passengers, **kwargs): 25 | map_tiles = tiles(style=dict(alpha=alpha), plot=tile_options) 26 | points = hv.Points(df, kdims=[plot+'_x', plot+'_y'], vdims=['passenger_count']) 27 | if passengers != passenger_counts: points = points.select(passenger_count=passengers) 28 | taxi_trips = datashade(points, x_sampling=1, y_sampling=1, cmap=colormap, 29 | dynamic=False, x_range=x_range, y_range=y_range) 30 | return map_tiles * taxi_trips 31 | 32 | selector = NYCTaxiExplorer(name="NYC Taxi Trips") 33 | selector.output = hv.DynamicMap(selector.make_view, streams=[selector, RangeXY(), PlotSize()]) 34 | 35 | doc = parambokeh.Widgets(selector, view_position='right', callback=selector.event, mode='server') 36 | -------------------------------------------------------------------------------- /osm_parambokeh.py: -------------------------------------------------------------------------------- 1 | import holoviews as hv, geoviews as gv, param, parambokeh, dask.dataframe as dd 2 | 3 | from copy import deepcopy 4 | import numpy as np 5 | from colorcet import cm 6 | from bokeh.models import WMTSTileSource 7 | from holoviews.operation.datashader import aggregate, shade 8 | from holoviews.operation import histogram 9 | from holoviews.streams import RangeXY, PlotSize 10 | 11 | hv.extension('bokeh') 12 | 13 | df = dd.read_parquet('./data/osm-1billion.snappy.parq/').persist() 14 | 15 | url='https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{Z}/{Y}/{X}.jpg' 16 | tiles = gv.WMTS(WMTSTileSource(url=url)) 17 | 18 | hv.opts("WMTS [width=800 height=475 xaxis=None yaxis=None bgcolor='black']" 19 | "Histogram [logy=True] (fill_color='white') {+framewise} VLine (color='black')") 20 | 21 | class OSMExplorer(hv.streams.Stream): 22 | alpha = param.Magnitude(default=0.75, doc="Alpha value for the map opacity") 23 | cmap = param.ObjectSelector(default=cm["fire"], objects=cm.values()) 24 | min_count = param.Number(default=0, bounds=(0, 100)) 25 | output = parambokeh.view.Plot() 26 | 27 | def filter_count(agg, min_count, **kwargs): 28 | if min_count: 29 | agg = deepcopy(agg) 30 | agg.data.Count.data[agg.data.Count.data 8 | 9 | Permission to use, copy, modify, and/or distribute this software for any 10 | purpose with or without fee is hereby granted, provided that the above 11 | copyright notice and this permission notice appear in all copies. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 14 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 15 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 16 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 17 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 18 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 19 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 20 | 21 | clint.textui.progress 22 | ~~~~~~~~~~~~~~~~~ 23 | 24 | This module provides the progressbar functionality. 25 | 26 | """ 27 | from collections import OrderedDict 28 | from os import path 29 | import glob 30 | import os 31 | import subprocess 32 | import sys 33 | import tarfile 34 | import time 35 | import zipfile 36 | 37 | import yaml 38 | try: 39 | import requests 40 | except ImportError: 41 | print('this download script requires the requests module: conda install requests') 42 | sys.exit(1) 43 | 44 | STREAM = sys.stderr 45 | 46 | BAR_TEMPLATE = '%s[%s%s] %i/%i - %s\r' 47 | MILL_TEMPLATE = '%s %s %i/%i\r' 48 | 49 | DOTS_CHAR = '.' 50 | BAR_FILLED_CHAR = '#' 51 | BAR_EMPTY_CHAR = ' ' 52 | MILL_CHARS = ['|', '/', '-', '\\'] 53 | 54 | # How long to wait before recalculating the ETA 55 | ETA_INTERVAL = 1 56 | # How many intervals (excluding the current one) to calculate the simple moving 57 | # average 58 | ETA_SMA_WINDOW = 9 59 | 60 | 61 | class Bar(object): 62 | def __enter__(self): 63 | return self 64 | 65 | def __exit__(self, exc_type, exc_val, exc_tb): 66 | self.done() 67 | return False # we're not suppressing exceptions 68 | 69 | def __init__(self, label='', width=32, hide=None, empty_char=BAR_EMPTY_CHAR, 70 | filled_char=BAR_FILLED_CHAR, expected_size=None, every=1): 71 | '''Bar is a class for printing the status of downloads''' 72 | self.label = label 73 | self.width = width 74 | self.hide = hide 75 | # Only show bar in terminals by default (better for piping, logging etc.) 76 | if hide is None: 77 | try: 78 | self.hide = not STREAM.isatty() 79 | except AttributeError: # output does not support isatty() 80 | self.hide = True 81 | self.empty_char = empty_char 82 | self.filled_char = filled_char 83 | self.expected_size = expected_size 84 | self.every = every 85 | self.start = time.time() 86 | self.ittimes = [] 87 | self.eta = 0 88 | self.etadelta = time.time() 89 | self.etadisp = self.format_time(self.eta) 90 | self.last_progress = 0 91 | if (self.expected_size): 92 | self.show(0) 93 | 94 | def show(self, progress, count=None): 95 | if count is not None: 96 | self.expected_size = count 97 | if self.expected_size is None: 98 | raise Exception("expected_size not initialized") 99 | self.last_progress = progress 100 | if (time.time() - self.etadelta) > ETA_INTERVAL: 101 | self.etadelta = time.time() 102 | self.ittimes = \ 103 | self.ittimes[-ETA_SMA_WINDOW:] + \ 104 | [-(self.start - time.time()) / (progress+1)] 105 | self.eta = \ 106 | sum(self.ittimes) / float(len(self.ittimes)) * \ 107 | (self.expected_size - progress) 108 | self.etadisp = self.format_time(self.eta) 109 | x = int(self.width * progress / self.expected_size) 110 | if not self.hide: 111 | if ((progress % self.every) == 0 or # True every "every" updates 112 | (progress == self.expected_size)): # And when we're done 113 | STREAM.write(BAR_TEMPLATE % ( 114 | self.label, self.filled_char * x, 115 | self.empty_char * (self.width - x), progress, 116 | self.expected_size, self.etadisp)) 117 | STREAM.flush() 118 | 119 | def done(self): 120 | self.elapsed = time.time() - self.start 121 | elapsed_disp = self.format_time(self.elapsed) 122 | if not self.hide: 123 | # Print completed bar with elapsed time 124 | STREAM.write(BAR_TEMPLATE % ( 125 | self.label, self.filled_char * self.width, 126 | self.empty_char * 0, self.last_progress, 127 | self.expected_size, elapsed_disp)) 128 | STREAM.write('\n') 129 | STREAM.flush() 130 | 131 | def format_time(self, seconds): 132 | return time.strftime('%H:%M:%S', time.gmtime(seconds)) 133 | 134 | 135 | def bar(it, label='', width=32, hide=None, empty_char=BAR_EMPTY_CHAR, 136 | filled_char=BAR_FILLED_CHAR, expected_size=None, every=1): 137 | """Progress iterator. Wrap your iterables with it.""" 138 | 139 | count = len(it) if expected_size is None else expected_size 140 | 141 | with Bar(label=label, width=width, hide=hide, empty_char=BAR_EMPTY_CHAR, 142 | filled_char=BAR_FILLED_CHAR, expected_size=count, every=every) \ 143 | as bar: 144 | for i, item in enumerate(it): 145 | yield item 146 | bar.show(i + 1) 147 | 148 | 149 | def ordered_load(stream, Loader=yaml.Loader, object_pairs_hook=OrderedDict): 150 | class OrderedLoader(Loader): 151 | pass 152 | def construct_mapping(loader, node): 153 | loader.flatten_mapping(node) 154 | return object_pairs_hook(loader.construct_pairs(node)) 155 | OrderedLoader.add_constructor( 156 | yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, 157 | construct_mapping) 158 | return yaml.load(stream, OrderedLoader) 159 | 160 | 161 | class DirectoryContext(object): 162 | """ 163 | Context Manager for changing directories 164 | """ 165 | def __init__(self, path): 166 | self.old_dir = os.getcwd() 167 | self.new_dir = path 168 | 169 | def __enter__(self): 170 | os.chdir(self.new_dir) 171 | 172 | def __exit__(self, *args): 173 | os.chdir(self.old_dir) 174 | 175 | 176 | def _url_to_binary_write(url, output_path, title): 177 | '''Given a url, output_path and title, 178 | write the contents of a requests get operation to 179 | the url in binary mode and print the title of operation''' 180 | print('Downloading {0}'.format(title)) 181 | resp = requests.get(url, stream=True) 182 | try: 183 | with open(output_path, 'wb') as f: 184 | total_length = int(resp.headers.get('content-length')) 185 | for chunk in bar(resp.iter_content(chunk_size=1024), expected_size=(total_length/1024) + 1, every=1000): 186 | if chunk: 187 | f.write(chunk) 188 | f.flush() 189 | except: 190 | # Don't leave a half-written zip file 191 | if path.exists(output_path): 192 | os.remove(output_path) 193 | raise 194 | 195 | 196 | def _extract_downloaded_archive(output_path): 197 | '''Extract a local archive, e.g. zip or tar, then 198 | delete the archive''' 199 | if output_path.endswith("tar.gz"): 200 | with tarfile.open(output_path, "r:gz") as tar: 201 | tar.extractall() 202 | os.remove(output_path) 203 | elif output_path.endswith("tar"): 204 | with tarfile.open(output_path, "r:") as tar: 205 | tar.extractall() 206 | os.remove(output_path) 207 | elif output_path.endswith("tar.bz2"): 208 | with tarfile.open(output_path, "r:bz2") as tar: 209 | tar.extractall() 210 | os.remove(output_path) 211 | elif output_path.endswith("zip"): 212 | with zipfile.ZipFile(output_path, 'r') as zipf: 213 | zipf.extractall() 214 | os.remove(output_path) 215 | 216 | 217 | def _process_dataset(dataset, output_dir, here): 218 | '''Process each download spec in datasets.yml 219 | 220 | Typically each dataset list entry in the yml has 221 | "files" and "url" and "title" keys/values to show 222 | local files that must be present / extracted from 223 | a decompression of contents downloaded from the url. 224 | 225 | If a url endswith '/', then all files given 226 | are assumed to be added to the url pattern at the 227 | end 228 | ''' 229 | if not path.exists(output_dir): 230 | os.makedirs(output_dir) 231 | 232 | with DirectoryContext(output_dir) as d: 233 | requires_download = False 234 | for f in dataset.get('files', []): 235 | if not path.exists(f): 236 | requires_download = True 237 | break 238 | 239 | if not requires_download: 240 | print('Skipping {0}'.format(dataset['title'])) 241 | return 242 | url = dataset['url'] 243 | title_fmt = dataset['title'] + ' {} of {}' 244 | if url.endswith('/'): 245 | urls = [url + f for f in dataset['files']] 246 | output_paths = [os.path.join(here, 'data', fname) 247 | for fname in dataset['files']] 248 | 249 | unpacked = ['.'.join(output_path.split('.')[:(-2 if output_path.endswith('gz') else -1)]) + '*' 250 | for output_path in output_paths] 251 | else: 252 | urls = [url] 253 | output_paths = [path.split(url)[1]] 254 | unpacked = dataset['files'] 255 | if not isinstance(unpacked, (tuple, list)): 256 | unpacked = [unpacked] 257 | zipped = zip(urls, output_paths, unpacked) 258 | for idx, (url, output_path, unpack) in enumerate(zipped): 259 | running_title = title_fmt.format(idx + 1, len(urls)) 260 | if glob.glob(unpack) or os.path.exists(unpack.replace('*','')): 261 | # Skip a file if a similar one is downloaded: 262 | # i.e. one that has same name but dif't extension 263 | print('Skipping {0}'.format(running_title)) 264 | continue 265 | _url_to_binary_write(url, output_path, running_title) 266 | _extract_downloaded_archive(output_path) 267 | 268 | 269 | def main(): 270 | '''Download each dataset specified by datasets.yml in this directory''' 271 | here = contrib_dir = path.abspath(path.join(path.split(__file__)[0])) 272 | info_file = path.join(here, 'datasets.yml') 273 | with open(info_file) as f: 274 | info = ordered_load(f.read()) 275 | for topic, downloads in info.items(): 276 | output_dir = path.join(here, topic) 277 | for d in downloads: 278 | _process_dataset(d, output_dir, here) 279 | 280 | if __name__ == '__main__': 281 | main() 282 | -------------------------------------------------------------------------------- /SciPy2017-Slides.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "extensions": { 7 | "jupyter_dashboards": { 8 | "version": 1, 9 | "views": { 10 | "grid_default": { 11 | "col": 0, 12 | "height": 4, 13 | "hidden": false, 14 | "row": 0, 15 | "width": 12 16 | }, 17 | "report_default": {} 18 | } 19 | } 20 | }, 21 | "slideshow": { 22 | "slide_type": "slide" 23 | } 24 | }, 25 | "source": [ 26 | "

Dashboards Visualizing

Hundreds of Millions of Datapoints

in 30 Lines of Python

\n", 27 | "
\n", 28 | "

Philipp Rudiger, James A. Bednar, and Jean-Luc Stevens

Continuum Analytics

\n", 29 | "\n", 30 | "
\n", 31 | "\n", 32 | "
" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": { 38 | "extensions": { 39 | "jupyter_dashboards": { 40 | "version": 1, 41 | "views": { 42 | "grid_default": { 43 | "col": 0, 44 | "height": 5, 45 | "hidden": false, 46 | "row": 4, 47 | "width": 12 48 | }, 49 | "report_default": {} 50 | } 51 | } 52 | }, 53 | "slideshow": { 54 | "slide_type": "slide" 55 | } 56 | }, 57 | "source": [ 58 | "**Let's say you want to:** \n", 59 | "\n", 60 | "* Make it easy to explore some dataset with custom widgets.\n", 61 | "\n", 62 | "**But then you have to:**\n", 63 | "* Spend days of effort to get something working in a notebook\n", 64 | "* Build an opaque mishmash of domain-specific, widget, and plotting code\n", 65 | "* Start over from scratch whenever you need to:\n", 66 | " - Deploy in a standalone server\n", 67 | " - Visualize different aspects of your data\n", 68 | " - Scale up to larger (>100K) datasets" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": { 74 | "slideshow": { 75 | "slide_type": "slide" 76 | } 77 | }, 78 | "source": [ 79 | "Here we'll show a simple, flexible, powerful, step-by-step way to solve problems like this, by combining open-source libraries:\n", 80 | "\n", 81 | "* [**Dask**](http://dask.pydata.org): Efficient out-of-core/distributed computation on massive datasets\n", 82 | "* [**Fastparquet**](https://fastparquet.readthedocs.io): Efficient storage for columnar data\n", 83 | "* [**HoloViews**](http://holoviews.org): Declarative objects for instantly visualizable data\n", 84 | "* [**GeoViews**](http://geo.holoviews.org): Easy mix-and-matching of geographic data with custom plots\n", 85 | "* [**Bokeh**](http://bokeh.pydata.org): Interactive plotting in web browsers, controlled by Python\n", 86 | "* [**Numba**](http://numba.pydata.org): Accelerated machine code for inner loops\n", 87 | "* [**Datashader**](https://github.com/bokeh/datashader): Rasterizing huge datasets quickly using Dask and Numba\n", 88 | "* [**Param**](https://github.com/ioam/param): Declaring user-relevant parameters in domain-specific code" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": { 94 | "slideshow": { 95 | "slide_type": "slide" 96 | } 97 | }, 98 | "source": [ 99 | "We'll be working through this process:\n", 100 | "- Step 1: Get some data\n", 101 | "- Step 2: Prototype a plot in a notebook\n", 102 | "- Step 3: Declare your Parameters\n", 103 | "- Step 4: Get a widget-based UI for free\n", 104 | "- Step 5: Link your Parameters to your data\n", 105 | "- Step 6: Widgets now control your interactive plots\n", 106 | "- Step 7: Deploy your dashboard" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": { 113 | "slideshow": { 114 | "slide_type": "skip" 115 | } 116 | }, 117 | "outputs": [], 118 | "source": [ 119 | "import holoviews as hv\n", 120 | "import geoviews as gv\n", 121 | "import param, paramnb, parambokeh\n", 122 | "import pandas as pd\n", 123 | "import dask.dataframe as dd\n", 124 | "\n", 125 | "from colorcet import cm\n", 126 | "from bokeh.models import WMTSTileSource\n", 127 | "from holoviews.operation.datashader import datashade\n", 128 | "from holoviews.streams import RangeXY, PlotSize" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": { 134 | "slideshow": { 135 | "slide_type": "slide" 136 | } 137 | }, 138 | "source": [ 139 | "## Step 1: Get some data\n", 140 | "\n", 141 | "* Here we'll use a subset of the often-studied NYC Taxi dataset\n", 142 | "* About 12 million points of GPS locations from taxis\n", 143 | "* Stored in the efficient Parquet format for easy access\n", 144 | "* Loaded into a Dask dataframe for multi-core
(and if needed, out of core or distributed) computation" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "metadata": { 151 | "slideshow": { 152 | "slide_type": "fragment" 153 | } 154 | }, 155 | "outputs": [], 156 | "source": [ 157 | "df = dd.read_parquet('./data/nyc_taxi.parq/').persist()\n", 158 | "print(len(df))\n", 159 | "df.head(2)" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": { 165 | "slideshow": { 166 | "slide_type": "slide" 167 | } 168 | }, 169 | "source": [ 170 | "## Step 2: Prototype a plot in a notebook\n", 171 | "\n", 172 | "* A text-based representation isn't very useful for big datasets like this, so we need to build a plot\n", 173 | "* But we don't want to start a software project, so we use HoloViews:\n", 174 | " - Simple, declarative way to annotate your data for visualization\n", 175 | " - Large library of Elements with associated visual representation\n", 176 | " - Elements combine (lay out or overlay) easily\n", 177 | "* And we'll want live interactivity, so we'll use a Bokeh backend\n", 178 | "* But our data is much too big for Bokeh directly, so we'll use Datashader to rasterize it first" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": null, 184 | "metadata": { 185 | "slideshow": { 186 | "slide_type": "slide" 187 | } 188 | }, 189 | "outputs": [], 190 | "source": [ 191 | "hv.extension('bokeh')\n", 192 | "points = hv.Points(df, kdims=['pickup_x', 'pickup_y'], vdims=['passenger_count'])\n", 193 | "options = dict(width=800,height=475,xaxis=None,yaxis=None,bgcolor='black',show_grid=False)\n", 194 | "taxi_trips = datashade(points, x_sampling=1, y_sampling=1, cmap=cm['fire']).opts(plot=options)\n", 195 | "taxi_trips" 196 | ] 197 | }, 198 | { 199 | "cell_type": "markdown", 200 | "metadata": { 201 | "slideshow": { 202 | "slide_type": "slide" 203 | } 204 | }, 205 | "source": [ 206 | "Let's put the data in context, overlaying it on a map:" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": null, 212 | "metadata": { 213 | "slideshow": { 214 | "slide_type": "fragment" 215 | } 216 | }, 217 | "outputs": [], 218 | "source": [ 219 | "tiles = gv.WMTS(WMTSTileSource(url='https://server.arcgisonline.com/ArcGIS/rest/services/'\n", 220 | " 'World_Imagery/MapServer/tile/{Z}/{Y}/{X}.jpg'))\n", 221 | "tiles * taxi_trips" 222 | ] 223 | }, 224 | { 225 | "cell_type": "markdown", 226 | "metadata": { 227 | "slideshow": { 228 | "slide_type": "slide" 229 | } 230 | }, 231 | "source": [ 232 | "## Step 3: Declare your Parameters\n", 233 | "\n", 234 | "Now that we've prototyped a nice plot, we want it to be widely sharable, with controls for safe and easy exploration. \n", 235 | "\n", 236 | "So the next step: declare what the intended user can change, with:\n", 237 | "\n", 238 | " - type and range checking\n", 239 | " - documentation strings\n", 240 | " - default values\n", 241 | " \n", 242 | "The Param library allows declaring Python attributes having these features
(and more, such as dynamic values and inheritance)." 243 | ] 244 | }, 245 | { 246 | "cell_type": "markdown", 247 | "metadata": { 248 | "slideshow": { 249 | "slide_type": "slide" 250 | } 251 | }, 252 | "source": [ 253 | "## NYC Taxi Parameters" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": null, 259 | "metadata": { 260 | "slideshow": { 261 | "slide_type": "fragment" 262 | } 263 | }, 264 | "outputs": [], 265 | "source": [ 266 | "class NYCTaxiExplorer(hv.streams.Stream):\n", 267 | " alpha = param.Magnitude(default=0.75, doc=\"Alpha value for the map opacity\")\n", 268 | " plot = param.ObjectSelector(default=\"pickup\", objects=[\"pickup\",\"dropoff\"])\n", 269 | " colormap = param.ObjectSelector(default=cm[\"fire\"], objects=cm.values())\n", 270 | " passengers = param.Range(default=(0, 10), bounds=(0, 10), doc=\"\"\"\n", 271 | " Filter for taxi trips by number of passengers\"\"\")" 272 | ] 273 | }, 274 | { 275 | "cell_type": "markdown", 276 | "metadata": { 277 | "slideshow": { 278 | "slide_type": "fragment" 279 | } 280 | }, 281 | "source": [ 282 | "Each Parameter is a normal Python attribute, but with special checks and functions run automatically when getting or setting.\n", 283 | "\n", 284 | "Parameters capture your goals and your knowledge about your domain, declaratively." 285 | ] 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "metadata": { 290 | "slideshow": { 291 | "slide_type": "slide" 292 | } 293 | }, 294 | "source": [ 295 | "### Class level parameters" 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": null, 301 | "metadata": { 302 | "slideshow": { 303 | "slide_type": "fragment" 304 | } 305 | }, 306 | "outputs": [], 307 | "source": [ 308 | "NYCTaxiExplorer.alpha" 309 | ] 310 | }, 311 | { 312 | "cell_type": "code", 313 | "execution_count": null, 314 | "metadata": { 315 | "slideshow": { 316 | "slide_type": "fragment" 317 | } 318 | }, 319 | "outputs": [], 320 | "source": [ 321 | "NYCTaxiExplorer.alpha = 0.5\n", 322 | "NYCTaxiExplorer.alpha" 323 | ] 324 | }, 325 | { 326 | "cell_type": "markdown", 327 | "metadata": { 328 | "slideshow": { 329 | "slide_type": "slide" 330 | } 331 | }, 332 | "source": [ 333 | "### Validation" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": null, 339 | "metadata": { 340 | "slideshow": { 341 | "slide_type": "fragment" 342 | } 343 | }, 344 | "outputs": [], 345 | "source": [ 346 | "try:\n", 347 | " NYCTaxiExplorer.alpha = '0'\n", 348 | "except Exception as e:\n", 349 | " print(e)" 350 | ] 351 | }, 352 | { 353 | "cell_type": "markdown", 354 | "metadata": { 355 | "slideshow": { 356 | "slide_type": "slide" 357 | } 358 | }, 359 | "source": [ 360 | "### Instance parameters" 361 | ] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "execution_count": null, 366 | "metadata": { 367 | "slideshow": { 368 | "slide_type": "fragment" 369 | } 370 | }, 371 | "outputs": [], 372 | "source": [ 373 | "explorer = NYCTaxiExplorer(alpha=0.6)\n", 374 | "explorer.alpha" 375 | ] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": null, 380 | "metadata": { 381 | "slideshow": { 382 | "slide_type": "fragment" 383 | } 384 | }, 385 | "outputs": [], 386 | "source": [ 387 | "NYCTaxiExplorer.alpha" 388 | ] 389 | }, 390 | { 391 | "cell_type": "markdown", 392 | "metadata": { 393 | "slideshow": { 394 | "slide_type": "slide" 395 | } 396 | }, 397 | "source": [ 398 | "## Step 4: Get a widget-based UI for free\n", 399 | "\n", 400 | "* Parameters are purely declarative, but contain all the information needed to build interactive widgets\n", 401 | "* ParamNB generates UIs from Parameters, using ipywidgets" 402 | ] 403 | }, 404 | { 405 | "cell_type": "code", 406 | "execution_count": null, 407 | "metadata": { 408 | "slideshow": { 409 | "slide_type": "fragment" 410 | } 411 | }, 412 | "outputs": [], 413 | "source": [ 414 | "paramnb.Widgets(NYCTaxiExplorer)" 415 | ] 416 | }, 417 | { 418 | "cell_type": "code", 419 | "execution_count": null, 420 | "metadata": { 421 | "slideshow": { 422 | "slide_type": "fragment" 423 | } 424 | }, 425 | "outputs": [], 426 | "source": [ 427 | "NYCTaxiExplorer.passengers" 428 | ] 429 | }, 430 | { 431 | "cell_type": "markdown", 432 | "metadata": { 433 | "slideshow": { 434 | "slide_type": "fragment" 435 | } 436 | }, 437 | "source": [ 438 | "* ipywidgets work with Jupyter Dashboards Server for deployment" 439 | ] 440 | }, 441 | { 442 | "cell_type": "markdown", 443 | "metadata": { 444 | "slideshow": { 445 | "slide_type": "slide" 446 | } 447 | }, 448 | "source": [ 449 | "* Declaration of parameters is independent of the UI library used\n", 450 | "* ParamBokeh generates UIs from Parameters, using Bokeh widgets" 451 | ] 452 | }, 453 | { 454 | "cell_type": "code", 455 | "execution_count": null, 456 | "metadata": { 457 | "slideshow": { 458 | "slide_type": "fragment" 459 | } 460 | }, 461 | "outputs": [], 462 | "source": [ 463 | "parambokeh.Widgets(NYCTaxiExplorer)" 464 | ] 465 | }, 466 | { 467 | "cell_type": "markdown", 468 | "metadata": { 469 | "slideshow": { 470 | "slide_type": "fragment" 471 | } 472 | }, 473 | "source": [ 474 | "* Bokeh widgets work with Bokeh Server for deployment" 475 | ] 476 | }, 477 | { 478 | "cell_type": "markdown", 479 | "metadata": { 480 | "slideshow": { 481 | "slide_type": "slide" 482 | } 483 | }, 484 | "source": [ 485 | "## Step 5: Link your Parameters to your data\n", 486 | "\n", 487 | "Because the Parameters defined earlier are *about* a plot, it makes sense to combine the parameter and plotting declarations into a single object:" 488 | ] 489 | }, 490 | { 491 | "cell_type": "code", 492 | "execution_count": null, 493 | "metadata": { 494 | "collapsed": true, 495 | "extensions": { 496 | "jupyter_dashboards": { 497 | "version": 1, 498 | "views": { 499 | "grid_default": { 500 | "hidden": true 501 | }, 502 | "report_default": {} 503 | } 504 | } 505 | }, 506 | "slideshow": { 507 | "slide_type": "fragment" 508 | } 509 | }, 510 | "outputs": [], 511 | "source": [ 512 | "class NYCTaxiExplorer(hv.streams.Stream):\n", 513 | " alpha = param.Magnitude(default=0.75, doc=\"Alpha value for the map opacity\")\n", 514 | " colormap = param.ObjectSelector(default=cm[\"fire\"], objects=cm.values())\n", 515 | " plot = param.ObjectSelector(default=\"pickup\", objects=[\"pickup\",\"dropoff\"])\n", 516 | " passengers = param.Range(default=(1, 9), bounds=(1, 9))\n", 517 | "\n", 518 | " def make_view(self, x_range=None, y_range=None, **kwargs):\n", 519 | " map_tiles = tiles.opts(style=dict(alpha=self.alpha), plot=options) \n", 520 | "\n", 521 | " points = hv.Points(df, kdims=[self.plot+'_x', self.plot+'_y'], vdims=['passenger_count'])\n", 522 | " selected = points.select(passenger_count=self.passengers)\n", 523 | " taxi_trips = datashade(selected, x_sampling=1, y_sampling=1, cmap=self.colormap,\n", 524 | " dynamic=False, x_range=x_range, y_range=y_range,\n", 525 | " width=800, height=475)\n", 526 | " return map_tiles * taxi_trips" 527 | ] 528 | }, 529 | { 530 | "cell_type": "markdown", 531 | "metadata": { 532 | "slideshow": { 533 | "slide_type": "slide" 534 | } 535 | }, 536 | "source": [ 537 | "Note that the `NYCTaxiExplorer` class is entirely declarative (no widgets), and can be used \"by hand\" to provide range-checked and type-checked plotting:" 538 | ] 539 | }, 540 | { 541 | "cell_type": "code", 542 | "execution_count": null, 543 | "metadata": { 544 | "extensions": { 545 | "jupyter_dashboards": { 546 | "version": 1, 547 | "views": { 548 | "grid_default": { 549 | "hidden": true 550 | }, 551 | "report_default": { 552 | "hidden": true 553 | } 554 | } 555 | } 556 | }, 557 | "slideshow": { 558 | "slide_type": "fragment" 559 | } 560 | }, 561 | "outputs": [], 562 | "source": [ 563 | "explorer = NYCTaxiExplorer(alpha=0.2, plot=\"dropoff\")\n", 564 | "explorer.make_view()" 565 | ] 566 | }, 567 | { 568 | "cell_type": "markdown", 569 | "metadata": { 570 | "slideshow": { 571 | "slide_type": "slide" 572 | } 573 | }, 574 | "source": [ 575 | "## Step 6: Widgets now control your interactive plots" 576 | ] 577 | }, 578 | { 579 | "cell_type": "code", 580 | "execution_count": null, 581 | "metadata": { 582 | "slideshow": { 583 | "slide_type": "slide" 584 | } 585 | }, 586 | "outputs": [], 587 | "source": [ 588 | "explorer = NYCTaxiExplorer()\n", 589 | "paramnb.Widgets(explorer, callback=explorer.event)\n", 590 | "hv.DynamicMap(explorer.make_view, streams=[explorer, RangeXY()])" 591 | ] 592 | }, 593 | { 594 | "cell_type": "markdown", 595 | "metadata": { 596 | "slideshow": { 597 | "slide_type": "slide" 598 | } 599 | }, 600 | "source": [ 601 | "## Step 7: Deploy your dashboard\n", 602 | "\n", 603 | "If you want to share your work with people who don't use Python, you'll now want to run a server with this same code.\n", 604 | "\n", 605 | "* If you used **ParamBokeh**, deploy with **Bokeh Server**:\n", 606 | " - Write the above code to a file ``nyc_parambokeh.py``,
saving the `Widgets` object as the attribute `doc`\n", 607 | " - ``bokeh serve nyc_parambokeh.py``" 608 | ] 609 | }, 610 | { 611 | "cell_type": "code", 612 | "execution_count": null, 613 | "metadata": { 614 | "collapsed": true, 615 | "slideshow": { 616 | "slide_type": "subslide" 617 | } 618 | }, 619 | "outputs": [], 620 | "source": [ 621 | "explorer = NYCTaxiExplorer()\n", 622 | "parambokeh.Widgets(explorer, callback=explorer.event)\n", 623 | "hv.DynamicMap(explorer.make_view, streams=[explorer, RangeXY()])" 624 | ] 625 | }, 626 | { 627 | "cell_type": "markdown", 628 | "metadata": { 629 | "slideshow": { 630 | "slide_type": "slide" 631 | } 632 | }, 633 | "source": [ 634 | "* If you used **ParamNB**, deploy with **Jupyter Dashboard Server**:\n", 635 | " - Use Jupyter Dashboards Extension to select cells from the notebook to display\n", 636 | " - Use preview mode to see layout\n", 637 | " - Use Jupyter Dashboards Server to deploy\n", 638 | " - Note various caveats below" 639 | ] 640 | }, 641 | { 642 | "cell_type": "markdown", 643 | "metadata": { 644 | "slideshow": { 645 | "slide_type": "slide" 646 | } 647 | }, 648 | "source": [ 649 | "# Branching out\n", 650 | "\n", 651 | "Much more ambitious apps are possible with very little additional code or effort:\n", 652 | "\n", 653 | "* Adding additional linked or separate subplots of any type; see [holoviews.org](http://holoviews.org)\n", 654 | "* Declaring code that runs for clicking or selecting *within* the Bokeh plot; see \"streams\" at [holoviews.org](http://holoviews.org)\n", 655 | "* Using multiple sets of widgets of many different types; see [ParamNB](https://github.com/ioam/paramnb) and [ParamBokeh](https://github.com/ioam/parambokeh)\n", 656 | "* Using datasets too big for any one machine, with [Dask.Distributed](https://distributed.readthedocs.io)" 657 | ] 658 | }, 659 | { 660 | "cell_type": "markdown", 661 | "metadata": { 662 | "slideshow": { 663 | "slide_type": "slide" 664 | } 665 | }, 666 | "source": [ 667 | "# Future work\n", 668 | "\n", 669 | "* Jupyter Dashboards Server not currently maintained; requires older ipywidgets version\n", 670 | "* Bokeh Server is mature and well supported, but does not currently support drag-and-drop layout like Jupyter Dashboards does\n", 671 | "* ParamBokeh still needs some polishing and work to make it ready for widespread use; ParamNB is more mature so far\n", 672 | "* Both ParamNB and ParamBokeh should provide more flexible widget layouts\n", 673 | "* Let us know what you would like to see out of these tools!" 674 | ] 675 | } 676 | ], 677 | "metadata": { 678 | "celltoolbar": "Slideshow", 679 | "extensions": { 680 | "jupyter_dashboards": { 681 | "activeView": "grid_default", 682 | "version": 1, 683 | "views": { 684 | "grid_default": { 685 | "cellMargin": 10, 686 | "defaultCellHeight": 20, 687 | "maxColumns": 12, 688 | "name": "grid", 689 | "type": "grid" 690 | }, 691 | "report_default": { 692 | "name": "report", 693 | "type": "report" 694 | } 695 | } 696 | } 697 | }, 698 | "kernelspec": { 699 | "display_name": "Python 3", 700 | "language": "python", 701 | "name": "python3" 702 | }, 703 | "language_info": { 704 | "codemirror_mode": { 705 | "name": "ipython", 706 | "version": 3 707 | }, 708 | "file_extension": ".py", 709 | "mimetype": "text/x-python", 710 | "name": "python", 711 | "nbconvert_exporter": "python", 712 | "pygments_lexer": "ipython3", 713 | "version": "3.6.1" 714 | } 715 | }, 716 | "nbformat": 4, 717 | "nbformat_minor": 2 718 | } 719 | --------------------------------------------------------------------------------