├── .gitignore ├── figs ├── darts.png ├── map.png ├── latency.png ├── latency2.png ├── wideView.png ├── throughput1.png ├── throughput2.png └── allconnections.png ├── tutorial ├── myscript.py ├── All Together.ipynb ├── Parallel Magics.ipynb ├── Multiplexing.ipynb └── Load-Balancing.ipynb ├── README.md ├── soln ├── matmul.py ├── remoteiterhint.py ├── remote_iter_hint.py ├── ngrams.py ├── mcpi.py ├── remote_iter.py ├── remote_iter_slightly_better.py ├── remoteiter.py └── nestedloop.py ├── hints.py ├── check_env.py ├── images_common.py ├── Summary.ipynb ├── examples ├── wikipedia │ ├── widget_forcedirectedgraph.py │ ├── eventful_graph.py │ ├── eventful_dict.py │ ├── Wikipedia.ipynb │ └── widget_forcedirectedgraph.js ├── MPI Broadcast.ipynb ├── memmap.ipynb ├── Parallel image processing.ipynb ├── Parallel face detection.ipynb ├── MC Options.ipynb └── Monitoring MPI.ipynb ├── Index.ipynb ├── exercises ├── Monte Carlo π.ipynb └── Remote Iteration.ipynb ├── Overview.ipynb ├── Background.ipynb └── Performance.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | images 2 | .ipynb_checkpoints 3 | *.py[co] 4 | 5 | -------------------------------------------------------------------------------- /figs/darts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/IPython-parallel-tutorial/master/figs/darts.png -------------------------------------------------------------------------------- /figs/map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/IPython-parallel-tutorial/master/figs/map.png -------------------------------------------------------------------------------- /figs/latency.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/IPython-parallel-tutorial/master/figs/latency.png -------------------------------------------------------------------------------- /figs/latency2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/IPython-parallel-tutorial/master/figs/latency2.png -------------------------------------------------------------------------------- /figs/wideView.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/IPython-parallel-tutorial/master/figs/wideView.png -------------------------------------------------------------------------------- /figs/throughput1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/IPython-parallel-tutorial/master/figs/throughput1.png -------------------------------------------------------------------------------- /figs/throughput2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/IPython-parallel-tutorial/master/figs/throughput2.png -------------------------------------------------------------------------------- /figs/allconnections.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leriomaggio/IPython-parallel-tutorial/master/figs/allconnections.png -------------------------------------------------------------------------------- /tutorial/myscript.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy 3 | import sys 4 | 5 | a=5 6 | 7 | def mysquare(x): 8 | return x*x -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | IPython.parallel tutorial 2 | 3 | See the [index notebook](http://nbviewer.ipython.org/github/minrk/IPython-parallel-tutorial/blob/master/Index.ipynb). 4 | -------------------------------------------------------------------------------- /soln/matmul.py: -------------------------------------------------------------------------------- 1 | def pdot(v, A, B): 2 | v['B'] = B # push B everywhere 3 | v.scatter('A', A) # scatter A 4 | v.execute('C=A.dot(B)') # compute the dot-product 5 | return v.gather('C', block=True) # gather the resulting sub-arrays 6 | -------------------------------------------------------------------------------- /soln/remoteiterhint.py: -------------------------------------------------------------------------------- 1 | lis = range(5) 2 | 3 | # you can create an iterator with `iter(iterable)` 4 | it = iter(lis) 5 | 6 | # it.next() returns the next value 7 | # and raises StopIteration when you get to the end 8 | while True: 9 | try: 10 | print it.next() 11 | except StopIteration: 12 | print "done" 13 | break 14 | -------------------------------------------------------------------------------- /soln/remote_iter_hint.py: -------------------------------------------------------------------------------- 1 | from IPython.display import display 2 | 3 | t_minus = range(10,0,-1) 4 | 5 | def lazy_iterator(name): 6 | seq = eval(name) 7 | it = iter(seq) 8 | while True: 9 | try: 10 | yield it.next() 11 | # this looks silly locally, but it will be useful for the remote version: 12 | except StopIteration: 13 | raise StopIteration 14 | 15 | lzit = lazy_iterator('t_minus') 16 | display(lzit) 17 | list(lzit) -------------------------------------------------------------------------------- /soln/ngrams.py: -------------------------------------------------------------------------------- 1 | def ngrams_parallel(view, fnames, n=1): 2 | """Compute ngrams in parallel 3 | 4 | view - An IPython DirectView 5 | fnames - The filenames containing the split data. 6 | """ 7 | 8 | ar = view.map_async(ngrams, fnames, [n] * len(fnames)) 9 | counts = {} 10 | for engine_count in ar: 11 | for gram, count in engine_count.items(): 12 | if gram not in counts: 13 | counts[gram] = 0 14 | counts[gram] += count 15 | return counts 16 | -------------------------------------------------------------------------------- /soln/mcpi.py: -------------------------------------------------------------------------------- 1 | def mcpi(nsamples): 2 | from random import random 3 | s = 0 4 | for i in xrange(nsamples): 5 | x = random() 6 | y = random() 7 | if x*x + y*y <= 1: 8 | s+=1 9 | return 4.*s/nsamples 10 | 11 | def multi_mcpi(view, nsamples): 12 | p = len(view.targets) 13 | if nsamples % p: 14 | # ensure even divisibility 15 | nsamples += p - (nsamples%p) 16 | 17 | subsamples = nsamples/p 18 | 19 | ar = view.apply_async(mcpi, subsamples) 20 | return sum(ar)/p -------------------------------------------------------------------------------- /soln/remote_iter.py: -------------------------------------------------------------------------------- 1 | from IPython import parallel 2 | 3 | def remote_iterator(view, name): 4 | """Return an iterator on an object living on a remote engine.""" 5 | it_name = '_%s_iter' % name 6 | view.execute('%s = iter(%s)' % (it_name,name), block=True) 7 | ref = parallel.Reference(it_name) 8 | while True: 9 | try: 10 | yield view.apply_sync(lambda x: x.next(), ref) 11 | # This causes the StopIteration exception to be raised. 12 | except parallel.RemoteError as e: 13 | if e.ename == 'StopIteration': 14 | raise StopIteration 15 | else: 16 | raise e 17 | -------------------------------------------------------------------------------- /soln/remote_iter_slightly_better.py: -------------------------------------------------------------------------------- 1 | from IPython import parallel 2 | 3 | def remote_iterator(view, name): 4 | """Return an iterator on an object living on a remote engine.""" 5 | it_name = '_%s_iter' % name 6 | view.execute('%s = iter(%s)' % (it_name,name), block=True) 7 | next_ref = parallel.Reference(it_name + '.next') 8 | while True: 9 | try: 10 | yield view.apply_sync(next_ref) 11 | # This causes the StopIteration exception to be raised. 12 | except parallel.RemoteError as e: 13 | if e.ename == 'StopIteration': 14 | raise StopIteration 15 | else: 16 | raise e 17 | -------------------------------------------------------------------------------- /soln/remoteiter.py: -------------------------------------------------------------------------------- 1 | from IPython.parallel.error import RemoteError 2 | from IPython.parallel import Reference 3 | 4 | def remote_iterator(view,name): 5 | """Return an iterator on an object living on a remote engine. 6 | """ 7 | view.execute('it%s=iter(%s)' % (name,name), block=True) 8 | while True: 9 | try: 10 | result = view.apply_sync(lambda x: x.next(), Reference('it'+name)) 11 | # This causes the StopIteration exception to be raised. 12 | except RemoteError as e: 13 | if e.ename == 'StopIteration': 14 | raise StopIteration 15 | else: 16 | raise e 17 | else: 18 | yield result 19 | 20 | -------------------------------------------------------------------------------- /soln/nestedloop.py: -------------------------------------------------------------------------------- 1 | # To parallelize every call with map, you just need to get a list for each argument. 2 | # You can use `itertools.product` + `zip` to get this: 3 | 4 | 5 | import itertools 6 | 7 | product = list(itertools.product(widths, heights)) 8 | # [(1, 6), (1, 7), (2, 6), (2, 7), (3, 6), (3, 7)] 9 | 10 | # So we have a "list of pairs", 11 | # but what we really want is a single list for each argument, i.e. a "pair of lists". 12 | # This is exactly what the slightly weird `zip(*product)` syntax gets us: 13 | 14 | allwidths, allheights = zip(*itertools.product(widths, heights)) 15 | 16 | print " widths", allwidths 17 | print "heights", allheights 18 | 19 | # Now we just map our function onto those two lists, to parallelize nested for loops: 20 | 21 | ar = lview.map_async(area, allwidths, allheights) 22 | -------------------------------------------------------------------------------- /hints.py: -------------------------------------------------------------------------------- 1 | from IPython.display import display, HTML 2 | 3 | def mmhint(): 4 | display(HTML(""" 5 | 6 |
7 | Remember - multiply rows of one by the columns of the other. 8 |
9 | 10 |
11 | 12 |
13 | easiest implementation involves one each of: push, scatter, execute, gather 14 |
15 | 19 | """)) 20 | 21 | def nesthint(): 22 | display(HTML(""" 23 | 26 | 27 |
28 | `itertools.product` and `zip` will be helpful. 29 |
30 | 33 | """)) -------------------------------------------------------------------------------- /check_env.py: -------------------------------------------------------------------------------- 1 | """check_env.py for IPython.parallel tutorial at SciPy 2014""" 2 | 3 | import sys 4 | 5 | import numpy 6 | import scipy 7 | 8 | import requests 9 | 10 | import matplotlib.pyplot 11 | import skimage 12 | 13 | import matplotlib 14 | 15 | try: 16 | from bs4 import BeautifulSoup 17 | except ImportError: 18 | print("BeautifulSoup will be used for an example.") 19 | 20 | try: 21 | import networkx 22 | except ImportError: 23 | print("networkx will be used for an example.") 24 | 25 | try: 26 | import cv 27 | except ImportError: 28 | print("opencv will be used for an example.") 29 | 30 | from distutils.version import LooseVersion as V 31 | import IPython 32 | 33 | if V(IPython.__version__) < V('2.0'): 34 | print("Need IPython >= 2.0, have %s" % IPython.__version__) 35 | sys.exit(1) 36 | 37 | from IPython import parallel 38 | 39 | print("OK") 40 | -------------------------------------------------------------------------------- /images_common.py: -------------------------------------------------------------------------------- 1 | import os 2 | import matplotlib.pyplot as plt 3 | 4 | from skimage.io import imread 5 | from skimage import measure 6 | 7 | def plot_contours(img, dark, light, show=True): 8 | """Display the image and plot all contours found""" 9 | plt.imshow(img, cmap='gray') 10 | 11 | for n, contour in enumerate(dark): 12 | plt.plot(contour[:, 1], contour[:, 0], c='r', linewidth=1) 13 | 14 | for n, contour in enumerate(light): 15 | plt.plot(contour[:, 1], contour[:, 0], c='b', linewidth=1) 16 | 17 | plt.axis('image') 18 | plt.xticks([]) 19 | plt.yticks([]) 20 | if show: 21 | plt.show() 22 | 23 | def find_contours(path, low=0.1, high=0.8): 24 | """Find contours in an image at path 25 | """ 26 | img = imread(path, flatten=True) 27 | 28 | # Find contours at a constant value of 0.1 and 0.8 29 | dark = measure.find_contours(img, low) 30 | light = measure.find_contours(img, high) 31 | return img, dark, light 32 | 33 | def get_contours_image(path): 34 | from IPython.core.pylabtools import print_figure 35 | 36 | img, dark, light = find_contours(path) 37 | plot_contours(img, dark, light, show=False) 38 | fig = plt.gcf() 39 | pngdata = print_figure(fig) 40 | plt.close(fig) 41 | return pngdata 42 | 43 | def get_pictures(pictures_dir): 44 | """Return a list of picture files found in pictures_dir""" 45 | 46 | pictures = [] 47 | for directory, subdirs, files in os.walk(pictures_dir): 48 | for fname in files: 49 | if fname.lower().endswith(('.jpg', '.png')): 50 | pictures.append(os.path.join(directory, fname)) 51 | 52 | return pictures 53 | -------------------------------------------------------------------------------- /Summary.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:0982a89ff15ca0c0aa4006f9962fcb046083fa857ff818e9b30f2858d5706f60" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "# Summary of what we have covered\n", 16 | "\n", 17 | "* Simple remote execution of functions, or statements\n", 18 | "* Distribution of data\n", 19 | "* Parallel map\n", 20 | "* Multiplexed execution\n", 21 | "* Load-Balanced task farming\n", 22 | "* Asynchronous task submission and result retrieval\n", 23 | "* More efficient data movememnt with memmap and MPI" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "# And further things we have *not* covered\n", 31 | "\n", 32 | "* Functional Task Dependencies\n", 33 | "* Configuring the IPython cluster (working with profiles, SGE, MongoDB, etc.)\n", 34 | "* Tuning the IPython Cluster (TaskScheduler.hwm, custom serialization, etc.)\n", 35 | "* Inter-engine communication with PyZMQ or MPI\n", 36 | "* decorators for parallel and remote functions\n", 37 | "* Result caching, and task metadata\n", 38 | "* The TaskResult Database\n", 39 | "* Task resubmission\n", 40 | "* Queue monitoring and management\n", 41 | "\n", 42 | "See the [IPython.parallel docs](http://ipython.org/ipython-doc/dev/parallel) for more detailed coverage of these topics, and check out our [examples](https://www.github.com/ipython/ipython/tree/master/docs/examples/parallel)." 43 | ] 44 | } 45 | ], 46 | "metadata": {} 47 | } 48 | ] 49 | } -------------------------------------------------------------------------------- /examples/wikipedia/widget_forcedirectedgraph.py: -------------------------------------------------------------------------------- 1 | from IPython.html import widgets # Widget definitions 2 | from IPython.utils.traitlets import Unicode, CInt, CFloat # Import the base Widget class and the traitlets Unicode class. 3 | from IPython.display import display, Javascript 4 | 5 | def publish_js(): 6 | with open('./widget_forcedirectedgraph.js', 'r') as f: 7 | display(Javascript(data=f.read())) 8 | 9 | 10 | # Define our ForceDirectedGraphWidget and its target model and default view. 11 | class ForceDirectedGraphWidget(widgets.DOMWidget): 12 | _view_name = Unicode('D3ForceDirectedGraphView', sync=True) 13 | 14 | width = CInt(400, sync=True) 15 | height = CInt(300, sync=True) 16 | charge = CFloat(270., sync=True) 17 | distance = CInt(30., sync=True) 18 | strength = CInt(0.3, sync=True) 19 | 20 | def __init__(self, eventful_graph, *pargs, **kwargs): 21 | widgets.DOMWidget.__init__(self, *pargs, **kwargs) 22 | 23 | self._eventful_graph = eventful_graph 24 | self._send_dict_changes(eventful_graph.graph, 'graph') 25 | self._send_dict_changes(eventful_graph.node, 'node') 26 | self._send_dict_changes(eventful_graph.adj, 'adj') 27 | 28 | def _ipython_display_(self, *pargs, **kwargs): 29 | 30 | # Show the widget, then send the current state 31 | widgets.DOMWidget._ipython_display_(self, *pargs, **kwargs) 32 | for (key, value) in self._eventful_graph.graph.items(): 33 | self.send({'dict': 'graph', 'action': 'add', 'key': key, 'value': value}) 34 | for (key, value) in self._eventful_graph.node.items(): 35 | self.send({'dict': 'node', 'action': 'add', 'key': key, 'value': value}) 36 | for (key, value) in self._eventful_graph.adj.items(): 37 | self.send({'dict': 'adj', 'action': 'add', 'key': key, 'value': value}) 38 | 39 | def _send_dict_changes(self, eventful_dict, dict_name): 40 | def key_add(key, value): 41 | self.send({'dict': dict_name, 'action': 'add', 'key': key, 'value': value}) 42 | def key_set(key, value): 43 | self.send({'dict': dict_name, 'action': 'set', 'key': key, 'value': value}) 44 | def key_del(key): 45 | self.send({'dict': dict_name, 'action': 'del', 'key': key}) 46 | eventful_dict.on_add(key_add) 47 | eventful_dict.on_set(key_set) 48 | eventful_dict.on_del(key_del) 49 | -------------------------------------------------------------------------------- /examples/wikipedia/eventful_graph.py: -------------------------------------------------------------------------------- 1 | """NetworkX graphs do not have events that can be listened to. In order to 2 | watch the NetworkX graph object for changes a custom eventful graph object must 3 | be created. The custom eventful graph object will inherit from the base graph 4 | object and use special eventful dictionaries instead of standard Python dict 5 | instances. Because NetworkX nests dictionaries inside dictionaries, it's 6 | important that the eventful dictionary is capable of recognizing when a 7 | dictionary value is set to another dictionary instance. When this happens, the 8 | eventful dictionary needs to also make the new dictionary an eventful 9 | dictionary. This allows the eventful dictionary to listen to changes made to 10 | dictionaries within dictionaries.""" 11 | import networkx 12 | from networkx.generators.classic import empty_graph 13 | 14 | from eventful_dict import EventfulDict 15 | 16 | class EventfulGraph(networkx.Graph): 17 | 18 | _constructed_callback = None 19 | 20 | @staticmethod 21 | def on_constructed(callback): 22 | """Register a callback to be called when a graph is constructed.""" 23 | if callback is None or callable(callback): 24 | EventfulGraph._constructed_callback = callback 25 | 26 | def __init__(self, *pargs, **kwargs): 27 | """Initialize a graph with edges, name, graph attributes. 28 | 29 | Parameters 30 | sleep: float 31 | optional float that allows you to tell the 32 | dictionary to hang for the given amount of seconds on each 33 | event. This is usefull for animations.""" 34 | super(EventfulGraph, self).__init__(*pargs, **kwargs) 35 | 36 | # Override internal dictionaries with custom eventful ones. 37 | sleep = kwargs.get('sleep', 0.0) 38 | self.graph = EventfulDict(self.graph, sleep=sleep) 39 | self.node = EventfulDict(self.node, sleep=sleep) 40 | self.adj = EventfulDict(self.adj, sleep=sleep) 41 | 42 | # Notify callback of construction event. 43 | if EventfulGraph._constructed_callback: 44 | EventfulGraph._constructed_callback(self) 45 | 46 | 47 | def empty_eventfulgraph_hook(*pargs, **kwargs): 48 | def wrapped(*wpargs, **wkwargs): 49 | """Wrapper for networkx.generators.classic.empty_graph(...)""" 50 | wkwargs['create_using'] = EventfulGraph(*pargs, **kwargs) 51 | return empty_graph(*wpargs, **wkwargs) 52 | return wrapped 53 | -------------------------------------------------------------------------------- /Index.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:6636398377d5d9edcc77f93c1122410c921eef520d3dc2aa2b8504b1d71531b3" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "# IPython.parallel\n", 16 | "\n", 17 | "## Interactive (parallel) Python\n", 18 | "\n", 19 | "https://github.com/minrk/IPython-parallel-tutorial\n", 20 | "\n", 21 | "**Min Ragan-Kelley, Fernando Perez**, *Helen Wills Neuroscience Institute, UC Berkeley*\n" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "# Installation and dependencies\n", 29 | "\n", 30 | "You will need IPython 2.1, and pyzmq \u2265 13. To use the demo notebooks, you will also need tornado \u2265 2.1. I will also make use of numpy and matplotlib. If you have Canopy or Anaconda, you already have all of these.\n", 31 | "\n", 32 | "Quick one-line install for IPython and its dependencies:\n", 33 | " \n", 34 | " pip install ipython[all]\n", 35 | " \n", 36 | "Or get everything for the tutorial with conda:\n", 37 | "\n", 38 | " conda install anaconda mpi4py\n", 39 | "\n", 40 | "For those who prefer pip or otherwise manual package installation, the following packages will be used:\n", 41 | "\n", 42 | "ipython[all]\n", 43 | "numpy\n", 44 | "matplotlib\n", 45 | "networkx\n", 46 | "scikit-image\n", 47 | "requests\n", 48 | "beautifulsoup\n", 49 | "mpi4py\n", 50 | "\n", 51 | "\n", 52 | "Get the [tutorial notebooks](https://github.com/minrk/IPython-parallel-tutorial) from https://github.com/minrk/IPython-parallel-tutorial\n", 53 | "\n", 54 | "Optional dependencies: I will use [NetworkX](http://networkx.lanl.gov/)\n", 55 | "for one demo, and `scikit-image` for another, but they are not critical. Both packages are in in Anaconda and Canopy.\n", 56 | "\n", 57 | "For the image-related demos, all you need are some images on your computer. The notebooks will try to fetch images from Wikimedia Commons, but since the conference networks are untrustworty, I have [bundled some images here](http://s3.amazonaws.com/ipython-parallel-data/images.zip)." 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "## Outline\n", 65 | "- [Some Background](Background.ipynb)\n", 66 | "- [Motivating Example](examples/Parallel%20image%20processing.ipynb)\n", 67 | "- [Overview](Overview.ipynb)\n", 68 | "- [Tutorial](tutorial)\n", 69 | " - [Remote Execution](tutorial/Remote%20Execution.ipynb)\n", 70 | " - [Multiplexing](tutorial/Multiplexing.ipynb)\n", 71 | " - [Load-Balancing](tutorial/Load-Balancing.ipynb)\n", 72 | " - [Both!](tutorial/All%20Together.ipynb)\n", 73 | " - [Parallel Magics](tutorial/Parallel%20Magics.ipynb)\n", 74 | "- [Examples](examples)\n", 75 | "- [Exercises](exercises)\n" 76 | ] 77 | } 78 | ], 79 | "metadata": {} 80 | } 81 | ] 82 | } -------------------------------------------------------------------------------- /examples/wikipedia/eventful_dict.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | class EventfulDict(dict): 4 | """Eventful dictionary""" 5 | 6 | def __init__(self, *args, **kwargs): 7 | """Sleep is an optional float that allows you to tell the 8 | dictionary to hang for the given amount of seconds on each 9 | event. This is usefull for animations.""" 10 | self._sleep = kwargs.pop('sleep', 0.0) 11 | self._add_callbacks = [] 12 | self._del_callbacks = [] 13 | self._set_callbacks = [] 14 | dict.__init__(self, *args, **kwargs) 15 | 16 | def on_add(self, callback, remove=False): 17 | self._register_callback(self._add_callbacks, callback, remove) 18 | def on_del(self, callback, remove=False): 19 | self._register_callback(self._del_callbacks, callback, remove) 20 | def on_set(self, callback, remove=False): 21 | self._register_callback(self._set_callbacks, callback, remove) 22 | def _register_callback(self, callback_list, callback, remove=False): 23 | if callable(callback): 24 | if remove and callback in callback_list: 25 | callback_list.remove(callback) 26 | elif not remove and not callback in callback_list: 27 | callback_list.append(callback) 28 | else: 29 | raise Exception('Callback must be callable.') 30 | 31 | def _handle_add(self, key, value): 32 | self._try_callbacks(self._add_callbacks, key, value) 33 | self._try_sleep() 34 | def _handle_del(self, key): 35 | self._try_callbacks(self._del_callbacks, key) 36 | self._try_sleep() 37 | def _handle_set(self, key, value): 38 | self._try_callbacks(self._set_callbacks, key, value) 39 | self._try_sleep() 40 | def _try_callbacks(self, callback_list, *pargs, **kwargs): 41 | for callback in callback_list: 42 | callback(*pargs, **kwargs) 43 | 44 | def _try_sleep(self): 45 | if self._sleep > 0.0: 46 | time.sleep(self._sleep) 47 | 48 | def __setitem__(self, key, value): 49 | return_val = None 50 | exists = False 51 | if key in self: 52 | exists = True 53 | 54 | # If the user sets the property to a new dict, make the dict 55 | # eventful and listen to the changes of it ONLY if it is not 56 | # already eventful. Any modification to this new dict will 57 | # fire a set event of the parent dict. 58 | if isinstance(value, dict) and not isinstance(value, EventfulDict): 59 | new_dict = EventfulDict(value) 60 | 61 | def handle_change(*pargs, **kwargs): 62 | self._try_callbacks(self._set_callbacks, key, dict.__getitem__(self, key)) 63 | 64 | new_dict.on_add(handle_change) 65 | new_dict.on_del(handle_change) 66 | new_dict.on_set(handle_change) 67 | return_val = dict.__setitem__(self, key, new_dict) 68 | else: 69 | return_val = dict.__setitem__(self, key, value) 70 | 71 | if exists: 72 | self._handle_set(key, value) 73 | else: 74 | self._handle_add(key, value) 75 | return return_val 76 | 77 | def __delitem__(self, key): 78 | return_val = dict.__delitem__(self, key) 79 | self._handle_del(key) 80 | return return_val 81 | 82 | def pop(self, key): 83 | return_val = dict.pop(self, key) 84 | if key in self: 85 | self._handle_del(key) 86 | return return_val 87 | 88 | def popitem(self): 89 | popped = dict.popitem(self) 90 | if popped is not None and popped[0] is not None: 91 | self._handle_del(popped[0]) 92 | return popped 93 | 94 | def update(self, other_dict): 95 | for (key, value) in other_dict.items(): 96 | self[key] = value 97 | 98 | def clear(self): 99 | for key in list(self.keys()): 100 | del self[key] -------------------------------------------------------------------------------- /examples/MPI Broadcast.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:da03414f2ab592310b23f6a557936f90bea89e0e61ca0e32b137abdb2154348a" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "heading", 13 | "level": 1, 14 | "metadata": {}, 15 | "source": [ 16 | "More efficient data movement with MPI" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "Just like [we did](memmap.ipynb) manually with memmap,\n", 24 | "you can move data more efficiently with MPI by sending it to just one engine,\n", 25 | "and using MPI to broadcast it to the rest of the engines.\n" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "collapsed": false, 31 | "input": [ 32 | "import socket\n", 33 | "import os, sys, re\n", 34 | "\n", 35 | "import numpy as np\n", 36 | "\n", 37 | "from IPython import parallel" 38 | ], 39 | "language": "python", 40 | "metadata": {}, 41 | "outputs": [] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "For this demo, I will connect to a cluster with engines started with MPI.\n", 48 | "If you have MPI and mpi4py on your machine, you can start a local cluster with MPI with:\n", 49 | "\n", 50 | " ipcluster start -n 8 --engines=MPI --profile mpi" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "collapsed": false, 56 | "input": [ 57 | "mpi_profile = 'dirac'\n", 58 | "rc = parallel.Client(profile=mpi_profile)\n", 59 | "eall = rc[:]\n", 60 | "root = rc[-1]" 61 | ], 62 | "language": "python", 63 | "metadata": {}, 64 | "outputs": [] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "collapsed": false, 69 | "input": [ 70 | "%px from mpi4py.MPI import COMM_WORLD as MPI" 71 | ], 72 | "language": "python", 73 | "metadata": {}, 74 | "outputs": [] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "collapsed": false, 79 | "input": [ 80 | "mpi_ranks = eall.apply_async(lambda : MPI.Get_rank()).get_dict()\n", 81 | "root_rank = root.apply_sync(lambda : MPI.Get_rank())\n", 82 | "mpi_ranks" 83 | ], 84 | "language": "python", 85 | "metadata": {}, 86 | "outputs": [] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "collapsed": false, 91 | "input": [ 92 | "sz = 256\n", 93 | "data = np.random.random((sz, sz))\n", 94 | "data = data.dot(data.T)" 95 | ], 96 | "language": "python", 97 | "metadata": {}, 98 | "outputs": [] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "collapsed": false, 103 | "input": [ 104 | "%%time \n", 105 | "ar = eall.push({'data': data}, block=False)\n", 106 | "ar.wait_interactive()" 107 | ], 108 | "language": "python", 109 | "metadata": {}, 110 | "outputs": [] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "collapsed": false, 115 | "input": [ 116 | "@parallel.interactive\n", 117 | "def _bcast(key, root_rank):\n", 118 | " \"\"\"function to run on engines as part of broadcast\"\"\"\n", 119 | " g = globals()\n", 120 | " obj = g.get(key, None)\n", 121 | " obj = MPI.bcast(obj, root_rank)\n", 122 | " g[key] = obj\n", 123 | "\n", 124 | "def broadcast(key, obj, dv, root, root_rank):\n", 125 | " \"\"\"More efficient broadcast by doing push to root,\n", 126 | " and MPI broadcast to other engines.\n", 127 | " \n", 128 | " Still O(N) messages, but all but one message is always small.\n", 129 | " \"\"\"\n", 130 | " root.push({key : obj}, block=False)\n", 131 | " return dv.apply_async(_bcast, key, root_rank)" 132 | ], 133 | "language": "python", 134 | "metadata": {}, 135 | "outputs": [] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "collapsed": false, 140 | "input": [ 141 | "%%time\n", 142 | "ar = broadcast('data', data, eall, root, root_rank)\n", 143 | "ar.wait_interactive()" 144 | ], 145 | "language": "python", 146 | "metadata": {}, 147 | "outputs": [] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "collapsed": false, 152 | "input": [ 153 | "eall.apply_sync(np.linalg.norm, parallel.Reference('data'), 2)" 154 | ], 155 | "language": "python", 156 | "metadata": {}, 157 | "outputs": [] 158 | } 159 | ], 160 | "metadata": {} 161 | } 162 | ] 163 | } -------------------------------------------------------------------------------- /exercises/Monte Carlo π.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:d2f72deed8c75c78e56cd49ca438da24eaef281bf80b3a3b92de09745a57c0ac" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "# Multiplexing Exercise - Monte Carlo \u03c0\n", 16 | "\n", 17 | "A simple toy problem to get a handle on multiple engines is a Monte\n", 18 | "Carlo approximation of \u03c0.\n", 19 | "\n", 20 | "Let's say we have a dartboard with a round target inscribed on a square\n", 21 | "board. If you threw darts randomly, and they land evenly distributed on\n", 22 | "the square board, how many darts would you expect to hit the target?\n", 23 | "\n", 24 | "" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "$$\n", 32 | "\\frac{A_c}{A_{sq}} = \\frac{\\pi r^2}{(2r)^2} = \\frac{\\pi}{4}\n", 33 | "$$" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "collapsed": false, 39 | "input": [ 40 | "from __future__ import print_function\n", 41 | "\n", 42 | "from random import random\n", 43 | "from math import pi\n", 44 | "\n", 45 | "def mcpi(nsamples):\n", 46 | " s = 0\n", 47 | " for i in range(nsamples):\n", 48 | " x = random()\n", 49 | " y = random()\n", 50 | " if x*x + y*y <= 1:\n", 51 | " s+=1\n", 52 | " return 4.*s/nsamples" 53 | ], 54 | "language": "python", 55 | "metadata": {}, 56 | "outputs": [], 57 | "prompt_number": 1 58 | }, 59 | { 60 | "cell_type": "code", 61 | "collapsed": false, 62 | "input": [ 63 | "for n in [10, 100, 1000, 10000, 100000, 1000000]:\n", 64 | " print(\"%8i\" % n, end=' ')\n", 65 | " for i in range(3):\n", 66 | " print(\"%.5f\" % mcpi(n), end=' ')\n", 67 | " print()" 68 | ], 69 | "language": "python", 70 | "metadata": {}, 71 | "outputs": [ 72 | { 73 | "output_type": "stream", 74 | "stream": "stdout", 75 | "text": [ 76 | " 10 3.60000 2.40000 2.80000 \n", 77 | " 100 2.72000 3.20000 3.20000 \n", 78 | " 1000 3.14800 3.10400 3.16000 \n", 79 | " 10000 3.15720 3.13480 3.16320 \n", 80 | " 100000 3.13800" 81 | ] 82 | }, 83 | { 84 | "output_type": "stream", 85 | "stream": "stdout", 86 | "text": [ 87 | " 3.14412 3.13400" 88 | ] 89 | }, 90 | { 91 | "output_type": "stream", 92 | "stream": "stdout", 93 | "text": [ 94 | " \n", 95 | " 1000000 3.14120" 96 | ] 97 | }, 98 | { 99 | "output_type": "stream", 100 | "stream": "stdout", 101 | "text": [ 102 | " 3.14144" 103 | ] 104 | }, 105 | { 106 | "output_type": "stream", 107 | "stream": "stdout", 108 | "text": [ 109 | " 3.14244" 110 | ] 111 | }, 112 | { 113 | "output_type": "stream", 114 | "stream": "stdout", 115 | "text": [ 116 | " \n" 117 | ] 118 | } 119 | ], 120 | "prompt_number": 2 121 | }, 122 | { 123 | "cell_type": "code", 124 | "collapsed": false, 125 | "input": [ 126 | "%timeit mcpi(1000000)" 127 | ], 128 | "language": "python", 129 | "metadata": {}, 130 | "outputs": [ 131 | { 132 | "output_type": "stream", 133 | "stream": "stdout", 134 | "text": [ 135 | "1 loops, best of 3: 426 ms per loop\n" 136 | ] 137 | } 138 | ], 139 | "prompt_number": 3 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "metadata": {}, 144 | "source": [ 145 | "It takes a lot of samples to get a good approximation. Can you write a\n", 146 | "function that will use your engines to break up the work?\n", 147 | "\n", 148 | "```python\n", 149 | "def multi_mcpi(dview, nsamples):\n", 150 | " raise NotImplementedError(\"you write this\")\n", 151 | "```" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "collapsed": false, 157 | "input": [ 158 | "from IPython import parallel\n", 159 | "rc = parallel.Client()\n", 160 | "\n", 161 | "view = rc[:]" 162 | ], 163 | "language": "python", 164 | "metadata": {}, 165 | "outputs": [], 166 | "prompt_number": 6 167 | }, 168 | { 169 | "cell_type": "code", 170 | "collapsed": true, 171 | "input": [ 172 | "%load ../soln/mcpi.py" 173 | ], 174 | "language": "python", 175 | "metadata": {}, 176 | "outputs": [], 177 | "prompt_number": 4 178 | }, 179 | { 180 | "cell_type": "code", 181 | "collapsed": false, 182 | "input": [ 183 | "multi_mcpi(view, 10000000)" 184 | ], 185 | "language": "python", 186 | "metadata": {}, 187 | "outputs": [ 188 | { 189 | "metadata": {}, 190 | "output_type": "pyout", 191 | "prompt_number": 10, 192 | "text": [ 193 | "3.1411995999999998" 194 | ] 195 | } 196 | ], 197 | "prompt_number": 10 198 | } 199 | ], 200 | "metadata": {} 201 | } 202 | ] 203 | } -------------------------------------------------------------------------------- /examples/memmap.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:688993a555a5450c12af8b8843dad50f2b78cac0f389adedc2f133aa556908cf" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "heading", 13 | "level": 1, 14 | "metadata": {}, 15 | "source": [ 16 | "More efficient broadcast of arrays with memmap" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "Data movement is where IPython's na\u00efve model suffers the most.\n", 24 | "But knowing about your cluster lets you make smarter decisions about data movement than a simple `rc[:].push`." 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "collapsed": false, 30 | "input": [ 31 | "import socket\n", 32 | "import os, sys, re\n", 33 | "\n", 34 | "import numpy as np\n", 35 | "\n", 36 | "from IPython import parallel" 37 | ], 38 | "language": "python", 39 | "metadata": {}, 40 | "outputs": [] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "collapsed": false, 45 | "input": [ 46 | "rc = parallel.Client(profile='dirac')\n", 47 | "eall = rc[:]" 48 | ], 49 | "language": "python", 50 | "metadata": {}, 51 | "outputs": [] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "collapsed": false, 56 | "input": [ 57 | "engine_hosts = eall.apply_async(socket.gethostname).get_dict()\n", 58 | "engine_hosts" 59 | ], 60 | "language": "python", 61 | "metadata": {}, 62 | "outputs": [] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "collapsed": false, 67 | "input": [ 68 | "host_engines = {}\n", 69 | "\n", 70 | "for eid, host in engine_hosts.items():\n", 71 | " if host not in host_engines:\n", 72 | " host_engines[host] = []\n", 73 | " host_engines[host].append(eid)\n", 74 | "\n", 75 | "host_engines" 76 | ], 77 | "language": "python", 78 | "metadata": {}, 79 | "outputs": [] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "collapsed": false, 84 | "input": [ 85 | "sz = 256\n", 86 | "data = np.random.random((sz,sz))\n", 87 | "data = data.dot(data.T)" 88 | ], 89 | "language": "python", 90 | "metadata": {}, 91 | "outputs": [] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "collapsed": false, 96 | "input": [ 97 | "%time _ = rc[:].apply_sync(lambda : None)" 98 | ], 99 | "language": "python", 100 | "metadata": {}, 101 | "outputs": [] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "collapsed": false, 106 | "input": [ 107 | "ar = rc[:].push({'data': data}, block=False)\n", 108 | "ar.wait_interactive()" 109 | ], 110 | "language": "python", 111 | "metadata": {}, 112 | "outputs": [] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "collapsed": false, 117 | "input": [ 118 | "%px import numpy as np" 119 | ], 120 | "language": "python", 121 | "metadata": {}, 122 | "outputs": [] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "collapsed": false, 127 | "input": [ 128 | "def array_to_file(A):\n", 129 | " \"\"\"write an array to a temporary file, return its filename\"\"\"\n", 130 | " import tempfile\n", 131 | " with tempfile.NamedTemporaryFile(suffix='.np', delete=False) as tf:\n", 132 | " np.save(tf, data)\n", 133 | " data_path = tf.name\n", 134 | " return data_path" 135 | ], 136 | "language": "python", 137 | "metadata": {}, 138 | "outputs": [] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "collapsed": false, 143 | "input": [ 144 | "@parallel.interactive\n", 145 | "def load_memmap(name, path, mode='r+'):\n", 146 | " \"\"\"load a file on disk into the interactive namespace as a memmapped array\"\"\"\n", 147 | " globals()[name] = np.memmap(path, mode=mode)" 148 | ], 149 | "language": "python", 150 | "metadata": {}, 151 | "outputs": [] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "collapsed": false, 156 | "input": [ 157 | "def bcast_memmap(data, name, client, host_engines):\n", 158 | " \"\"\"broadcast a numpy array efficiently\n", 159 | " \n", 160 | " - sends data to each remote host only once\n", 161 | " - loads with memmap everywhere\n", 162 | " \"\"\"\n", 163 | "\n", 164 | " # actually push the data, just once to each machine\n", 165 | "\n", 166 | " local_filename = None\n", 167 | " filenames_ars = {}\n", 168 | " for host, engines in host_engines.items():\n", 169 | " h0 = engines[0]\n", 170 | " if host == socket.gethostname():\n", 171 | " # Don't push at all to local engines\n", 172 | " local_filename = array_to_file(data)\n", 173 | " else:\n", 174 | " filenames_ars[host] = rc[h0].apply_async(array_to_file, data)\n", 175 | "\n", 176 | " # load the data on all engines into a memmapped array\n", 177 | " msg_ids = []\n", 178 | " for host, engines in host_engines.items():\n", 179 | " if host == socket.gethostname():\n", 180 | " filename = local_filename\n", 181 | " else:\n", 182 | " filename = filenames_ars[host].get()\n", 183 | " ar = rc[engines].apply_async(load_memmap, name, filename)\n", 184 | " msg_ids.extend(ar.msg_ids)\n", 185 | " \n", 186 | " return parallel.AsyncResult(client, msg_ids=msg_ids)" 187 | ], 188 | "language": "python", 189 | "metadata": {}, 190 | "outputs": [] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "collapsed": false, 195 | "input": [ 196 | "%%time\n", 197 | "ar = bcast_memmap(data, 'data', rc, host_engines)\n", 198 | "ar.wait_interactive()" 199 | ], 200 | "language": "python", 201 | "metadata": {}, 202 | "outputs": [] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "collapsed": false, 207 | "input": [ 208 | "%px np.linalg.norm(data, 2)" 209 | ], 210 | "language": "python", 211 | "metadata": {}, 212 | "outputs": [] 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "metadata": {}, 217 | "source": [ 218 | "You can also do the same thing [with MPI](MPI Broadcast.ipynb)." 219 | ] 220 | } 221 | ], 222 | "metadata": {} 223 | } 224 | ] 225 | } -------------------------------------------------------------------------------- /examples/wikipedia/Wikipedia.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:07b06af47856760f4b8fd7d0ee04bd3d6d2038a9a1be22dcd94ee62d8df15fe3" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "heading", 13 | "level": 1, 14 | "metadata": {}, 15 | "source": [ 16 | "Crawling Wikipedia" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "This notebook crawls links on Wikipedia\n", 24 | "and visualizes the graph with NetworkX and d3." 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "collapsed": false, 30 | "input": [ 31 | "%matplotlib inline\n", 32 | "import matplotlib.pyplot as plt" 33 | ], 34 | "language": "python", 35 | "metadata": {}, 36 | "outputs": [] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "collapsed": false, 41 | "input": [ 42 | "from IPython.html import widgets\n", 43 | "from IPython.display import display\n", 44 | "from eventful_graph import EventfulGraph\n", 45 | "from widget_forcedirectedgraph import ForceDirectedGraphWidget, publish_js\n", 46 | "publish_js()" 47 | ], 48 | "language": "python", 49 | "metadata": {}, 50 | "outputs": [] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "collapsed": false, 55 | "input": [ 56 | "import networkx as nx" 57 | ], 58 | "language": "python", 59 | "metadata": {}, 60 | "outputs": [] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "collapsed": false, 65 | "input": [ 66 | "from IPython import parallel\n", 67 | "rc = parallel.Client()\n", 68 | "lbv = rc.load_balanced_view()" 69 | ], 70 | "language": "python", 71 | "metadata": {}, 72 | "outputs": [] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "collapsed": false, 77 | "input": [ 78 | "%%px --local\n", 79 | "\n", 80 | "import requests\n", 81 | "from bs4 import BeautifulSoup\n", 82 | "\n", 83 | "import re\n", 84 | "wiki_pat = re.compile(r'^/wiki/([^:]*)$')\n" 85 | ], 86 | "language": "python", 87 | "metadata": {}, 88 | "outputs": [] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "collapsed": false, 93 | "input": [ 94 | "def links_for_page(title):\n", 95 | " page = BeautifulSoup(requests.get('http://en.wikipedia.org/wiki/%s' % title).text)\n", 96 | " links = page.find(\"div\", id=\"content\").findAll(\"a\", href=wiki_pat)\n", 97 | " \n", 98 | " titles = []\n", 99 | " for link in links:\n", 100 | " title = wiki_pat.match(link['href']).group(1)\n", 101 | " titles.append(title)\n", 102 | " \n", 103 | " return titles" 104 | ], 105 | "language": "python", 106 | "metadata": {}, 107 | "outputs": [] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "collapsed": false, 112 | "input": [ 113 | "def add_node(g, label, **kwargs):\n", 114 | " \"\"\"add a node to a graph, with some default fill and color\"\"\"\n", 115 | " kwargs.setdefault('fill', '#ccc')\n", 116 | " kwargs.setdefault('color', 'black')\n", 117 | " g.add_node(label, label=label, **kwargs)" 118 | ], 119 | "language": "python", 120 | "metadata": {}, 121 | "outputs": [] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "collapsed": false, 126 | "input": [ 127 | "def add_links(graph, src, links):\n", 128 | " \"\"\"Add links from src to links in graph\"\"\"\n", 129 | " new_nodes = []\n", 130 | " add_node(graph, src)\n", 131 | " n = len(links)\n", 132 | " for i,link in enumerate(links):\n", 133 | " if link not in graph:\n", 134 | " new_nodes.append(link)\n", 135 | " add_node(graph, link)\n", 136 | " \n", 137 | " graph.add_edge(src, link)#, distance=(i+0.2))\n", 138 | " return new_nodes" 139 | ], 140 | "language": "python", 141 | "metadata": {}, 142 | "outputs": [] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "collapsed": false, 147 | "input": [ 148 | "def wikipedia_graph(lbview, root, limit=32, in_degree_limit=3):\n", 149 | " \"\"\"build a graph by crawling Wikipedia from a root page\n", 150 | " \n", 151 | " The visualized graph will be limited to pages linked from several other pages\n", 152 | " \"\"\"\n", 153 | " graph = nx.DiGraph()\n", 154 | " egraph = EventfulGraph()\n", 155 | "\n", 156 | " graph_widget = ForceDirectedGraphWidget(egraph, width=800, height=600)\n", 157 | " display(graph_widget)\n", 158 | " \n", 159 | " add_node(graph, root)\n", 160 | " add_node(egraph, root, r=16, fill='#aef')\n", 161 | " surface = [root]\n", 162 | " while len(egraph) < limit:\n", 163 | " surface = [ node for node in graph if graph.out_degree(node) == 0 ]\n", 164 | " amr = lbview.map_async(links_for_page, surface)\n", 165 | " for i, links in enumerate(amr):\n", 166 | " src = surface[i]\n", 167 | " links = links[:20]\n", 168 | " add_links(graph, src, links)\n", 169 | " for node in links:\n", 170 | " if graph.in_degree(node) >= in_degree_limit:\n", 171 | " path = nx.shortest_path(graph, root, node)\n", 172 | " prv = root\n", 173 | " for nxt in path[1:]:\n", 174 | " if nxt not in egraph:\n", 175 | " add_node(egraph, nxt)\n", 176 | " egraph.add_edge(prv, nxt)\n", 177 | " egraph.node[nxt]['r'] = min(3 * graph.in_degree(nxt), 24)\n", 178 | " prv = nxt\n", 179 | " for parent in graph.predecessors(node):\n", 180 | " if parent in egraph:\n", 181 | " egraph.add_edge(parent, node)\n", 182 | " egraph.node[node]['r'] = min(3 * graph.in_degree(node), 24)\n", 183 | " for child in graph.successors(node):\n", 184 | " if child in egraph:\n", 185 | " egraph.add_edge(node, child)\n", 186 | " egraph.node[child]['r'] = min(3 * graph.in_degree(child), 24)\n", 187 | " time.sleep(0.3)\n", 188 | " if len(egraph) > limit:\n", 189 | " return graph, egraph\n", 190 | " print('%s: %i' % (src, len(graph)))\n", 191 | " sys.stdout.flush()\n", 192 | " return graph, egraph\n", 193 | " " 194 | ], 195 | "language": "python", 196 | "metadata": {}, 197 | "outputs": [], 198 | "prompt_number": 73 199 | }, 200 | { 201 | "cell_type": "code", 202 | "collapsed": false, 203 | "input": [ 204 | "g, eg = wikipedia_graph(lbv, 'SciPy', limit=20)" 205 | ], 206 | "language": "python", 207 | "metadata": {}, 208 | "outputs": [] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "collapsed": false, 213 | "input": [ 214 | "g, eg = wikipedia_graph(lbv, 'Austin, TX', limit=12)" 215 | ], 216 | "language": "python", 217 | "metadata": {}, 218 | "outputs": [] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "collapsed": false, 223 | "input": [], 224 | "language": "python", 225 | "metadata": {}, 226 | "outputs": [] 227 | } 228 | ], 229 | "metadata": {} 230 | } 231 | ] 232 | } -------------------------------------------------------------------------------- /examples/Parallel image processing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:78138dc17f10550cd721240f4580b77dd56bdba366ab9062c72f97239c746c50" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "heading", 13 | "level": 1, 14 | "metadata": {}, 15 | "source": [ 16 | "Motivating example: Parallel image processing with scikit-image" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "To get a sense of what IPython.parallel might be used for,\n", 24 | "we start with an example of some batch processing of image files with [scikit-image](http://scikit-image.org/).\n", 25 | "We will revisit pieces of this example as we learn about the different components of IPython.\n", 26 | "\n", 27 | "You can download images with [this notebook](../images.ipynb), or get a zip [here](https://s3.amazonaws.com/ipython-parallel-data/images.zip), or find any images on your computer." 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "collapsed": false, 33 | "input": [ 34 | "%matplotlib inline\n", 35 | "import matplotlib.pyplot as plt" 36 | ], 37 | "language": "python", 38 | "metadata": {}, 39 | "outputs": [] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "collapsed": false, 44 | "input": [ 45 | "import sys,os,re,time\n", 46 | "import urllib\n", 47 | "\n", 48 | "import numpy as np\n", 49 | "\n", 50 | "from IPython import parallel" 51 | ], 52 | "language": "python", 53 | "metadata": {}, 54 | "outputs": [] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "collapsed": false, 59 | "input": [ 60 | "from skimage.io import imread\n", 61 | "from skimage import measure" 62 | ], 63 | "language": "python", 64 | "metadata": {}, 65 | "outputs": [] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "Define a function to " 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "collapsed": false, 77 | "input": [ 78 | "def find_contours(path, low=0.1, high=0.8):\n", 79 | " \"\"\"Find contours in an image at path\n", 80 | " \n", 81 | " Returns the image and the contour lists.\n", 82 | " \"\"\"\n", 83 | " img = imread(path, flatten=True)\n", 84 | " \n", 85 | " # Find contours at a constant value of 0.1 and 0.8\n", 86 | " dark = measure.find_contours(img, low)\n", 87 | " light = measure.find_contours(img, high)\n", 88 | " return img, dark, light\n" 89 | ], 90 | "language": "python", 91 | "metadata": {}, 92 | "outputs": [] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "collapsed": false, 97 | "input": [ 98 | "def plot_contours(img, dark, light, show=True):\n", 99 | " \"\"\"Display the image and plot all contours found\"\"\"\n", 100 | " plt.imshow(img, cmap='gray')\n", 101 | " \n", 102 | " for n, contour in enumerate(dark):\n", 103 | " plt.plot(contour[:, 1], contour[:, 0], c='r', linewidth=1)\n", 104 | " \n", 105 | " for n, contour in enumerate(light):\n", 106 | " plt.plot(contour[:, 1], contour[:, 0], c='b', linewidth=1)\n", 107 | "\n", 108 | " plt.axis('image')\n", 109 | " plt.xticks([])\n", 110 | " plt.yticks([])\n", 111 | " if show:\n", 112 | " plt.show()\n" 113 | ], 114 | "language": "python", 115 | "metadata": {}, 116 | "outputs": [] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "collapsed": false, 121 | "input": [ 122 | "def get_contours_image(path):\n", 123 | " \"\"\"Given a path, return a PNG of the image with contour lines\n", 124 | " \n", 125 | " Calls both find_contours and plot_contours\n", 126 | " \"\"\"\n", 127 | " from IPython.core.pylabtools import print_figure\n", 128 | " \n", 129 | " img, dark, light = find_contours(path)\n", 130 | " plot_contours(img, dark, light, show=False)\n", 131 | " fig = plt.gcf()\n", 132 | " pngdata = print_figure(fig)\n", 133 | " plt.close(fig)\n", 134 | " return pngdata\n" 135 | ], 136 | "language": "python", 137 | "metadata": {}, 138 | "outputs": [] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "collapsed": false, 143 | "input": [ 144 | "import os\n", 145 | "pictures_dir = os.path.join('..', 'images', 'castle')\n", 146 | "\n", 147 | "pictures = []\n", 148 | "for directory, subdirs, files in os.walk(pictures_dir):\n", 149 | " for fname in files:\n", 150 | " if fname.lower().endswith(('.jpg', '.png')):\n", 151 | " pictures.append(os.path.join(directory, fname))\n" 152 | ], 153 | "language": "python", 154 | "metadata": {}, 155 | "outputs": [] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "Let's test our function locally, to see what it does." 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "collapsed": false, 167 | "input": [ 168 | "for p in pictures[:3]:\n", 169 | " img, dark, light = find_contours(p)\n", 170 | " plot_contours(img, dark, light)\n" 171 | ], 172 | "language": "python", 173 | "metadata": {}, 174 | "outputs": [] 175 | }, 176 | { 177 | "cell_type": "heading", 178 | "level": 1, 179 | "metadata": {}, 180 | "source": [ 181 | "Now in parallel" 182 | ] 183 | }, 184 | { 185 | "cell_type": "markdown", 186 | "metadata": {}, 187 | "source": [ 188 | "First, we connect our parallel Client" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "collapsed": false, 194 | "input": [ 195 | "rc = parallel.Client()\n", 196 | "all_engines = rc[:]\n", 197 | "view = rc.load_balanced_view()" 198 | ], 199 | "language": "python", 200 | "metadata": {}, 201 | "outputs": [] 202 | }, 203 | { 204 | "cell_type": "markdown", 205 | "metadata": {}, 206 | "source": [ 207 | "Then we initialize the namespace on all of the engines with imports" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "collapsed": false, 213 | "input": [ 214 | "%px import os; os.chdir(\"{os.getcwd()}\")" 215 | ], 216 | "language": "python", 217 | "metadata": {}, 218 | "outputs": [] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "collapsed": false, 223 | "input": [ 224 | "%%px\n", 225 | "import matplotlib\n", 226 | "matplotlib.use('Agg')\n", 227 | "import matplotlib.pyplot as plt\n", 228 | "\n", 229 | "from skimage.io import imread\n", 230 | "from skimage import measure" 231 | ], 232 | "language": "python", 233 | "metadata": {}, 234 | "outputs": [] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "metadata": {}, 239 | "source": [ 240 | "and make sure some functions are defined everywhere (this is only necessary for the `contours_in_url` case)" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "collapsed": false, 246 | "input": [ 247 | "all_engines.push(dict(\n", 248 | " plot_contours=plot_contours,\n", 249 | " find_contours=find_contours,\n", 250 | "))" 251 | ], 252 | "language": "python", 253 | "metadata": {}, 254 | "outputs": [] 255 | }, 256 | { 257 | "cell_type": "markdown", 258 | "metadata": {}, 259 | "source": [ 260 | "Now we can iterate through all of our pictures, and detect and display any faces we find" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "collapsed": false, 266 | "input": [ 267 | "from IPython.display import display, Image\n", 268 | "\n", 269 | "amr = view.map_async(get_contours_image, pictures[:20], ordered=False)\n", 270 | "for pngdata in amr:\n", 271 | " display(Image(data=pngdata))\n" 272 | ], 273 | "language": "python", 274 | "metadata": {}, 275 | "outputs": [] 276 | } 277 | ], 278 | "metadata": {} 279 | } 280 | ] 281 | } -------------------------------------------------------------------------------- /tutorial/All Together.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:2367377a6fce3b59a11f38f4dd2b55cd70b0035b3fbff17a56eaba6ee57e897d" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "heading", 13 | "level": 1, 14 | "metadata": {}, 15 | "source": [ 16 | "Using DirectView and LoadBalancedView together" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "We mentioned that you can use any number of views of any sort simultaneously.\n", 24 | "\n", 25 | "The most common model for doing this is to use a DirectView to initialise the engine namespaces\n", 26 | "(either defining functions and variables, or loading datasets) with a DirectView,\n", 27 | "and then submitting work as tasks via the LoadBalancedView.\n", 28 | "\n", 29 | "For this example, we are going to define a few functions for fetching data and getting a simple summary on some stocks,\n", 30 | "then we are going to run those functions on a series of stocks in a load-balanced way." 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "The usual boilerplate:" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "collapsed": false, 43 | "input": [ 44 | "%matplotlib inline\n", 45 | "import matplotlib.pyplot as plt" 46 | ], 47 | "language": "python", 48 | "metadata": {}, 49 | "outputs": [] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "collapsed": false, 54 | "input": [ 55 | "from IPython import parallel\n", 56 | "\n", 57 | "rc = parallel.Client()\n", 58 | "all_engines = rc[:]\n", 59 | "lbv = rc.load_balanced_view()" 60 | ], 61 | "language": "python", 62 | "metadata": {}, 63 | "outputs": [] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "Use matplotlib to fetch data from Yahoo\n", 70 | "\n", 71 | "Note that we are defining these functions on the engines" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "collapsed": false, 77 | "input": [ 78 | "%%px\n", 79 | "\n", 80 | "import urllib2\n", 81 | "from io import BytesIO\n", 82 | "\n", 83 | "from matplotlib import finance\n", 84 | "\n", 85 | "def get_yahoo_data(name=None, start=None, end=None):\n", 86 | " \"\"\"return historical stock data from yahoo as a recarray.\"\"\"\n", 87 | " fp = finance.fetch_historical_yahoo(name, start, end)\n", 88 | " return finance.parse_yahoo_historical(fp, asobject=True)\n", 89 | "\n", 90 | "def relative_close(records):\n", 91 | " \"\"\"return the reative change in closing price over the interval\"\"\"\n", 92 | " \n", 93 | " close = records['aclose']\n", 94 | " return (close[-1] - close[0]) / close[0]\n", 95 | "\n" 96 | ], 97 | "language": "python", 98 | "metadata": {}, 99 | "outputs": [] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "Setup the timestamps. We want the past year" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "collapsed": false, 111 | "input": [ 112 | "%%px\n", 113 | "import datetime\n", 114 | "end = datetime.datetime.now()\n", 115 | "start = end - datetime.timedelta(days=365)" 116 | ], 117 | "language": "python", 118 | "metadata": {}, 119 | "outputs": [] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "Our ticker of stocks to check" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "collapsed": false, 131 | "input": [ 132 | "ticker = ['AAPL', 'GOOG', 'MSFT', 'ORCL', 'AMZN', 'INTC', 'ATVI', 'EA', 'NFLX']" 133 | ], 134 | "language": "python", 135 | "metadata": {}, 136 | "outputs": [] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": {}, 141 | "source": [ 142 | "And map a simple function (that depends on the code we have defined on the engines)\n", 143 | "onto our ticker." 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "collapsed": false, 149 | "input": [ 150 | "def year_over_year(name):\n", 151 | " \"\"\"fetch data, and return year-over-year change (relative to first close)\n", 152 | " \n", 153 | " in percent\n", 154 | " \"\"\"\n", 155 | " stock = get_yahoo_data(name, start, end)\n", 156 | " return relative_close(stock) * 100\n", 157 | "\n", 158 | "amr = lbv.map(year_over_year, ticker)\n", 159 | "amr" 160 | ], 161 | "language": "python", 162 | "metadata": {}, 163 | "outputs": [] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "collapsed": false, 168 | "input": [ 169 | "fig, ax = plt.subplots()\n", 170 | "fig.set_figwidth(10)\n", 171 | "ax.bar(range(len(amr)), amr)\n", 172 | "ax.set_xticks(np.arange(len(amr))+0.4)\n", 173 | "ax.set_xticklabels(ticker)\n", 174 | "ax.axhline(0, c='k');" 175 | ], 176 | "language": "python", 177 | "metadata": {}, 178 | "outputs": [] 179 | }, 180 | { 181 | "cell_type": "heading", 182 | "level": 1, 183 | "metadata": {}, 184 | "source": [ 185 | "Plotting stock data in parallel with Pandas" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "collapsed": false, 191 | "input": [ 192 | "%%px\n", 193 | "%matplotlib inline\n", 194 | "import matplotlib.pyplot as plt" 195 | ], 196 | "language": "python", 197 | "metadata": {}, 198 | "outputs": [] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "collapsed": false, 203 | "input": [ 204 | "from pandas.io.data import DataReader\n", 205 | "from datetime import datetime" 206 | ], 207 | "language": "python", 208 | "metadata": {}, 209 | "outputs": [] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "collapsed": false, 214 | "input": [ 215 | "def plot_stock_since(name, since_when, key=\"Adj Close\"):\n", 216 | " data = DataReader(name, \"yahoo\", since_when)\n", 217 | " data[key].plot()\n", 218 | " plt.title(name)\n", 219 | " plt.show()\n", 220 | " " 221 | ], 222 | "language": "python", 223 | "metadata": {}, 224 | "outputs": [] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "collapsed": false, 229 | "input": [ 230 | "start = datetime(2011,1,1)\n", 231 | "plot_stock_since('GOOG', start)" 232 | ], 233 | "language": "python", 234 | "metadata": {}, 235 | "outputs": [] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "collapsed": false, 240 | "input": [ 241 | "%px from pandas.io.data import DataReader" 242 | ], 243 | "language": "python", 244 | "metadata": {}, 245 | "outputs": [] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "collapsed": false, 250 | "input": [ 251 | "all_engines.scatter('ticker', ticker)\n", 252 | "%px print ticker" 253 | ], 254 | "language": "python", 255 | "metadata": {}, 256 | "outputs": [] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "collapsed": false, 261 | "input": [ 262 | "all_engines['plot_stock_since'] = plot_stock_since" 263 | ], 264 | "language": "python", 265 | "metadata": {}, 266 | "outputs": [] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "collapsed": false, 271 | "input": [ 272 | "%%px\n", 273 | "from datetime import datetime\n", 274 | "start = datetime(2011,1,1)\n", 275 | "from pandas.io.data import DataReader\n", 276 | "\n", 277 | "for stock in ticker:\n", 278 | " plot_stock_since(stock, start)" 279 | ], 280 | "language": "python", 281 | "metadata": {}, 282 | "outputs": [] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "collapsed": false, 287 | "input": [ 288 | "amr = lbv.map(plot_stock_since, ticker, [start]*len(ticker))\n", 289 | "amr\n" 290 | ], 291 | "language": "python", 292 | "metadata": {}, 293 | "outputs": [] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "collapsed": false, 298 | "input": [ 299 | "%time amr.wait()\n", 300 | "amr.display_outputs()" 301 | ], 302 | "language": "python", 303 | "metadata": {}, 304 | "outputs": [] 305 | } 306 | ], 307 | "metadata": {} 308 | } 309 | ] 310 | } -------------------------------------------------------------------------------- /examples/Parallel face detection.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:bc7020c47aa9ce4f8bde7861883d0b3f8b2a20568102b3b09f0d2ebe3ef7f1db" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "heading", 13 | "level": 1, 14 | "metadata": {}, 15 | "source": [ 16 | "Motivating example: Interactive parallel face detection with OpenCV" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "To get a sense of what IPython.parallel might be used for,\n", 24 | "we start with an example.\n", 25 | "We will revisit pieces of this example as we learn about the different components of IPython." 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "collapsed": false, 31 | "input": [ 32 | "%matplotlib inline\n", 33 | "import matplotlib.pyplot as plt" 34 | ], 35 | "language": "python", 36 | "metadata": {}, 37 | "outputs": [] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "collapsed": false, 42 | "input": [ 43 | "import sys, os, re, time\n", 44 | "\n", 45 | "import numpy as np\n", 46 | "\n", 47 | "from IPython import parallel" 48 | ], 49 | "language": "python", 50 | "metadata": {}, 51 | "outputs": [] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "First, initialize OpenCV for simple facial detection" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "collapsed": false, 63 | "input": [ 64 | "HAAR_CASCADE_PATH = \"haarcascade_frontalface_default.xml\"\n", 65 | "# if you have opencv installed via homebrew, this would be in\n", 66 | "# /usr/local/share/OpenCV/haarcascades/\n", 67 | "# If via Conda, it will be in:\n", 68 | "# os.path.join(sys.prefix, 'share', 'OpenCV', 'haarcascades')\n", 69 | "\n", 70 | "import cv\n", 71 | "storage = cv.CreateMemStorage()\n", 72 | "cascade = cv.Load(HAAR_CASCADE_PATH)\n" 73 | ], 74 | "language": "python", 75 | "metadata": {}, 76 | "outputs": [] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": {}, 81 | "source": [ 82 | "Then define a few functions for extracting faces from images" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "collapsed": false, 88 | "input": [ 89 | "def extract_faces(image, faces):\n", 90 | " \"\"\"Returns any faces in an image in a list of numpy arrays\"\"\"\n", 91 | " import numpy as np\n", 92 | " A = np.frombuffer(image.tostring(), dtype=np.uint8).reshape((image.height, image.width, image.nChannels))\n", 93 | " A = A[:,:,::-1]\n", 94 | " face_arrays = []\n", 95 | " for face in faces:\n", 96 | " Aface = A[face[1]:face[1]+face[3],face[0]:face[0]+face[2]]\n", 97 | " face_arrays.append(Aface)\n", 98 | " return face_arrays\n", 99 | "\n", 100 | "\n", 101 | "def detect_faces(filename):\n", 102 | " \"\"\"Loads an image into OpenCV, and detects faces\n", 103 | "\n", 104 | " returns None if no image is found,\n", 105 | " (filename, [list of numpy arrays]) if there are faces\n", 106 | " \"\"\"\n", 107 | " \n", 108 | " image = cv.LoadImage(filename)\n", 109 | " faces = []\n", 110 | " detected = cv.HaarDetectObjects(image, cascade, storage, 1.2, 2, cv.CV_HAAR_DO_CANNY_PRUNING, (100,100))\n", 111 | " if detected:\n", 112 | " for (x,y,w,h),n in detected:\n", 113 | " faces.append((x,y,w,h))\n", 114 | " if faces:\n", 115 | " return filename, extract_faces(image, faces)\n" 116 | ], 117 | "language": "python", 118 | "metadata": {}, 119 | "outputs": [] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "Since we don't trust the network, we can just build a list of images from anywhere on our filesystem.\n", 126 | "Any list of images will do.\n", 127 | "For instance, you can use the path to the 'Thumbnails' directory in your iPhoto library,\n", 128 | "which vary from ~320x240 - 1024x768." 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "collapsed": false, 134 | "input": [ 135 | "pictures_dir = os.path.join('images', 'portrait')" 136 | ], 137 | "language": "python", 138 | "metadata": {}, 139 | "outputs": [] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "metadata": {}, 144 | "source": [ 145 | "This will search `pictures_dir` for any JPEGs or PNGs.\n", 146 | "\n", 147 | "See the [download images](download impages.ipynb) notebook for a quick way to populate a folder with images from Wikimedia Commons with a certain tag." 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "collapsed": false, 153 | "input": [ 154 | "import glob\n", 155 | "pictures = []\n", 156 | "for directory, subdirs, files in os.walk(pictures_dir):\n", 157 | " for fname in files:\n", 158 | " if fname.lower().endswith(('.jpg', '.png')):\n", 159 | " pictures.append(os.path.join(directory, fname))\n" 160 | ], 161 | "language": "python", 162 | "metadata": {}, 163 | "outputs": [] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": {}, 168 | "source": [ 169 | "Let's test our output" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "collapsed": false, 175 | "input": [ 176 | "for p in pictures:\n", 177 | " found = detect_faces(p)\n", 178 | " if found:\n", 179 | " break\n", 180 | "\n", 181 | "filename, faces = found\n", 182 | "for face in faces:\n", 183 | " plt.figure()\n", 184 | " plt.imshow(face)" 185 | ], 186 | "language": "python", 187 | "metadata": {}, 188 | "outputs": [] 189 | }, 190 | { 191 | "cell_type": "markdown", 192 | "metadata": {}, 193 | "source": [ 194 | "Hey, that looks like a face!" 195 | ] 196 | }, 197 | { 198 | "cell_type": "heading", 199 | "level": 1, 200 | "metadata": {}, 201 | "source": [ 202 | "Now in parallel" 203 | ] 204 | }, 205 | { 206 | "cell_type": "markdown", 207 | "metadata": {}, 208 | "source": [ 209 | "First, we connect our parallel Client" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "collapsed": false, 215 | "input": [ 216 | "rc = parallel.Client()\n", 217 | "all_engines = rc[:]\n", 218 | "view = rc.load_balanced_view()" 219 | ], 220 | "language": "python", 221 | "metadata": {}, 222 | "outputs": [] 223 | }, 224 | { 225 | "cell_type": "markdown", 226 | "metadata": {}, 227 | "source": [ 228 | "Then we initialize OpenCV on all of the engines (identical to what we did above)" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "collapsed": false, 234 | "input": [ 235 | "here = os.getcwd()\n", 236 | "%px %cd $here" 237 | ], 238 | "language": "python", 239 | "metadata": {}, 240 | "outputs": [] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "collapsed": false, 245 | "input": [ 246 | "%%px\n", 247 | "HAAR_CASCADE_PATH = \"haarcascade_frontalface_default.xml\"\n", 248 | "\n", 249 | "import cv\n", 250 | "storage = cv.CreateMemStorage()\n", 251 | "cascade = cv.Load(HAAR_CASCADE_PATH)\n" 252 | ], 253 | "language": "python", 254 | "metadata": {}, 255 | "outputs": [] 256 | }, 257 | { 258 | "cell_type": "markdown", 259 | "metadata": {}, 260 | "source": [ 261 | "and make sure `extract_faces` is defined everywhere" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "collapsed": false, 267 | "input": [ 268 | "all_engines.push(dict(\n", 269 | " extract_faces=extract_faces,\n", 270 | "))" 271 | ], 272 | "language": "python", 273 | "metadata": {}, 274 | "outputs": [] 275 | }, 276 | { 277 | "cell_type": "markdown", 278 | "metadata": {}, 279 | "source": [ 280 | "Now we can iterate through all of our pictures, and detect and display any faces we find" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "collapsed": false, 286 | "input": [ 287 | "tic = time.time()\n", 288 | "\n", 289 | "amr = view.map_async(detect_faces, pictures[:1000], ordered=False)\n", 290 | "nfound = 0\n", 291 | "for r in amr:\n", 292 | " if not r:\n", 293 | " continue\n", 294 | " filename, faces = r\n", 295 | " nfound += len(faces)\n", 296 | " print \"%i faces found in %s\" % (len(faces), filename)\n", 297 | " for face in faces:\n", 298 | " plt.imshow(face)\n", 299 | " plt.show()\n", 300 | "\n", 301 | "toc = time.time()\n", 302 | "\n", 303 | "print \"found %i faces in %i images in %f s\" % (nfound, len(amr), toc-tic)\n" 304 | ], 305 | "language": "python", 306 | "metadata": {}, 307 | "outputs": [] 308 | } 309 | ], 310 | "metadata": {} 311 | } 312 | ] 313 | } -------------------------------------------------------------------------------- /tutorial/Parallel Magics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:dd660ce669809bc71efd233ff4f0e4bf3058643050e31480a60e8801d1758fe7" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "heading", 13 | "level": 1, 14 | "metadata": {}, 15 | "source": [ 16 | "Using Parallel Magics" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "IPython has a few magics for working with your engines.\n", 24 | "\n", 25 | "This assumes you have started an IPython cluster, either with the notebook interface,\n", 26 | "or the `ipcluster/controller/engine` commands." 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "collapsed": false, 32 | "input": [ 33 | "from IPython import parallel\n", 34 | "rc = parallel.Client()\n", 35 | "dv = rc[:]\n", 36 | "rc.ids" 37 | ], 38 | "language": "python", 39 | "metadata": {}, 40 | "outputs": [] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "Creating a Client registers the parallel magics `%px`, `%%px`, `%pxresult`, `pxconfig`, and `%autopx`. \n", 47 | "These magics are initially associated with a DirectView always associated with all currently registered engines." 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "Now we can execute code remotely with `%px`:" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "collapsed": false, 60 | "input": [ 61 | "%px a=5" 62 | ], 63 | "language": "python", 64 | "metadata": {}, 65 | "outputs": [] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "collapsed": false, 70 | "input": [ 71 | "%px print a" 72 | ], 73 | "language": "python", 74 | "metadata": {}, 75 | "outputs": [] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "collapsed": false, 80 | "input": [ 81 | "%px a" 82 | ], 83 | "language": "python", 84 | "metadata": {}, 85 | "outputs": [] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "collapsed": false, 90 | "input": [ 91 | "with dv.sync_imports():\n", 92 | " import sys" 93 | ], 94 | "language": "python", 95 | "metadata": {}, 96 | "outputs": [] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "collapsed": false, 101 | "input": [ 102 | "%px from __future__ import print_function\n", 103 | "%px print(\"ERROR\", file=sys.stderr)" 104 | ], 105 | "language": "python", 106 | "metadata": {}, 107 | "outputs": [] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "You don't have to wait for results. The `%pxconfig` magic lets you change the default blocking/targets for the `%px` magics:" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "collapsed": false, 119 | "input": [ 120 | "%pxconfig --noblock" 121 | ], 122 | "language": "python", 123 | "metadata": {}, 124 | "outputs": [] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "collapsed": false, 129 | "input": [ 130 | "%px import time\n", 131 | "%px time.sleep(5)\n", 132 | "%px time.time()" 133 | ], 134 | "language": "python", 135 | "metadata": {}, 136 | "outputs": [] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": {}, 141 | "source": [ 142 | "But you will notice that this didn't output the result of the last command.\n", 143 | "For this, we have `%pxresult`, which displays the output of the latest request:" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "collapsed": false, 149 | "input": [ 150 | "%pxresult" 151 | ], 152 | "language": "python", 153 | "metadata": {}, 154 | "outputs": [] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": {}, 159 | "source": [ 160 | "Remember, an IPython engine is IPython, so you can do magics remotely as well!" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "collapsed": false, 166 | "input": [ 167 | "%%px\n", 168 | "%matplotlib inline\n", 169 | "import numpy as np\n", 170 | "import matplotlib.pyplot as plt" 171 | ], 172 | "language": "python", 173 | "metadata": {}, 174 | "outputs": [] 175 | }, 176 | { 177 | "cell_type": "markdown", 178 | "metadata": {}, 179 | "source": [ 180 | "`%%px` can also be used as a cell magic, for submitting whole blocks.\n", 181 | "This one acceps `--block` and `--noblock` flags to specify\n", 182 | "the blocking behavior, though the default is unchanged.\n" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "collapsed": false, 188 | "input": [ 189 | "dv.scatter('id', dv.targets, flatten=True)\n", 190 | "dv['stride'] = len(dv)" 191 | ], 192 | "language": "python", 193 | "metadata": {}, 194 | "outputs": [] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "collapsed": false, 199 | "input": [ 200 | "%%px --block\n", 201 | "print(\" id=%s\" % id)\n", 202 | "print(\"stride=%s\" % stride)" 203 | ], 204 | "language": "python", 205 | "metadata": {}, 206 | "outputs": [] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "collapsed": false, 211 | "input": [ 212 | "%%px --noblock\n", 213 | "x = np.linspace(0,np.pi,1000)\n", 214 | "for n in range(id, 12, stride):\n", 215 | " print(n)\n", 216 | " plt.plot(x,np.sin(n*x))\n", 217 | "plt.title(\"Plot %i\" % id);" 218 | ], 219 | "language": "python", 220 | "metadata": {}, 221 | "outputs": [] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "collapsed": false, 226 | "input": [ 227 | "%pxresult" 228 | ], 229 | "language": "python", 230 | "metadata": {}, 231 | "outputs": [] 232 | }, 233 | { 234 | "cell_type": "heading", 235 | "level": 2, 236 | "metadata": {}, 237 | "source": [ 238 | "Parallel Exceptions" 239 | ] 240 | }, 241 | { 242 | "cell_type": "markdown", 243 | "metadata": {}, 244 | "source": [ 245 | "When you raise exceptions with the parallel exception,\n", 246 | "the CompositeError raised locally will display your remote traceback." 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "collapsed": false, 252 | "input": [ 253 | "%%px\n", 254 | "from numpy.random import random\n", 255 | "A = random((100,100,'invalid shape'))" 256 | ], 257 | "language": "python", 258 | "metadata": {}, 259 | "outputs": [] 260 | }, 261 | { 262 | "cell_type": "heading", 263 | "level": 2, 264 | "metadata": {}, 265 | "source": [ 266 | "Remote Cell Magics" 267 | ] 268 | }, 269 | { 270 | "cell_type": "markdown", 271 | "metadata": {}, 272 | "source": [ 273 | "Remember, Engines are IPython too, so the cell that is run remotely by %%px can in turn use a cell magic." 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "collapsed": false, 279 | "input": [ 280 | "%%px\n", 281 | "%%bash\n", 282 | "echo \"remote parallel interactive bash!\"\n", 283 | "hostname\n", 284 | "date\n", 285 | "echo $$" 286 | ], 287 | "language": "python", 288 | "metadata": {}, 289 | "outputs": [] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "collapsed": false, 294 | "input": [ 295 | "%%px\n", 296 | "%%ruby\n", 297 | "puts 'hello from ruby'" 298 | ], 299 | "language": "python", 300 | "metadata": {}, 301 | "outputs": [] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "collapsed": false, 306 | "input": [ 307 | "dv.scatter('rank', dv.targets, flatten=True)" 308 | ], 309 | "language": "python", 310 | "metadata": {}, 311 | "outputs": [] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "collapsed": false, 316 | "input": [ 317 | "%%px\n", 318 | "%%timeit\n", 319 | "from numpy.random import random\n", 320 | "from numpy.linalg import norm\n", 321 | "N = 100 * (rank + 1)\n", 322 | "A = random((N,N))\n", 323 | "norm(A, 2) " 324 | ], 325 | "language": "python", 326 | "metadata": {}, 327 | "outputs": [] 328 | }, 329 | { 330 | "cell_type": "heading", 331 | "level": 1, 332 | "metadata": {}, 333 | "source": [ 334 | "Debugging Engines" 335 | ] 336 | }, 337 | { 338 | "cell_type": "markdown", 339 | "metadata": {}, 340 | "source": [ 341 | "Since the IPython engine is precisely the same object used for the notebook and qtconsole,\n", 342 | "we can connect other fronteds directly to the engine.\n", 343 | "\n", 344 | "The first step is to bind the engine's sockets, so its connection pattern looks like a regular kernel" 345 | ] 346 | }, 347 | { 348 | "cell_type": "code", 349 | "collapsed": false, 350 | "input": [ 351 | "%%px\n", 352 | "from IPython.parallel import bind_kernel\n", 353 | "bind_kernel()" 354 | ], 355 | "language": "python", 356 | "metadata": {}, 357 | "outputs": [] 358 | }, 359 | { 360 | "cell_type": "markdown", 361 | "metadata": {}, 362 | "source": [ 363 | "Now we can raise an exception on the engines" 364 | ] 365 | }, 366 | { 367 | "cell_type": "code", 368 | "collapsed": false, 369 | "input": [ 370 | "%%px\n", 371 | "\n", 372 | "def foo(a, b):\n", 373 | " return a/(1-b)\n", 374 | "\n", 375 | "def bar(b):\n", 376 | " return foo(2, b)\n", 377 | "\n", 378 | "bar(1)" 379 | ], 380 | "language": "python", 381 | "metadata": {}, 382 | "outputs": [] 383 | }, 384 | { 385 | "cell_type": "markdown", 386 | "metadata": {}, 387 | "source": [ 388 | "Now we can connect a qtconsole to the engine(s)" 389 | ] 390 | }, 391 | { 392 | "cell_type": "code", 393 | "collapsed": false, 394 | "input": [ 395 | "%px %qtconsole" 396 | ], 397 | "language": "python", 398 | "metadata": {}, 399 | "outputs": [] 400 | } 401 | ], 402 | "metadata": {} 403 | } 404 | ] 405 | } -------------------------------------------------------------------------------- /examples/MC Options.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:209151f66e872710532565f2f42c8ee65c33b71fe9eff13118f5791667d90cf8" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "# Parallel Monto-Carlo options pricing" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "## Problem setup" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "collapsed": false, 28 | "input": [ 29 | "%matplotlib inline" 30 | ], 31 | "language": "python", 32 | "metadata": {}, 33 | "outputs": [] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "collapsed": true, 38 | "input": [ 39 | "import sys\n", 40 | "import time\n", 41 | "import numpy as np\n", 42 | "\n", 43 | "from matplotlib import pyplot as plt\n", 44 | "try:\n", 45 | " import seaborn\n", 46 | "except ImportError:\n", 47 | " pass\n", 48 | "\n", 49 | "from IPython import parallel\n" 50 | ], 51 | "language": "python", 52 | "metadata": {}, 53 | "outputs": [] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "collapsed": true, 58 | "input": [ 59 | "def price_options(S=100.0, K=100.0, sigma=0.25, r=0.05, days=260, paths=10000):\n", 60 | " \"\"\"\n", 61 | " Price European and Asian options using a Monte Carlo method.\n", 62 | "\n", 63 | " Parameters\n", 64 | " ----------\n", 65 | " S : float\n", 66 | " The initial price of the stock.\n", 67 | " K : float\n", 68 | " The strike price of the option.\n", 69 | " sigma : float\n", 70 | " The volatility of the stock.\n", 71 | " r : float\n", 72 | " The risk free interest rate.\n", 73 | " days : int\n", 74 | " The number of days until the option expires.\n", 75 | " paths : int\n", 76 | " The number of Monte Carlo paths used to price the option.\n", 77 | "\n", 78 | " Returns\n", 79 | " -------\n", 80 | " A tuple of (E. call, E. put, A. call, A. put) option prices.\n", 81 | " \"\"\"\n", 82 | " import numpy as np\n", 83 | " from math import exp,sqrt\n", 84 | " \n", 85 | " h = 1.0/days\n", 86 | " const1 = exp((r-0.5*sigma**2)*h)\n", 87 | " const2 = sigma*sqrt(h)\n", 88 | " stock_price = S*np.ones(paths, dtype='float64')\n", 89 | " stock_price_sum = np.zeros(paths, dtype='float64')\n", 90 | " for j in range(days):\n", 91 | " growth_factor = const1*np.exp(const2*np.random.standard_normal(paths))\n", 92 | " stock_price = stock_price*growth_factor\n", 93 | " stock_price_sum = stock_price_sum + stock_price\n", 94 | " stock_price_avg = stock_price_sum/days\n", 95 | " zeros = np.zeros(paths, dtype='float64')\n", 96 | " r_factor = exp(-r*h*days)\n", 97 | " euro_put = r_factor*np.mean(np.maximum(zeros, K-stock_price))\n", 98 | " asian_put = r_factor*np.mean(np.maximum(zeros, K-stock_price_avg))\n", 99 | " euro_call = r_factor*np.mean(np.maximum(zeros, stock_price-K))\n", 100 | " asian_call = r_factor*np.mean(np.maximum(zeros, stock_price_avg-K))\n", 101 | " return (euro_call, euro_put, asian_call, asian_put)" 102 | ], 103 | "language": "python", 104 | "metadata": {}, 105 | "outputs": [] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "collapsed": true, 110 | "input": [ 111 | "price = 100.0 # Initial price\n", 112 | "rate = 0.05 # Interest rate\n", 113 | "days = 260 # Days to expiration\n", 114 | "paths = 10000 # Number of MC paths\n", 115 | "n_strikes = 6 # Number of strike values\n", 116 | "min_strike = 90.0 # Min strike price\n", 117 | "max_strike = 110.0 # Max strike price\n", 118 | "n_sigmas = 5 # Number of volatility values\n", 119 | "min_sigma = 0.1 # Min volatility\n", 120 | "max_sigma = 0.4 # Max volatility" 121 | ], 122 | "language": "python", 123 | "metadata": {}, 124 | "outputs": [] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "collapsed": true, 129 | "input": [ 130 | "strike_vals = np.linspace(min_strike, max_strike, n_strikes)\n", 131 | "sigma_vals = np.linspace(min_sigma, max_sigma, n_sigmas)" 132 | ], 133 | "language": "python", 134 | "metadata": {}, 135 | "outputs": [] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "## Parallel computation across strike prices and volatilities" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "The Client is used to setup the calculation and works with all engines." 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "collapsed": false, 154 | "input": [ 155 | "rc = parallel.Client()" 156 | ], 157 | "language": "python", 158 | "metadata": {}, 159 | "outputs": [] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "metadata": {}, 164 | "source": [ 165 | "A LoadBalancedView is an interface to the engines that provides dynamic load\n", 166 | "balancing at the expense of not knowing which engine will execute the code." 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "collapsed": true, 172 | "input": [ 173 | "view = rc.load_balanced_view()" 174 | ], 175 | "language": "python", 176 | "metadata": {}, 177 | "outputs": [] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "collapsed": false, 182 | "input": [ 183 | "print \"Strike prices: \", strike_vals\n", 184 | "print \"Volatilities: \", sigma_vals" 185 | ], 186 | "language": "python", 187 | "metadata": {}, 188 | "outputs": [] 189 | }, 190 | { 191 | "cell_type": "markdown", 192 | "metadata": {}, 193 | "source": [ 194 | "Submit tasks for each (strike, sigma) pair." 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "collapsed": true, 200 | "input": [ 201 | "t1 = time.time()\n", 202 | "async_results = []\n", 203 | "for strike in strike_vals:\n", 204 | " for sigma in sigma_vals:\n", 205 | " ar = view.apply_async(price_options, price, strike, sigma, rate, days, paths)\n", 206 | " async_results.append(ar)" 207 | ], 208 | "language": "python", 209 | "metadata": {}, 210 | "outputs": [] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "collapsed": false, 215 | "input": [ 216 | "print \"Submitted tasks: \", len(async_results)" 217 | ], 218 | "language": "python", 219 | "metadata": {}, 220 | "outputs": [] 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "metadata": {}, 225 | "source": [ 226 | "Block until all tasks are completed." 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "collapsed": false, 232 | "input": [ 233 | "rc.wait(async_results)\n", 234 | "t2 = time.time()\n", 235 | "t = t2-t1\n", 236 | "\n", 237 | "print \"Parallel calculation completed, time = %s s\" % t" 238 | ], 239 | "language": "python", 240 | "metadata": {}, 241 | "outputs": [] 242 | }, 243 | { 244 | "cell_type": "markdown", 245 | "metadata": {}, 246 | "source": [ 247 | "## Process and visualize results" 248 | ] 249 | }, 250 | { 251 | "cell_type": "markdown", 252 | "metadata": {}, 253 | "source": [ 254 | "Get the results using the `get` method:" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "collapsed": true, 260 | "input": [ 261 | "results = [ar.get() for ar in async_results]" 262 | ], 263 | "language": "python", 264 | "metadata": {}, 265 | "outputs": [] 266 | }, 267 | { 268 | "cell_type": "markdown", 269 | "metadata": {}, 270 | "source": [ 271 | "Assemble the result into a structured NumPy array." 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "collapsed": true, 277 | "input": [ 278 | "prices = np.empty(n_strikes*n_sigmas,\n", 279 | " dtype=[('ecall',float),('eput',float),('acall',float),('aput',float)]\n", 280 | ")\n", 281 | "\n", 282 | "for i, price in enumerate(results):\n", 283 | " prices[i] = tuple(price)\n", 284 | "\n", 285 | "prices.shape = (n_strikes, n_sigmas)" 286 | ], 287 | "language": "python", 288 | "metadata": {}, 289 | "outputs": [] 290 | }, 291 | { 292 | "cell_type": "markdown", 293 | "metadata": {}, 294 | "source": [ 295 | "Plot the value of the European call in (volatility, strike) space." 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "collapsed": false, 301 | "input": [ 302 | "plt.figure()\n", 303 | "plt.contourf(sigma_vals, strike_vals, prices['ecall'])\n", 304 | "plt.axis('tight')\n", 305 | "plt.colorbar()\n", 306 | "plt.title('European Call')\n", 307 | "plt.xlabel(\"Volatility\")\n", 308 | "plt.ylabel(\"Strike Price\")" 309 | ], 310 | "language": "python", 311 | "metadata": {}, 312 | "outputs": [] 313 | }, 314 | { 315 | "cell_type": "markdown", 316 | "metadata": {}, 317 | "source": [ 318 | "Plot the value of the Asian call in (volatility, strike) space." 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "collapsed": false, 324 | "input": [ 325 | "plt.figure()\n", 326 | "plt.contourf(sigma_vals, strike_vals, prices['acall'])\n", 327 | "plt.axis('tight')\n", 328 | "plt.colorbar()\n", 329 | "plt.title(\"Asian Call\")\n", 330 | "plt.xlabel(\"Volatility\")\n", 331 | "plt.ylabel(\"Strike Price\")" 332 | ], 333 | "language": "python", 334 | "metadata": {}, 335 | "outputs": [] 336 | }, 337 | { 338 | "cell_type": "markdown", 339 | "metadata": {}, 340 | "source": [ 341 | "Plot the value of the European put in (volatility, strike) space." 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "collapsed": false, 347 | "input": [ 348 | "plt.figure()\n", 349 | "plt.contourf(sigma_vals, strike_vals, prices['eput'])\n", 350 | "plt.axis('tight')\n", 351 | "plt.colorbar()\n", 352 | "plt.title(\"European Put\")\n", 353 | "plt.xlabel(\"Volatility\")\n", 354 | "plt.ylabel(\"Strike Price\")" 355 | ], 356 | "language": "python", 357 | "metadata": {}, 358 | "outputs": [] 359 | }, 360 | { 361 | "cell_type": "markdown", 362 | "metadata": {}, 363 | "source": [ 364 | "Plot the value of the Asian put in (volatility, strike) space." 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "collapsed": false, 370 | "input": [ 371 | "plt.figure()\n", 372 | "plt.contourf(sigma_vals, strike_vals, prices['aput'])\n", 373 | "plt.axis('tight')\n", 374 | "plt.colorbar()\n", 375 | "plt.title(\"Asian Put\")\n", 376 | "plt.xlabel(\"Volatility\")\n", 377 | "plt.ylabel(\"Strike Price\")" 378 | ], 379 | "language": "python", 380 | "metadata": {}, 381 | "outputs": [] 382 | } 383 | ], 384 | "metadata": {} 385 | } 386 | ] 387 | } -------------------------------------------------------------------------------- /tutorial/Multiplexing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:68d985ab383f6a47901c566e128b2baa5c65d48f434457de01e2e766b3d2a101" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "# DirectView as multiplexer" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "collapsed": false, 21 | "input": [ 22 | "import os,sys,time\n", 23 | "import numpy as np\n", 24 | "\n", 25 | "from IPython.core.display import display\n", 26 | "from IPython import parallel\n", 27 | "rc = parallel.Client()" 28 | ], 29 | "language": "python", 30 | "metadata": {}, 31 | "outputs": [] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "The DirectView can be readily understood as an Engine Multiplexer -\n", 38 | "it does the same thing on all of its engines.\n", 39 | "\n", 40 | "The only difference between running code on a single remote engine\n", 41 | "and running code in parallel is how many engines the DirectView is\n", 42 | "instructed to use.\n", 43 | "\n", 44 | "You can create DirectViews by index-access to the Client. This creates\n", 45 | "a DirectView using the engines after passing the same index (or slice)\n", 46 | "to the `ids` list." 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "collapsed": true, 52 | "input": [ 53 | "e0 = rc[0]\n", 54 | "engines = rc[:]\n", 55 | "even = rc[::2]\n", 56 | "odd = rc[1::2]\n", 57 | "\n", 58 | "# this is the one we are going to use:\n", 59 | "dview = engines\n", 60 | "dview.block = True" 61 | ], 62 | "language": "python", 63 | "metadata": {}, 64 | "outputs": [] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "Now, the only difference from single-engine remote execution is that the code we run happens on all of the engines of a given view:" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "collapsed": false, 76 | "input": [ 77 | "for view in (e0, engines, even, odd):\n", 78 | " print view, view.apply_sync(os.getpid)" 79 | ], 80 | "language": "python", 81 | "metadata": {}, 82 | "outputs": [] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "The results of multiplexed execution is always a list of the length of the number of engines." 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "collapsed": false, 94 | "input": [ 95 | "engines['a'] = 5\n", 96 | "engines['a']" 97 | ], 98 | "language": "python", 99 | "metadata": {}, 100 | "outputs": [] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "# Scatter and Gather\n", 107 | "\n", 108 | "Lots of parallel computations involve partitioning data onto processes. \n", 109 | "DirectViews have `scatter()` and `gather()` methods, to help with this.\n", 110 | "Pass any container or numpy array, and IPython will partition the object onto the engines wih `scatter`,\n", 111 | "or reconstruct the full object in the Client with `gather()`." 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "collapsed": false, 117 | "input": [ 118 | "dview.scatter('a',range(16))\n", 119 | "dview['a']" 120 | ], 121 | "language": "python", 122 | "metadata": {}, 123 | "outputs": [] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "collapsed": false, 128 | "input": [ 129 | "dview.gather('a')" 130 | ], 131 | "language": "python", 132 | "metadata": {}, 133 | "outputs": [] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "collapsed": false, 138 | "input": [ 139 | "dview.execute(\"asum = sum(a)\")\n", 140 | "dview.gather('asum')" 141 | ], 142 | "language": "python", 143 | "metadata": {}, 144 | "outputs": [] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "We can pass a 'flatten' keyword,\n", 151 | "to instruct engines that will only get one item of the list to\n", 152 | "get the actual item, rather than a one-element sublist:" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "collapsed": false, 158 | "input": [ 159 | "dview.scatter('id',rc.ids)\n", 160 | "dview['id']" 161 | ], 162 | "language": "python", 163 | "metadata": {}, 164 | "outputs": [] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "collapsed": false, 169 | "input": [ 170 | "dview.scatter('id',rc.ids, flatten=True)\n", 171 | "dview['id']" 172 | ], 173 | "language": "python", 174 | "metadata": {}, 175 | "outputs": [] 176 | }, 177 | { 178 | "cell_type": "markdown", 179 | "metadata": {}, 180 | "source": [ 181 | "Scatter and gather also work with numpy arrays" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "collapsed": false, 187 | "input": [ 188 | "A = np.random.randint(1,10,(16,4))\n", 189 | "B = np.random.randint(1,10,(4,16))\n", 190 | "display(A)\n", 191 | "display(B)" 192 | ], 193 | "language": "python", 194 | "metadata": {}, 195 | "outputs": [] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "collapsed": false, 200 | "input": [ 201 | "dview.scatter('A', A)\n", 202 | "dview.scatter('B', B)\n", 203 | "display(e0['A'])\n", 204 | "display(e0['B'])" 205 | ], 206 | "language": "python", 207 | "metadata": {}, 208 | "outputs": [] 209 | }, 210 | { 211 | "cell_type": "heading", 212 | "level": 2, 213 | "metadata": {}, 214 | "source": [ 215 | "Excercise: Parallel Matrix Multiply" 216 | ] 217 | }, 218 | { 219 | "cell_type": "markdown", 220 | "metadata": {}, 221 | "source": [ 222 | "Can you compute the Matrix product `C=A.dot(B)` in parallel? (not looking for brilliant, just correct).\n" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "collapsed": false, 228 | "input": [ 229 | "%run ../hints\n", 230 | "mmhint()" 231 | ], 232 | "language": "python", 233 | "metadata": {}, 234 | "outputs": [] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "collapsed": true, 239 | "input": [ 240 | "%load soln/matmul.py" 241 | ], 242 | "language": "python", 243 | "metadata": {}, 244 | "outputs": [] 245 | }, 246 | { 247 | "cell_type": "markdown", 248 | "metadata": {}, 249 | "source": [ 250 | "Let's run this, and validate the result against a local computation." 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "collapsed": false, 256 | "input": [ 257 | "C_ref = A.dot(B)\n", 258 | "C1 = pdot(dview, A, B)\n", 259 | "# validation:\n", 260 | "(C1==C_ref).all()" 261 | ], 262 | "language": "python", 263 | "metadata": {}, 264 | "outputs": [] 265 | }, 266 | { 267 | "cell_type": "markdown", 268 | "metadata": {}, 269 | "source": [ 270 | "# Map\n", 271 | "\n", 272 | "DirectViews have a map method, which behaves just like the builtin map,\n", 273 | "but computed in parallel." 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "collapsed": false, 279 | "input": [ 280 | "dview.block = True\n", 281 | "\n", 282 | "serial_result = map(lambda x:x**10, range(32))\n", 283 | "parallel_result = dview.map(lambda x:x**10, range(32))\n", 284 | "\n", 285 | "serial_result==parallel_result" 286 | ], 287 | "language": "python", 288 | "metadata": {}, 289 | "outputs": [] 290 | }, 291 | { 292 | "cell_type": "markdown", 293 | "metadata": {}, 294 | "source": [ 295 | "`DirectView.map` partitions the sequences onto each engine,\n", 296 | "and then calls `map` remotely. The result is always a single\n", 297 | "IPython task per engine." 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "collapsed": false, 303 | "input": [ 304 | "amr = dview.map_async(lambda x:x**10, range(32))\n", 305 | "amr.msg_ids" 306 | ], 307 | "language": "python", 308 | "metadata": {}, 309 | "outputs": [] 310 | }, 311 | { 312 | "cell_type": "code", 313 | "collapsed": false, 314 | "input": [ 315 | "amr = dview.map_async(lambda x:x**10, range(3200))\n", 316 | "amr.msg_ids" 317 | ], 318 | "language": "python", 319 | "metadata": {}, 320 | "outputs": [] 321 | }, 322 | { 323 | "cell_type": "heading", 324 | "level": 3, 325 | "metadata": {}, 326 | "source": [ 327 | "The motivating example" 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "collapsed": false, 333 | "input": [ 334 | "from IPython.display import display, Image\n", 335 | "%run ../images_common" 336 | ], 337 | "language": "python", 338 | "metadata": {}, 339 | "outputs": [] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "collapsed": false, 344 | "input": [ 345 | "pictures = get_pictures(os.path.join('..', 'images', 'castle'))" 346 | ], 347 | "language": "python", 348 | "metadata": {}, 349 | "outputs": [] 350 | }, 351 | { 352 | "cell_type": "code", 353 | "collapsed": false, 354 | "input": [ 355 | "%px cd {os.getcwd()}" 356 | ], 357 | "language": "python", 358 | "metadata": {}, 359 | "outputs": [] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "collapsed": false, 364 | "input": [ 365 | "%%px\n", 366 | "import matplotlib\n", 367 | "matplotlib.use('Agg')\n", 368 | "import matplotlib.pyplot as plt\n", 369 | "\n", 370 | "from skimage.io import imread\n", 371 | "from skimage import measure" 372 | ], 373 | "language": "python", 374 | "metadata": {}, 375 | "outputs": [] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "collapsed": false, 380 | "input": [ 381 | "engines.push(dict(\n", 382 | " plot_contours=plot_contours,\n", 383 | " find_contours=find_contours,\n", 384 | "))" 385 | ], 386 | "language": "python", 387 | "metadata": {}, 388 | "outputs": [] 389 | }, 390 | { 391 | "cell_type": "code", 392 | "collapsed": false, 393 | "input": [ 394 | "ar = e0.apply_async(get_contours_image, pictures[0])\n", 395 | "ar.wait_interactive()\n", 396 | "Image(data=ar.get())" 397 | ], 398 | "language": "python", 399 | "metadata": {}, 400 | "outputs": [] 401 | }, 402 | { 403 | "cell_type": "code", 404 | "collapsed": false, 405 | "input": [ 406 | "amr = engines.map_async(get_contours_image, pictures[:len(engines)])\n", 407 | "amr.wait_interactive()\n", 408 | "for pngdata in amr:\n", 409 | " display(Image(data=pngdata))" 410 | ], 411 | "language": "python", 412 | "metadata": {}, 413 | "outputs": [] 414 | }, 415 | { 416 | "cell_type": "heading", 417 | "level": 2, 418 | "metadata": {}, 419 | "source": [ 420 | "Exercises and Examples" 421 | ] 422 | }, 423 | { 424 | "cell_type": "markdown", 425 | "metadata": {}, 426 | "source": [ 427 | "- [Remote Iteration](exercises/Remote%20Iteration.ipynb)\n", 428 | "- [Monte Carlo \u03c0](../exercises/Monte%20Carlo%20\u03c0.ipynb)" 429 | ] 430 | }, 431 | { 432 | "cell_type": "heading", 433 | "level": 1, 434 | "metadata": {}, 435 | "source": [ 436 | "Moving on" 437 | ] 438 | }, 439 | { 440 | "cell_type": "markdown", 441 | "metadata": {}, 442 | "source": [ 443 | "IPython.parallel can also be used for [load-balanced execution](Load-Balancing.ipynb),\n", 444 | "when you just want code to run, but don't care where." 445 | ] 446 | } 447 | ], 448 | "metadata": {} 449 | } 450 | ] 451 | } -------------------------------------------------------------------------------- /exercises/Remote Iteration.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:85889b09d56f7825cd4efd30c738e47a0a6f54269869f51aabe5a567611363b7" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "heading", 13 | "level": 1, 14 | "metadata": {}, 15 | "source": [ 16 | "N-Way merge with remote data" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "How to merge multiple *sorted* remote data streams using the `heapq.merge` function that ships with Python." 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "collapsed": false, 29 | "input": [ 30 | "from __future__ import print_function\n", 31 | "import heapq\n", 32 | "\n", 33 | "from IPython.display import display\n", 34 | "from IPython import parallel\n", 35 | "\n", 36 | "rc = parallel.Client()" 37 | ], 38 | "language": "python", 39 | "metadata": {}, 40 | "outputs": [], 41 | "prompt_number": 1 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "Imagine we have some routine that is capable of loading/creating a sorted subset of our data in an engine, based on a parameter (such as the indes of which part of the data to read):" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "collapsed": false, 53 | "input": [ 54 | "def load_data(arg):\n", 55 | " \"\"\"Load a dataset in the global namespace. The dataset *must* be sorted.\n", 56 | "\n", 57 | " Return the *name* of the variable in which the dataset was loaded.\"\"\"\n", 58 | " global data\n", 59 | " # Here, real data loading would occur\n", 60 | " s = 4-arg\n", 61 | " step = arg+1\n", 62 | " data = range(s, s+4*step**2, step)\n", 63 | " return 'data'" 64 | ], 65 | "language": "python", 66 | "metadata": {}, 67 | "outputs": [], 68 | "prompt_number": 2 69 | }, 70 | { 71 | "cell_type": "heading", 72 | "level": 2, 73 | "metadata": {}, 74 | "source": [ 75 | "Exercise" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": {}, 81 | "source": [ 82 | "We want a function that takes a given single-engine View and a variable name,\n", 83 | "and returns a local iterator on the remote object.\n", 84 | "It should look something like this skeleton function:" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "collapsed": false, 90 | "input": [ 91 | "def remote_iterator(view, name):\n", 92 | " \"\"\"Return an iterator on an object living on a remote engine.\"\"\"\n", 93 | " # TODO: create an iterator remotely\n", 94 | " while True:\n", 95 | " pass\n", 96 | " # TODO: yield the next item\n", 97 | " # TODO: turn remote StopIteration into local StopIteration" 98 | ], 99 | "language": "python", 100 | "metadata": {}, 101 | "outputs": [], 102 | "prompt_number": 3 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "metadata": {}, 107 | "source": [ 108 | "#### Relevant Aside:\n", 109 | "\n", 110 | "Errors raised on engines will show up in the Client as a RemoteError.\n", 111 | "This means you have to be a little careful when trying to catch remote errors:" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "collapsed": false, 117 | "input": [ 118 | "try:\n", 119 | " rc[-1].execute(\"foo = barbarbar\", block=True)\n", 120 | "except NameError:\n", 121 | " print(\"caught NameError\")\n", 122 | "except Exception as e:\n", 123 | " print(\"Oops! Didn't catch %r\" % e)\n", 124 | " raise e\n", 125 | "print(\"safe and sound\")" 126 | ], 127 | "language": "python", 128 | "metadata": {}, 129 | "outputs": [ 130 | { 131 | "output_type": "stream", 132 | "stream": "stdout", 133 | "text": [ 134 | "Oops! Didn't catch \n" 135 | ] 136 | }, 137 | { 138 | "ename": "RemoteError", 139 | "evalue": "NameError(name 'barbarbar' is not defined)", 140 | "output_type": "pyerr", 141 | "traceback": [ 142 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 143 | "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m", 144 | "\u001b[1;32m----> 1\u001b[1;33m \u001b[0mfoo\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mbarbarbar\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m", 145 | "\u001b[0m\u001b[1;31mNameError\u001b[0m: name 'barbarbar' is not defined" 146 | ] 147 | } 148 | ], 149 | "prompt_number": 4 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "A RemoteError has three attributes:\n", 156 | "\n", 157 | "* `err.ename` - the class name of the remote error (e.g. `NameError`, `ValueError`)\n", 158 | "* `err.evalue` - the string value of the error message\n", 159 | "* `err.traceback` - the remote traceback as a list of strings" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "For simple builtin exceptions,\n", 167 | "you can re-raise remote errors as the original exception class with a case like the following:" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "collapsed": false, 173 | "input": [ 174 | "def assign_foo():\n", 175 | " try:\n", 176 | " rc[-1].execute(\"foo = barbarbar\", block=True)\n", 177 | " except parallel.RemoteError as e:\n", 178 | " if e.ename == 'NameError':\n", 179 | " raise NameError(e.evalue)\n", 180 | " else:\n", 181 | " raise e" 182 | ], 183 | "language": "python", 184 | "metadata": {}, 185 | "outputs": [], 186 | "prompt_number": 5 187 | }, 188 | { 189 | "cell_type": "markdown", 190 | "metadata": {}, 191 | "source": [ 192 | "By doing this re-cast, any exception handling outside will handle remote exceptions as if they were local." 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "collapsed": false, 198 | "input": [ 199 | "try:\n", 200 | " assign_foo()\n", 201 | "except NameError:\n", 202 | " print(\"caught NameError\")\n", 203 | "except Exception as e:\n", 204 | " print(\"Oops! Didn't catch %r\" % e)\n", 205 | " raise e\n", 206 | "print(\"safe and sound\")" 207 | ], 208 | "language": "python", 209 | "metadata": {}, 210 | "outputs": [ 211 | { 212 | "output_type": "stream", 213 | "stream": "stdout", 214 | "text": [ 215 | "caught NameError\n", 216 | "safe and sound\n" 217 | ] 218 | } 219 | ], 220 | "prompt_number": 7 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "metadata": {}, 225 | "source": [ 226 | "Can you fill out this remote_iterator function?\n", 227 | "\n", 228 | "Potentially useful:\n", 229 | "\n", 230 | "* catching RemoteErrors\n", 231 | "* parallel.Reference\n", 232 | "* yield" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "collapsed": false, 238 | "input": [ 239 | "def remote_iterator(view, name):\n", 240 | " \"\"\"Return an iterator on an object living on a remote engine.\"\"\"\n", 241 | " # TODO: create an iterator remotely\n", 242 | " while True:\n", 243 | " pass\n", 244 | " # TODO: yield the next item\n", 245 | " # TODO: turn remote StopIteration into local StopIteration" 246 | ], 247 | "language": "python", 248 | "metadata": {}, 249 | "outputs": [] 250 | }, 251 | { 252 | "cell_type": "markdown", 253 | "metadata": {}, 254 | "source": [ 255 | "A local example that should be a good guideline for the remote version:" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "collapsed": false, 261 | "input": [ 262 | "%load ../soln/remote_iter_hint.py" 263 | ], 264 | "language": "python", 265 | "metadata": {}, 266 | "outputs": [], 267 | "prompt_number": 8 268 | }, 269 | { 270 | "cell_type": "markdown", 271 | "metadata": {}, 272 | "source": [ 273 | "And the solution:" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "collapsed": false, 279 | "input": [ 280 | "%load ../soln/remote_iter.py" 281 | ], 282 | "language": "python", 283 | "metadata": {}, 284 | "outputs": [], 285 | "prompt_number": 9 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "metadata": {}, 290 | "source": [ 291 | "And an ever-so-slightly fancier solution:" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "collapsed": false, 297 | "input": [ 298 | "%load ../soln/remote_iter_slightly_better.py" 299 | ], 300 | "language": "python", 301 | "metadata": {}, 302 | "outputs": [], 303 | "prompt_number": 10 304 | }, 305 | { 306 | "cell_type": "markdown", 307 | "metadata": {}, 308 | "source": [ 309 | "Now, we bring `IPython.parallel` into action:" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "collapsed": false, 315 | "input": [ 316 | "dview = rc.direct_view()\n", 317 | "print('Engine IDs:', rc.ids)" 318 | ], 319 | "language": "python", 320 | "metadata": {}, 321 | "outputs": [] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "collapsed": false, 326 | "input": [ 327 | "# Load the data on the engines\n", 328 | "data_refs = dview.map(load_data, rc.ids)" 329 | ], 330 | "language": "python", 331 | "metadata": {}, 332 | "outputs": [] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "collapsed": false, 337 | "input": [ 338 | "data_refs" 339 | ], 340 | "language": "python", 341 | "metadata": {}, 342 | "outputs": [] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "collapsed": false, 347 | "input": [ 348 | "list(data_refs)" 349 | ], 350 | "language": "python", 351 | "metadata": {}, 352 | "outputs": [] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "collapsed": false, 357 | "input": [ 358 | "# And we now make a local object which represents the remote iterator\n", 359 | "iterators = [remote_iterator(rc[e], ref) for e,ref in zip(rc.ids, data_refs)]\n", 360 | "for it in iterators:\n", 361 | " print(list(it))" 362 | ], 363 | "language": "python", 364 | "metadata": {}, 365 | "outputs": [] 366 | }, 367 | { 368 | "cell_type": "markdown", 369 | "metadata": {}, 370 | "source": [ 371 | "Now, let's merge those datasets into a single sorted one:" 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "collapsed": false, 377 | "input": [ 378 | "print('Locally merge the remote sets:')\n", 379 | "iterators = [remote_iterator(rc[e], ref) for e,ref in zip(rc.ids, data_refs)] \n", 380 | "remote = list(heapq.merge(*iterators))\n", 381 | "print(remote)" 382 | ], 383 | "language": "python", 384 | "metadata": {}, 385 | "outputs": [] 386 | }, 387 | { 388 | "cell_type": "heading", 389 | "level": 4, 390 | "metadata": {}, 391 | "source": [ 392 | "Validation" 393 | ] 394 | }, 395 | { 396 | "cell_type": "markdown", 397 | "metadata": {}, 398 | "source": [ 399 | "repeat the operation by copying the data from the engines to our local namespace and doing a regular merge here:" 400 | ] 401 | }, 402 | { 403 | "cell_type": "code", 404 | "collapsed": false, 405 | "input": [ 406 | "# Key step here: pull data from each engine:\n", 407 | "local_data = [rc[e][ref] for e,ref in zip(rc.ids, data_refs)]\n", 408 | "print('Local data:')\n", 409 | "for subset in local_data:\n", 410 | " print(subset)\n", 411 | "print('Sorted:')\n", 412 | "local = list(heapq.merge(*local_data))\n", 413 | "print(local)\n", 414 | "print(\"local == remote: %s\" % (local==remote))" 415 | ], 416 | "language": "python", 417 | "metadata": {}, 418 | "outputs": [] 419 | } 420 | ], 421 | "metadata": {} 422 | } 423 | ] 424 | } -------------------------------------------------------------------------------- /examples/wikipedia/widget_forcedirectedgraph.js: -------------------------------------------------------------------------------- 1 | require(["//cdnjs.cloudflare.com/ajax/libs/d3/3.4.1/d3.min.js", "widgets/js/manager"], function(d3, WidgetManager){ 2 | 3 | // Define the D3ForceDirectedGraphView 4 | var D3ForceDirectedGraphView = IPython.DOMWidgetView.extend({ 5 | 6 | render: function(){ 7 | this.guid = 'd3force' + IPython.utils.uuid(); 8 | this.setElement($('
', {id: this.guid})); 9 | 10 | this.model.on('msg:custom', this.on_msg, this); 11 | this.has_drawn = false; 12 | 13 | // Wait for element to be added to the DOM 14 | var that = this; 15 | setTimeout(function() { 16 | that.update(); 17 | }, 0); 18 | }, 19 | 20 | try_add_node: function(id){ 21 | var index = this.find_node(id); 22 | if (index == -1) { 23 | var node = {id: id}; 24 | this.nodes.push(node); 25 | return node; 26 | } else { 27 | return this.nodes[index]; 28 | } 29 | }, 30 | 31 | update_node: function(node, attributes) { 32 | if (node !== null) { 33 | for (var key in attributes) { 34 | node[key] = attributes[key]; 35 | } 36 | this._update_circle(d3.select('#' + this.guid + node.id)); 37 | this._update_text(d3.select('#' + this.guid + node.id + '-text')); 38 | } 39 | }, 40 | 41 | remove_node: function(id){ 42 | this.remove_links_to(id); 43 | 44 | var found_index = this.find_node(id); 45 | if (found_index>=0) { 46 | this.nodes.splice(found_index, 1); 47 | } 48 | }, 49 | 50 | find_node: function(id){ 51 | var found_index = -1; 52 | for (var index in this.nodes) { 53 | if (this.nodes[index].id == id) { 54 | found_index = index; 55 | break; 56 | } 57 | } 58 | return found_index; 59 | }, 60 | 61 | find_link: function(source_id, target_id){ 62 | for (var index in this.links) { 63 | if (this.links[index].source.id == source_id && this.links[index].target.id == target_id) { 64 | return index; 65 | } 66 | } 67 | return -1; 68 | }, 69 | 70 | try_add_link: function(source_id, target_id){ 71 | var index = this.find_link(source_id, target_id); 72 | if (index == -1) { 73 | var source_node = this.try_add_node(source_id); 74 | var target_node = this.try_add_node(target_id); 75 | var new_link = {source: source_node, target: target_node}; 76 | this.links.push(new_link); 77 | return new_link; 78 | } else { 79 | return this.links[index] 80 | } 81 | }, 82 | 83 | update_link: function(link, attributes){ 84 | if (link != null) { 85 | for (var key in attributes) { 86 | link[key] = attributes[key]; 87 | } 88 | this._update_edge(d3.select('#' + this.guid + link.source.id + "-" + link.target.id)); 89 | } 90 | }, 91 | 92 | remove_links: function(source_id){ 93 | var found_indicies = []; 94 | for (var index in this.links) { 95 | if (this.links[index].source.id == source_id) { 96 | found_indicies.push(index); 97 | } 98 | } 99 | found_indicies.reverse(); 100 | 101 | for (var index in found_indicies) { 102 | this.links.splice(index, 1); 103 | }; 104 | }, 105 | 106 | remove_links_to: function(id){ 107 | var found_indicies = []; 108 | for (var index in this.links) { 109 | if (this.links[index].source.id == id || this.links[index].target.id == id) { 110 | found_indicies.push(index); 111 | } 112 | } 113 | found_indicies.reverse(); 114 | 115 | for (var index in found_indicies) { 116 | this.links.splice(index, 1); 117 | }; 118 | }, 119 | 120 | normalize_key: function (key) { 121 | return key.replace(/[\/\(\)\%]/g, '-') 122 | }, 123 | 124 | on_msg: function(content){ 125 | this.update(); 126 | 127 | var dict = content.dict; 128 | var action = content.action; 129 | var key = this.normalize_key(content.key); 130 | 131 | if (dict=='node') { 132 | if (action=='add' || action=='set') { 133 | this.update_node(this.try_add_node(key), content.value) 134 | } else if (action=='del') { 135 | this.remove_node(key); 136 | } 137 | 138 | } else if (dict=='adj') { 139 | if (action=='add' || action=='set') { 140 | var links = content.value; 141 | for (var target_id in links) { 142 | target_id = this.normalize_key(target_id); 143 | this.update_link(this.try_add_link(key, target_id), links[target_id]); 144 | } 145 | } else if (action=='del') { 146 | this.remove_links(key); 147 | } 148 | } 149 | this.start(); 150 | }, 151 | 152 | start: function() { 153 | var node = this.svg.selectAll(".gnode"), 154 | link = this.svg.selectAll(".link"); 155 | 156 | var link = link.data(this.force.links(), function(d) { return d.source.id + "-" + d.target.id; }); 157 | this._update_edge(link.enter().insert("line", ".gnode")) 158 | link.exit().remove(); 159 | 160 | var node = node.data(this.force.nodes(), function(d) { return d.id;}); 161 | var that = this; 162 | 163 | var gnode = node.enter() 164 | .append("g") 165 | .attr('class', 'gnode') 166 | .on('mouseover', function(d){ 167 | d3.select(this).select("text").style({opacity:'1.0'}); 168 | }) 169 | .on('mouseout', function(d){ 170 | d3.select(this).select("text").style({opacity:'0.0'}); 171 | }) 172 | .call(this.force.drag); 173 | this._update_circle(gnode.append("circle")); 174 | this._update_text(gnode.append("text").style({opacity:'0.0'})); 175 | node.exit().remove(); 176 | 177 | this.force.start(); 178 | }, 179 | 180 | _update_circle: function(circle) { 181 | var that = this; 182 | 183 | circle 184 | .attr("id", function(d) { return that.guid + d.id; }) 185 | .attr("class", function(d) { return "node " + d.id; }) 186 | .attr("r", function(d) { 187 | if (d.r == undefined) { 188 | return 8; 189 | } else { 190 | return d.r; 191 | } 192 | 193 | }) 194 | .style("fill", function(d) { 195 | if (d.fill == undefined) { 196 | return that.color(d.group); 197 | } else { 198 | return d.fill; 199 | } 200 | 201 | }) 202 | .style("stroke", function(d) { 203 | if (d.stroke == undefined) { 204 | return "#FFF"; 205 | } else { 206 | return d.stroke; 207 | } 208 | 209 | }) 210 | .style("stroke-width", function(d) { 211 | if (d.strokewidth == undefined) { 212 | return "#FFF"; 213 | } else { 214 | return d.strokewidth; 215 | } 216 | 217 | }) 218 | .attr('dx', 0) 219 | .attr('dy', 0); 220 | }, 221 | 222 | _update_text: function(text) { 223 | var that = this; 224 | 225 | text 226 | .attr("id", function(d) { return that.guid + d.id + '-text'; }) 227 | .text(function(d) { 228 | if (d.label) { 229 | return d.label; 230 | } else { 231 | return ''; 232 | } 233 | }) 234 | .style("font-size",function(d) { 235 | if (d.font_size) { 236 | return d.font_size; 237 | } else { 238 | return '11pt'; 239 | } 240 | }) 241 | .attr("text-anchor", "middle") 242 | .style("fill", function(d) { 243 | if (d.color) { 244 | return d.color; 245 | } else { 246 | return 'white'; 247 | } 248 | }) 249 | .attr('dx', function(d) { 250 | if (d.dx) { 251 | return d.dx; 252 | } else { 253 | return 0; 254 | } 255 | }) 256 | .attr('dy', function(d) { 257 | if (d.dy) { 258 | return d.dy; 259 | } else { 260 | return 5; 261 | } 262 | }) 263 | .style("pointer-events", 'none'); 264 | }, 265 | 266 | _update_edge: function(edge) { 267 | var that = this; 268 | edge 269 | .attr("id", function(d) { return that.guid + d.source.id + "-" + d.target.id; }) 270 | .attr("class", "link") 271 | .style("stroke-width", function(d) { 272 | if (d.strokewidth == undefined) { 273 | return "1.5px"; 274 | } else { 275 | return d.strokewidth; 276 | } 277 | 278 | }) 279 | .style('stroke', function(d) { 280 | if (d.stroke == undefined) { 281 | return "#999"; 282 | } else { 283 | return d.stroke; 284 | } 285 | 286 | }); 287 | }, 288 | 289 | tick: function() { 290 | var gnode = this.svg.selectAll(".gnode"), 291 | link = this.svg.selectAll(".link"); 292 | 293 | link.attr("x1", function(d) { return d.source.x; }) 294 | .attr("y1", function(d) { return d.source.y; }) 295 | .attr("x2", function(d) { return d.target.x; }) 296 | .attr("y2", function(d) { return d.target.y; }); 297 | 298 | // Translate the groups 299 | gnode.attr("transform", function(d) { return "translate(" + d.x + "," + d.y + ")"; }); 300 | }, 301 | 302 | update: function(){ 303 | if (!this.has_drawn) { 304 | this.has_drawn = true; 305 | var width = this.model.get('width'), 306 | height = this.model.get('height'); 307 | 308 | this.color = d3.scale.category20(); 309 | 310 | this.nodes = []; 311 | this.links = []; 312 | 313 | var that = this; 314 | this.force = d3.layout.force() 315 | .nodes(this.nodes) 316 | .links(this.links) 317 | .charge(function (d) { 318 | if (d.charge === undefined) { 319 | return -800; 320 | } else { 321 | return d.charge; 322 | } 323 | }) 324 | .linkDistance(function (d) { 325 | if (d.distance === undefined) { 326 | return 20; 327 | } else { 328 | return d.distance; 329 | } 330 | }) 331 | .linkStrength(function (d) { 332 | if (d.strength === undefined) { 333 | return 0.3; 334 | } else { 335 | return d.strength; 336 | } 337 | }) 338 | .size([width, height]) 339 | .on("tick", $.proxy(this.tick, this)); 340 | 341 | this.svg = d3.select("#" + this.guid).append("svg") 342 | .attr("width", width) 343 | .attr("height", height); 344 | } 345 | 346 | var that = this; 347 | setTimeout(function() { 348 | that.start(); 349 | }, 0); 350 | return D3ForceDirectedGraphView.__super__.update.apply(this); 351 | }, 352 | 353 | }); 354 | 355 | // Register the D3ForceDirectedGraphView with the widget manager. 356 | WidgetManager.register_widget_view('D3ForceDirectedGraphView', D3ForceDirectedGraphView); 357 | }); -------------------------------------------------------------------------------- /tutorial/Load-Balancing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:4e6942c31bdc43fb673826bcb3816ccaa9ecac04e2be02d84f691bc81e566ff9" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "# Load-balancing with IPython.parallel" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "collapsed": false, 21 | "input": [ 22 | "import os,sys,time\n", 23 | "import numpy as np\n", 24 | "\n", 25 | "from IPython.core.display import display\n", 26 | "from IPython import parallel\n", 27 | "rc = parallel.Client()\n", 28 | "dview = rc[:]" 29 | ], 30 | "language": "python", 31 | "metadata": {}, 32 | "outputs": [] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "Create a LoadBalancedView" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "collapsed": false, 44 | "input": [ 45 | "lview = rc.load_balanced_view()\n", 46 | "lview" 47 | ], 48 | "language": "python", 49 | "metadata": {}, 50 | "outputs": [] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "LoadBalancedViews behave very much like a DirectView on a single engine:\n", 57 | "\n", 58 | "Each call to `apply()` results in a single remote computation,\n", 59 | "and the result (or AsyncResult) of that call is returned directly,\n", 60 | "rather than in a list, as in the multi-engine DirectView." 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "collapsed": true, 66 | "input": [ 67 | "e0 = rc[0]" 68 | ], 69 | "language": "python", 70 | "metadata": {}, 71 | "outputs": [] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "collapsed": false, 76 | "input": [ 77 | "from numpy.linalg import norm\n", 78 | "A = np.random.random(1024)\n", 79 | "\n", 80 | "e0.apply_sync(norm, A, 2)" 81 | ], 82 | "language": "python", 83 | "metadata": {}, 84 | "outputs": [] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "collapsed": false, 89 | "input": [ 90 | "lview.apply_sync(norm, A, 2)" 91 | ], 92 | "language": "python", 93 | "metadata": {}, 94 | "outputs": [] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": {}, 99 | "source": [ 100 | "However, unlike the DirectView of a single engine, you are letting the IPython Scheduler decide which engine should do the work:" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "collapsed": false, 106 | "input": [ 107 | "e0.apply_sync(os.getpid)" 108 | ], 109 | "language": "python", 110 | "metadata": {}, 111 | "outputs": [] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "collapsed": false, 116 | "input": [ 117 | "for i in range(2*len(rc.ids)):\n", 118 | " pid = lview.apply_sync(os.getpid)\n", 119 | " print \"task %i ran on: %i\" % (i, pid)" 120 | ], 121 | "language": "python", 122 | "metadata": {}, 123 | "outputs": [] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "# Map\n", 130 | "\n", 131 | "The LoadBalancedView also has a load-balanced version of the builtin `map()`" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "collapsed": false, 137 | "input": [ 138 | "lview.block = True\n", 139 | "\n", 140 | "serial_result = map(lambda x:x**10, range(32))\n", 141 | "parallel_result = lview.map(lambda x:x**10, range(32))\n", 142 | "\n", 143 | "serial_result==parallel_result" 144 | ], 145 | "language": "python", 146 | "metadata": {}, 147 | "outputs": [] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": {}, 152 | "source": [ 153 | "Just like `apply()`, you can use non-blocking map with `block=False` or `map_async`" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "collapsed": true, 159 | "input": [ 160 | "amr = lview.map_async(lambda x:x**10, range(32))\n", 161 | "amr.msg_ids" 162 | ], 163 | "language": "python", 164 | "metadata": {}, 165 | "outputs": [] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "collapsed": false, 170 | "input": [ 171 | "lview.map??" 172 | ], 173 | "language": "python", 174 | "metadata": {}, 175 | "outputs": [] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "collapsed": false, 180 | "input": [ 181 | "amr = lview.map_async(lambda x:x**10, range(32), chunksize=4)\n", 182 | "amr.msg_ids" 183 | ], 184 | "language": "python", 185 | "metadata": {}, 186 | "outputs": [] 187 | }, 188 | { 189 | "cell_type": "markdown", 190 | "metadata": {}, 191 | "source": [ 192 | "## Map results are iterable!\n", 193 | "\n", 194 | "AsyncResults with multiple results are actually iterable before their\n", 195 | "results arrive.\n", 196 | "\n", 197 | "This means that you can perform map/reduce operations on elements as\n", 198 | "they come in:" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "collapsed": false, 204 | "input": [ 205 | "lview.block = False" 206 | ], 207 | "language": "python", 208 | "metadata": {}, 209 | "outputs": [] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "collapsed": false, 214 | "input": [ 215 | "# scatter 'id', so id=0,1,2 on engines 0,1,2\n", 216 | "dv = rc[:]\n", 217 | "dv.scatter('id', rc.ids, flatten=True)\n", 218 | "print dv['id']\n", 219 | "\n", 220 | "# create a Reference to `id`. This will be a different value on each engine\n", 221 | "ref = parallel.Reference('id')\n", 222 | "\n", 223 | "tic = time.time()\n", 224 | "ar = dv.apply(time.sleep, ref)\n", 225 | "for i,r in enumerate(ar):\n", 226 | " print \"%i: %.3f\"%(i, time.time()-tic)" 227 | ], 228 | "language": "python", 229 | "metadata": {}, 230 | "outputs": [] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "collapsed": false, 235 | "input": [ 236 | "amr = lview.map_async(time.sleep, [1] * 12)" 237 | ], 238 | "language": "python", 239 | "metadata": {}, 240 | "outputs": [] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "collapsed": false, 245 | "input": [ 246 | "amr.wait_interactive()" 247 | ], 248 | "language": "python", 249 | "metadata": {}, 250 | "outputs": [] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "collapsed": false, 255 | "input": [ 256 | "amr.wall_time, amr.elapsed" 257 | ], 258 | "language": "python", 259 | "metadata": {}, 260 | "outputs": [] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "collapsed": false, 265 | "input": [ 266 | "amr.serial_time" 267 | ], 268 | "language": "python", 269 | "metadata": {}, 270 | "outputs": [] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "collapsed": false, 275 | "input": [ 276 | "amr.wall_time" 277 | ], 278 | "language": "python", 279 | "metadata": {}, 280 | "outputs": [] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "collapsed": false, 285 | "input": [ 286 | "amr.elapsed" 287 | ], 288 | "language": "python", 289 | "metadata": {}, 290 | "outputs": [] 291 | }, 292 | { 293 | "cell_type": "markdown", 294 | "metadata": {}, 295 | "source": [ 296 | "Now we submit a bunch of tasks of increasing magnitude, and\n", 297 | "watch where they happen, iterating through the results as they come." 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "collapsed": false, 303 | "input": [ 304 | "def sleep_here(t):\n", 305 | " \"\"\"sleep here for a time, return where it happened\"\"\"\n", 306 | " import time\n", 307 | " time.sleep(t)\n", 308 | " return id\n", 309 | "\n", 310 | "amr = lview.map(sleep_here, [.01*t for t in range(100)])\n", 311 | "tic = time.time()\n", 312 | "for i,r in enumerate(amr):\n", 313 | " print i,r\n", 314 | " print \"task %i on engine %i: %.3f\" % (i, r, time.time()-tic)\n" 315 | ], 316 | "language": "python", 317 | "metadata": {}, 318 | "outputs": [] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "collapsed": false, 323 | "input": [ 324 | "amr.wall_time" 325 | ], 326 | "language": "python", 327 | "metadata": {}, 328 | "outputs": [] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "collapsed": false, 333 | "input": [ 334 | "amr.serial_time" 335 | ], 336 | "language": "python", 337 | "metadata": {}, 338 | "outputs": [] 339 | }, 340 | { 341 | "cell_type": "code", 342 | "collapsed": false, 343 | "input": [ 344 | "amr.serial_time / amr.wall_time" 345 | ], 346 | "language": "python", 347 | "metadata": {}, 348 | "outputs": [] 349 | }, 350 | { 351 | "cell_type": "markdown", 352 | "metadata": {}, 353 | "source": [ 354 | "Unlike `DirectView.map()`, which always results in one task per engine,\n", 355 | "LoadBalance map defaults to one task per *item* in the sequence. This\n", 356 | "can be changed by specifying the `chunksize` keyword arg." 357 | ] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "collapsed": false, 362 | "input": [ 363 | "amr = lview.map(sleep_here, [.01*t for t in range(100)], chunksize=4)\n", 364 | "tic = time.time()\n", 365 | "for i,r in enumerate(amr):\n", 366 | " print \"task %i on engine %i: %.3f\"%(i, r, time.time()-tic)" 367 | ], 368 | "language": "python", 369 | "metadata": {}, 370 | "outputs": [] 371 | }, 372 | { 373 | "cell_type": "markdown", 374 | "metadata": {}, 375 | "source": [ 376 | "# Excercise\n", 377 | "\n", 378 | "## Parallelize nested loops\n", 379 | "\n", 380 | "Often we want to run a function with a variety of combinations of arguments.\n", 381 | "A useful skill is the ability to express a nested loop in terms of a map." 382 | ] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "collapsed": false, 387 | "input": [ 388 | "def area(w,h):\n", 389 | " return w*h\n", 390 | "\n", 391 | "\n", 392 | "widths = range(1,4)\n", 393 | "heights = range(6,10)\n", 394 | "\n", 395 | "areas = []\n", 396 | "for w in widths:\n", 397 | " for h in heights:\n", 398 | " areas.append(area(w,h))\n", 399 | "areas" 400 | ], 401 | "language": "python", 402 | "metadata": {}, 403 | "outputs": [] 404 | }, 405 | { 406 | "cell_type": "code", 407 | "collapsed": false, 408 | "input": [ 409 | "%run ../hints\n", 410 | "nesthint()" 411 | ], 412 | "language": "python", 413 | "metadata": {}, 414 | "outputs": [] 415 | }, 416 | { 417 | "cell_type": "code", 418 | "collapsed": true, 419 | "input": [ 420 | "%load ../soln/nestedloop.py" 421 | ], 422 | "language": "python", 423 | "metadata": {}, 424 | "outputs": [] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "collapsed": false, 429 | "input": [ 430 | "# To parallelize every call with map, you just need to get a list for each argument.\n", 431 | "# You can use `itertools.product` + `zip` to get this:\n", 432 | "\n", 433 | "\n", 434 | "import itertools\n", 435 | "\n", 436 | "product = list(itertools.product(widths, heights))\n", 437 | "# [(1, 6), (1, 7), (2, 6), (2, 7), (3, 6), (3, 7)]\n", 438 | "\n", 439 | "# So we have a \"list of pairs\", \n", 440 | "# but what we really want is a single list for each argument, i.e. a \"pair of lists\".\n", 441 | "# This is exactly what the slightly weird `zip(*product)` syntax gets us:\n", 442 | "\n", 443 | "allwidths, allheights = zip(*itertools.product(widths, heights))\n", 444 | "\n", 445 | "print \" widths\", allwidths\n", 446 | "print \"heights\", allheights\n", 447 | "\n", 448 | "# Now we just map our function onto those two lists, to parallelize nested for loops:\n", 449 | "\n", 450 | "ar = lview.map_async(area, allwidths, allheights)\n" 451 | ], 452 | "language": "python", 453 | "metadata": {}, 454 | "outputs": [] 455 | }, 456 | { 457 | "cell_type": "markdown", 458 | "metadata": {}, 459 | "source": [ 460 | "Validate the result:" 461 | ] 462 | }, 463 | { 464 | "cell_type": "code", 465 | "collapsed": false, 466 | "input": [ 467 | "p_areas = ar.get()\n", 468 | "p_areas" 469 | ], 470 | "language": "python", 471 | "metadata": {}, 472 | "outputs": [] 473 | }, 474 | { 475 | "cell_type": "code", 476 | "collapsed": false, 477 | "input": [ 478 | "areas == p_areas" 479 | ], 480 | "language": "python", 481 | "metadata": {}, 482 | "outputs": [] 483 | }, 484 | { 485 | "cell_type": "heading", 486 | "level": 2, 487 | "metadata": {}, 488 | "source": [ 489 | "Examples and Exercises" 490 | ] 491 | }, 492 | { 493 | "cell_type": "markdown", 494 | "metadata": {}, 495 | "source": [ 496 | "- [Counting Words!](../examples/Counting%20Words.ipynb)\n", 497 | "- [Monte Carlo Options Pricing](../examples/MC%20Options.ipynb)" 498 | ] 499 | }, 500 | { 501 | "cell_type": "markdown", 502 | "metadata": {}, 503 | "source": [ 504 | "Now that we've seen multiplexing and load-balancing, let's see how they are [used together](All%20Together.ipynb)." 505 | ] 506 | } 507 | ], 508 | "metadata": {} 509 | } 510 | ] 511 | } -------------------------------------------------------------------------------- /Overview.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:aab834d6d66727b51b1bd5e3d9d09345d9a219de070e95d5de1ae88e0cad69c7" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "code", 13 | "collapsed": false, 14 | "input": [ 15 | "%%html\n", 16 | "" 23 | ], 24 | "language": "python", 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "html": [ 29 | "" 36 | ], 37 | "metadata": {}, 38 | "output_type": "display_data", 39 | "text": [ 40 | "" 41 | ] 42 | } 43 | ], 44 | "prompt_number": 22 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "# Overview and getting started\n", 51 | "\n", 52 | "\n", 53 | "## Introduction\n", 54 | "\n", 55 | "Let's start with an overview of IPython's architecture for parallel\n", 56 | "and distributed computing. This architecture abstracts out parallelism\n", 57 | "in a very general way, which enables IPython to support many different\n", 58 | "styles of parallelism including:\n", 59 | "\n", 60 | "- Single program, multiple data (SPMD) parallelism\n", 61 | "- Multiple program, multiple data (MPMD) parallelism\n", 62 | "- Message passing using MPI or \u00d8MQ\n", 63 | "- Task farming\n", 64 | "- Data parallel\n", 65 | "- Coordination of distributed processes\n", 66 | "- Combinations of these approaches\n", 67 | "- Custom user defined approaches\n", 68 | "\n", 69 | "Most importantly, IPython enables all types of parallel applications to\n", 70 | "be developed, executed, debugged and monitored *interactively*. Hence,\n", 71 | "the `I` in `IPython`. Some example use cases for\n", 72 | "`IPython.parallel`:\n", 73 | "\n", 74 | "- Quickly parallelize algorithms that are embarrassingly parallel\n", 75 | " using a number of simple approaches. Many simple things can be\n", 76 | " parallelized interactively in one or two lines of code.\n", 77 | "\n", 78 | "- Steer traditional MPI applications on a supercomputer from an\n", 79 | " IPython session on your laptop.\n", 80 | "\n", 81 | "- Analyze and visualize large datasets (that could be remote and/or\n", 82 | " distributed) interactively using IPython and tools like\n", 83 | " matplotlib/TVTK.\n", 84 | "\n", 85 | "- Develop, test and debug new parallel algorithms (that may use MPI or PyZMQ)\n", 86 | " interactively.\n", 87 | "\n", 88 | "- Tie together multiple MPI jobs running on different systems into one\n", 89 | " giant distributed and parallel system.\n", 90 | "\n", 91 | "- Start a parallel job on your cluster and then have a remote\n", 92 | " collaborator connect to it and pull back data into their local\n", 93 | " IPython session for plotting and analysis.\n", 94 | "\n", 95 | "- Run a set of tasks on a set of CPUs using dynamic load balancing." 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | "## Architecture overview\n", 103 | "\n", 104 | "\n", 105 | "\n", 106 | "The IPython architecture consists of four components:\n", 107 | "\n", 108 | "- The IPython engine\n", 109 | "- The IPython hub\n", 110 | "- The IPython schedulers\n", 111 | "- The cluster client\n", 112 | "\n", 113 | "These components live in the `IPython.parallel` package and are\n", 114 | "installed with IPython." 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "### IPython engine\n", 122 | "\n", 123 | "The IPython engine is a Python instance that accepts Python commands over\n", 124 | "a network connection. When multiple engines are started, parallel\n", 125 | "and distributed computing becomes possible. An important property of an\n", 126 | "IPython engine is that it blocks while user code is being executed. Read\n", 127 | "on for how the IPython controller solves this problem to expose a clean\n", 128 | "asynchronous API to the user." 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "### IPython controller\n", 136 | "\n", 137 | "\n", 138 | "The IPython controller processes provide an interface for working with a\n", 139 | "set of engines. At a general level, the controller is a collection of\n", 140 | "processes to which IPython engines and clients can connect. The\n", 141 | "controller is composed of a `Hub` and a collection of\n", 142 | "`Schedulers`, which may be in processes or threads.\n", 143 | "\n", 144 | "The controller provides a single point of contact for users who\n", 145 | "wish to utilize the engines in the cluster. There is a variety of\n", 146 | "different ways of working with a controller, but all of these\n", 147 | "models are implemented via the `View.apply` method, after\n", 148 | "constructing `View` objects to represent different collections engines.\n", 149 | "The two primary models for interacting with engines are:\n", 150 | "\n", 151 | "- A **Direct** interface, where engines are addressed explicitly.\n", 152 | "- A **LoadBalanced** interface, where the Scheduler is trusted with\n", 153 | " assigning work to appropriate engines.\n", 154 | "\n", 155 | "Advanced users can readily extend the View models to enable other styles\n", 156 | "of parallelism." 157 | ] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "metadata": {}, 162 | "source": [ 163 | "#### The Hub\n", 164 | "\n", 165 | "The center of an IPython cluster is the Hub. The Hub can be viewed as an \u00fcber-logger, which keeps track of engine connections, schedulers, clients, as well as persist all\n", 166 | "task requests and results in a database for later use." 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "#### Schedulers\n", 174 | "\n", 175 | "All actions that can be performed on the engine go through a Scheduler.\n", 176 | "While the engines themselves block when user code is run, the schedulers\n", 177 | "hide that from the user to provide a fully asynchronous interface to a\n", 178 | "set of engines. Each Scheduler is a small GIL-less function in C provided\n", 179 | "by pyzmq (the Python load-balanced scheduler being an exception). " 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": {}, 185 | "source": [ 186 | "## \u00d8MQ and PyZMQ\n", 187 | "\n", 188 | "All of this is implemented with the lovely \u00d8MQ messaging library,\n", 189 | "and pyzmq, the lightweight Python bindings, which allows very fast\n", 190 | "zero-copy communication of objects like numpy arrays." 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "metadata": {}, 196 | "source": [ 197 | "## IPython client and views\n", 198 | "\n", 199 | "There is one primary object, the `Client`, for\n", 200 | "connecting to a cluster. For each execution model, there is a\n", 201 | "corresponding `View`. These views allow users to\n", 202 | "interact with a set of engines through the interface. Here are the two\n", 203 | "default views:\n", 204 | "\n", 205 | "- The `DirectView` class for explicit addressing.\n", 206 | "- The `LoadBalancedView` class for destination-agnostic\n", 207 | " scheduling." 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "metadata": {}, 213 | "source": [ 214 | "## Getting Started\n", 215 | "\n", 216 | "## Starting the IPython controller and engines\n", 217 | "\n", 218 | "To follow along with this tutorial, you will need to start the IPython\n", 219 | "controller and four IPython engines. The simplest way of doing this is\n", 220 | "with the [clusters tab](/#clusters),\n", 221 | "or you can use the `ipcluster` command in a terminal:\n", 222 | "\n", 223 | " $ ipcluster start -n 4\n", 224 | "\n", 225 | "There isn't time to go into it here, but ipcluster can be used to start engines\n", 226 | "and the controller with various batch systems including:\n", 227 | "\n", 228 | "* SGE\n", 229 | "* PBS\n", 230 | "* LSF\n", 231 | "* MPI\n", 232 | "* SSH\n", 233 | "* WinHPC\n", 234 | "\n", 235 | "More information on starting and configuring the IPython cluster in \n", 236 | "[the IPython.parallel docs](http://ipython.org/ipython-doc/stable/parallel/parallel_process.html).\n", 237 | "\n", 238 | "Once you have started the IPython controller and one or more engines,\n", 239 | "you are ready to use the engines to do something useful. \n", 240 | "\n", 241 | "To make sure everything is working correctly, let's do a very simple demo:" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "collapsed": false, 247 | "input": [ 248 | "from IPython import parallel\n", 249 | "rc = parallel.Client()\n", 250 | "rc.block = True" 251 | ], 252 | "language": "python", 253 | "metadata": {}, 254 | "outputs": [] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "collapsed": false, 259 | "input": [ 260 | "rc.ids" 261 | ], 262 | "language": "python", 263 | "metadata": {}, 264 | "outputs": [] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "collapsed": false, 269 | "input": [ 270 | "def mul(a,b):\n", 271 | " return a*b" 272 | ], 273 | "language": "python", 274 | "metadata": {}, 275 | "outputs": [] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "collapsed": false, 280 | "input": [ 281 | "def summary():\n", 282 | " \"\"\"summarize some info about this process\"\"\"\n", 283 | " import os\n", 284 | " import socket\n", 285 | " import sys\n", 286 | " return {\n", 287 | " 'cwd': os.getcwd(),\n", 288 | " 'Python': sys.version,\n", 289 | " 'hostname': socket.gethostname(),\n", 290 | " 'pid': os.getpid(),\n", 291 | " }" 292 | ], 293 | "language": "python", 294 | "metadata": {}, 295 | "outputs": [] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "collapsed": false, 300 | "input": [ 301 | "mul(5,6)" 302 | ], 303 | "language": "python", 304 | "metadata": {}, 305 | "outputs": [] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "collapsed": false, 310 | "input": [ 311 | "summary()" 312 | ], 313 | "language": "python", 314 | "metadata": {}, 315 | "outputs": [] 316 | }, 317 | { 318 | "cell_type": "markdown", 319 | "metadata": {}, 320 | "source": [ 321 | "What does it look like to call this function remotely?\n", 322 | "\n", 323 | "Just turn `f(*args, **kwargs)` into `view.apply(f, *args, **kwargs)`!" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "collapsed": false, 329 | "input": [ 330 | "rc[0].apply(mul, 5, 6)" 331 | ], 332 | "language": "python", 333 | "metadata": {}, 334 | "outputs": [] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "collapsed": false, 339 | "input": [ 340 | "rc[0].apply(summary)" 341 | ], 342 | "language": "python", 343 | "metadata": {}, 344 | "outputs": [] 345 | }, 346 | { 347 | "cell_type": "markdown", 348 | "metadata": {}, 349 | "source": [ 350 | "And the same thing in parallel?" 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "collapsed": false, 356 | "input": [ 357 | "rc[:].apply(mul, 5, 6)" 358 | ], 359 | "language": "python", 360 | "metadata": {}, 361 | "outputs": [] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "collapsed": false, 366 | "input": [ 367 | "rc[:].apply(summary)" 368 | ], 369 | "language": "python", 370 | "metadata": {}, 371 | "outputs": [] 372 | }, 373 | { 374 | "cell_type": "markdown", 375 | "metadata": {}, 376 | "source": [ 377 | "Python has a builtin map for calling a function with a variety of arguments" 378 | ] 379 | }, 380 | { 381 | "cell_type": "code", 382 | "collapsed": false, 383 | "input": [ 384 | "map(mul, range(1,10), range(2,11))" 385 | ], 386 | "language": "python", 387 | "metadata": {}, 388 | "outputs": [] 389 | }, 390 | { 391 | "cell_type": "markdown", 392 | "metadata": {}, 393 | "source": [ 394 | "So how do we do this in parallel?" 395 | ] 396 | }, 397 | { 398 | "cell_type": "code", 399 | "collapsed": false, 400 | "input": [ 401 | "view = rc.load_balanced_view()\n", 402 | "view.map(mul, range(1,10), range(2,11))" 403 | ], 404 | "language": "python", 405 | "metadata": {}, 406 | "outputs": [] 407 | }, 408 | { 409 | "cell_type": "markdown", 410 | "metadata": {}, 411 | "source": [ 412 | "And a preview of parallel magics:" 413 | ] 414 | }, 415 | { 416 | "cell_type": "code", 417 | "collapsed": false, 418 | "input": [ 419 | "%%px\n", 420 | "import os, socket\n", 421 | "print os.getpid()\n", 422 | "print socket.gethostname()" 423 | ], 424 | "language": "python", 425 | "metadata": {}, 426 | "outputs": [] 427 | }, 428 | { 429 | "cell_type": "markdown", 430 | "metadata": {}, 431 | "source": [ 432 | "Now let's get into some more detail about how to use IPython for [remote execution](tutorial/Remote Execution.ipynb)." 433 | ] 434 | } 435 | ], 436 | "metadata": {} 437 | } 438 | ] 439 | } -------------------------------------------------------------------------------- /Background.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:bef23ef8fbd80f76061a9b67928003d87464fb34501e3a11f37b4be4ccf6d51f" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "heading", 13 | "level": 1, 14 | "metadata": { 15 | "slideshow": { 16 | "slide_type": "skip" 17 | } 18 | }, 19 | "source": [ 20 | "Some Background on IPython" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": { 26 | "slideshow": { 27 | "slide_type": "skip" 28 | } 29 | }, 30 | "source": [ 31 | "Let's start with some background on what IPython is,\n", 32 | "then we can talk about what parallel IPython is,\n", 33 | "and why it makes sense." 34 | ] 35 | }, 36 | { 37 | "cell_type": "heading", 38 | "level": 1, 39 | "metadata": { 40 | "slideshow": { 41 | "slide_type": "slide" 42 | } 43 | }, 44 | "source": [ 45 | "What is IPython?" 46 | ] 47 | }, 48 | { 49 | "cell_type": "heading", 50 | "level": 1, 51 | "metadata": { 52 | "slideshow": { 53 | "slide_type": "slide" 54 | } 55 | }, 56 | "source": [ 57 | "Level 0: Interactive Python" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "collapsed": false, 63 | "input": [ 64 | "def foo():\n", 65 | " \"\"\"docstring\"\"\"\n", 66 | " return 'hi'\n", 67 | "foo()" 68 | ], 69 | "language": "python", 70 | "metadata": {}, 71 | "outputs": [] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "collapsed": false, 76 | "input": [ 77 | "def bar():\n", 78 | " print 'hi'" 79 | ], 80 | "language": "python", 81 | "metadata": {}, 82 | "outputs": [] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "collapsed": false, 87 | "input": [ 88 | "from __future__ import print_function\n", 89 | "\n", 90 | "import time\n", 91 | "\n", 92 | "for i in range(10):\n", 93 | " time.sleep(0.25)\n", 94 | " print(i, end=' ')\n" 95 | ], 96 | "language": "python", 97 | "metadata": {}, 98 | "outputs": [] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "collapsed": false, 103 | "input": [ 104 | "import numpy as np" 105 | ], 106 | "language": "python", 107 | "metadata": {}, 108 | "outputs": [] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "collapsed": false, 113 | "input": [ 114 | "import ctypes\n", 115 | "libc = ctypes.CDLL('libc.dylib')\n", 116 | "libc.time(-1)" 117 | ], 118 | "language": "python", 119 | "metadata": {}, 120 | "outputs": [] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "collapsed": false, 125 | "input": [ 126 | "print(\"I'm okay!\")" 127 | ], 128 | "language": "python", 129 | "metadata": {}, 130 | "outputs": [] 131 | }, 132 | { 133 | "cell_type": "heading", 134 | "level": 1, 135 | "metadata": { 136 | "slideshow": { 137 | "slide_type": "slide" 138 | } 139 | }, 140 | "source": [ 141 | "Level 1: A bit more than Python" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "collapsed": false, 147 | "input": [ 148 | "import numpy as np" 149 | ], 150 | "language": "python", 151 | "metadata": {}, 152 | "outputs": [] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "collapsed": false, 157 | "input": [ 158 | "np." 159 | ], 160 | "language": "python", 161 | "metadata": {}, 162 | "outputs": [] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "collapsed": false, 167 | "input": [ 168 | "np.linspace?" 169 | ], 170 | "language": "python", 171 | "metadata": {}, 172 | "outputs": [] 173 | }, 174 | { 175 | "cell_type": "heading", 176 | "level": 2, 177 | "metadata": { 178 | "slideshow": { 179 | "slide_type": "slide" 180 | } 181 | }, 182 | "source": [ 183 | "Shell interaction" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "collapsed": false, 189 | "input": [ 190 | "!date" 191 | ], 192 | "language": "python", 193 | "metadata": {}, 194 | "outputs": [] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "collapsed": false, 199 | "input": [ 200 | "ls" 201 | ], 202 | "language": "python", 203 | "metadata": {}, 204 | "outputs": [] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "collapsed": false, 209 | "input": [ 210 | "files = !ls" 211 | ], 212 | "language": "python", 213 | "metadata": {}, 214 | "outputs": [] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "collapsed": false, 219 | "input": [ 220 | "for f in files:\n", 221 | " print(repr(f))" 222 | ], 223 | "language": "python", 224 | "metadata": {}, 225 | "outputs": [] 226 | }, 227 | { 228 | "cell_type": "heading", 229 | "level": 2, 230 | "metadata": { 231 | "slideshow": { 232 | "slide_type": "slide" 233 | } 234 | }, 235 | "source": [ 236 | "Extending Python with `%magics`" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "collapsed": false, 242 | "input": [ 243 | "biglist = range(1000000)\n", 244 | "bigset = set(biglist)\n", 245 | "\n", 246 | "item = biglist[0]\n", 247 | "%timeit item in biglist\n", 248 | "%timeit item in bigset" 249 | ], 250 | "language": "python", 251 | "metadata": {}, 252 | "outputs": [] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "collapsed": false, 257 | "input": [ 258 | "def inner(x):\n", 259 | " return 1/x\n", 260 | "\n", 261 | "def outer(y):\n", 262 | " return inner(1-y)\n", 263 | "\n", 264 | "z = 2\n", 265 | "outer(-z + 3)" 266 | ], 267 | "language": "python", 268 | "metadata": {}, 269 | "outputs": [] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "collapsed": false, 274 | "input": [ 275 | "%debug" 276 | ], 277 | "language": "python", 278 | "metadata": {}, 279 | "outputs": [] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "collapsed": false, 284 | "input": [ 285 | "%lsmagic" 286 | ], 287 | "language": "python", 288 | "metadata": {}, 289 | "outputs": [] 290 | }, 291 | { 292 | "cell_type": "heading", 293 | "level": 1, 294 | "metadata": { 295 | "slideshow": { 296 | "slide_type": "slide" 297 | } 298 | }, 299 | "source": [ 300 | "Level 2: A bit more than the Terminal" 301 | ] 302 | }, 303 | { 304 | "cell_type": "heading", 305 | "level": 2, 306 | "metadata": { 307 | "slideshow": { 308 | "slide_type": "slide" 309 | } 310 | }, 311 | "source": [ 312 | "Rich Display" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "collapsed": false, 318 | "input": [ 319 | "%matplotlib inline\n", 320 | "import numpy as np\n", 321 | "import matplotlib.pyplot as plt\n", 322 | "from scipy.special import jn" 323 | ], 324 | "language": "python", 325 | "metadata": {}, 326 | "outputs": [] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "collapsed": false, 331 | "input": [ 332 | "x = np.linspace(0,10)\n", 333 | "for n in range(5):\n", 334 | " plt.plot(x, jn(n,x))" 335 | ], 336 | "language": "python", 337 | "metadata": {}, 338 | "outputs": [] 339 | }, 340 | { 341 | "cell_type": "code", 342 | "collapsed": false, 343 | "input": [ 344 | "%%latex\n", 345 | "\\begin{eqnarray}\n", 346 | "\\nabla \\times \\vec{\\mathbf{B}} &\n", 347 | " - \\frac{1}{c} \\frac{\\partial\\vec{\\mathbf{E}}}{\\partial t} \n", 348 | " & = & \\frac{4 \\pi}{c} \\vec{\\mathbf{j}} \\\\\n", 349 | "\\nabla \\cdot \\vec{\\mathbf{E}} && = & 4 \\pi \\rho \\\\\n", 350 | "\\nabla \\times \\vec{\\mathbf{E}} &\n", 351 | " + \\frac{1}{c} \\frac{\\partial\\vec{\\mathbf{B}}}{\\partial t}\n", 352 | " & = & \\vec{\\mathbf{0}} \\\\\n", 353 | "\\nabla \\cdot \\vec{\\mathbf{B}} && = & 0 \n", 354 | "\\end{eqnarray}\n" 355 | ], 356 | "language": "python", 357 | "metadata": {}, 358 | "outputs": [] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "collapsed": false, 363 | "input": [ 364 | "import sympy\n", 365 | "sympy.init_printing(use_latex='mathjax')\n", 366 | "x = sympy.symbols('x')\n", 367 | "expr = x**10-1\n", 368 | "expr" 369 | ], 370 | "language": "python", 371 | "metadata": {}, 372 | "outputs": [] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "collapsed": false, 377 | "input": [ 378 | "sympy.factor(expr)" 379 | ], 380 | "language": "python", 381 | "metadata": {}, 382 | "outputs": [] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "collapsed": false, 387 | "input": [ 388 | "from IPython.display import display\n", 389 | "from IPython.html.widgets import interact\n", 390 | "\n", 391 | "@interact\n", 392 | "def factorit(n=10):\n", 393 | " display(sympy.factor(x**n-1))\n", 394 | " " 395 | ], 396 | "language": "python", 397 | "metadata": {}, 398 | "outputs": [] 399 | }, 400 | { 401 | "cell_type": "code", 402 | "collapsed": false, 403 | "input": [ 404 | "from IPython.display import Image, YouTubeVideo\n", 405 | "Image(\"figs/darts.png\")" 406 | ], 407 | "language": "python", 408 | "metadata": {}, 409 | "outputs": [] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "collapsed": false, 414 | "input": [ 415 | "YouTubeVideo('BROWqjuTM0g ')" 416 | ], 417 | "language": "python", 418 | "metadata": {}, 419 | "outputs": [] 420 | }, 421 | { 422 | "cell_type": "heading", 423 | "level": 2, 424 | "metadata": {}, 425 | "source": [ 426 | "Level 3: Documents" 427 | ] 428 | }, 429 | { 430 | "cell_type": "heading", 431 | "level": 2, 432 | "metadata": {}, 433 | "source": [ 434 | "In the Notebook: not just code" 435 | ] 436 | }, 437 | { 438 | "cell_type": "markdown", 439 | "metadata": {}, 440 | "source": [ 441 | "We also have\n", 442 | "\n", 443 | "- *markdown* and\n", 444 | "- $\\LaTeX$\n", 445 | "\n", 446 | "```javascript\n", 447 | "var foo = $(\".class\");\n", 448 | "```" 449 | ] 450 | }, 451 | { 452 | "cell_type": "heading", 453 | "level": 2, 454 | "metadata": { 455 | "slideshow": { 456 | "slide_type": "slide" 457 | } 458 | }, 459 | "source": [ 460 | "What is a notebook?" 461 | ] 462 | }, 463 | { 464 | "cell_type": "code", 465 | "collapsed": false, 466 | "input": [ 467 | "name = \"Background\"\n", 468 | "!head -n 100 {name}.ipynb | pygmentize -l json" 469 | ], 470 | "language": "python", 471 | "metadata": {}, 472 | "outputs": [] 473 | }, 474 | { 475 | "cell_type": "code", 476 | "collapsed": false, 477 | "input": [ 478 | "!ipython nbconvert {name}.ipynb --to html\n", 479 | "\n", 480 | "from IPython.display import display, HTML\n", 481 | "display(HTML(\"{name}.html\".format(name=name)))" 482 | ], 483 | "language": "python", 484 | "metadata": {}, 485 | "outputs": [] 486 | }, 487 | { 488 | "cell_type": "heading", 489 | "level": 1, 490 | "metadata": { 491 | "slideshow": { 492 | "slide_type": "slide" 493 | } 494 | }, 495 | "source": [ 496 | "But what is IPython, *really*?" 497 | ] 498 | }, 499 | { 500 | "cell_type": "markdown", 501 | "metadata": {}, 502 | "source": [ 503 | "[*A Protocol*](http://ipython.org/ipython-doc/dev/development/messaging.html)\n", 504 | "\n", 505 | "Message type: `execute_request`\n", 506 | "\n", 507 | "```python\n", 508 | "content = {\n", 509 | " # Source code to be executed by the kernel, one or more lines.\n", 510 | " 'code' : str,\n", 511 | "\n", 512 | " # A boolean flag which, if True, signals the kernel to execute\n", 513 | " # this code as quietly as possible.\n", 514 | " 'silent' : bool,\n", 515 | "\n", 516 | " # A boolean flag which, if True, signals the kernel to populate history\n", 517 | " # The default is True if silent is False. If silent is True, store_history\n", 518 | " # is forced to be False.\n", 519 | " 'store_history' : bool,\n", 520 | " ...\n", 521 | "}\n", 522 | "```" 523 | ] 524 | }, 525 | { 526 | "cell_type": "markdown", 527 | "metadata": {}, 528 | "source": [ 529 | "Message type: `execute_reply`\n", 530 | "\n", 531 | "```python\n", 532 | "content = {\n", 533 | " # One of: 'ok' OR 'error'\n", 534 | " 'status' : str,\n", 535 | "\n", 536 | " # The global kernel counter that increases by one with each request that\n", 537 | " # stores history. This will typically be used by clients to display\n", 538 | " # prompt numbers to the user. If the request did not store history, this will\n", 539 | " # be the current value of the counter in the kernel.\n", 540 | " 'execution_count' : int,\n", 541 | "}\n", 542 | "```" 543 | ] 544 | }, 545 | { 546 | "cell_type": "markdown", 547 | "metadata": {}, 548 | "source": [ 549 | "Message type: `display_data`\n", 550 | "\n", 551 | "```python\n", 552 | "content = {\n", 553 | "\n", 554 | " # The data dict contains key/value pairs, where the kids are MIME\n", 555 | " # types and the values are the raw data of the representation in that\n", 556 | " # format.\n", 557 | " 'data' : dict,\n", 558 | "\n", 559 | " # Any metadata that describes the data\n", 560 | " 'metadata' : dict,\n", 561 | "}\n", 562 | "```" 563 | ] 564 | }, 565 | { 566 | "cell_type": "code", 567 | "collapsed": false, 568 | "input": [ 569 | "%%javascript\n", 570 | "IPython.notebook.kernel.execute(\"a=1\");" 571 | ], 572 | "language": "python", 573 | "metadata": {}, 574 | "outputs": [] 575 | }, 576 | { 577 | "cell_type": "code", 578 | "collapsed": false, 579 | "input": [ 580 | "print(a)" 581 | ], 582 | "language": "python", 583 | "metadata": {}, 584 | "outputs": [] 585 | }, 586 | { 587 | "cell_type": "markdown", 588 | "metadata": {}, 589 | "source": [ 590 | "The protocol is [publicly documented](http://ipython.org/ipython-doc/dev/development/messaging.html), and [language agnostic](https://github.com/ipython/ipython/wiki/Projects-using-IPython#list-of-some-ipython-compatible-kernels).\n" 591 | ] 592 | }, 593 | { 594 | "cell_type": "heading", 595 | "level": 2, 596 | "metadata": {}, 597 | "source": [ 598 | "So what is IPython, really?" 599 | ] 600 | }, 601 | { 602 | "cell_type": "markdown", 603 | "metadata": {}, 604 | "source": [ 605 | "- Tools for interactive computing\n", 606 | " - documents\n", 607 | " - interfaces\n", 608 | " - terminal\n", 609 | " - qtconsole\n", 610 | " - notebook\n", 611 | " - introspection\n", 612 | " - debugging\n", 613 | "- A message protocol for remote execution\n", 614 | " - language-agnostic\n", 615 | " - rich remote repl" 616 | ] 617 | }, 618 | { 619 | "cell_type": "heading", 620 | "level": 2, 621 | "metadata": {}, 622 | "source": [ 623 | "How does this become IPython.parallel?" 624 | ] 625 | }, 626 | { 627 | "cell_type": "markdown", 628 | "metadata": {}, 629 | "source": [ 630 | "Let's see a [quick demo](examples/Parallel%20image%20processing.ipynb), and then [find out](Overview.ipynb) in more detail." 631 | ] 632 | } 633 | ], 634 | "metadata": {} 635 | } 636 | ] 637 | } -------------------------------------------------------------------------------- /examples/Monitoring MPI.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:00bb635430db4d7407002e894c19a7bb520d700f45ca69c85d7462fcc36781ae" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "heading", 13 | "level": 1, 14 | "metadata": { 15 | "slideshow": { 16 | "slide_start": false 17 | } 18 | }, 19 | "source": [ 20 | "Interactive monitoring of a parallel MPI simulation with the IPython Notebook" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "collapsed": false, 26 | "input": [ 27 | "%matplotlib inline\n", 28 | "import numpy as np\n", 29 | "import matplotlib.pyplot as plt\n", 30 | "\n", 31 | "from IPython.display import display\n", 32 | "from IPython.parallel import Client, error\n", 33 | "\n", 34 | "cluster = Client(profile=\"mpi\")\n", 35 | "view = cluster[:]\n", 36 | "view.block = True\n", 37 | "e0 = cluster[0]\n", 38 | "e0.activate('0')" 39 | ], 40 | "language": "python", 41 | "metadata": { 42 | "slideshow": { 43 | "slide_start": false 44 | } 45 | }, 46 | "outputs": [] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "collapsed": false, 51 | "input": [ 52 | "cluster.ids" 53 | ], 54 | "language": "python", 55 | "metadata": {}, 56 | "outputs": [] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": { 61 | "slideshow": { 62 | "slide_start": false 63 | } 64 | }, 65 | "source": [ 66 | "Now, we load the MPI libraries into the engine namespaces, and do a simple printing of their MPI rank information to verify that all nodes are operational and they match our cluster's real capacity. \n", 67 | "\n", 68 | "Here, we are making use of IPython's special `%%px` cell magic, which marks the entire cell for parallel execution. This means that the code below will not run in this notebook's kernel, but instead will be sent to *all* engines for execution there. In this way, IPython makes it very natural to control your entire cluster from within the notebook environment:" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "collapsed": false, 74 | "input": [ 75 | "%%px\n", 76 | "# MPI initialization, library imports and sanity checks on all engines\n", 77 | "from mpi4py import MPI\n", 78 | "import numpy as np\n", 79 | "import time\n", 80 | "\n", 81 | "mpi = MPI.COMM_WORLD\n", 82 | "bcast = mpi.bcast\n", 83 | "barrier = mpi.barrier\n", 84 | "rank = mpi.rank\n", 85 | "print(\"MPI rank: %i/%i\" % (mpi.rank,mpi.size))" 86 | ], 87 | "language": "python", 88 | "metadata": { 89 | "slideshow": { 90 | "slide_start": false 91 | } 92 | }, 93 | "outputs": [] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": { 98 | "slideshow": { 99 | "slide_start": false 100 | } 101 | }, 102 | "source": [ 103 | "We write a utility that reorders a list according to the mpi ranks of the engines, since all gather operations will return data in engine id order, not in MPI rank order. We'll need this later on when we want to reassemble in IPython data structures coming from all the engines: IPython will collect the data ordered by engine ID, but our code creates data structures based on MPI rank, so we need to map from one indexing scheme to the other. This simple function does the job:" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "collapsed": false, 109 | "input": [ 110 | "ranks = view['rank']\n", 111 | "rank_indices = np.argsort(ranks)\n", 112 | "\n", 113 | "def mpi_order(seq):\n", 114 | " \"\"\"Return elements of a sequence ordered by MPI rank.\n", 115 | "\n", 116 | " The input sequence is assumed to be ordered by engine ID.\"\"\"\n", 117 | " return [seq[x] for x in rank_indices]" 118 | ], 119 | "language": "python", 120 | "metadata": { 121 | "slideshow": { 122 | "slide_start": false 123 | } 124 | }, 125 | "outputs": [] 126 | }, 127 | { 128 | "cell_type": "heading", 129 | "level": 2, 130 | "metadata": { 131 | "slideshow": { 132 | "slide_start": false 133 | } 134 | }, 135 | "source": [ 136 | "MPI simulation example" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": { 142 | "slideshow": { 143 | "slide_start": false 144 | } 145 | }, 146 | "source": [ 147 | "This is our 'simulation', a toy example that computes $\\sin(f(x^2+y^2))$ for a slowly increasing frequency $f$ over a gradually refined mesh. In a real-world example, there typically is a 'simulate' method that, afer setting up initial parameters, runs the entire computation. But having this simple example will be sufficient to see something that changes visually as the computation evolves and that is quick enough for us to test.\n", 148 | "\n", 149 | "And while simple, this example has a realistic decomposition of the spatial domain in one array per MPI node that requires care in reordering the data for visualization, as would be needed in a real-world application (unless your code accumulates data in the rank 0 node that you can grab directly)." 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "collapsed": false, 155 | "input": [ 156 | "%%px\n", 157 | "\n", 158 | "stop = False\n", 159 | "nsteps = 100\n", 160 | "delay = 0.1\n", 161 | "\n", 162 | "xmin, xmax = 0, np.pi\n", 163 | "ymin, ymax = 0, 2*np.pi\n", 164 | "dy = (ymax-ymin)/mpi.size\n", 165 | "\n", 166 | "def simulation():\n", 167 | " \"\"\"Toy simulation code, computes sin(f*(x**2+y**2)) for a slowly increasing f\n", 168 | " over an increasingly fine mesh.\n", 169 | "\n", 170 | " The purpose of this code is simply to illustrate the basic features of a typical\n", 171 | " MPI code: spatial domain decomposition, a solution which is evolving in some \n", 172 | " sense, and local per-node computation. In this case the nodes don't really\n", 173 | " communicate at all.\n", 174 | " \"\"\"\n", 175 | " # By making these few variables global, we allow the IPython client to access them\n", 176 | " # remotely for interactive introspection\n", 177 | " global j, Z, nx, nyt\n", 178 | " freqs = np.linspace(0.6, 1, nsteps)\n", 179 | " for j in range(nsteps):\n", 180 | " nx, ny = 2+j/4, 2+j/2/mpi.size\n", 181 | " nyt = mpi.size*ny\n", 182 | " Xax = np.linspace(xmin, xmax, nx)\n", 183 | " Yax = np.linspace(ymin+rank*dy, ymin+(rank+1)*dy, ny, endpoint=rank==mpi.size)\n", 184 | " X, Y = np.meshgrid(Xax, Yax)\n", 185 | " f = freqs[j]\n", 186 | " Z = np.cos(f*(X**2 + Y**2))\n", 187 | " # We add a small delay to simulate that a real-world computation\n", 188 | " # would take much longer, and we ensure all nodes are synchronized\n", 189 | " time.sleep(delay)\n", 190 | " # The stop flag can be set remotely via IPython, allowing the simulation to be\n", 191 | " # cleanly stopped from the outside\n", 192 | " if stop:\n", 193 | " break" 194 | ], 195 | "language": "python", 196 | "metadata": { 197 | "slideshow": { 198 | "slide_start": false 199 | } 200 | }, 201 | "outputs": [] 202 | }, 203 | { 204 | "cell_type": "heading", 205 | "level": 2, 206 | "metadata": { 207 | "slideshow": { 208 | "slide_start": false 209 | } 210 | }, 211 | "source": [ 212 | "IPython tools to interactively monitor and plot the MPI results" 213 | ] 214 | }, 215 | { 216 | "cell_type": "markdown", 217 | "metadata": { 218 | "slideshow": { 219 | "slide_start": false 220 | } 221 | }, 222 | "source": [ 223 | "We now define a local (to this notebook) plotting function that fetches data from the engines' global namespace. Once it has retrieved the current state of the relevant variables, it produces and returns a figure:" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "collapsed": false, 229 | "input": [ 230 | "from IPython.display import clear_output\n", 231 | "\n", 232 | "def plot_current_results(in_place=True):\n", 233 | " \"\"\"Makes a blocking call to retrieve remote data and displays the solution mesh\n", 234 | " as a contour plot.\n", 235 | " \n", 236 | " Parameters\n", 237 | " ----------\n", 238 | " in_place : bool\n", 239 | " By default it calls clear_output so that new plots replace old ones. Set\n", 240 | " to False to allow keeping of all previous outputs.\n", 241 | " \"\"\"\n", 242 | " \n", 243 | " # We make a blocking call to load the remote data from the simulation into simple named \n", 244 | " # variables we can read from the engine namespaces\n", 245 | " #view.apply_sync(load_simulation_globals)\n", 246 | " # And now we can use the view to read these variables from all the engines. Then we\n", 247 | " # concatenate all of them into single arrays for local plotting\n", 248 | " try:\n", 249 | " Z = np.concatenate(mpi_order(view['Z']))\n", 250 | " except ValueError:\n", 251 | " print(\"dimension mismatch in Z, not plotting\")\n", 252 | " ax = plt.gca()\n", 253 | " return ax.figure\n", 254 | " \n", 255 | " nx, nyt, j, nsteps = view.pull(['nx', 'nyt', 'j', 'nsteps'], targets=0)\n", 256 | " fig, ax = plt.subplots()\n", 257 | " ax.contourf(Z)\n", 258 | " ax.set_title('Mesh: %i x %i, step %i/%i' % (nx, nyt, j+1, nsteps))\n", 259 | " plt.axis('off')\n", 260 | " # We clear the notebook output before plotting this if in-place plot updating is requested\n", 261 | " if in_place:\n", 262 | " clear_output(wait=True)\n", 263 | " display(fig)\n", 264 | " return fig" 265 | ], 266 | "language": "python", 267 | "metadata": { 268 | "slideshow": { 269 | "slide_start": false 270 | } 271 | }, 272 | "outputs": [] 273 | }, 274 | { 275 | "cell_type": "markdown", 276 | "metadata": { 277 | "slideshow": { 278 | "slide_start": false 279 | } 280 | }, 281 | "source": [ 282 | "It will also be useful to be able to check whether the simulation is still alive or not. Below we will wrap the main simulation function into a thread to allow IPython to pull data from the engines, and we will call this object `simulation_thread`. So to check whether the code is still running, all we have to do is call the `is_alive` method on all of our engines and see whether any of them returns True:" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "collapsed": false, 288 | "input": [ 289 | "def simulation_alive():\n", 290 | " \"\"\"Return True if the simulation thread is still running on any engine.\n", 291 | " \"\"\"\n", 292 | " return any(view.apply_sync(lambda : simulation_thread.is_alive()))" 293 | ], 294 | "language": "python", 295 | "metadata": { 296 | "slideshow": { 297 | "slide_start": false 298 | } 299 | }, 300 | "outputs": [] 301 | }, 302 | { 303 | "cell_type": "markdown", 304 | "metadata": { 305 | "slideshow": { 306 | "slide_start": false 307 | } 308 | }, 309 | "source": [ 310 | "Finally, this is a convenience wrapper around the plotting code so that we can interrupt monitoring at any point, and that will provide basic timing information:" 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "collapsed": false, 316 | "input": [ 317 | "def monitor_simulation(refresh=5.0, plots_in_place=True):\n", 318 | " \"\"\"Monitor the simulation progress and call plotting routine.\n", 319 | "\n", 320 | " Supress KeyboardInterrupt exception if interrupted, ensure that the last \n", 321 | " figure is always displayed and provide basic timing and simulation status.\n", 322 | "\n", 323 | " Parameters\n", 324 | " ----------\n", 325 | " refresh : float\n", 326 | " Refresh interval between calls to retrieve and plot data. The default\n", 327 | " is 5s, adjust depending on the desired refresh rate, but be aware that \n", 328 | " very short intervals will start having a significant impact.\n", 329 | "\n", 330 | " plots_in_place : bool\n", 331 | " If true, every new figure replaces the last one, producing a (slow)\n", 332 | " animation effect in the notebook. If false, all frames are plotted\n", 333 | " in sequence and appended in the output area.\n", 334 | " \"\"\"\n", 335 | " import datetime as dt, time\n", 336 | " \n", 337 | " if not simulation_alive():\n", 338 | " plot_current_results(in_place=plots_in_place)\n", 339 | " plt.close('all')\n", 340 | " print('Simulation has already finished, no monitoring to do.')\n", 341 | " return\n", 342 | " \n", 343 | " t0 = dt.datetime.now()\n", 344 | " fig = None\n", 345 | " try:\n", 346 | " while simulation_alive():\n", 347 | " fig = plot_current_results(in_place=plots_in_place)\n", 348 | " plt.close('all') # prevent re-plot of old figures\n", 349 | " time.sleep(refresh) # so we don't hammer the server too fast\n", 350 | " except (KeyboardInterrupt, error.TimeoutError):\n", 351 | " msg = 'Monitoring interrupted, simulation is ongoing!'\n", 352 | " else:\n", 353 | " msg = 'Simulation completed!'\n", 354 | " tmon = dt.datetime.now() - t0\n", 355 | " if plots_in_place and fig is not None:\n", 356 | " clear_output(wait=True)\n", 357 | " plt.close('all')\n", 358 | " display(fig)\n", 359 | " print(msg)\n", 360 | " print('Monitored for: %s.' % tmon)" 361 | ], 362 | "language": "python", 363 | "metadata": { 364 | "slideshow": { 365 | "slide_start": false 366 | } 367 | }, 368 | "outputs": [] 369 | }, 370 | { 371 | "cell_type": "heading", 372 | "level": 2, 373 | "metadata": { 374 | "slideshow": { 375 | "slide_start": false 376 | } 377 | }, 378 | "source": [ 379 | "Making a simulation object that can be monitored interactively" 380 | ] 381 | }, 382 | { 383 | "cell_type": "code", 384 | "collapsed": false, 385 | "input": [ 386 | "%%px\n", 387 | "from threading import Thread\n", 388 | "stop = False\n", 389 | "nsteps = 100\n", 390 | "delay=0.5\n", 391 | "# Create a thread wrapper for the simulation. The target must be an argument-less\n", 392 | "# function so we wrap the call to 'simulation' in a simple lambda:\n", 393 | "simulation_thread = Thread(target = lambda : simulation())\n", 394 | "# Now we actually start the simulation\n", 395 | "simulation_thread.start()" 396 | ], 397 | "language": "python", 398 | "metadata": { 399 | "slideshow": { 400 | "slide_start": false 401 | } 402 | }, 403 | "outputs": [] 404 | }, 405 | { 406 | "cell_type": "code", 407 | "collapsed": false, 408 | "input": [ 409 | "monitor_simulation(refresh=1);" 410 | ], 411 | "language": "python", 412 | "metadata": { 413 | "slideshow": { 414 | "slide_start": false 415 | } 416 | }, 417 | "outputs": [] 418 | }, 419 | { 420 | "cell_type": "markdown", 421 | "metadata": { 422 | "slideshow": { 423 | "slide_start": false 424 | } 425 | }, 426 | "source": [ 427 | "If you execute the following cell before the MPI code is finished running, it will stop the simulation at that point, which you can verify by calling the monitoring again:" 428 | ] 429 | }, 430 | { 431 | "cell_type": "code", 432 | "collapsed": false, 433 | "input": [ 434 | "view['stop'] = True" 435 | ], 436 | "language": "python", 437 | "metadata": { 438 | "slideshow": { 439 | "slide_start": false 440 | } 441 | }, 442 | "outputs": [] 443 | }, 444 | { 445 | "cell_type": "code", 446 | "collapsed": false, 447 | "input": [ 448 | "%px0 from IPython.parallel import bind_kernel; bind_kernel()" 449 | ], 450 | "language": "python", 451 | "metadata": {}, 452 | "outputs": [] 453 | }, 454 | { 455 | "cell_type": "code", 456 | "collapsed": false, 457 | "input": [ 458 | "%px0 %qtconsole" 459 | ], 460 | "language": "python", 461 | "metadata": {}, 462 | "outputs": [] 463 | }, 464 | { 465 | "cell_type": "code", 466 | "collapsed": false, 467 | "input": [], 468 | "language": "python", 469 | "metadata": {}, 470 | "outputs": [] 471 | } 472 | ], 473 | "metadata": {} 474 | } 475 | ] 476 | } -------------------------------------------------------------------------------- /Performance.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:2ef7f69dc3a748f5ef6e65a754eb56be463b690a83224a1b48ce16fdf8adfc21" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "# Performance\n", 16 | "\n", 17 | "How small do my tasks need to be (aka how fast is IPython)?\n", 18 | "\n", 19 | "In parallel computing, an important relationship to keep in mind is the\n", 20 | "ratio of computation to communication. In order for your simulation to\n", 21 | "perform reasonably, you must keep this ratio high. When testing out a\n", 22 | "new tool like IPython, it is important to examine the limit of\n", 23 | "granularity that is appropriate. If it takes half a second of overhead\n", 24 | "to run each task, then breaking your work up into millisecond chunks\n", 25 | "isn't going to make sense.\n", 26 | "\n", 27 | "Basic imports to use later, create a Client, and a LoadBalancedView of all the engines." 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "collapsed": false, 33 | "input": [ 34 | "%matplotlib inline" 35 | ], 36 | "language": "python", 37 | "metadata": {}, 38 | "outputs": [], 39 | "prompt_number": 1 40 | }, 41 | { 42 | "cell_type": "code", 43 | "collapsed": false, 44 | "input": [ 45 | "import time\n", 46 | "import numpy as np\n", 47 | "\n", 48 | "from IPython.parallel import Client\n", 49 | "\n", 50 | "rc = Client()\n", 51 | "view = rc.load_balanced_view()" 52 | ], 53 | "language": "python", 54 | "metadata": {}, 55 | "outputs": [], 56 | "prompt_number": 2 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "## Latency\n", 63 | "\n", 64 | "Sending and receiving tiny messages gives us a sense of the minimum time\n", 65 | "IPython must spend building and sending messages around. This should\n", 66 | "give us a sense of the *minimum* overhead of the communication system.\n", 67 | "\n", 68 | "This should give us a sense of the lower limit on available granularity." 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "collapsed": true, 74 | "input": [ 75 | "def test_latency(v, n):\n", 76 | " tic = time.time()\n", 77 | " echo = lambda x: x\n", 78 | " tic = time.time()\n", 79 | " for i in xrange(n):\n", 80 | " v.apply_async(echo, '')\n", 81 | " toc = time.time()\n", 82 | " v.wait()\n", 83 | " tac = time.time()\n", 84 | " sent = toc-tic\n", 85 | " roundtrip = tac-tic\n", 86 | " return sent, roundtrip" 87 | ], 88 | "language": "python", 89 | "metadata": {}, 90 | "outputs": [], 91 | "prompt_number": 3 92 | }, 93 | { 94 | "cell_type": "code", 95 | "collapsed": false, 96 | "input": [ 97 | "for n in [8,16,32,64,128,256,512,1024]:\n", 98 | " # short rest between tests\n", 99 | " time.sleep(0.5)\n", 100 | " s,rt = test_latency(view, n)\n", 101 | " print \"%4i %6.1f %6.1f\" % (n,n/s,n/rt)" 102 | ], 103 | "language": "python", 104 | "metadata": {}, 105 | "outputs": [ 106 | { 107 | "output_type": "stream", 108 | "stream": "stdout", 109 | "text": [ 110 | " 8 1336.0 233.0\n", 111 | " 16 1182.2 202.0" 112 | ] 113 | }, 114 | { 115 | "output_type": "stream", 116 | "stream": "stdout", 117 | "text": [ 118 | "\n", 119 | " 32 1378.5 271.1" 120 | ] 121 | }, 122 | { 123 | "output_type": "stream", 124 | "stream": "stdout", 125 | "text": [ 126 | "\n", 127 | " 64 1322.6 277.6" 128 | ] 129 | }, 130 | { 131 | "output_type": "stream", 132 | "stream": "stdout", 133 | "text": [ 134 | "\n", 135 | " 128 661.4 220.1" 136 | ] 137 | }, 138 | { 139 | "output_type": "stream", 140 | "stream": "stdout", 141 | "text": [ 142 | "\n", 143 | " 256 1218.5 278.8" 144 | ] 145 | }, 146 | { 147 | "output_type": "stream", 148 | "stream": "stdout", 149 | "text": [ 150 | "\n", 151 | " 512 1248.1 277.6" 152 | ] 153 | }, 154 | { 155 | "output_type": "stream", 156 | "stream": "stdout", 157 | "text": [ 158 | "\n", 159 | "1024 1249.6 288.3" 160 | ] 161 | }, 162 | { 163 | "output_type": "stream", 164 | "stream": "stdout", 165 | "text": [ 166 | "\n" 167 | ] 168 | } 169 | ], 170 | "prompt_number": 4 171 | }, 172 | { 173 | "cell_type": "code", 174 | "collapsed": false, 175 | "input": [ 176 | "for n in [8,16,32,64,128,256,512,1024]:\n", 177 | " # short rest between tests\n", 178 | " time.sleep(0.5)\n", 179 | " s,rt = test_latency(view, n)\n", 180 | " print \"%4i %6.1f %6.1f\" % (n,n/s,n/rt)" 181 | ], 182 | "language": "python", 183 | "metadata": {}, 184 | "outputs": [ 185 | { 186 | "output_type": "stream", 187 | "stream": "stdout", 188 | "text": [ 189 | " 8 969.3 97.8\n", 190 | " 16 1084.1 198.7" 191 | ] 192 | }, 193 | { 194 | "output_type": "stream", 195 | "stream": "stdout", 196 | "text": [ 197 | "\n", 198 | " 32 1251.3 261.6" 199 | ] 200 | }, 201 | { 202 | "output_type": "stream", 203 | "stream": "stdout", 204 | "text": [ 205 | "\n", 206 | " 64 795.2 208.2" 207 | ] 208 | }, 209 | { 210 | "output_type": "stream", 211 | "stream": "stdout", 212 | "text": [ 213 | "\n", 214 | " 128 831.3 282.1" 215 | ] 216 | }, 217 | { 218 | "output_type": "stream", 219 | "stream": "stdout", 220 | "text": [ 221 | "\n", 222 | " 256 667.4 172.6" 223 | ] 224 | }, 225 | { 226 | "output_type": "stream", 227 | "stream": "stdout", 228 | "text": [ 229 | "\n", 230 | " 512 438.0 165.9" 231 | ] 232 | }, 233 | { 234 | "output_type": "stream", 235 | "stream": "stdout", 236 | "text": [ 237 | "\n", 238 | "1024 645.9 175.9" 239 | ] 240 | }, 241 | { 242 | "output_type": "stream", 243 | "stream": "stdout", 244 | "text": [ 245 | "\n" 246 | ] 247 | } 248 | ], 249 | "prompt_number": 5 250 | }, 251 | { 252 | "cell_type": "markdown", 253 | "metadata": {}, 254 | "source": [ 255 | "\n", 256 | "\n", 257 | "These tests were run on the loopback interface on a fast 8-core machine\n", 258 | "with 4 engines and slightly tuned non-default config (msgpack for serialization, TaskScheduler.hwm=0).\n", 259 | "\n", 260 | "The hwm optimization is the most important for performance of these benchmarks.\n", 261 | "\n", 262 | "\n", 263 | "The tests were done with the Python scheduler and pure-zmq scheduler,\n", 264 | "and with/without an SSH tunnel. We can see that the Python scheduler can\n", 265 | "do about 800 tasks/sec, while the pure-zmq scheduler gets an extra\n", 266 | "factor of two, at around 1.5k tasks/sec roundtrip. Purely outgoing - the\n", 267 | "time before the Client code can go on working, is closer to 4k msgs/sec\n", 268 | "sent. Using an SSH tunnel does not significantly impact performance, as\n", 269 | "long as you have a few tasks to line up.\n", 270 | "\n", 271 | "Running the same test on a dedicated cluster with up to 128 CPUs shows\n", 272 | "that IPython does scale reasonably well.\n", 273 | "\n", 274 | "" 275 | ] 276 | }, 277 | { 278 | "cell_type": "markdown", 279 | "metadata": {}, 280 | "source": [ 281 | "## Throughput\n", 282 | "\n", 283 | "Echoing numpy arrays is similar to the latency test, but scaling the\n", 284 | "array size instead of the number of messages tests the limits when there\n", 285 | "is data to be transferred." 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "collapsed": true, 291 | "input": [ 292 | "def test_throughput(v, n, s):\n", 293 | " A = np.random.random(s/8) # doubles are 8B\n", 294 | " tic = time.time()\n", 295 | " echo = lambda x: x\n", 296 | " tic = time.time()\n", 297 | " for i in xrange(n):\n", 298 | " v.apply_async(echo, A)\n", 299 | " toc = time.time()\n", 300 | " v.wait()\n", 301 | " tac = time.time()\n", 302 | " sent = toc-tic\n", 303 | " roundtrip = tac-tic\n", 304 | " return sent, roundtrip" 305 | ], 306 | "language": "python", 307 | "metadata": {}, 308 | "outputs": [], 309 | "prompt_number": 6 310 | }, 311 | { 312 | "cell_type": "code", 313 | "collapsed": false, 314 | "input": [ 315 | "n = 128\n", 316 | "for sz in [1e1,1e2,1e3,1e4,1e5,5e5,1e6,2e6]:\n", 317 | " # short rest between tests\n", 318 | " time.sleep(1)\n", 319 | " s,rt = test_throughput(view, n, int(sz))\n", 320 | " print \"%8i %6.1f t/s %6.1f t/s %9.3f Mbps\" % (sz,n/s,n/rt, 1e-6*sz*n/rt)" 321 | ], 322 | "language": "python", 323 | "metadata": {}, 324 | "outputs": [ 325 | { 326 | "output_type": "stream", 327 | "stream": "stdout", 328 | "text": [ 329 | " 10 1125.6 t/s 285.6 t/s 0.003 Mbps\n", 330 | " 100 967.2 t/s 281.8 t/s 0.028 Mbps" 331 | ] 332 | }, 333 | { 334 | "output_type": "stream", 335 | "stream": "stdout", 336 | "text": [ 337 | "\n", 338 | " 1000 1246.3 t/s 281.6 t/s 0.282 Mbps" 339 | ] 340 | }, 341 | { 342 | "output_type": "stream", 343 | "stream": "stdout", 344 | "text": [ 345 | "\n", 346 | " 10000 1285.6 t/s 265.3 t/s 2.653 Mbps" 347 | ] 348 | }, 349 | { 350 | "output_type": "stream", 351 | "stream": "stdout", 352 | "text": [ 353 | "\n", 354 | " 100000 404.8 t/s 206.1 t/s 20.606 Mbps" 355 | ] 356 | }, 357 | { 358 | "output_type": "stream", 359 | "stream": "stdout", 360 | "text": [ 361 | "\n", 362 | " 500000 294.2 t/s 159.6 t/s 79.822 Mbps" 363 | ] 364 | }, 365 | { 366 | "output_type": "stream", 367 | "stream": "stdout", 368 | "text": [ 369 | "\n", 370 | " 1000000 328.4 t/s 108.8 t/s 108.802 Mbps" 371 | ] 372 | }, 373 | { 374 | "output_type": "stream", 375 | "stream": "stdout", 376 | "text": [ 377 | "\n", 378 | " 2000000 425.9 t/s 83.4 t/s 166.755 Mbps" 379 | ] 380 | }, 381 | { 382 | "output_type": "stream", 383 | "stream": "stdout", 384 | "text": [ 385 | "\n" 386 | ] 387 | } 388 | ], 389 | "prompt_number": 7 390 | }, 391 | { 392 | "cell_type": "code", 393 | "collapsed": false, 394 | "input": [ 395 | "n = 128\n", 396 | "for sz in [1e1,1e2,1e3,1e4,1e5,5e5,1e6,2e6]:\n", 397 | " # short rest between tests\n", 398 | " time.sleep(1)\n", 399 | " s,rt = test_throughput(view, n, int(sz))\n", 400 | " print \"%8i %6.1f t/s %6.1f t/s %9.3f Mbps\" % (sz,n/s,n/rt, 1e-6*sz*n/rt)" 401 | ], 402 | "language": "python", 403 | "metadata": {}, 404 | "outputs": [ 405 | { 406 | "output_type": "stream", 407 | "stream": "stdout", 408 | "text": [ 409 | " 10 1278.9 t/s 303.4 t/s 0.003 Mbps\n", 410 | " 100 1339.5 t/s 301.8 t/s 0.030 Mbps" 411 | ] 412 | }, 413 | { 414 | "output_type": "stream", 415 | "stream": "stdout", 416 | "text": [ 417 | "\n", 418 | " 1000 784.0 t/s 265.0 t/s 0.265 Mbps" 419 | ] 420 | }, 421 | { 422 | "output_type": "stream", 423 | "stream": "stdout", 424 | "text": [ 425 | "\n", 426 | " 10000 1083.7 t/s 274.0 t/s 2.740 Mbps" 427 | ] 428 | }, 429 | { 430 | "output_type": "stream", 431 | "stream": "stdout", 432 | "text": [ 433 | "\n", 434 | " 100000 336.1 t/s 143.6 t/s 14.357 Mbps" 435 | ] 436 | }, 437 | { 438 | "output_type": "stream", 439 | "stream": "stdout", 440 | "text": [ 441 | "\n", 442 | " 500000 289.4 t/s 150.9 t/s 75.453 Mbps" 443 | ] 444 | }, 445 | { 446 | "output_type": "stream", 447 | "stream": "stdout", 448 | "text": [ 449 | "\n", 450 | " 1000000 226.7 t/s 102.6 t/s 102.639 Mbps" 451 | ] 452 | }, 453 | { 454 | "output_type": "stream", 455 | "stream": "stdout", 456 | "text": [ 457 | "\n", 458 | " 2000000 368.9 t/s 70.2 t/s 140.495 Mbps" 459 | ] 460 | }, 461 | { 462 | "output_type": "stream", 463 | "stream": "stdout", 464 | "text": [ 465 | "\n" 466 | ] 467 | } 468 | ], 469 | "prompt_number": 8 470 | }, 471 | { 472 | "cell_type": "markdown", 473 | "metadata": {}, 474 | "source": [ 475 | "\n", 476 | "\n", 477 | "Note that the dotted lines, which measure the time it took to *send* the\n", 478 | "arrays is *not* a function of the message size. This is again thanks to\n", 479 | "pyzmq's non-copying sends. Locally, we can send 100 4MB arrays in ~50\n", 480 | "ms, and libzmq will take care of actually transmitting the data while we\n", 481 | "can go on working.\n", 482 | "\n", 483 | "Plotting the same data, scaled by message size shows that we are\n", 484 | "saturating the connection at ~1Gbps with ~10kB messages when using\n", 485 | "SSH, and ~10Gbps with ~50kB messages when not using SSH.\n", 486 | "\n", 487 | "" 488 | ] 489 | }, 490 | { 491 | "cell_type": "markdown", 492 | "metadata": {}, 493 | "source": [ 494 | "## Map\n", 495 | "\n", 496 | "Another useful test is seeing how fast \\`view.map\\` is, for various\n", 497 | "numbers of tasks and for tasks of varying size.\n", 498 | "\n", 499 | "These tests were done on [AWS](http://aws.amazon.com/) extra-large\n", 500 | "instances with the help of\n", 501 | "[StarCluster](http://web.mit.edu/stardev/cluster/), so the IO and CPU\n", 502 | "performance are quite low compared to a physical cluster." 503 | ] 504 | }, 505 | { 506 | "cell_type": "code", 507 | "collapsed": true, 508 | "input": [ 509 | "def test_map(v,dt,n):\n", 510 | " ts = [dt]*n\n", 511 | " tic = time.time()\n", 512 | " amr = v.map_async(time.sleep, ts)\n", 513 | " toc = time.time()\n", 514 | " amr.get()\n", 515 | " tac = time.time()\n", 516 | " sent = toc-tic\n", 517 | " roundtrip = tac-tic\n", 518 | " return sent, roundtrip" 519 | ], 520 | "language": "python", 521 | "metadata": {}, 522 | "outputs": [], 523 | "prompt_number": 9 524 | }, 525 | { 526 | "cell_type": "code", 527 | "collapsed": false, 528 | "input": [ 529 | "n = len(rc.ids) * 16\n", 530 | "for dt in np.logspace(-3,0,7):\n", 531 | " time.sleep(0.5)\n", 532 | " s,rt = test_map(view, dt, n)\n", 533 | " print \"%4ims %5.1f%%\" % (1000*dt, 1600*dt / rt)" 534 | ], 535 | "language": "python", 536 | "metadata": {}, 537 | "outputs": [ 538 | { 539 | "output_type": "stream", 540 | "stream": "stdout", 541 | "text": [ 542 | " 1ms 7.6%\n", 543 | " 3ms 17.5%" 544 | ] 545 | }, 546 | { 547 | "output_type": "stream", 548 | "stream": "stdout", 549 | "text": [ 550 | "\n", 551 | " 10ms 63.1%" 552 | ] 553 | }, 554 | { 555 | "output_type": "stream", 556 | "stream": "stdout", 557 | "text": [ 558 | "\n", 559 | " 31ms 85.5%" 560 | ] 561 | }, 562 | { 563 | "output_type": "stream", 564 | "stream": "stdout", 565 | "text": [ 566 | "\n", 567 | " 100ms 95.8%" 568 | ] 569 | }, 570 | { 571 | "output_type": "stream", 572 | "stream": "stdout", 573 | "text": [ 574 | "\n", 575 | " 316ms 98.7%" 576 | ] 577 | }, 578 | { 579 | "output_type": "stream", 580 | "stream": "stdout", 581 | "text": [ 582 | "\n", 583 | "1000ms 99.6%" 584 | ] 585 | }, 586 | { 587 | "output_type": "stream", 588 | "stream": "stdout", 589 | "text": [ 590 | "\n" 591 | ] 592 | } 593 | ], 594 | "prompt_number": 10 595 | }, 596 | { 597 | "cell_type": "markdown", 598 | "metadata": {}, 599 | "source": [ 600 | "\n", 601 | "\n", 602 | "This shows runs for jobs ranging from 1 to 128 ms, on 4,31,and 63\n", 603 | "engines. On this system, millisecond jobs are clearly too small, but by\n", 604 | "the time individual tasks are \\> 100 ms, IPython overhead is negligible.\n", 605 | "\n", 606 | "Now let's see how we use it for remote execution." 607 | ] 608 | } 609 | ], 610 | "metadata": {} 611 | } 612 | ] 613 | } --------------------------------------------------------------------------------