├── notebooks ├── exercises │ ├── __init__.py │ ├── snippets │ ├── 07.Vectorize.Exercises.ipynb │ ├── 03.Numba.Types.Demo.ipynb │ ├── 02.Intro.to.JIT.exercises.ipynb │ ├── 05.Cavity.Flow.Exercises.ipynb │ ├── 04.Direct.Summation.Exercises.ipynb │ └── 08.GUVectorize.Exercises.ipynb ├── snippets │ ├── __init__.py │ ├── logit.py │ ├── nbody │ │ ├── distance.py │ │ ├── direct_sum.py │ │ └── create_n.py │ ├── clip.py │ ├── guvectorize │ │ ├── ht_signature.py │ │ └── ht_timeloop.py │ ├── ppe_numba.py │ └── ns_helper.py ├── IC.pickle ├── bad_script.py ├── 06.2.optional.Test.Compiled.Module.ipynb ├── 06.1.optional.Compile.Module.ipynb ├── 02.Intro.to.jit.ipynb ├── 10.optional.Numba.and.ipyparallel.ipynb ├── 08.Make.generalized.ufuncs.ipynb ├── 05.0.A.Breakneck.Introduction.to.CFD.ipynb ├── 03.How.Numba.Works.ipynb ├── 05.2.optional.Numba.v.Cython.v.Fortran.ipynb ├── 09.Tips.and.FAQ.ipynb ├── 01.When.where.to.use.Numba.ipynb ├── 04.Direct.Summation.ipynb ├── 05.1.Cavity_Flow.ipynb ├── 07.Make.your.own.ufuncs.ipynb └── figures │ └── 2d_full_weighting_detail.svg ├── ppe_compile_module ├── __init__.py ├── setup.py └── main.py ├── .gitignore ├── slides ├── tutorial-vid.png ├── images │ └── SciPy2016-Logo-450x120-01.25.png └── Intro.ipynb ├── environment.yml ├── requirements.txt ├── LICENSE.md ├── check_install.py └── README.md /notebooks/exercises/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/snippets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ppe_compile_module/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/exercises/snippets: -------------------------------------------------------------------------------- 1 | ../snippets -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.so 2 | *.f90 3 | *.lprof 4 | .ipynb_checkpoints 5 | __pycache__ 6 | 7 | notebooks/fail.html 8 | -------------------------------------------------------------------------------- /notebooks/IC.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/barbagroup/numba_tutorial_scipy2016/master/notebooks/IC.pickle -------------------------------------------------------------------------------- /slides/tutorial-vid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/barbagroup/numba_tutorial_scipy2016/master/slides/tutorial-vid.png -------------------------------------------------------------------------------- /notebooks/snippets/logit.py: -------------------------------------------------------------------------------- 1 | @vectorize([float64(float64)]) 2 | def logit(a): 3 | return math.log(a / (1 - a)) 4 | 5 | a = numpy.linspace(.1, .9, 9) 6 | -------------------------------------------------------------------------------- /slides/images/SciPy2016-Logo-450x120-01.25.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/barbagroup/numba_tutorial_scipy2016/master/slides/images/SciPy2016-Logo-450x120-01.25.png -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: numbatutorial 2 | dependencies: 3 | - python=3.5 4 | - numpy 5 | - matplotlib 6 | - numba=0.26.0 7 | - jupyter 8 | - ipython 9 | - pip: 10 | - line-profiler==2.0 11 | -------------------------------------------------------------------------------- /notebooks/snippets/nbody/distance.py: -------------------------------------------------------------------------------- 1 | @njit 2 | def distance(part1, part2): 3 | '''calculate the distance between two particles''' 4 | return ((part1.x - part2.x)**2 + 5 | (part1.y - part2.y)**2 + 6 | (part1.z - part2.z)**2)**.5 7 | -------------------------------------------------------------------------------- /notebooks/snippets/nbody/direct_sum.py: -------------------------------------------------------------------------------- 1 | @njit 2 | def direct_sum(particles): 3 | for i, target in enumerate(particles): 4 | for j, source in enumerate(particles): 5 | if i != j: 6 | r = distance(target, source) 7 | target.phi += source.m / r 8 | 9 | return particles 10 | -------------------------------------------------------------------------------- /notebooks/snippets/clip.py: -------------------------------------------------------------------------------- 1 | def truncate(a, amin, amax): 2 | if a < amin: 3 | a = amin 4 | elif a > amax: 5 | a = amax 6 | return a 7 | 8 | vec_truncate_serial = vectorize(['float64(float64, float64, float64)'])(truncate) 9 | vec_truncate_par = vectorize(['float64(float64, float64, float64)'], target='parallel')(truncate) 10 | -------------------------------------------------------------------------------- /ppe_compile_module/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from distutils.core import setup 4 | from main import cc 5 | 6 | def main(): 7 | """The main entry point.""" 8 | skw = dict( 9 | name='ppe', 10 | license='MIT', 11 | ext_modules=[cc.distutils_extension()], 12 | ) 13 | setup(**skw) 14 | 15 | 16 | if __name__ == '__main__': 17 | main() 18 | -------------------------------------------------------------------------------- /notebooks/bad_script.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from time import sleep 3 | 4 | def bad_call(dude): 5 | sleep(.5) 6 | 7 | def worse_call(dude): 8 | sleep(1) 9 | 10 | def sumulate(foo): 11 | if not isinstance(foo, int): 12 | return 13 | 14 | a = numpy.random.random((1000, 1000)) 15 | a @ a 16 | ans = 0 17 | for i in range(foo): 18 | ans += i 19 | 20 | bad_call(ans) 21 | worse_call(ans) 22 | 23 | return ans 24 | 25 | if __name__ == '__main__': 26 | sumulate(15) 27 | -------------------------------------------------------------------------------- /notebooks/snippets/nbody/create_n.py: -------------------------------------------------------------------------------- 1 | @njit 2 | def create_n_random_particles(n, m, domain=1): 3 | ''' 4 | Creates `n` particles with mass `m` with random coordinates 5 | between 0 and `domain` 6 | ''' 7 | parts = numpy.zeros((n), dtype=particle_dtype) 8 | #attribute access only in @jitted function 9 | for p in parts: 10 | p.x = numpy.random.random() * domain 11 | p.y = numpy.random.random() * domain 12 | p.z = numpy.random.random() * domain 13 | p.m = m 14 | p.phi = 0 15 | return parts 16 | -------------------------------------------------------------------------------- /notebooks/snippets/guvectorize/ht_signature.py: -------------------------------------------------------------------------------- 1 | @guvectorize(['float64[:,:], float64, float64, float64, float64[:,:]'], 2 | '(m,m),(),(),()->(m,m)', nopython=True) 3 | def ftcs(T, alpha, dt, dx, Tn): 4 | I, J = T.shape 5 | for i in range(1, I - 1): 6 | for j in range(1, J - 1): 7 | Tn[i, j] = (T[i, j] + 8 | alpha * 9 | (dt / dx**2 * (T[i + 1, j] - 2 * T[i, j] + T[i - 1, j]) + 10 | dt / dx**2 * (T[i, j + 1] - 2 * T[i, j] + T[i, j - 1]))) 11 | 12 | for i in range(I): 13 | Tn[i, 0] = T[i, 0] 14 | Tn[i, J - 1] = Tn[i, J - 2] 15 | 16 | for j in range(J): 17 | Tn[0, j] = T[0, j] 18 | Tn[I - 1, j] = Tn[I - 2, j] -------------------------------------------------------------------------------- /notebooks/snippets/guvectorize/ht_timeloop.py: -------------------------------------------------------------------------------- 1 | @guvectorize(['float64[:,:], float64, float64, float64, int64, float64[:,:]'], 2 | '(m,m),(),(),(),()->(m,m)', nopython=True) 3 | def ftcs_loop(T, alpha, dt, dx, nt, Tn): 4 | I, J = T.shape 5 | for n in range(nt): 6 | for i in range(1, I - 1): 7 | for j in range(1, J - 1): 8 | Tn[i,j] = (T[i, j] + 9 | alpha * 10 | (dt/dx**2 * (T[i + 1, j] - 2*T[i, j] + T[i - 1, j]) + 11 | dt/dx**2 * (T[i, j + 1] - 2*T[i, j] + T[i, j - 1]))) 12 | 13 | for i in range(I): 14 | Tn[i, 0] = T[i, 0] 15 | Tn[i, J - 1] = Tn[i, J - 2] 16 | 17 | for j in range(J): 18 | Tn[0, j] = T[0, j] 19 | Tn[I - 1, j] = Tn[I - 2, j] 20 | 21 | T = Tn.copy() -------------------------------------------------------------------------------- /notebooks/snippets/ppe_numba.py: -------------------------------------------------------------------------------- 1 | @jit(nopython=True) 2 | def pressure_poisson(p, b, l2_target): 3 | I, J = b.shape 4 | 5 | iter_diff = l2_target + 1 6 | 7 | n = 0 8 | while iter_diff > l2_target and n <= 500: 9 | pn = p.copy() 10 | for i in range(1, I - 1): 11 | for j in range(1, J - 1): 12 | p[i, j] = (.25 * (pn[i, j + 1] + 13 | pn[i, j - 1] + 14 | pn[i + 1, j] + 15 | pn[i - 1, j]) - 16 | b[i, j]) 17 | 18 | for i in range(I): 19 | p[i, 0] = p[i, 1] 20 | p[i, -1] = 0 21 | 22 | for j in range(J): 23 | p[0, j] = p[1, j] 24 | p[-1, j] = p[-2, j] 25 | 26 | if n % 10 == 0: 27 | iter_diff = numpy.sqrt(numpy.sum((p - pn)**2)/numpy.sum(pn**2)) 28 | 29 | n += 1 30 | 31 | return p 32 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | backports.shutil-get-terminal-size==1.0.0 2 | bokeh==0.11.1 3 | chest==0.2.3 4 | cloudpickle==0.2.1 5 | cycler==0.10.0 6 | Cython==0.24 7 | dask==0.10.0 8 | decorator==4.0.10 9 | HeapDict==1.0.0 10 | ipykernel==4.3.1 11 | ipython==4.2.0 12 | ipython-genutils==0.1.0 13 | ipywidgets==4.1.1 14 | Jinja2==2.8 15 | jsonschema==2.5.1 16 | jupyter==1.0.0 17 | jupyter-client==4.3.0 18 | jupyter-console==4.1.1 19 | jupyter-core==4.1.0 20 | line-profiler==1.0 21 | llvmlite==0.11.0 22 | locket==0.2.0 23 | MarkupSafe==0.23 24 | matplotlib==1.5.1 25 | mistune==0.7.2 26 | nbconvert==4.1.0 27 | nbformat==4.0.1 28 | notebook==4.2.1 29 | numba==0.26.0 30 | numpy==1.11.0 31 | pandas==0.18.1 32 | partd==0.3.4 33 | path.py==0.0.0 34 | pexpect==4.0.1 35 | pickleshare==0.7.2 36 | ptyprocess==0.5.1 37 | Pygments==2.1.3 38 | pyparsing==2.1.4 39 | python-dateutil==2.5.3 40 | pytz==2016.4 41 | PyYAML==3.11 42 | pyzmq==15.2.0 43 | qtconsole==4.2.1 44 | requests==2.10.0 45 | simplegeneric==0.8.1 46 | six==1.10.0 47 | terminado==0.6 48 | toolz==0.8.0 49 | tornado==4.3 50 | traitlets==4.2.1 51 | -------------------------------------------------------------------------------- /ppe_compile_module/main.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from math import sqrt 3 | from numba.pycc import CC 4 | 5 | cc = CC('ppe') 6 | 7 | @cc.export('pressure_poisson', 8 | 'f8[:,:](f8[:,:], f8[:,:], f8)') 9 | def pressure_poisson(p, b, l2_target): 10 | pn = p.copy() 11 | I, J = b.shape 12 | 13 | iter_diff = l2_target + 1 14 | 15 | n = 0 16 | while iter_diff > l2_target and n <= 500: 17 | pn = p.copy() 18 | for i in range(1, I - 1): 19 | for j in range(1, J - 1): 20 | p[i, j] = (.25 * (pn[i, j + 1] + 21 | pn[i, j - 1] + 22 | pn[i + 1, j] + 23 | pn[i - 1, j]) - 24 | b[i, j]) 25 | 26 | for i in range(I): 27 | p[i, 0] = p[i, 1] 28 | p[i, -1] = 0 29 | 30 | for j in range(J): 31 | p[0, j] = p[1, j] 32 | p[-1, j] = p[-2, j] 33 | 34 | if n % 10 == 0: 35 | iter_diff = sqrt(numpy.sum((p - pn)**2)/numpy.sum(pn**2)) 36 | 37 | n += 1 38 | 39 | return p 40 | 41 | cc.compile() 42 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | All code samples released under 2 | 3 | The MIT License (MIT) 4 | 5 | Copyright (c) 2016 Gilbert Forsyth, Lorena Barba 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in all 15 | copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | 25 | All content is licensed under a [http://creativecommons.org/licenses/by/4.0/](Creative Commons Attribution 4.0 International License) (CC-BY) 26 | 27 | Copyright (c) 2016 Gilbert Forsyth, Lorena Barba 28 | -------------------------------------------------------------------------------- /notebooks/exercises/07.Vectorize.Exercises.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Exercise: Clipping an array" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Yes, NumPy has a `clip` ufunc already, but let's pretend it doesn't. \n", 15 | "\n", 16 | "Create a Numba vectorized ufunc that takes a vector `a`, a lower limit `amin` and an upper limit `amax`. It should return the vector `a` with all values clipped such that $a_{min} < a < a_{max}$\n" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 2, 22 | "metadata": { 23 | "collapsed": true 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "from numba import vectorize" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": { 34 | "collapsed": true 35 | }, 36 | "outputs": [], 37 | "source": [] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "## Exercise: Create `logit` ufunc" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "Recall from above that this is a ufunc which performs this operation:\n", 51 | "\n", 52 | "$$f(a) = \\log \\left(\\frac{a}{1-a}\\right)$$" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": { 59 | "collapsed": true 60 | }, 61 | "outputs": [], 62 | "source": [] 63 | } 64 | ], 65 | "metadata": { 66 | "kernelspec": { 67 | "display_name": "Python 3", 68 | "language": "python", 69 | "name": "python3" 70 | }, 71 | "language_info": { 72 | "codemirror_mode": { 73 | "name": "ipython", 74 | "version": 3 75 | }, 76 | "file_extension": ".py", 77 | "mimetype": "text/x-python", 78 | "name": "python", 79 | "nbconvert_exporter": "python", 80 | "pygments_lexer": "ipython3", 81 | "version": "3.5.1" 82 | } 83 | }, 84 | "nbformat": 4, 85 | "nbformat_minor": 0 86 | } 87 | -------------------------------------------------------------------------------- /notebooks/exercises/03.Numba.Types.Demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Numba type inference" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "from numba import jit" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": { 25 | "collapsed": true 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "@jit\n", 30 | "def add(a, b):\n", 31 | " return a + b" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": { 38 | "collapsed": false 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "add(1., 1.)" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": { 49 | "collapsed": false 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "add.inspect_types()" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": { 60 | "collapsed": false 61 | }, 62 | "outputs": [], 63 | "source": [ 64 | "add(1, 1)" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": { 71 | "collapsed": false 72 | }, 73 | "outputs": [], 74 | "source": [ 75 | "add.inspect_types()" 76 | ] 77 | } 78 | ], 79 | "metadata": { 80 | "kernelspec": { 81 | "display_name": "Python 3", 82 | "language": "python", 83 | "name": "python3" 84 | }, 85 | "language_info": { 86 | "codemirror_mode": { 87 | "name": "ipython", 88 | "version": 3 89 | }, 90 | "file_extension": ".py", 91 | "mimetype": "text/x-python", 92 | "name": "python", 93 | "nbconvert_exporter": "python", 94 | "pygments_lexer": "ipython3", 95 | "version": "3.5.1" 96 | } 97 | }, 98 | "nbformat": 4, 99 | "nbformat_minor": 0 100 | } 101 | -------------------------------------------------------------------------------- /check_install.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import sys 3 | from warnings import warn 4 | 5 | onpy2 = False 6 | 7 | try: 8 | assert sys.version_info >= (3,0) 9 | import importlib.util 10 | except AssertionError: 11 | warn('This tutorial is written for Python 3. Legacy Python is not explicitly supported.') 12 | onpy2 = True 13 | 14 | def tuple_version(version): 15 | return tuple(int(x) for x in version.strip('<>+-=.').split('.')) 16 | 17 | def check_versions(): 18 | version_trouble=False 19 | numba = importlib.import_module('numba') 20 | numba_version = tuple_version(numba.__version__) 21 | if numba_version < (0, 26, 0): 22 | print('Please update Numba to version 0.26.0') 23 | version_trouble=True 24 | 25 | mpl = importlib.import_module('matplotlib') 26 | mpl_version = tuple_version(mpl.__version__) 27 | if mpl_version < (1, 5, 0): 28 | print('Please update matplotlib to version 1.5.0 or higher') 29 | version_trouble=True 30 | 31 | return version_trouble 32 | 33 | def main(): 34 | required_modules = ['numpy', 'matplotlib', 'jupyter', 35 | 'numba', 'llvmlite', 'line_profiler', 'IPython',] 36 | missing_modules = [] 37 | for mod in required_modules: 38 | if not onpy2: 39 | spec = importlib.util.find_spec(mod) 40 | if spec is None: 41 | missing_modules.append(mod) 42 | else: 43 | try: 44 | importlib.import_module(mod) 45 | except ImportError: 46 | missing_modules.append(mod) 47 | 48 | if missing_modules: 49 | print('The following modules are required but not installed:') 50 | print(' {}'.format(', '.join(missing_modules))) 51 | print('\nYou can install them using conda by running:') 52 | print('\n conda install {}'.format(' '.join(missing_modules))) 53 | print('\nOr you can install them using pip by running:') 54 | print('\n pip install {}'.format(' '.join(missing_modules))) 55 | else: 56 | if check_versions(): 57 | print('All packages are installed but at least one needs updating') 58 | else: 59 | print('Everything looks good!') 60 | 61 | if __name__ == '__main__': 62 | main() 63 | -------------------------------------------------------------------------------- /notebooks/06.2.optional.Test.Compiled.Module.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy\n", 12 | "from ppe import pressure_poisson" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": { 19 | "collapsed": true 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "from snippets.ns_helper import cavity_flow, quiver_plot, velocity_term" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": { 30 | "collapsed": true 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "%matplotlib inline" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": { 41 | "collapsed": true 42 | }, 43 | "outputs": [], 44 | "source": [ 45 | "import pickle" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": { 52 | "collapsed": true 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "def run_cavity():\n", 57 | " with open('IC.pickle', 'rb') as f:\n", 58 | " u, v, p, b = pickle.load(f)\n", 59 | "\n", 60 | " nx = 41\n", 61 | " dx = 2 / (nx - 1)\n", 62 | " dt = .005\n", 63 | "\n", 64 | " nt = 1000\n", 65 | " u, v, p = cavity_flow(u, v, p, nt, dt, dx, \n", 66 | " velocity_term, \n", 67 | " pressure_poisson, \n", 68 | " rtol=1e-4)\n", 69 | " \n", 70 | " return u, v, p" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": { 77 | "collapsed": false 78 | }, 79 | "outputs": [], 80 | "source": [ 81 | "u, v, p = run_cavity()" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": { 88 | "collapsed": false 89 | }, 90 | "outputs": [], 91 | "source": [ 92 | "quiver_plot(u, v, p)" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": { 99 | "collapsed": false 100 | }, 101 | "outputs": [], 102 | "source": [ 103 | "'numba' in dir()" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": { 110 | "collapsed": false 111 | }, 112 | "outputs": [], 113 | "source": [ 114 | "%timeit run_cavity()" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": { 120 | "collapsed": true 121 | }, 122 | "source": [ 123 | "## Installing AoT compiled modules with `setup.py`" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": { 130 | "collapsed": true 131 | }, 132 | "outputs": [], 133 | "source": [ 134 | "%load ../ppe_compile_module/main.py" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": { 141 | "collapsed": true 142 | }, 143 | "outputs": [], 144 | "source": [ 145 | "%load ../ppe_compile_module/setup.py" 146 | ] 147 | } 148 | ], 149 | "metadata": { 150 | "kernelspec": { 151 | "display_name": "Python 3", 152 | "language": "python", 153 | "name": "python3" 154 | }, 155 | "language_info": { 156 | "codemirror_mode": { 157 | "name": "ipython", 158 | "version": 3 159 | }, 160 | "file_extension": ".py", 161 | "mimetype": "text/x-python", 162 | "name": "python", 163 | "nbconvert_exporter": "python", 164 | "pygments_lexer": "ipython3", 165 | "version": "3.5.1" 166 | } 167 | }, 168 | "nbformat": 4, 169 | "nbformat_minor": 0 170 | } 171 | -------------------------------------------------------------------------------- /notebooks/snippets/ns_helper.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from matplotlib import pyplot, cm 3 | 4 | def cavity_flow(u, v, p, nt, dt, dx, velocity_term, pressure_poisson, rho=1, nu=0.1, rtol=1e-3): 5 | ''' 6 | Solves the Navier-Stokes equations for a lid-driven cavity using 7 | finite-differences on a collocated grid in 2D 8 | 9 | Parameters 10 | ---------- 11 | u : floats; discretized velocity in x-direction 12 | v : floats; discretized velocity in y-direction 13 | p : floats; discretized pressure field 14 | nt : int; number of timesteps to run 15 | dt : float; size of individual timestep 16 | dx : float; spacing of individual grid points 17 | rho : float; density 18 | nu : float; kinematic viscosity 19 | rtol : float; relative tolerance to solve between successive iterations 20 | in pressure_poisson solver 21 | ''' 22 | 23 | un = numpy.empty_like(u) 24 | vn = numpy.empty_like(v) 25 | nx, ny = u.shape 26 | b = numpy.zeros((nx, ny)) 27 | 28 | for n in range(nt): 29 | un = u.copy() 30 | vn = v.copy() 31 | 32 | b = velocity_term(b, rho, dt, u, v, dx) 33 | p = pressure_poisson(p, b, rtol) 34 | 35 | u[1:-1,1:-1] = (un[1:-1, 1:-1] - dt / dx * 36 | (un[1:-1,1:-1] * (un[1:-1, 1:-1] - 37 | un[:-2, 1:-1]) + 38 | vn[1:-1, 1:-1] * (un[1:-1, 1:-1] - 39 | un[1:-1, :-2]) + 40 | 1 / (2 * rho) * (p[2:, 1:-1] - 41 | p[:-2, 1:-1])) + 42 | nu * dt / dx**2 * 43 | (un[2:, 1:-1] + 44 | un[:-2, 1:-1] + 45 | un[1:-1, 2:] + 46 | un[1:-1, :-2] - 47 | 4 * un[1:-1, 1:-1])) 48 | 49 | v[1:-1,1:-1] = (vn[1:-1, 1:-1] - dt / dx * 50 | (un[1:-1, 1:-1] * (vn[1:-1, 1:-1] - 51 | vn[:-2, 1:-1]) + 52 | vn[1:-1, 1:-1 ] * (vn[1:-1, 1:-1] - 53 | vn[1:-1, :-2]) + 54 | 1 / (2 * rho) * (p[1:-1, 2:] - 55 | p[1:-1, :-2])) + 56 | nu * dt / dx**2 * 57 | (vn[2:, 1:-1] + 58 | vn[:-2, 1:-1] + 59 | vn[1:-1, 2:] + 60 | vn[1:-1, :-2] - 61 | 4 * vn[1:-1, 1:-1])) 62 | 63 | u[0, :] = 0 64 | u[-1, :] = 0 65 | v[0, :] = 0 66 | v[-1, :] = 0 67 | u[:, 0] = 0 68 | u[:, -1] = 1 #set velocity on cavity lid equal to 1 69 | v[:, 0] = 0 70 | v[:, -1] = 0 71 | 72 | return u, v, p 73 | 74 | 75 | def quiver_plot(u, v, p, nx=41): 76 | nx = 41 77 | ny = nx 78 | x = numpy.linspace(0, 2, nx) 79 | y = numpy.linspace(0, 2, ny) 80 | X, Y = numpy.meshgrid(x, y, indexing='ij') 81 | 82 | quiver_skip = qs = 4 83 | pyplot.figure(figsize=(11, 7), dpi=100) 84 | pyplot.contourf(X, Y, p, alpha=0.5, cmap=cm.viridis) 85 | pyplot.colorbar() 86 | pyplot.contour(X, Y, p) 87 | pyplot.quiver(X[::qs, ::qs], Y[::qs, ::qs], u[::qs, ::qs], v[::qs, ::qs]) 88 | pyplot.xlabel('$x$', fontsize=18) 89 | pyplot.ylabel('$y$', fontsize=18) 90 | 91 | 92 | def velocity_term(b, rho, dt, u, v, dx): 93 | b[1:-1, 1:-1] = ( 94 | rho * dx / 16 * 95 | (2 / dt * (u[2:, 1:-1] - 96 | u[:-2, 1:-1] + 97 | v[1:-1, 2:] - 98 | v[1:-1, :-2]) - 99 | 2 / dx * (u[1:-1, 2:] - u[1:-1, :-2]) * 100 | (v[2:, 1:-1] - v[:-2, 1:-1]) - 101 | (u[2:, 1:-1] - u[:-2, 1:-1])**2 / dx - 102 | (v[1:-1, 2:] - v[1:-1, :-2])**2 / dx) 103 | ) 104 | 105 | return b 106 | -------------------------------------------------------------------------------- /notebooks/exercises/02.Intro.to.JIT.exercises.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# JIT Exercise" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Everyone likes fractals! (right...?)\n", 15 | "\n", 16 | "Use `jit` (either in function or decorator form) to speed up the Mandelbrot code below.\n", 17 | "\n", 18 | "**Note**: the call to run the `create_fractal` function is commented out because it takes around ~15s to run on a new-ish i7 (which means that a `%timeit` run takes ~45s. You have been warned). " 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": { 25 | "collapsed": true 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "from numba import jit\n", 30 | "import numpy\n", 31 | "\n", 32 | "from matplotlib import pyplot, cm\n", 33 | "%matplotlib inline" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": { 40 | "collapsed": true 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "def mandel(x, y, max_iters):\n", 45 | " \"\"\"\n", 46 | " Given the real and imaginary parts of a complex number,\n", 47 | " determine if it is a candidate for membership in the Mandelbrot\n", 48 | " set given a fixed number of iterations.\n", 49 | " \"\"\"\n", 50 | " i = 0\n", 51 | " c = complex(x, y)\n", 52 | " z = 0.0j\n", 53 | " for i in range(max_iters):\n", 54 | " z = z * z + c\n", 55 | " if (z.real * z.real + z.imag * z.imag) >= 4:\n", 56 | " return i\n", 57 | "\n", 58 | " return 255\n", 59 | "\n", 60 | "def create_fractal(min_x, max_x, min_y, max_y, image, iters):\n", 61 | " height = image.shape[0]\n", 62 | " width = image.shape[1]\n", 63 | "\n", 64 | " pixel_size_x = (max_x - min_x) / width\n", 65 | " pixel_size_y = (max_y - min_y) / height\n", 66 | " for x in range(width):\n", 67 | " real = min_x + x * pixel_size_x\n", 68 | " for y in range(height):\n", 69 | " imag = min_y + y * pixel_size_y\n", 70 | " color = mandel(real, imag, iters)\n", 71 | " image[y, x] = color\n", 72 | "\n", 73 | " return image" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": { 80 | "collapsed": false 81 | }, 82 | "outputs": [], 83 | "source": [ 84 | "#Uncomment these to run\n", 85 | "# image = numpy.zeros((500 * 2, 750 * 2), dtype=numpy.uint8)\n", 86 | "# image = create_fractal(-2.0, 1.0, -1.0, 1.0, image, 20)" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": { 93 | "collapsed": false 94 | }, 95 | "outputs": [], 96 | "source": [ 97 | "pyplot.figure(figsize=(10,8))\n", 98 | "pyplot.imshow(image, cmap=cm.viridis)\n", 99 | "pyplot.colorbar();" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": { 106 | "collapsed": false 107 | }, 108 | "outputs": [], 109 | "source": [ 110 | "%%timeit\n", 111 | "image = numpy.zeros((500 * 2, 750 * 2), dtype=numpy.uint8)\n", 112 | "image = create_fractal(-2.0, 1.0, -1.0, 1.0, image, 20)" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "Lovingly forked from https://github.com/numba/numba/blob/master/examples/mandel.py" 120 | ] 121 | } 122 | ], 123 | "metadata": { 124 | "kernelspec": { 125 | "display_name": "Python 3", 126 | "language": "python", 127 | "name": "python3" 128 | }, 129 | "language_info": { 130 | "codemirror_mode": { 131 | "name": "ipython", 132 | "version": 3 133 | }, 134 | "file_extension": ".py", 135 | "mimetype": "text/x-python", 136 | "name": "python", 137 | "nbconvert_exporter": "python", 138 | "pygments_lexer": "ipython3", 139 | "version": "3.5.1" 140 | } 141 | }, 142 | "nbformat": 4, 143 | "nbformat_minor": 0 144 | } 145 | -------------------------------------------------------------------------------- /slides/Intro.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "## Numba: Tell those C++ bullies to get lost\n", 12 | "\n", 13 | "
\n", 14 | "### Gil Forsyth, Professor Lorena Barba\n", 15 | "#### The George Washington University\n", 16 | "\n", 17 | "\n", 18 | "

\n", 19 | "![scipylogo](./images/SciPy2016-Logo-450x120-01.25.png)" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": { 25 | "slideshow": { 26 | "slide_type": "slide" 27 | } 28 | }, 29 | "source": [ 30 | "## Getting started\n", 31 | "\n", 32 | "If you haven't already done so, please clone the tutorial repository and fire up a Jupyter notebook server.\n", 33 | "\n", 34 | "```console\n", 35 | "git clone https://github.com/barbagroup/numba_tutorial_scipy2016.git\n", 36 | "\n", 37 | "cd numba_tutorial_scipy2016\n", 38 | "\n", 39 | "jupyter notebook\n", 40 | "```\n", 41 | "\n", 42 | "If you've already cloned the repository, do a quick `git pull` to make sure you grab a few changes we pushed a few days ago.\n", 43 | "\n" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": { 49 | "slideshow": { 50 | "slide_type": "slide" 51 | } 52 | }, 53 | "source": [ 54 | "## A quick note on style\n", 55 | "\n", 56 | "We use\n", 57 | "\n", 58 | "```python\n", 59 | "import numpy\n", 60 | "from matplotlib import pyplot\n", 61 | "```\n", 62 | "\n", 63 | "not\n", 64 | "\n", 65 | "```python\n", 66 | "import numpy as np\n", 67 | "import matplotlib.pyplot as plt\n", 68 | "```\n", 69 | "\n", 70 | "(sorry?) " 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": { 76 | "slideshow": { 77 | "slide_type": "slide" 78 | } 79 | }, 80 | "source": [ 81 | "## What is Numba?\n", 82 | "\n", 83 | "a JIT compiler for Python that:\n", 84 | "\n", 85 | "* generates optimized machine code using LLVM\n", 86 | "* integrates well with the Scientific Python stack\n", 87 | "* is totally awesome" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": { 93 | "slideshow": { 94 | "slide_type": "slide" 95 | } 96 | }, 97 | "source": [ 98 | "## Numba is _not_:\n", 99 | "\n", 100 | "* a 'full' JIT replacement of CPython a la `Pyston`, `PyPy`, `Pyjion`, etc...\n", 101 | "* magical\n", 102 | "\n", 103 | "but it is specifically designed for math-heavy Python code and it works (we think) very well." 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": { 109 | "slideshow": { 110 | "slide_type": "slide" 111 | } 112 | }, 113 | "source": [ 114 | "## Caveat\n", 115 | "\n", 116 | "YMMV. Numba's JIT compilation produces code optimized for your particular CPU model so you may see better or worse speedups compared to thsoe around you, depending on what kind of processor you have. " 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": { 122 | "slideshow": { 123 | "slide_type": "slide" 124 | } 125 | }, 126 | "source": [ 127 | "## Testing\n", 128 | "\n", 129 | "You should really,_really_ write tests when using Numba. " 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": { 135 | "slideshow": { 136 | "slide_type": "slide" 137 | } 138 | }, 139 | "source": [ 140 | "## One last note\n", 141 | "We will write a lot of loops. \n", 142 | "It will seem weird.\n", 143 | "\n", 144 | "(sorry)" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "metadata": { 151 | "collapsed": true 152 | }, 153 | "outputs": [], 154 | "source": [] 155 | } 156 | ], 157 | "metadata": { 158 | "celltoolbar": "Slideshow", 159 | "kernelspec": { 160 | "display_name": "Python 3", 161 | "language": "python", 162 | "name": "python3" 163 | }, 164 | "language_info": { 165 | "codemirror_mode": { 166 | "name": "ipython", 167 | "version": 3 168 | }, 169 | "file_extension": ".py", 170 | "mimetype": "text/x-python", 171 | "name": "python", 172 | "nbconvert_exporter": "python", 173 | "pygments_lexer": "ipython3", 174 | "version": "3.5.1" 175 | } 176 | }, 177 | "nbformat": 4, 178 | "nbformat_minor": 0 179 | } 180 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Numba: Tell those C++ bullies to get lost 2 | 3 | This is the repository for the Scipy 2016 tutorial. The tutorial will be presented as a set of Jupyter notebooks with exercises sprinkled throughout. 4 | 5 | 1. [Installation](#installation-instructions) 6 | 2. [Optional extras](#optional-extras) 7 | 3. [Check your installation](#check-installation) 8 | 9 | 10 | # Installation Instructions 11 | 12 | We _strongly_, *strongly*, __strongly__ encourage you to use `conda` to install the required packages for this tutorial. There are non-Python dependencies required that make manual installation or installing with `pip` very involved. 13 | 14 | Note also that this tutorial is written for Python 3.5. Most things will still work on Python 3.4. No guarantees of any kind are made that it will be compatible with Python 2. 15 | 16 | ## Regarding `matplotlib` 17 | 18 | This tutorial uses the Viridis colormap pretty much everywhere we can use a colormap. This colormap was first made available in matplotlib 1.5.0. Please upgrade if you have an earlier version installed. 19 | 20 | ## Installing with `conda` 21 | 22 | ### Option a) Create a new environment 23 | Download the `environment.yml` file in the root of this repository, e.g. 24 | 25 | ```console 26 | wget https://raw.githubusercontent.com/barbagroup/numba_tutorial_scipy2016/master/environment.yml 27 | ``` 28 | 29 | and then create the environment with 30 | 31 | ```console 32 | conda env create -f environment.yml 33 | ``` 34 | 35 | This will create a conda environment named `numbatutorial` with all of the required packages. 36 | 37 | You can activate the environment with 38 | 39 | ```console 40 | source activate numbatutorial 41 | ``` 42 | or on Windows: 43 | 44 | ```console 45 | activate numbatutorial 46 | ``` 47 | 48 | ### Option b) Install the required packages 49 | 50 | ```console 51 | conda install jupyter ipython numpy numba line_profiler matplotlib 52 | ``` 53 | 54 | ```console 55 | pip install line_profiler 56 | ``` 57 | 58 | **Note**: Do not use `conda` to install `line_profiler`; the version available in `conda` default channels is out of date. 59 | 60 | 61 | 62 | ## Installing with `pip` 63 | 64 | To install (specifically) Numba using `pip`, you need to have LLVM 3.7 installed on your machine with both libraries and header files. 65 | 66 | ### Prerequisites 67 | 68 | #### Ubuntu / Debian 69 | 70 | You should be able to do a 71 | 72 | ```console 73 | sudo apt-get install llvm-3.7-dev 74 | ``` 75 | 76 | You may also need to install `libedit-dev` 77 | 78 | #### Windows 79 | You can follow instructions here for getting LLVM installed on Windows. 80 | 81 | http://llvm.org/docs/GettingStartedVS.html 82 | 83 | #### OSX 84 | 85 | Install XCode which includes LLVM 86 | 87 | ### Install `llvmlite` 88 | 89 | If your `llvm-config` (or `llvm-config.exe`) file is in a non-standard location, set the `LLVM_CONFIG` environment variable to point at the `llvm-config` binary. 90 | 91 | Then 92 | 93 | ```console 94 | pip install llvmlite 95 | ``` 96 | 97 | If that installed successfully then you can continue to install the rest of the dependencies (which are must less fussy) 98 | 99 | ### Install everything else 100 | 101 | ```console 102 | pip install numpy matplotlib jupyter ipython numba line_profiler 103 | ``` 104 | 105 | or 106 | 107 | ```console 108 | pip install -r requirements.txt 109 | ``` 110 | 111 | # Optional extras 112 | 113 | No hands-on work requires these, but if you want to play with some of the examples. If you installed using either `environments.yml` or `requirements.txt` these are already installed. 114 | 115 | ```console 116 | conda install cython dask 117 | ``` 118 | 119 | ```console 120 | pip install cython dask 121 | ``` 122 | 123 | We recommend you also install the Jupyter notebook extensions. 124 | 125 | ```console 126 | pip install https://github.com/ipython-contrib/IPython-notebook-extensions/archive/master.zip --user 127 | ``` 128 | 129 | Once they are installed, start a notebook server 130 | 131 | ```console 132 | jupyter notebook 133 | ``` 134 | 135 | and (assuming port 8888) navigate to `http://localhost:8888/nbextensions` where you can choose which extensions to enable. One that is helpful (for us!) when using Numba in the notebook is the `Skip-Traceback` extension. You're welcome to enable whichever extensions you like (we're also fans of `Codefolding` and the `Comment/Uncomment Hotkey`). 136 | 137 | # Check Installation 138 | 139 | Once you have downloaded all of the requires libraries/packages, you can run the `check_install.py` script to confirm that everything is working as expected. Either download the file directly or clone this repository and then run 140 | 141 | ```console 142 | python check_install.py 143 | ``` 144 | 145 | # Video of the live tutorial 146 | 147 | Check out the video of the live tutorial at SciPy 2016 (filmed Monday 11 July). 148 | 149 | [![vid](./slides/tutorial-vid.png)](https://youtu.be/SzBi3xdEF2Y) 150 | -------------------------------------------------------------------------------- /notebooks/exercises/05.Cavity.Flow.Exercises.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Exercise 1\n", 8 | "## JIT the pressure poisson equation\n", 9 | "The equation we need to unroll is given by \n", 10 | "\n", 11 | "\\begin{equation}\n", 12 | "p_{i,j}^{n} = \\frac{1}{4}\\left(p_{i+1,j}^{n}+p_{i-1,j}^{n}+p_{i,j+1}^{n}+p_{i,j-1}^{n}\\right) - b\n", 13 | "\\end{equation}\n", 14 | "\n", 15 | "and recall that `b` is already computed, so no need to worry about unrolling that. We've also filled in the boundary conditions, so don't worry about those. (don't forget to decorate your function!)" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 1, 21 | "metadata": { 22 | "collapsed": true 23 | }, 24 | "outputs": [], 25 | "source": [ 26 | "import numpy\n", 27 | "from numba import jit" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": { 34 | "collapsed": true 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "def pressure_poisson(p, b, l2_target=1e-4):\n", 39 | " I, J = b.shape\n", 40 | " \n", 41 | " iter_diff = l2_target + 1\n", 42 | " n = 0\n", 43 | " while iter_diff > l2_target and n <= 500\n", 44 | " pn = p.copy()\n", 45 | " \n", 46 | " #Your code here\n", 47 | " \n", 48 | " #boundary conditions\n", 49 | " for i in range(I):\n", 50 | " p[i, 0] = p[i, 1]\n", 51 | " p[i, -1] = 0\n", 52 | "\n", 53 | " for j in range(J):\n", 54 | " p[0, j] = p[1, j]\n", 55 | " p[-1, j] = p[-2, j]\n", 56 | "\n", 57 | " if n % 10 == 0:\n", 58 | " iter_diff = numpy.sqrt(numpy.sum((p - pn)**2)/numpy.sum(pn**2))\n", 59 | " \n", 60 | " n += 1\n", 61 | " \n", 62 | " return p" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": { 69 | "collapsed": false 70 | }, 71 | "outputs": [], 72 | "source": [ 73 | "import pickle\n", 74 | "from snippets.ns_helper import cavity_flow, velocity_term, quiver_plot" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": { 81 | "collapsed": true 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "def run_cavity():\n", 86 | " nx = 41\n", 87 | " with open('IC.pickle', 'rb') as f:\n", 88 | " u, v, p, b = pickle.load(f)\n", 89 | "\n", 90 | " dx = 2 / (nx - 1)\n", 91 | " dt = .005\n", 92 | " nt = 1000\n", 93 | " \n", 94 | " u, v, p = cavity_flow(u, v, p, nt, dt, dx,\n", 95 | " velocity_term,\n", 96 | " pressure_poisson,\n", 97 | " rtol=1e-4)\n", 98 | "\n", 99 | " return u, v, p" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": { 106 | "collapsed": true 107 | }, 108 | "outputs": [], 109 | "source": [ 110 | "un, vn, pn = run_cavity()" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": { 117 | "collapsed": true 118 | }, 119 | "outputs": [], 120 | "source": [ 121 | "%timeit run_cavity()" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": { 128 | "collapsed": true 129 | }, 130 | "outputs": [], 131 | "source": [ 132 | "with open('numpy_ans.pickle', 'rb') as f:\n", 133 | " u, v, p = pickle.load(f)" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": { 140 | "collapsed": true 141 | }, 142 | "outputs": [], 143 | "source": [ 144 | "assert numpy.allclose(u, un)\n", 145 | "assert numpy.allclose(v, vn)\n", 146 | "assert numpy.allclose(p, pn)" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": {}, 152 | "source": [ 153 | "# Exercise 2 (optional)\n", 154 | "\n", 155 | "Finish early? Just want to try more stuff? \n", 156 | "\n", 157 | "This line is not super efficient: \n", 158 | "\n", 159 | "```python\n", 160 | "iter_diff = numpy.sqrt(numpy.sum((p - pn)**2)/numpy.sum(pn**2))\n", 161 | "```\n", 162 | "\n", 163 | "Try rewriting it using a jitted function and see what kind of performance gain you can get." 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": null, 169 | "metadata": { 170 | "collapsed": true 171 | }, 172 | "outputs": [], 173 | "source": [] 174 | } 175 | ], 176 | "metadata": { 177 | "kernelspec": { 178 | "display_name": "Python 3", 179 | "language": "python", 180 | "name": "python3" 181 | }, 182 | "language_info": { 183 | "codemirror_mode": { 184 | "name": "ipython", 185 | "version": 3 186 | }, 187 | "file_extension": ".py", 188 | "mimetype": "text/x-python", 189 | "name": "python", 190 | "nbconvert_exporter": "python", 191 | "pygments_lexer": "ipython3", 192 | "version": "3.5.1" 193 | } 194 | }, 195 | "nbformat": 4, 196 | "nbformat_minor": 0 197 | } 198 | -------------------------------------------------------------------------------- /notebooks/06.1.optional.Compile.Module.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Precompiling Numba modules" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "One issue with Numba is that it can be hard to install. With `conda` it's incredibly easy, but not everybody uses `conda` and trying to explain to users/collaborators why they're doing everything wrong is hard. \n", 15 | "\n", 16 | "Tools like SWIG can compile C/C++ (or other) code at install time and make it available as a Python module if there's some serious numerical heavy-lifting required. \n", 17 | "\n", 18 | "But if you have ever tried to use SWIG together with NumPy... suffice to say it's a less than ideal arrangement (please don't hurt me, @dabeaz)." 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": { 24 | "collapsed": true 25 | }, 26 | "source": [ 27 | "## Ahead-of-Time compilation\n", 28 | "\n", 29 | "While Numba's main use is in JIT compiling, they do provide tools for doing AOT compilation. This pre-compiled module does not rely on Numba, only on NumPy. (If you are working with collaborators who don't have NumPy installed, I can't help you)." 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "We need to import `numpy`, of course, and also `numba.pycc.CC`" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": { 43 | "collapsed": true 44 | }, 45 | "outputs": [], 46 | "source": [ 47 | "import numpy\n", 48 | "from math import sqrt\n", 49 | "from numba.pycc import CC" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "Name the module `ppe` (I am not creative)" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": { 63 | "collapsed": true 64 | }, 65 | "outputs": [], 66 | "source": [ 67 | "cc = CC('ppe')\n", 68 | "cc.verbose = True" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": { 75 | "collapsed": false 76 | }, 77 | "outputs": [], 78 | "source": [ 79 | "@cc.export('pressure_poisson', \n", 80 | " 'f8[:,:](f8[:,:], f8[:,:], f8)')\n", 81 | "def pressure_poisson(p, b, l2_target):\n", 82 | " I, J = b.shape\n", 83 | "\n", 84 | " iter_diff = l2_target + 1\n", 85 | "\n", 86 | " n = 0\n", 87 | " while iter_diff > l2_target and n <= 500:\n", 88 | " pn = p.copy()\n", 89 | " for i in range(1, I - 1):\n", 90 | " for j in range(1, J - 1):\n", 91 | " p[i, j] = (.25 * (pn[i, j + 1] +\n", 92 | " pn[i, j - 1] +\n", 93 | " pn[i + 1, j] +\n", 94 | " pn[i - 1, j]) -\n", 95 | " b[i, j])\n", 96 | "\n", 97 | " for i in range(I):\n", 98 | " p[i, 0] = p[i, 1]\n", 99 | " p[i, -1] = 0\n", 100 | "\n", 101 | " for j in range(J):\n", 102 | " p[0, j] = p[1, j]\n", 103 | " p[-1, j] = p[-2, j]\n", 104 | "\n", 105 | " if n % 10 == 0:\n", 106 | " iter_diff = sqrt(numpy.sum((p - pn)**2)/numpy.sum(pn**2))\n", 107 | "\n", 108 | " n += 1\n", 109 | "\n", 110 | " return p\n" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "**Note:** Each function in the module can be compiled with one type signature only. You can specify multiple types, each with its own function name, e.g.\n", 118 | "\n", 119 | "```python\n", 120 | "@cc.export('pressure_poisson_single', \n", 121 | " 'f4[:,:](f4[:,:], f4[:,:], f4)')\n", 122 | "@cc.export('pressure_poisson_double', \n", 123 | " 'f8[:,:](f8[:,:], f8[:,:], f8)')\n", 124 | "@cc.export('pressure_poisson_quad', \n", 125 | " 'f16[:,:](f16[:,:], f16[:,:], f16)')\n", 126 | "def pressure_poisson(p, b, l2_target=1e-4):\n", 127 | "```" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": { 134 | "collapsed": false 135 | }, 136 | "outputs": [], 137 | "source": [ 138 | "cc.compile()" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": { 145 | "collapsed": false 146 | }, 147 | "outputs": [], 148 | "source": [ 149 | "%ls" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": { 156 | "collapsed": true 157 | }, 158 | "outputs": [], 159 | "source": [] 160 | } 161 | ], 162 | "metadata": { 163 | "kernelspec": { 164 | "display_name": "Python 3", 165 | "language": "python", 166 | "name": "python3" 167 | }, 168 | "language_info": { 169 | "codemirror_mode": { 170 | "name": "ipython", 171 | "version": 3 172 | }, 173 | "file_extension": ".py", 174 | "mimetype": "text/x-python", 175 | "name": "python", 176 | "nbconvert_exporter": "python", 177 | "pygments_lexer": "ipython3", 178 | "version": "3.5.1" 179 | } 180 | }, 181 | "nbformat": 4, 182 | "nbformat_minor": 0 183 | } 184 | -------------------------------------------------------------------------------- /notebooks/exercises/04.Direct.Summation.Exercises.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy\n", 12 | "from numba import njit" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": { 19 | "collapsed": true 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "particle_dtype = numpy.dtype({'names':['x','y','z','m','phi'], \n", 24 | " 'formats':[numpy.double, \n", 25 | " numpy.double, \n", 26 | " numpy.double, \n", 27 | " numpy.double, \n", 28 | " numpy.double]})" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "# Exercise 1\n", 36 | "\n", 37 | "Write a function `create_n_random_particles` that takes the arguments `n` (number of particles), `m` (mass of every particle) and a domain within to generate a random number (as in the class above).\n", 38 | "It should create an array with `n` elements and `dtype=particle_dtype` and then return that array.\n", 39 | "\n", 40 | "For each particle, the mass should be initialized to the value of `m` and the potential `phi` initialized to zero.\n", 41 | "\n", 42 | "For the `x` component of a given particle `p`, you might do something like\n", 43 | "\n", 44 | "```python\n", 45 | "p['x'] = domain * numpy.random.random()\n", 46 | "```" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": { 53 | "collapsed": true 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "def create_n_random_particles(n, m, domain=1):\n", 58 | " '''\n", 59 | " Creates `n` particles with mass `m` with random coordinates\n", 60 | " between 0 and `domain`\n", 61 | " '''\n", 62 | " parts = numpy.zeros((n), dtype=particle_dtype)\n", 63 | " \n", 64 | " #your code here\n", 65 | "\n", 66 | " return parts" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "Test it out!" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": { 80 | "collapsed": true 81 | }, 82 | "outputs": [], 83 | "source": [ 84 | "parts = create_n_random_particles(1000, .001, 1)" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "# Exercise 2" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "Write a JITted function `distance` to calculate the distance between two particles of dtype `particle_dtype`" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "Here's the `distance` method from the `Particle` class as a reference:\n", 106 | "\n", 107 | "```python\n", 108 | "def distance(self, other):\n", 109 | " return ((self.x - other.x)**2 + \n", 110 | " (self.y - other.y)**2 + \n", 111 | " (self.z - other.z)**2)**.5\n", 112 | "```" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "metadata": { 119 | "collapsed": true 120 | }, 121 | "outputs": [], 122 | "source": [ 123 | "def distance(part1, part2):\n", 124 | " '''calculate the distance between two particles'''\n", 125 | " \n", 126 | " # your code here" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "Try it out!" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": { 140 | "collapsed": true 141 | }, 142 | "outputs": [], 143 | "source": [ 144 | "distance(parts[0], parts[1])" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "# Exercise 3" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "Modify the original `direct_sum` function (copied below for reference) to instead work a NumPy array of particles. Loop over each element in the array and calculate its total potential.\n", 159 | "\n", 160 | "```python\n", 161 | "def direct_sum(particles):\n", 162 | " \"\"\"\n", 163 | " Calculate the potential at each particle\n", 164 | " using direct summation method.\n", 165 | "\n", 166 | " Arguments:\n", 167 | " particles: the list of particles\n", 168 | "\n", 169 | " \"\"\"\n", 170 | " for i, target in enumerate(particles):\n", 171 | " for source in (particles[:i] + particles[i+1:]):\n", 172 | " r = target.distance(source)\n", 173 | " target.phi += source.m / r\n", 174 | "```" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": { 181 | "collapsed": true 182 | }, 183 | "outputs": [], 184 | "source": [ 185 | "def direct_sum(particles):\n", 186 | " # take it away" 187 | ] 188 | } 189 | ], 190 | "metadata": { 191 | "kernelspec": { 192 | "display_name": "Python 3", 193 | "language": "python", 194 | "name": "python3" 195 | }, 196 | "language_info": { 197 | "codemirror_mode": { 198 | "name": "ipython", 199 | "version": 3 200 | }, 201 | "file_extension": ".py", 202 | "mimetype": "text/x-python", 203 | "name": "python", 204 | "nbconvert_exporter": "python", 205 | "pygments_lexer": "ipython3", 206 | "version": "3.5.1" 207 | } 208 | }, 209 | "nbformat": 4, 210 | "nbformat_minor": 0 211 | } 212 | -------------------------------------------------------------------------------- /notebooks/02.Intro.to.jit.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Using `jit`\n", 8 | "\n", 9 | "We know how to find hotspots now, how do we improve their performance?\n", 10 | "\n", 11 | "We `jit` them!\n", 12 | "\n", 13 | "We'll start with a trivial example but get to some more realistic applications shortly." 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "### Array sum\n", 21 | "\n", 22 | "The function below is a naive `sum` function that sums all the elements of a given array." 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": { 29 | "collapsed": true 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "def sum_array(inp):\n", 34 | " J, I = inp.shape\n", 35 | " \n", 36 | " #this is a bad idea\n", 37 | " mysum = 0\n", 38 | " for j in range(J):\n", 39 | " for i in range(I):\n", 40 | " mysum += inp[j, i]\n", 41 | " \n", 42 | " return mysum" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": { 49 | "collapsed": true 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "import numpy" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": { 60 | "collapsed": false 61 | }, 62 | "outputs": [], 63 | "source": [ 64 | "arr = numpy.random.random((300, 300))" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": { 71 | "collapsed": false 72 | }, 73 | "outputs": [], 74 | "source": [ 75 | "sum_array(arr)" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": { 82 | "collapsed": false 83 | }, 84 | "outputs": [], 85 | "source": [ 86 | "plain = %timeit -o sum_array(arr)" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": {}, 92 | "source": [ 93 | "# Let's get started" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": { 100 | "collapsed": true 101 | }, 102 | "outputs": [], 103 | "source": [ 104 | "from numba import jit" 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": {}, 110 | "source": [ 111 | "## As a function call" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": { 118 | "collapsed": true 119 | }, 120 | "outputs": [], 121 | "source": [ 122 | "sum_array_numba = jit()(sum_array)" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "What's up with the weird double `()`s? We'll cover that in a little bit." 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": { 136 | "collapsed": false 137 | }, 138 | "outputs": [], 139 | "source": [ 140 | "sum_array_numba(arr)" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": null, 146 | "metadata": { 147 | "collapsed": false 148 | }, 149 | "outputs": [], 150 | "source": [ 151 | "jitted = %timeit -o sum_array_numba(arr)" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "metadata": { 158 | "collapsed": false 159 | }, 160 | "outputs": [], 161 | "source": [ 162 | "plain.best / jitted.best" 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": {}, 168 | "source": [ 169 | "## (more commonly) As a decorator" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": null, 175 | "metadata": { 176 | "collapsed": true 177 | }, 178 | "outputs": [], 179 | "source": [ 180 | "@jit\n", 181 | "def sum_array(inp):\n", 182 | " I, J = inp.shape\n", 183 | " \n", 184 | " mysum = 0\n", 185 | " for i in range(I):\n", 186 | " for j in range(J):\n", 187 | " mysum += inp[i, j]\n", 188 | " \n", 189 | " return mysum" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": { 196 | "collapsed": false 197 | }, 198 | "outputs": [], 199 | "source": [ 200 | "sum_array(arr)" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "metadata": { 207 | "collapsed": false 208 | }, 209 | "outputs": [], 210 | "source": [ 211 | "%timeit sum_array(arr)" 212 | ] 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "metadata": {}, 217 | "source": [ 218 | "## How does this compare to NumPy?" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": null, 224 | "metadata": { 225 | "collapsed": false 226 | }, 227 | "outputs": [], 228 | "source": [ 229 | "%timeit arr.sum()" 230 | ] 231 | }, 232 | { 233 | "cell_type": "markdown", 234 | "metadata": {}, 235 | "source": [ 236 | "## When does `numba` compile things?" 237 | ] 238 | }, 239 | { 240 | "cell_type": "markdown", 241 | "metadata": {}, 242 | "source": [ 243 | "The first time you call the function. " 244 | ] 245 | }, 246 | { 247 | "cell_type": "markdown", 248 | "metadata": {}, 249 | "source": [ 250 | "## [Your turn!](./exercises/02.Intro.to.JIT.exercises.ipynb#JIT-Exercise)" 251 | ] 252 | } 253 | ], 254 | "metadata": { 255 | "kernelspec": { 256 | "display_name": "Python 3", 257 | "language": "python", 258 | "name": "python3" 259 | }, 260 | "language_info": { 261 | "codemirror_mode": { 262 | "name": "ipython", 263 | "version": 3 264 | }, 265 | "file_extension": ".py", 266 | "mimetype": "text/x-python", 267 | "name": "python", 268 | "nbconvert_exporter": "python", 269 | "pygments_lexer": "ipython3", 270 | "version": "3.5.1" 271 | } 272 | }, 273 | "nbformat": 4, 274 | "nbformat_minor": 0 275 | } 276 | -------------------------------------------------------------------------------- /notebooks/10.optional.Numba.and.ipyparallel.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Using `numba` with `ipyparallel`" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "First make sure `ipyparallel` is installed \n", 15 | "\n", 16 | "```console\n", 17 | "conda install ipyparallel\n", 18 | "```\n", 19 | "\n", 20 | "Then spin up a local 'cluster' with \n", 21 | "\n", 22 | "```console\n", 23 | "ipcluster start -n 4\n", 24 | "```\n", 25 | "\n", 26 | "(assuming you have 4 cores)" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": { 33 | "collapsed": true 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "from ipyparallel import Client" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "Check that you have 4 clients (or however many workers you have started with `ipcluster`)" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": { 51 | "collapsed": false 52 | }, 53 | "outputs": [], 54 | "source": [ 55 | "rc = Client()\n", 56 | "rc.ids" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "Create a 'direct view' of all workers" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": { 70 | "collapsed": true 71 | }, 72 | "outputs": [], 73 | "source": [ 74 | "dv = rc[:]" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "## Mandelbrot serial pure python" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "Define pure Python mandelbrot function" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": { 95 | "collapsed": true 96 | }, 97 | "outputs": [], 98 | "source": [ 99 | "def mandel(x, y):\n", 100 | " max_iters = 20\n", 101 | " c = complex(x, y)\n", 102 | " z = 0.0j\n", 103 | " for i in range(max_iters):\n", 104 | " z = z * z + c\n", 105 | " if z.real * z.real + z.imag * z.imag >= 4:\n", 106 | " return i\n", 107 | " return 255" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "## Mandelbrot `ipyparallel` pure python" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "Add the `dv.parallel` decorator to enable it in parallel. (We're also enabling blocking here, for simplicity)" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": { 128 | "collapsed": false 129 | }, 130 | "outputs": [], 131 | "source": [ 132 | "@dv.parallel(block=True)\n", 133 | "def mandel_par(x, y):\n", 134 | " max_iters = 20\n", 135 | " c = complex(x, y)\n", 136 | " z = 0.0j\n", 137 | " for i in range(max_iters):\n", 138 | " z = z * z + c\n", 139 | " if z.real * z.real + z.imag * z.imag >= 4:\n", 140 | " return i\n", 141 | " return 255" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "## Mandelbrot `ipyparallel` & `numba`" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "To use `numba` with the parallel version, simply stack the decorators" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": { 162 | "collapsed": false 163 | }, 164 | "outputs": [], 165 | "source": [ 166 | "from numba import jit" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": { 173 | "collapsed": true 174 | }, 175 | "outputs": [], 176 | "source": [ 177 | "@dv.parallel(block=True)\n", 178 | "@jit(nopython=True)\n", 179 | "def mandel_par_numba(x, y):\n", 180 | " max_iters = 20\n", 181 | " c = complex(x, y)\n", 182 | " z = 0.0j\n", 183 | " for i in range(max_iters):\n", 184 | " z = z * z + c\n", 185 | " if z.real * z.real + z.imag * z.imag >= 4:\n", 186 | " return i\n", 187 | " return 255" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "metadata": { 194 | "collapsed": true 195 | }, 196 | "outputs": [], 197 | "source": [ 198 | "import numpy" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "metadata": { 205 | "collapsed": true 206 | }, 207 | "outputs": [], 208 | "source": [ 209 | "x = numpy.arange(-2, 1, 0.005)\n", 210 | "y = numpy.arange(-1, 1, 0.005)\n", 211 | "X, Y = numpy.meshgrid(x, y)" 212 | ] 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "metadata": {}, 217 | "source": [ 218 | "## 1 core" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": null, 224 | "metadata": { 225 | "collapsed": false 226 | }, 227 | "outputs": [], 228 | "source": [ 229 | "%%time\n", 230 | "im = numpy.reshape(list(map(mandel, X.ravel(), Y.ravel())), (len(y), len(x)))" 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": {}, 236 | "source": [ 237 | "## 4 cores with `ipyparallel`" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": null, 243 | "metadata": { 244 | "collapsed": false 245 | }, 246 | "outputs": [], 247 | "source": [ 248 | "%%time\n", 249 | "im_par = numpy.reshape(mandel_par.map(X.ravel(), Y.ravel()), (len(y), len(x)))" 250 | ] 251 | }, 252 | { 253 | "cell_type": "markdown", 254 | "metadata": {}, 255 | "source": [ 256 | "## 4 cores with `ipyparallel` and `numba`" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": null, 262 | "metadata": { 263 | "collapsed": false 264 | }, 265 | "outputs": [], 266 | "source": [ 267 | "%%time\n", 268 | "im_par_numba = numpy.reshape(mandel_par_numba.map(X.ravel(), Y.ravel()), (len(y), len(x)))" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": null, 274 | "metadata": { 275 | "collapsed": true 276 | }, 277 | "outputs": [], 278 | "source": [ 279 | "from matplotlib import pyplot, cm\n", 280 | "%matplotlib inline" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": null, 286 | "metadata": { 287 | "collapsed": false 288 | }, 289 | "outputs": [], 290 | "source": [ 291 | "fig, axes = pyplot.subplots(1, 3, figsize=(12, 4))\n", 292 | "axes[0].imshow(im, cmap=cm.viridis)\n", 293 | "axes[1].imshow(im_par, cmap=cm.viridis)\n", 294 | "axes[2].imshow(im_par_numba, cmap=cm.viridis)" 295 | ] 296 | }, 297 | { 298 | "cell_type": "markdown", 299 | "metadata": {}, 300 | "source": [ 301 | "Original `ipyparallel` example taken from Duke's [Computational Statistics in Python](http://people.duke.edu/~ccc14/sta-663-2016/19C_IPyParallel.html#Example:-Use-the-@parallel-decorator-to-speed-up-Mandelbrot-calculations) course" 302 | ] 303 | } 304 | ], 305 | "metadata": { 306 | "kernelspec": { 307 | "display_name": "Python 3", 308 | "language": "python", 309 | "name": "python3" 310 | }, 311 | "language_info": { 312 | "codemirror_mode": { 313 | "name": "ipython", 314 | "version": 3 315 | }, 316 | "file_extension": ".py", 317 | "mimetype": "text/x-python", 318 | "name": "python", 319 | "nbconvert_exporter": "python", 320 | "pygments_lexer": "ipython3", 321 | "version": "3.5.1" 322 | } 323 | }, 324 | "nbformat": 4, 325 | "nbformat_minor": 0 326 | } 327 | -------------------------------------------------------------------------------- /notebooks/exercises/08.GUVectorize.Exercises.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Exercise: 2D Heat Transfer signature" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Below is a function `ftcs` that uses second-order finite differences to solve a heat transfer problem. \n", 15 | "\n", 16 | "Add the `guvectorize` decorator (or function call!) with the appropriate signature and input/output layout so that the code block runs. \n", 17 | "The types of the arguments are:\n", 18 | "\n", 19 | "1. `T` - 2d array of floats\n", 20 | "2. `alpha` - float\n", 21 | "3. `dt` - float\n", 22 | "4. `dx` - float\n", 23 | "5. `nt` - int\n", 24 | "6. `Tn` - 2d array of floats\n", 25 | "\n", 26 | "`T` and `Tn` have the same dimensions and are square." 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": { 33 | "collapsed": true 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "import numpy\n", 38 | "from numba import guvectorize, jit" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": { 45 | "collapsed": false 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "def ftcs(T, alpha, dt, dx, Tn):\n", 50 | " I, J = T.shape\n", 51 | " for i in range(1, I - 1):\n", 52 | " for j in range(1, J - 1):\n", 53 | " Tn[i, j] = (T[i, j] + \n", 54 | " alpha * \n", 55 | " (dt / dx**2 * (T[i + 1, j] - 2 * T[i, j] + T[i - 1, j]) + \n", 56 | " dt / dx**2 * (T[i, j + 1] - 2 * T[i, j] + T[i, j - 1])))\n", 57 | "\n", 58 | " for i in range(I):\n", 59 | " Tn[i, 0] = T[i, 0]\n", 60 | " Tn[i, J - 1] = Tn[i, J - 2]\n", 61 | "\n", 62 | " for j in range(J):\n", 63 | " Tn[0, j] = T[0, j]\n", 64 | " Tn[I - 1, j] = Tn[I - 2, j]" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": { 71 | "collapsed": false 72 | }, 73 | "outputs": [], 74 | "source": [ 75 | "def run_ftcs():\n", 76 | " L = 1.0e-2\n", 77 | " nx = 101\n", 78 | " nt = 1000\n", 79 | " dx = L / (nx - 1)\n", 80 | " x = numpy.linspace(0, L, nx)\n", 81 | " alpha = .0001\n", 82 | " sigma = 0.25\n", 83 | " dt = sigma * dx**2 / alpha\n", 84 | "\n", 85 | " Ti = numpy.ones((nx, nx), dtype=numpy.float64)\n", 86 | " Ti[0,:]= 100\n", 87 | " Ti[:,0] = 100\n", 88 | "\n", 89 | " for t in range(nt):\n", 90 | " Tn = ftcs(Ti, alpha, dt, dx)\n", 91 | " Ti = Tn.copy()\n", 92 | " \n", 93 | " return Tn, x" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": { 100 | "collapsed": true 101 | }, 102 | "outputs": [], 103 | "source": [ 104 | "# %load snippets/guvectorize/ht_signature.py" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "metadata": { 111 | "collapsed": false 112 | }, 113 | "outputs": [], 114 | "source": [ 115 | "Tn, x = run_ftcs()" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": { 122 | "collapsed": false 123 | }, 124 | "outputs": [], 125 | "source": [ 126 | "%timeit run_ftcs()" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": null, 132 | "metadata": { 133 | "collapsed": true 134 | }, 135 | "outputs": [], 136 | "source": [ 137 | "from matplotlib import pyplot, cm\n", 138 | "%matplotlib inline" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": { 145 | "collapsed": false 146 | }, 147 | "outputs": [], 148 | "source": [ 149 | "pyplot.figure(figsize=(8, 8))\n", 150 | "mx, my = numpy.meshgrid(x, x, indexing='ij')\n", 151 | "pyplot.contourf(mx, my, Tn, 20, cmap=cm.viridis)\n", 152 | "pyplot.axis('equal');" 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "# Exercise: 2D Heat Transfer Time loop" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "The example above loops in time outside of the `vectorize`d function. That means it's looping in vanilla Python which is not the fastest thing in the world. \n", 167 | "\n", 168 | "What to do? \n", 169 | "\n", 170 | "Let's move the time loop inside the function.\n", 171 | "\n", 172 | "You need to modify the parameter list to include the number of timesteps, `nt`, add the `guvectorize` decorator, and also adjust the signature according to the new paramters. \n", 173 | "\n", 174 | "Make sure to add the `copy()` statement within the time loop." 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": { 181 | "collapsed": false 182 | }, 183 | "outputs": [], 184 | "source": [ 185 | "def ftcs_loop(T, alpha, dt, dx, Tn):\n", 186 | " I, J = T.shape\n", 187 | " for i in range(1, I - 1):\n", 188 | " for j in range(1, J - 1):\n", 189 | " Tn[i, j] = (T[i, j] + \n", 190 | " alpha * \n", 191 | " (dt / dx**2 * (T[i + 1, j] - 2 * T[i, j] + T[i - 1, j]) + \n", 192 | " dt / dx**2 * (T[i, j + 1] - 2 * T[i, j] + T[i, j - 1])))\n", 193 | "\n", 194 | " for i in range(I):\n", 195 | " Tn[i, 0] = T[i, 0]\n", 196 | " Tn[i, J - 1] = Tn[i, J - 2]\n", 197 | "\n", 198 | " for j in range(J):\n", 199 | " Tn[0, j] = T[0, j]\n", 200 | " Tn[I - 1, j] = Tn[I - 2, j]" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "metadata": { 207 | "collapsed": false 208 | }, 209 | "outputs": [], 210 | "source": [ 211 | "def run_ftcs_vecloop():\n", 212 | " L = 1.0e-2\n", 213 | " nx = 101\n", 214 | " nt = 1000\n", 215 | " dx = L / (nx - 1)\n", 216 | " x = numpy.linspace(0, L, nx)\n", 217 | " alpha = .0001\n", 218 | " sigma = 0.25\n", 219 | " dt = sigma * dx**2 / alpha\n", 220 | "\n", 221 | " Ti = numpy.ones((nx, nx), dtype=numpy.float64)\n", 222 | " Ti[0,:]= 100\n", 223 | " Ti[:,0] = 100\n", 224 | "\n", 225 | " Tn = ftcs_loop(Ti, alpha, dt, dx, nt)\n", 226 | " \n", 227 | " return Tn, x" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": null, 233 | "metadata": { 234 | "collapsed": true 235 | }, 236 | "outputs": [], 237 | "source": [ 238 | "# %load snippets/guvectorize/ht_timeloop.py" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": null, 244 | "metadata": { 245 | "collapsed": false 246 | }, 247 | "outputs": [], 248 | "source": [ 249 | "Tn, x = run_ftcs_vecloop()" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": null, 255 | "metadata": { 256 | "collapsed": false 257 | }, 258 | "outputs": [], 259 | "source": [ 260 | "%timeit run_ftcs_vecloop()" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": null, 266 | "metadata": { 267 | "collapsed": false 268 | }, 269 | "outputs": [], 270 | "source": [ 271 | "pyplot.figure(figsize=(8, 8))\n", 272 | "pyplot.contourf(mx, my, Tn, 20, cmap=cm.viridis)\n", 273 | "pyplot.axis('equal');" 274 | ] 275 | } 276 | ], 277 | "metadata": { 278 | "kernelspec": { 279 | "display_name": "Python 3", 280 | "language": "python", 281 | "name": "python3" 282 | }, 283 | "language_info": { 284 | "codemirror_mode": { 285 | "name": "ipython", 286 | "version": 3 287 | }, 288 | "file_extension": ".py", 289 | "mimetype": "text/x-python", 290 | "name": "python", 291 | "nbconvert_exporter": "python", 292 | "pygments_lexer": "ipython3", 293 | "version": "3.5.1" 294 | } 295 | }, 296 | "nbformat": 4, 297 | "nbformat_minor": 0 298 | } 299 | -------------------------------------------------------------------------------- /notebooks/08.Make.generalized.ufuncs.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Generalized ufuncs" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "We've just seen how to make our own ufuncs using `vectorize`, but what if we need something that can operate on an input array in any way that is not element-wise?\n", 15 | "\n", 16 | "Enter `guvectorize`. \n", 17 | "\n", 18 | "There are several important differences between `vectorize` and `guvectorize` that bear close examination. Let's take a look at a few simple examples." 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": { 25 | "collapsed": true 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "import numpy\n", 30 | "from numba import guvectorize" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": { 37 | "collapsed": false 38 | }, 39 | "outputs": [], 40 | "source": [ 41 | "@guvectorize('int64[:], int64, int64[:]', '(n),()->(n)')\n", 42 | "def g(x, y, result):\n", 43 | " for i in range(x.shape[0]):\n", 44 | " result[i] = x[i] + y" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "* Declaration of input/output layouts\n", 52 | "* No return statements" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": { 59 | "collapsed": false 60 | }, 61 | "outputs": [], 62 | "source": [ 63 | "x = numpy.arange(10)" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "In the cell below we call the function `g` with a preallocated array for the result." 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": { 77 | "collapsed": false 78 | }, 79 | "outputs": [], 80 | "source": [ 81 | "result = numpy.zeros_like(x)\n", 82 | "result = g(x, 5, result)\n", 83 | "print(result)" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": {}, 89 | "source": [ 90 | "But wait! We can still call `g` as if it were defined as `def g(x, y)`\n", 91 | "\n", 92 | "```python\n", 93 | "res = g(x, 5)\n", 94 | "print(res)\n", 95 | "```\n", 96 | "\n", 97 | "We don't recommend this as it can have unintended consequences if some of the elements of the `results` array are not operated on by the function `g`. (The advantage is that you can preserve existing interfaces to previously written functions)." 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": { 104 | "collapsed": false 105 | }, 106 | "outputs": [], 107 | "source": [ 108 | "@guvectorize('float64[:,:], float64[:,:], float64[:,:]', \n", 109 | " '(m,n),(n,p)->(m,p)')\n", 110 | "def matmul(A, B, C):\n", 111 | " m, n = A.shape\n", 112 | " n, p = B.shape\n", 113 | " for i in range(m):\n", 114 | " for j in range(p):\n", 115 | " C[i, j] = 0\n", 116 | " for k in range(n):\n", 117 | " C[i, j] += A[i, k] * B[k, j]" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "metadata": { 124 | "collapsed": true 125 | }, 126 | "outputs": [], 127 | "source": [ 128 | "a = numpy.random.random((500, 500))" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": { 135 | "collapsed": false 136 | }, 137 | "outputs": [], 138 | "source": [ 139 | "out = matmul(a, a, numpy.zeros_like(a))" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": { 146 | "collapsed": false 147 | }, 148 | "outputs": [], 149 | "source": [ 150 | "%timeit matmul(a, a, numpy.zeros_like(a))" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "metadata": { 157 | "collapsed": false 158 | }, 159 | "outputs": [], 160 | "source": [ 161 | "%timeit a @ a" 162 | ] 163 | }, 164 | { 165 | "cell_type": "markdown", 166 | "metadata": {}, 167 | "source": [ 168 | "And it also supports the `target` keyword argument" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": null, 174 | "metadata": { 175 | "collapsed": false 176 | }, 177 | "outputs": [], 178 | "source": [ 179 | "def g(x, y, res):\n", 180 | " for i in range(x.shape[0]):\n", 181 | " res[i] = x[i] + numpy.exp(y)\n", 182 | " \n", 183 | "g_serial = guvectorize('float64[:], float64, float64[:]', \n", 184 | " '(n),()->(n)')(g)\n", 185 | "g_par = guvectorize('float64[:], float64, float64[:]', \n", 186 | " '(n),()->(n)', target='parallel')(g)" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "metadata": { 193 | "collapsed": false 194 | }, 195 | "outputs": [], 196 | "source": [ 197 | "%timeit res = g_serial(numpy.arange(1000000).reshape(1000, 1000), 3)\n", 198 | "%timeit res = g_par(numpy.arange(1000000).reshape(1000, 1000), 3)" 199 | ] 200 | }, 201 | { 202 | "cell_type": "markdown", 203 | "metadata": {}, 204 | "source": [ 205 | "## [Exercise: Writing signatures](./exercises/08.GUVectorize.Exercises.ipynb#Exercise:-2D-Heat-Transfer-signature)" 206 | ] 207 | }, 208 | { 209 | "cell_type": "markdown", 210 | "metadata": {}, 211 | "source": [ 212 | "What's up with these boundary conditions?\n", 213 | "\n", 214 | "```python\n", 215 | "for i in range(I):\n", 216 | " Tn[i, 0] = T[i, 0]\n", 217 | " Tn[i, J - 1] = Tn[i, J - 2]\n", 218 | "\n", 219 | " for j in range(J):\n", 220 | " Tn[0, j] = T[0, j]\n", 221 | " Tn[I - 1, j] = Tn[I - 2, j]\n", 222 | "```\n", 223 | "\n", 224 | "We don't pass in `Tn` explicitly, which means Numba allocates it for us (thanks!) but it's allocated using `numpy.empty_like` so if we don't touch every value in `Tn` in the function, those empty values will stick around and cause trouble. \n", 225 | "\n", 226 | "Solutions? The one above, or pass it in explicitly after doing something like `Tn = Ti.copy()`" 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": { 232 | "collapsed": true 233 | }, 234 | "source": [ 235 | "## [Exercise: Remove the vanilla loops](./exercises/08.GUVectorize.Exercises.ipynb#Exercise:-2D-Heat-Transfer-Time-loop)" 236 | ] 237 | }, 238 | { 239 | "cell_type": "markdown", 240 | "metadata": {}, 241 | "source": [ 242 | "The example above loops in time outside of the `vectorize`d function. That means it's looping in vanilla Python which is not the fastest thing in the world. \n", 243 | "\n", 244 | "Move the time loop inside the function." 245 | ] 246 | }, 247 | { 248 | "cell_type": "markdown", 249 | "metadata": {}, 250 | "source": [ 251 | "## Demo: Why not `jit` the `run_ftcs` function?" 252 | ] 253 | }, 254 | { 255 | "cell_type": "markdown", 256 | "metadata": {}, 257 | "source": [ 258 | "Because, at the moment, it won't work. (bummer)." 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": null, 264 | "metadata": { 265 | "collapsed": true 266 | }, 267 | "outputs": [], 268 | "source": [ 269 | "@guvectorize('float64[:,:], float64[:,:]', '(n,n)->(n,n)')\n", 270 | "def gucopy(a, b):\n", 271 | " I, J = a.shape\n", 272 | " for i in range(I):\n", 273 | " for j in range(J):\n", 274 | " b[i, j] = a[i, j]" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": null, 280 | "metadata": { 281 | "collapsed": true 282 | }, 283 | "outputs": [], 284 | "source": [ 285 | "from numba import jit" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": null, 291 | "metadata": { 292 | "collapsed": true 293 | }, 294 | "outputs": [], 295 | "source": [ 296 | "@jit\n", 297 | "def make_a_copy():\n", 298 | " a = numpy.random.random((25,25))\n", 299 | " b = gucopy(a)\n", 300 | " \n", 301 | " return a, b" 302 | ] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "execution_count": null, 307 | "metadata": { 308 | "collapsed": false 309 | }, 310 | "outputs": [], 311 | "source": [ 312 | "a, b = make_a_copy()\n", 313 | "assert numpy.allclose(a, b)" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": null, 319 | "metadata": { 320 | "collapsed": false 321 | }, 322 | "outputs": [], 323 | "source": [ 324 | "make_a_copy.inspect_types()" 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": null, 330 | "metadata": { 331 | "collapsed": true 332 | }, 333 | "outputs": [], 334 | "source": [] 335 | } 336 | ], 337 | "metadata": { 338 | "kernelspec": { 339 | "display_name": "Python 3", 340 | "language": "python", 341 | "name": "python3" 342 | }, 343 | "language_info": { 344 | "codemirror_mode": { 345 | "name": "ipython", 346 | "version": 3 347 | }, 348 | "file_extension": ".py", 349 | "mimetype": "text/x-python", 350 | "name": "python", 351 | "nbconvert_exporter": "python", 352 | "pygments_lexer": "ipython3", 353 | "version": "3.5.1" 354 | } 355 | }, 356 | "nbformat": 4, 357 | "nbformat_minor": 0 358 | } 359 | -------------------------------------------------------------------------------- /notebooks/05.0.A.Breakneck.Introduction.to.CFD.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Application - Pressure Poisson Equation" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "The momentum equation for the velocity field $\\vec{u}$ in a fluid is\n", 15 | "\n", 16 | "\\begin{equation}\n", 17 | "\\frac{\\partial \\vec{u}}{\\partial t}+(\\vec{u}\\cdot\\nabla)\\vec{u}=-\\frac{1}{\\rho}\\nabla p + \\nu \\nabla^2\\vec{u}\n", 18 | "\\end{equation}\n", 19 | "\n", 20 | "where $p$ is the pressure, $\\nu$ is the fluid viscosity and $\\rho$ is the fluid density. With three velocity components, plus the pressure, we have four unknowns but only three equations. For compressible fluids, we have an equation of state to complete the system. In the incompressible case, we don't have an equation of state and we need an additional constraint from somewhere else. \n", 21 | "\n", 22 | "This is what we do: take the divergence of the momentum equation, apply the incompressibility constraint to cancel some terms, and get an equation for the pressure. It's a pretty cool trick." 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "Conservation of mass for an incompressible fluid requires that the divergence of $\\vec{u}$ must be zero:\n", 30 | "\n", 31 | "$$\\nabla \\cdot \\vec{u} = 0$$" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "Writing out the momentum equation in $x$ and $y$ components (for two-dimensional flow), we get" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "$$\\frac{\\partial u}{\\partial t}+u\\frac{\\partial u}{\\partial x}+v\\frac{\\partial u}{\\partial y} = -\\frac{1}{\\rho}\\frac{\\partial p}{\\partial x}+\\nu \\left(\\frac{\\partial^2 u}{\\partial x^2}+\\frac{\\partial^2 u}{\\partial y^2} \\right) $$\n", 46 | "\n", 47 | "\n", 48 | "$$\\frac{\\partial v}{\\partial t}+u\\frac{\\partial v}{\\partial x}+v\\frac{\\partial v}{\\partial y} = -\\frac{1}{\\rho}\\frac{\\partial p}{\\partial y}+\\nu\\left(\\frac{\\partial^2 v}{\\partial x^2}+\\frac{\\partial^2 v}{\\partial y^2}\\right) $$" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "We take the divergence of the momentum equation and then apply the incompressibility constraint. After some wrangling and cancellations, this leaves us with the pressure Poisson equation:\n", 56 | "\n", 57 | "$$\\frac{\\partial^2 p}{\\partial x^2}+\\frac{\\partial^2 p}{\\partial y^2} = -\\rho\\left(\\frac{\\partial u}{\\partial x}\\frac{\\partial u}{\\partial x}+2\\frac{\\partial u}{\\partial y}\\frac{\\partial v}{\\partial x}+\\frac{\\partial v}{\\partial y}\\frac{\\partial v}{\\partial y} \\right)$$" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "Which is an equation of the form" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "$$\\frac{\\partial ^2 p}{\\partial x^2} + \\frac{\\partial ^2 p}{\\partial y^2} = b$$" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "Imagine we discretize a domain using a uniform mesh of points in each spatial direction, as in the figure below:" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "Then the left-hand side of the Poisson equation, i.e., the Laplacian differential operator applied to $p$, is discretized using 2nd-order central differences as follows\n", 93 | "\n", 94 | "\n", 95 | "$$\\frac{p^n_{i+1, j} - 2p^n_{i,j} + p^n_{i - 1, j}}{\\Delta x ^2} + \\frac{p^n_{i, j+1} - 2p^n_{i,j} + p^n_{i, j-1}}{\\Delta y ^2}$$\n", 96 | "\n", 97 | "\n", 98 | "\n", 99 | "where subscripts $i,j$ denote the spatial location on a Cartesian coordinate system and superscripts $n$ denote a point in time." 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "We apply an appropriate finite-difference discretization to the momentum equation (forward-time, backward-space for the 1st-order terms) and also assume a uniform mesh, so $\\Delta x = \\Delta y$.\n", 107 | "\n", 108 | "\n", 109 | "Using this discretized form in the Poisson equation, we will leave only the $p_{i,j}$ terms in the left-hand side, and move the other terms to the right. Then we say that we can update all the values of $p_{i,j}$ using the values at the neighboring points for both $p$ and $u, v$. This update, repeated many times, happens to converge to the solution of Poisson's equation." 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "## Solution procedure" 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": {}, 122 | "source": [ 123 | "### Initial velocity field" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "We start with a velocity field in $u$ and $v$ at some timestep $n$. " 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "metadata": {}, 136 | "source": [ 137 | "### Calculate pressure" 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "Then, we iteratively solve the Poisson equation for pressure, as described above. Starting with an initial guess, the values $p_{i,j}$ are updated using the neighboring values of $p$, $u$ and $v$ at $(i+1,j)$ and $(i,j+1)$. The updates can be written as follows, where the $k$ superscript denotes an iteration in 'pseudo-time':\n", 145 | "\n", 146 | "\n", 147 | "\\begin{align}\n", 148 | "p_{i,j}^{k+1} &= \\frac{1}{4}\\left(p_{i+1,j}^{k}+p_{i-1,j}^{k}+p_{i,j+1}^{k}+p_{i,j-1}^{k}\\right) \\\\\n", 149 | "&-\\frac{\\rho \\Delta x}{16} \\left( \\frac{2}{\\Delta t} \\left(u_{i+1,j} - u_{i-1,j} + v_{i,j+1} - v_{i,j-1}\\right) \\right . \\\\\n", 150 | "&-\\frac{2}{\\Delta x}\\left(u_{i,j+1} - u_{i,j-1} \\right) \\left(v_{i+1,j} - v_{i-1,j} \\right) \\\\\n", 151 | "&- \\left . \\frac{\\left(u_{i+1,j} - u_{i-1,j} \\right)^2}{\\Delta x} \n", 152 | "- \\frac{ \\left(v_{i,j+1} - v_{i,j-1} \\right)^2 }{\\Delta x} \\right) \\\\\n", 153 | "\\end{align}\n", 154 | "\n", 155 | "In other words, we repeatedly apply the Poisson equation until the pressure reaches a quasi-steady state." 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": {}, 161 | "source": [ 162 | "### Update the velocity" 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": {}, 168 | "source": [ 169 | "Once the pressure field reaches its quasi-steady state via the Poisson equation, we use that field for the current time step, $p^n$, to solve for the velocity components $u$ and $v$ at the next timestep, $n+1$." 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "The momentum equation in the $u$ direction:\n", 177 | "\n", 178 | "\\begin{align}\n", 179 | "u_{i,j}^{n+1} = u_{i,j}^{n} &- \\frac{\\Delta t}{\\Delta x} \\left( u_{i,j}^{n}(u_{i,j}^{n}-u_{i-1,j}^{n})\n", 180 | "+ v_{i,j}^{n} (u_{i,j}^{n}-u_{i,j-1}^{n}) + \\frac{1}{2 \\rho}(p_{i+1,j}^{n}-p_{i-1,j}^{n}) \\right) \\\\\n", 181 | "&+\\frac{\\nu \\Delta t}{\\Delta x^2}\\left(u_{i+1,j}^{n} + u_{i-1,j}^{n} + u_{i,j+1}^{n} + u_{i,j-1}^{n} -4u_{i,j}^{n}\\right)\n", 182 | "\\end{align}\n", 183 | "\n", 184 | "The momentum equation in the $v$ direction:\n", 185 | "\n", 186 | "\\begin{align}\n", 187 | "v_{i,j}^{n+1} = v_{i,j}^{n} &- \\frac{\\Delta t}{\\Delta x} \\left( u_{i,j}^{n}(v_{i,j}^{n}-v_{i-1,j}^{n})\n", 188 | "+ v_{i,j}^{n} (v_{i,j}^{n}-v_{i,j-1}^{n}) + \\frac{1}{2 \\rho}(p_{i,j+1}^{n}-p_{i,j-1}^{n}) \\right) \\\\\n", 189 | "&+\\frac{\\nu \\Delta t}{\\Delta x^2}\\left(v_{i+1,j}^{n} + v_{i-1,j}^{n} + v_{i,j+1}^{n} + v_{i,j-1}^{n} -4v_{i,j}^{n}\\right)\n", 190 | "\\end{align}\n", 191 | "\n" 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "metadata": {}, 197 | "source": [ 198 | "Then, rinse and repeat." 199 | ] 200 | }, 201 | { 202 | "cell_type": "markdown", 203 | "metadata": {}, 204 | "source": [ 205 | "## What we left out\n", 206 | "\n", 207 | "There are various subtleties that we left out here, to get quickly to the equations we need in the code. First, there are some variations on the form of the pressure Poisson equation, depending on what terms involving $\\nabla\\cdot\\vec{u}$ one chooses to cancel (this has caused long arguments in the literature!). Second, we say nothing about the boundary conditions, which can cause some trouble (and more arguments!). And third, we show only the simplest iterative method for solving the Poisson equation, which also happens to be the slowest to converge. This is just meant to be a pedagogical example and discussion of these subtleties would be part of a full-fledged CFD course." 208 | ] 209 | } 210 | ], 211 | "metadata": { 212 | "kernelspec": { 213 | "display_name": "Python 2", 214 | "language": "python", 215 | "name": "python2" 216 | }, 217 | "language_info": { 218 | "codemirror_mode": { 219 | "name": "ipython", 220 | "version": 2 221 | }, 222 | "file_extension": ".py", 223 | "mimetype": "text/x-python", 224 | "name": "python", 225 | "nbconvert_exporter": "python", 226 | "pygments_lexer": "ipython2", 227 | "version": "2.7.10" 228 | } 229 | }, 230 | "nbformat": 4, 231 | "nbformat_minor": 0 232 | } 233 | -------------------------------------------------------------------------------- /notebooks/03.How.Numba.Works.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "## Is this just magic? What is Numba doing to make code run quickly?" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "Let's define a trivial example function." 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": { 25 | "collapsed": true, 26 | "slideshow": { 27 | "slide_type": "slide" 28 | } 29 | }, 30 | "outputs": [], 31 | "source": [ 32 | "from numba import jit" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": { 39 | "collapsed": true, 40 | "slideshow": { 41 | "slide_type": "fragment" 42 | } 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "@jit\n", 47 | "def add(a, b):\n", 48 | " return a + b" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": { 55 | "collapsed": false 56 | }, 57 | "outputs": [], 58 | "source": [ 59 | "add(1, 1)" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": { 65 | "slideshow": { 66 | "slide_type": "slide" 67 | } 68 | }, 69 | "source": [ 70 | "Numba examines Python bytecode and then translates this into an 'intermediate representation'. To view this IR, run (compile) `add` and you can access the `inspect_types` method." 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": { 77 | "collapsed": false, 78 | "scrolled": true, 79 | "slideshow": { 80 | "slide_type": "slide" 81 | } 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "add.inspect_types()" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": { 91 | "slideshow": { 92 | "slide_type": "slide" 93 | } 94 | }, 95 | "source": [ 96 | "Ok. Numba is has correctly inferred the type of the arguments, defining things as `int64` and running smoothly. \n", 97 | "\n", 98 | "(What happens if you do `add(1., 1.)` and then `inspect_types`?)" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "metadata": { 105 | "collapsed": false 106 | }, 107 | "outputs": [], 108 | "source": [ 109 | "add(1., 1.)" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": { 116 | "collapsed": false 117 | }, 118 | "outputs": [], 119 | "source": [ 120 | "add.inspect_types()" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "### What about the actual LLVM code?" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "You can see the actual LLVM code generated by Numba using the `inspect_llvm()` method. Since it's a `dict`, doing the following will be slightly more visually friendly." 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": { 141 | "collapsed": false 142 | }, 143 | "outputs": [], 144 | "source": [ 145 | "for k, v in add.inspect_llvm().items():\n", 146 | " print(k, v)" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": { 152 | "slideshow": { 153 | "slide_type": "slide" 154 | } 155 | }, 156 | "source": [ 157 | "## But there's a caveat" 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": {}, 163 | "source": [ 164 | "Now, watch what happens when we try to do something that is natural in Python, but not particularly mathematically sound:" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": null, 170 | "metadata": { 171 | "collapsed": false 172 | }, 173 | "outputs": [], 174 | "source": [ 175 | "def add_strings(a, b):\n", 176 | " return a + b" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": null, 182 | "metadata": { 183 | "collapsed": true 184 | }, 185 | "outputs": [], 186 | "source": [ 187 | "add_strings_jit = jit()(add_strings)" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "metadata": { 194 | "collapsed": false 195 | }, 196 | "outputs": [], 197 | "source": [ 198 | "add_strings_jit('a', 'b')" 199 | ] 200 | }, 201 | { 202 | "cell_type": "markdown", 203 | "metadata": {}, 204 | "source": [ 205 | "It worked, but what does `inspect_types` tell us?" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": null, 211 | "metadata": { 212 | "collapsed": false 213 | }, 214 | "outputs": [], 215 | "source": [ 216 | "add_strings_jit.inspect_types()" 217 | ] 218 | }, 219 | { 220 | "cell_type": "markdown", 221 | "metadata": { 222 | "collapsed": true, 223 | "slideshow": { 224 | "slide_type": "slide" 225 | } 226 | }, 227 | "source": [ 228 | "## What's all this pyobject business? \n", 229 | "\n", 230 | "This means it has been compiled in `object` mode. This can be a faster than regular python if it can do loop lifting, but not that fast. \n", 231 | "We want those `pyobjects` to be `int64` or another type that can be inferred by Numba. Your best bet is forcing `nopython` mode: this will throw an error if Numba finds itself in object mode, so that you _know_ that it can't give you speed." 232 | ] 233 | }, 234 | { 235 | "cell_type": "markdown", 236 | "metadata": { 237 | "collapsed": true 238 | }, 239 | "source": [ 240 | "For the full list of supported Python and NumPy features in `nopython` mode, see the Numba documentation here: http://numba.pydata.org/numba-doc/latest/reference/pysupported.html" 241 | ] 242 | }, 243 | { 244 | "cell_type": "markdown", 245 | "metadata": {}, 246 | "source": [ 247 | "## Figuring out what isn't working" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": null, 253 | "metadata": { 254 | "collapsed": false 255 | }, 256 | "outputs": [], 257 | "source": [ 258 | "%%file nopython_failure.py\n", 259 | "from numba import jit\n", 260 | "\n", 261 | "@jit\n", 262 | "def add(a, b):\n", 263 | " for i in range(100):\n", 264 | " c = i\n", 265 | " f = i + 7\n", 266 | " l = c + f\n", 267 | " \n", 268 | " return a + b\n", 269 | "\n", 270 | "add('a', 'b')" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": null, 276 | "metadata": { 277 | "collapsed": false 278 | }, 279 | "outputs": [], 280 | "source": [ 281 | "!numba --annotate-html fail.html nopython_failure.py" 282 | ] 283 | }, 284 | { 285 | "cell_type": "markdown", 286 | "metadata": {}, 287 | "source": [ 288 | "[fail.html](fail.html)" 289 | ] 290 | }, 291 | { 292 | "cell_type": "markdown", 293 | "metadata": {}, 294 | "source": [ 295 | "## Forcing `nopython` mode" 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": null, 301 | "metadata": { 302 | "collapsed": false 303 | }, 304 | "outputs": [], 305 | "source": [ 306 | "add_strings_jit = jit(nopython=True)(add_strings)" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": null, 312 | "metadata": { 313 | "collapsed": false 314 | }, 315 | "outputs": [], 316 | "source": [ 317 | "add_strings_jit('a', 'b')" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": null, 323 | "metadata": { 324 | "collapsed": true 325 | }, 326 | "outputs": [], 327 | "source": [ 328 | "from numba import njit" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": null, 334 | "metadata": { 335 | "collapsed": false 336 | }, 337 | "outputs": [], 338 | "source": [ 339 | "add_strings_jit = njit(add_strings)" 340 | ] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": null, 345 | "metadata": { 346 | "collapsed": false 347 | }, 348 | "outputs": [], 349 | "source": [ 350 | "add_strings_jit('a', 'b')" 351 | ] 352 | }, 353 | { 354 | "cell_type": "markdown", 355 | "metadata": {}, 356 | "source": [ 357 | "## Other compilation flags" 358 | ] 359 | }, 360 | { 361 | "cell_type": "markdown", 362 | "metadata": {}, 363 | "source": [ 364 | "There are two other main compilation flags for `@jit`\n", 365 | "\n", 366 | "```python\n", 367 | "cache=True\n", 368 | "```\n", 369 | "\n", 370 | "if you don't want to always want to get dinged by the compilation time for every run. This will actually save the compiled function into something like a `pyc` file in your `__pycache__` directory, so even between sessions you should have nice fast performance." 371 | ] 372 | }, 373 | { 374 | "cell_type": "markdown", 375 | "metadata": {}, 376 | "source": [ 377 | "```python\n", 378 | "nogil=True\n", 379 | "```\n", 380 | "\n", 381 | "This releases the GIL. Note, however, that it doesn't do anything else, like make your program threadsafe. You have to manage all of those things on your own (use `concurrent.futures`)." 382 | ] 383 | } 384 | ], 385 | "metadata": { 386 | "kernelspec": { 387 | "display_name": "Python 3", 388 | "language": "python", 389 | "name": "python3" 390 | }, 391 | "language_info": { 392 | "codemirror_mode": { 393 | "name": "ipython", 394 | "version": 3 395 | }, 396 | "file_extension": ".py", 397 | "mimetype": "text/x-python", 398 | "name": "python", 399 | "nbconvert_exporter": "python", 400 | "pygments_lexer": "ipython3", 401 | "version": "3.5.2" 402 | } 403 | }, 404 | "nbformat": 4, 405 | "nbformat_minor": 0 406 | } 407 | -------------------------------------------------------------------------------- /notebooks/05.2.optional.Numba.v.Cython.v.Fortran.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true, 8 | "deletable": true, 9 | "editable": true, 10 | "run_control": { 11 | "frozen": false, 12 | "read_only": false 13 | } 14 | }, 15 | "outputs": [], 16 | "source": [ 17 | "import numpy\n", 18 | "import pickle" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": { 25 | "collapsed": false, 26 | "deletable": true, 27 | "editable": true, 28 | "run_control": { 29 | "frozen": false, 30 | "read_only": false 31 | } 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "from snippets.ns_helper import cavity_flow, velocity_term" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": { 42 | "collapsed": true, 43 | "deletable": true, 44 | "editable": true, 45 | "run_control": { 46 | "frozen": false, 47 | "read_only": false 48 | } 49 | }, 50 | "outputs": [], 51 | "source": [ 52 | "def run_cavity():\n", 53 | " nx = 41\n", 54 | " with open('IC.pickle', 'rb') as f:\n", 55 | " u, v, p, b = pickle.load(f)\n", 56 | "\n", 57 | " dx = 2 / (nx - 1)\n", 58 | " dt = .005\n", 59 | " nt = 1000\n", 60 | " \n", 61 | " u, v, p = cavity_flow(u, v, p, nt, dt, dx, \n", 62 | " velocity_term, \n", 63 | " pressure_poisson, \n", 64 | " rtol=1e-4)\n", 65 | " \n", 66 | " return u, v, p" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": { 73 | "collapsed": true, 74 | "deletable": true, 75 | "editable": true, 76 | "run_control": { 77 | "frozen": false, 78 | "read_only": false 79 | } 80 | }, 81 | "outputs": [], 82 | "source": [ 83 | "with open('numpy_ans.pickle', 'rb') as f:\n", 84 | " u, v, p = pickle.load(f)" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": { 90 | "deletable": true, 91 | "editable": true 92 | }, 93 | "source": [ 94 | "## Other options for accelerating Python code" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": { 100 | "deletable": true, 101 | "editable": true 102 | }, 103 | "source": [ 104 | "## Cython" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "metadata": { 111 | "collapsed": false, 112 | "deletable": true, 113 | "editable": true, 114 | "run_control": { 115 | "frozen": false, 116 | "read_only": false 117 | } 118 | }, 119 | "outputs": [], 120 | "source": [ 121 | "%load_ext cython" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": { 128 | "collapsed": false, 129 | "deletable": true, 130 | "editable": true, 131 | "run_control": { 132 | "frozen": false, 133 | "read_only": false 134 | } 135 | }, 136 | "outputs": [], 137 | "source": [ 138 | "%%cython\n", 139 | "#%%cython -a\n", 140 | "\n", 141 | "cimport numpy\n", 142 | "cimport cython\n", 143 | "\n", 144 | "import numpy\n", 145 | "\n", 146 | "from libc.math cimport sqrt\n", 147 | "\n", 148 | "\n", 149 | "@cython.boundscheck(False)\n", 150 | "@cython.wraparound(False)\n", 151 | "@cython.cdivision(True)\n", 152 | "@cython.embedsignature(True)\n", 153 | "def pressure_poisson(numpy.ndarray[numpy.float_t, ndim=2] p,\n", 154 | " numpy.ndarray[numpy.float_t, ndim=2] b,\n", 155 | " double l2_target):\n", 156 | "\n", 157 | " cdef numpy.ndarray[numpy.float_t, ndim=2] pn = numpy.zeros_like(p)\n", 158 | " cdef int i, j, n\n", 159 | " cdef double s1, s2, iter_diff\n", 160 | " cdef int I = b.shape[0]\n", 161 | " cdef int J = b.shape[1]\n", 162 | "\n", 163 | " iter_diff = l2_target + 1\n", 164 | "\n", 165 | " n = 0\n", 166 | " while iter_diff > l2_target and n <= 500:\n", 167 | " pn = p.copy()\n", 168 | " for i in range(1, I - 1):\n", 169 | " for j in range(1, J - 1):\n", 170 | " p[i, j] = (.25 * (pn[i, j + 1] +\n", 171 | " pn[i, j - 1] +\n", 172 | " pn[i + 1, j] +\n", 173 | " pn[i - 1, j]) -\n", 174 | " b[i, j])\n", 175 | "\n", 176 | " for i in range(I):\n", 177 | " p[i, 0] = p[i, 1]\n", 178 | " p[i, J-1] = 0\n", 179 | "\n", 180 | " for j in range(J):\n", 181 | " p[0, j] = p[1, j]\n", 182 | " p[I-1, j] = p[I-2, j]\n", 183 | "\n", 184 | " if n % 10 == 0:\n", 185 | " s1 = 0.0\n", 186 | " s2 = 0.0\n", 187 | " for i in range(I):\n", 188 | " for j in range(J):\n", 189 | " s1 += (p[i, j] - pn[i, j])**2\n", 190 | " s2 += pn[i, j]**2\n", 191 | " iter_diff = sqrt(s1 / s2)\n", 192 | "\n", 193 | " n += 1\n", 194 | "\n", 195 | " return p" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": null, 201 | "metadata": { 202 | "collapsed": false, 203 | "deletable": true, 204 | "editable": true, 205 | "run_control": { 206 | "frozen": false, 207 | "read_only": false 208 | } 209 | }, 210 | "outputs": [], 211 | "source": [ 212 | "%timeit run_cavity()" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": null, 218 | "metadata": { 219 | "collapsed": false, 220 | "deletable": true, 221 | "editable": true, 222 | "run_control": { 223 | "frozen": false, 224 | "read_only": false 225 | } 226 | }, 227 | "outputs": [], 228 | "source": [ 229 | "u_cy, v_cy, p_cy = run_cavity()\n", 230 | "assert numpy.allclose(u, u_cy)\n", 231 | "assert numpy.allclose(v, v_cy)\n", 232 | "assert numpy.allclose(p, p_cy)" 233 | ] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "metadata": { 238 | "deletable": true, 239 | "editable": true 240 | }, 241 | "source": [ 242 | "## Fortran and `f2py`" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": null, 248 | "metadata": { 249 | "collapsed": false, 250 | "deletable": true, 251 | "editable": true, 252 | "run_control": { 253 | "frozen": false, 254 | "read_only": false 255 | } 256 | }, 257 | "outputs": [], 258 | "source": [ 259 | "%load_ext fortranmagic" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": null, 265 | "metadata": { 266 | "collapsed": false, 267 | "deletable": true, 268 | "editable": true, 269 | "run_control": { 270 | "frozen": false, 271 | "read_only": false 272 | } 273 | }, 274 | "outputs": [], 275 | "source": [ 276 | "%%fortran\n", 277 | "#%%fortran -vvv\n", 278 | "\n", 279 | "SUBROUTINE pressure_poisson(p, b, M, N, l2_target)\n", 280 | "IMPLICIT NONE\n", 281 | "\n", 282 | "INTEGER(4), INTENT(IN):: M, N\n", 283 | "REAL(8), INTENT(IN):: b(M, N), l2_target\n", 284 | "REAL(8), INTENT(INOUT):: p(M, N)\n", 285 | "REAL(8):: iter_diff, pn(M, N)\n", 286 | "INTEGER(4):: c\n", 287 | "\n", 288 | "!F2PY intent(inout):: b\n", 289 | "!F2PY intent(inplace, out):: p\n", 290 | "!F2PY real(8), optional, intent(in):: l2_target=1E-4\n", 291 | "!F2PY integer(4), intent(hide), depend(p):: m=shape(p, 0), n=shape(p, 1)\n", 292 | "\n", 293 | " c = 0\n", 294 | " iter_diff = l2_target + 1\n", 295 | " \n", 296 | " DO WHILE(iter_diff > l2_target)\n", 297 | " pn = p\n", 298 | " p(2:M-1, 2:N-1) = .25 * (pn(2:M-1, 3:N) + pn(2:M-1, 1:N-2) + &\n", 299 | " pn(3:M, 2:N-1) + pn(1:M-2, 2:N-1)) - b(2:M-1, 2:N-1)\n", 300 | " \n", 301 | " p(1:M, 1) = p(1:M, 2)\n", 302 | " p(1:M, N) = 0\n", 303 | " p(1, 1:N) = p(2, 1:N)\n", 304 | " p(M, 1:N) = p(M - 1, 1:N)\n", 305 | " \n", 306 | " \n", 307 | " IF (MOD(c, 10) .eq. 0) iter_diff = DSQRT(SUM((p - pn)**2)/SUM(pn**2))\n", 308 | " IF (c .eq. 500) EXIT\n", 309 | " \n", 310 | " c = c + 1\n", 311 | " ENDDO\n", 312 | " \n", 313 | "END SUBROUTINE pressure_poisson" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": null, 319 | "metadata": { 320 | "collapsed": false, 321 | "deletable": true, 322 | "editable": true, 323 | "run_control": { 324 | "frozen": false, 325 | "read_only": false 326 | } 327 | }, 328 | "outputs": [], 329 | "source": [ 330 | "%timeit run_cavity()" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": null, 336 | "metadata": { 337 | "collapsed": false, 338 | "deletable": true, 339 | "editable": true, 340 | "run_control": { 341 | "frozen": false, 342 | "read_only": false 343 | } 344 | }, 345 | "outputs": [], 346 | "source": [ 347 | "u_for, v_for, p_for = run_cavity()\n", 348 | "assert numpy.allclose(u, u_for)\n", 349 | "assert numpy.allclose(v, v_for)\n", 350 | "assert numpy.allclose(p, p_for)" 351 | ] 352 | } 353 | ], 354 | "metadata": { 355 | "hide_input": false, 356 | "kernelspec": { 357 | "display_name": "Python 3", 358 | "language": "python", 359 | "name": "python3" 360 | }, 361 | "language_info": { 362 | "codemirror_mode": { 363 | "name": "ipython", 364 | "version": 3 365 | }, 366 | "file_extension": ".py", 367 | "mimetype": "text/x-python", 368 | "name": "python", 369 | "nbconvert_exporter": "python", 370 | "pygments_lexer": "ipython3", 371 | "version": "3.6.1" 372 | } 373 | }, 374 | "nbformat": 4, 375 | "nbformat_minor": 0 376 | } 377 | -------------------------------------------------------------------------------- /notebooks/09.Tips.and.FAQ.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Can a `vectorize` function call a `jit` function?" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "import numpy\n", 19 | "from numba import vectorize, jit, njit" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": { 26 | "collapsed": true 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "@jit\n", 31 | "def add(a, b):\n", 32 | " return a + b" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": { 39 | "collapsed": true 40 | }, 41 | "outputs": [], 42 | "source": [ 43 | "@vectorize\n", 44 | "def add_vec(a, b):\n", 45 | " return add(a, b)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": { 52 | "collapsed": true 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "to_add = numpy.arange(100).reshape(50, 2)" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": { 63 | "collapsed": false 64 | }, 65 | "outputs": [], 66 | "source": [ 67 | "add_vec(to_add[:, 0], to_add[:, 1])" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "## Can a `jit` function call a `vectorize` function?" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": { 81 | "collapsed": true 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "@vectorize\n", 86 | "def add(a, b):\n", 87 | " return a + b\n", 88 | "\n", 89 | "@jit\n", 90 | "def add_vec(a, b):\n", 91 | " return add(a, b)" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": { 98 | "collapsed": false 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "add_vec(to_add[:, 0], to_add[:, 1])" 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "metadata": { 108 | "collapsed": true 109 | }, 110 | "source": [ 111 | "## Can I use `isinstance` in `jit`ted functions? \n", 112 | "\n", 113 | "No, but there is a solution, `generated_jit`\n", 114 | "\n", 115 | "Imagine you have a function that takes an array but wants the scalar sum of that array to operate on. You can just use `.sum()`. But sometimes, instead of an array getting passed in, a scalar gets passed, which will throw an error since scalars don't have a `sum` method. " 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": { 122 | "collapsed": true 123 | }, 124 | "outputs": [], 125 | "source": [ 126 | "from numba import types, generated_jit" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": null, 132 | "metadata": { 133 | "collapsed": true 134 | }, 135 | "outputs": [], 136 | "source": [ 137 | "@generated_jit\n", 138 | "def safesum(M):\n", 139 | " if isinstance(M, types.Array):\n", 140 | " return lambda M: M.sum()\n", 141 | " else:\n", 142 | " return lambda M: M" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "metadata": { 149 | "collapsed": false 150 | }, 151 | "outputs": [], 152 | "source": [ 153 | "safesum(to_add)" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "metadata": { 160 | "collapsed": false 161 | }, 162 | "outputs": [], 163 | "source": [ 164 | "safesum(5)" 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": {}, 170 | "source": [ 171 | "### Note:\n", 172 | "\n", 173 | "* `safesum` gets called with the Numba types of the argument, not their values\n", 174 | "* it returns a _function_, not a value. \n", 175 | "\n", 176 | "You could use this to have different functions called depending on the type of the inputs" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": null, 182 | "metadata": { 183 | "collapsed": false 184 | }, 185 | "outputs": [], 186 | "source": [ 187 | "def add(a, b):\n", 188 | " return a + b\n", 189 | "\n", 190 | "add_nums = jit(nopython=True)(add)\n", 191 | "add_str = jit()(add)" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "metadata": { 198 | "collapsed": false 199 | }, 200 | "outputs": [], 201 | "source": [ 202 | "@generated_jit\n", 203 | "def safeadd(a, b):\n", 204 | " if isinstance(a, types.Opaque):\n", 205 | " return add_str\n", 206 | " else:\n", 207 | " return add_nums" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": null, 213 | "metadata": { 214 | "collapsed": false 215 | }, 216 | "outputs": [], 217 | "source": [ 218 | "safeadd('3', '4')" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": null, 224 | "metadata": { 225 | "collapsed": false 226 | }, 227 | "outputs": [], 228 | "source": [ 229 | "safeadd(3, 4)" 230 | ] 231 | }, 232 | { 233 | "cell_type": "markdown", 234 | "metadata": {}, 235 | "source": [ 236 | "# Tips" 237 | ] 238 | }, 239 | { 240 | "cell_type": "markdown", 241 | "metadata": {}, 242 | "source": [ 243 | "Set envvar `NUMBA_DISABLE_JIT=1` to disable numba compilation (for debugging)\n", 244 | "\n", 245 | "Install the \"Hide Traceback\" extension if you're prototyping in a notebook." 246 | ] 247 | }, 248 | { 249 | "cell_type": "markdown", 250 | "metadata": {}, 251 | "source": [ 252 | "# Errors" 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "metadata": {}, 258 | "source": [ 259 | "### Unification errors\n", 260 | "\n", 261 | "Thanks to Graham Markhall for the idea for these examples:\n", 262 | "http://gmarkall.github.io/tutorials/pycon-uk-2015\n", 263 | "\n", 264 | "When Numba compiles a function just-in-time, it needs to declare the type of the output(s). If it can't do that in a consistent way, it gets upset." 265 | ] 266 | }, 267 | { 268 | "cell_type": "code", 269 | "execution_count": null, 270 | "metadata": { 271 | "collapsed": false 272 | }, 273 | "outputs": [], 274 | "source": [ 275 | "@jit(nopython=True)\n", 276 | "def get_low(a, b, c):\n", 277 | " if c:\n", 278 | " return a\n", 279 | " else:\n", 280 | " return b" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": null, 286 | "metadata": { 287 | "collapsed": false 288 | }, 289 | "outputs": [], 290 | "source": [ 291 | "get_low(3., (3, 2), True)" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": null, 297 | "metadata": { 298 | "collapsed": false 299 | }, 300 | "outputs": [], 301 | "source": [ 302 | "get_low(4, numpy.zeros(3), True)" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": null, 308 | "metadata": { 309 | "collapsed": false 310 | }, 311 | "outputs": [], 312 | "source": [ 313 | "get_low(3, 4, True)" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": null, 319 | "metadata": { 320 | "collapsed": true 321 | }, 322 | "outputs": [], 323 | "source": [ 324 | "@jit(nopython=True)\n", 325 | "def dont_index_a_scalar(a):\n", 326 | " return a[0]" 327 | ] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "execution_count": null, 332 | "metadata": { 333 | "collapsed": false 334 | }, 335 | "outputs": [], 336 | "source": [ 337 | "dont_index_a_scalar(5.)" 338 | ] 339 | }, 340 | { 341 | "cell_type": "markdown", 342 | "metadata": { 343 | "collapsed": true 344 | }, 345 | "source": [ 346 | "## Globals are treated as compile-time constants by Numba" 347 | ] 348 | }, 349 | { 350 | "cell_type": "code", 351 | "execution_count": null, 352 | "metadata": { 353 | "collapsed": true 354 | }, 355 | "outputs": [], 356 | "source": [ 357 | "a = 5" 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": null, 363 | "metadata": { 364 | "collapsed": true 365 | }, 366 | "outputs": [], 367 | "source": [ 368 | "@njit\n", 369 | "def add_to_a(b):\n", 370 | " return a + b" 371 | ] 372 | }, 373 | { 374 | "cell_type": "code", 375 | "execution_count": null, 376 | "metadata": { 377 | "collapsed": false 378 | }, 379 | "outputs": [], 380 | "source": [ 381 | "add_to_a(7)" 382 | ] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "execution_count": null, 387 | "metadata": { 388 | "collapsed": true 389 | }, 390 | "outputs": [], 391 | "source": [ 392 | "a = 12" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": null, 398 | "metadata": { 399 | "collapsed": false 400 | }, 401 | "outputs": [], 402 | "source": [ 403 | "add_to_a(7)" 404 | ] 405 | }, 406 | { 407 | "cell_type": "markdown", 408 | "metadata": {}, 409 | "source": [ 410 | "### Solution: Don't use globals(!)\n", 411 | "\n", 412 | "Seriously. Don't. But if you must, you can force a recompile of the jitted function." 413 | ] 414 | }, 415 | { 416 | "cell_type": "code", 417 | "execution_count": null, 418 | "metadata": { 419 | "collapsed": true 420 | }, 421 | "outputs": [], 422 | "source": [ 423 | "add_to_a.recompile()" 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": null, 429 | "metadata": { 430 | "collapsed": false 431 | }, 432 | "outputs": [], 433 | "source": [ 434 | "add_to_a(7)" 435 | ] 436 | } 437 | ], 438 | "metadata": { 439 | "kernelspec": { 440 | "display_name": "Python 3", 441 | "language": "python", 442 | "name": "python3" 443 | }, 444 | "language_info": { 445 | "codemirror_mode": { 446 | "name": "ipython", 447 | "version": 3 448 | }, 449 | "file_extension": ".py", 450 | "mimetype": "text/x-python", 451 | "name": "python", 452 | "nbconvert_exporter": "python", 453 | "pygments_lexer": "ipython3", 454 | "version": "3.5.1" 455 | } 456 | }, 457 | "nbformat": 4, 458 | "nbformat_minor": 0 459 | } 460 | -------------------------------------------------------------------------------- /notebooks/01.When.where.to.use.Numba.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Intro to profiling" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Python's dirty little secret is that it can be made to run pretty fast. \n", 15 | "\n", 16 | "The bare-metal HPC people will be angrily tweeting at me now, or rather, they would be if they could get their wireless drivers working.\n", 17 | "\n", 18 | "Still, there are some things you *really* don't want to do in Python. Nested loops are usually a bad idea. But often you won't know where your code is slowing down just by looking at it and trying to accelerate everything can be a waste of time. (Developer time, that is, both now and in the future: you incur technical debt if you unintentionally obfuscate code to make it faster when it doesn't need to be).\n", 19 | "\n", 20 | "The first step is always to find the bottlenecks in your code, via _profiling_: analyzing your code by measuring the execution time of its parts." 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "Tools\n", 28 | "-----\n", 29 | "\n", 30 | "2. `cProfile`\n", 31 | "1. [`line_profiler`](https://github.com/rkern/line_profiler)\n", 32 | "3. `timeit`\n", 33 | "\n", 34 | "**Note**:\n", 35 | "If you haven't already installed it, you can do\n", 36 | "\n", 37 | "```console\n", 38 | "conda install line_profiler\n", 39 | "```\n", 40 | "\n", 41 | "or\n", 42 | "\n", 43 | "```console\n", 44 | "pip install line_profiler\n", 45 | "```" 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "metadata": {}, 51 | "source": [ 52 | "## Some bad code" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "\n", 60 | "Here's a bit of code guaranteed to perform poorly: it sleeps for 1.5 seconds after doing any work! We will profile it and see where we might be able to help." 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": { 67 | "collapsed": true 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "import numpy\n", 72 | "from time import sleep\n", 73 | "\n", 74 | "def bad_call(dude):\n", 75 | " sleep(.5)\n", 76 | " \n", 77 | "def worse_call(dude):\n", 78 | " sleep(1)\n", 79 | " \n", 80 | "def sumulate(foo):\n", 81 | " if not isinstance(foo, int):\n", 82 | " return\n", 83 | " \n", 84 | " a = numpy.random.random((1000, 1000))\n", 85 | " a @ a\n", 86 | " \n", 87 | " ans = 0\n", 88 | " for i in range(foo):\n", 89 | " ans += i\n", 90 | " \n", 91 | " bad_call(ans)\n", 92 | " worse_call(ans)\n", 93 | " \n", 94 | " return ans" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": { 101 | "collapsed": false 102 | }, 103 | "outputs": [], 104 | "source": [ 105 | "sumulate(150)" 106 | ] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "metadata": {}, 111 | "source": [ 112 | "## using `cProfile`" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "[`cProfile`](https://docs.python.org/3.4/library/profile.html#module-cProfile) is the built-in profiler in Python (available since Python 2.5). It provides a function-by-function report of execution time. First import the module, then usage is simply a call to `cProfile.run()` with your code as argument. It will print out a list of all the functions that were called, with the number of calls and the time spent in each." 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": { 126 | "collapsed": true 127 | }, 128 | "outputs": [], 129 | "source": [ 130 | "import cProfile" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": null, 136 | "metadata": { 137 | "collapsed": false 138 | }, 139 | "outputs": [], 140 | "source": [ 141 | "cProfile.run('sumulate(150)')" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "You can see here that when our code `sumulate()` executes, it spends almost all its time in the method `time.sleep` (a bit over 1.5 seconds).\n", 149 | "\n", 150 | "If your program is more complicated that this cute demo, you'll have a hard time parsing the long output of `cProfile`. In that case, you may want a profiling visualization tool, like [SnakeViz](https://jiffyclub.github.io/snakeviz/). But that is outside the scope of this tutorial." 151 | ] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "metadata": {}, 156 | "source": [ 157 | "## using `line_profiler`" 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": {}, 163 | "source": [ 164 | "`line_profiler` offers more granular information thatn `cProfile`: it will give timing information about each line of code in a profiled function." 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": {}, 170 | "source": [ 171 | "Load the `line_profiler` extension" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "metadata": { 178 | "collapsed": false 179 | }, 180 | "outputs": [], 181 | "source": [ 182 | "%load_ext line_profiler" 183 | ] 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "metadata": {}, 188 | "source": [ 189 | "### For a pop-up window with results in notebook:" 190 | ] 191 | }, 192 | { 193 | "cell_type": "markdown", 194 | "metadata": {}, 195 | "source": [ 196 | "IPython has an `%lprun` magic to profile specific functions within an executed statement. Usage:\n", 197 | "`%lprun -f func_to_profile ` (get more help by running `%lprun?` in IPython)." 198 | ] 199 | }, 200 | { 201 | "cell_type": "markdown", 202 | "metadata": {}, 203 | "source": [ 204 | "### Profiling two functions" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": null, 210 | "metadata": { 211 | "collapsed": true 212 | }, 213 | "outputs": [], 214 | "source": [ 215 | "%lprun -f bad_call -f worse_call sumulate(13)" 216 | ] 217 | }, 218 | { 219 | "cell_type": "markdown", 220 | "metadata": {}, 221 | "source": [ 222 | "### Write results to a text file" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "metadata": { 229 | "collapsed": false 230 | }, 231 | "outputs": [], 232 | "source": [ 233 | "%lprun -T timings.txt -f sumulate sumulate(12)" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "metadata": { 240 | "collapsed": true 241 | }, 242 | "outputs": [], 243 | "source": [ 244 | "%load timings.txt" 245 | ] 246 | }, 247 | { 248 | "cell_type": "markdown", 249 | "metadata": {}, 250 | "source": [ 251 | "## Profiling on the command line" 252 | ] 253 | }, 254 | { 255 | "cell_type": "markdown", 256 | "metadata": {}, 257 | "source": [ 258 | "Open file, add `@profile` decorator to any function you want to profile, then run\n", 259 | "\n", 260 | "```console\n", 261 | "kernprof -l script_to_profile.py\n", 262 | "```\n", 263 | "\n", 264 | "which will generate `script_to_profile.py.lprof` (pickled result). To view the results, run\n", 265 | "\n", 266 | "```console\n", 267 | "python -m line_profiler script_to_profile.py.lprof\n", 268 | "```" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": null, 274 | "metadata": { 275 | "collapsed": true 276 | }, 277 | "outputs": [], 278 | "source": [ 279 | "from IPython.display import IFrame" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": null, 285 | "metadata": { 286 | "collapsed": false 287 | }, 288 | "outputs": [], 289 | "source": [ 290 | "IFrame('http://localhost:7000/terminals/1', width=800, height=700)" 291 | ] 292 | }, 293 | { 294 | "cell_type": "markdown", 295 | "metadata": {}, 296 | "source": [ 297 | "## `timeit`" 298 | ] 299 | }, 300 | { 301 | "cell_type": "markdown", 302 | "metadata": {}, 303 | "source": [ 304 | "`timeit` is not perfect, but it is helpful. \n", 305 | "\n", 306 | "Potential concerns re: `timeit`\n", 307 | "\n", 308 | "* Returns minimum time of run\n", 309 | "* Only runs benchmark 3 times\n", 310 | "* It disables garbage collection\n", 311 | "\n", 312 | "```python\n", 313 | "python -m timeit -v \"print(42)\"\n", 314 | "```\n", 315 | "\n", 316 | "```python\n", 317 | "python -m timeit -r 25 \"print(42)\"\n", 318 | "```\n", 319 | "\n", 320 | "```python\n", 321 | "python -m timeit -s \"gc.enable()\" \"print(42)\"\n", 322 | "```" 323 | ] 324 | }, 325 | { 326 | "cell_type": "markdown", 327 | "metadata": {}, 328 | "source": [ 329 | "### Line magic" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": null, 335 | "metadata": { 336 | "collapsed": false 337 | }, 338 | "outputs": [], 339 | "source": [ 340 | "%timeit x = 5" 341 | ] 342 | }, 343 | { 344 | "cell_type": "markdown", 345 | "metadata": {}, 346 | "source": [ 347 | "### Cell magic" 348 | ] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "execution_count": null, 353 | "metadata": { 354 | "collapsed": false 355 | }, 356 | "outputs": [], 357 | "source": [ 358 | "%%timeit\n", 359 | "x = 5\n", 360 | "y = 6\n", 361 | "x + y" 362 | ] 363 | }, 364 | { 365 | "cell_type": "markdown", 366 | "metadata": {}, 367 | "source": [ 368 | "The `-q` flag quiets output. The `-o` flag allows outputting results to a variable. The `-q` flag sometimes disagrees with OSX so please remove it if you're having issues." 369 | ] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "execution_count": null, 374 | "metadata": { 375 | "collapsed": false 376 | }, 377 | "outputs": [], 378 | "source": [ 379 | "a = %timeit -qo x = 5" 380 | ] 381 | } 382 | ], 383 | "metadata": { 384 | "kernelspec": { 385 | "display_name": "Python 3", 386 | "language": "python", 387 | "name": "python3" 388 | }, 389 | "language_info": { 390 | "codemirror_mode": { 391 | "name": "ipython", 392 | "version": 3 393 | }, 394 | "file_extension": ".py", 395 | "mimetype": "text/x-python", 396 | "name": "python", 397 | "nbconvert_exporter": "python", 398 | "pygments_lexer": "ipython3", 399 | "version": "3.5.2" 400 | } 401 | }, 402 | "nbformat": 4, 403 | "nbformat_minor": 0 404 | } 405 | -------------------------------------------------------------------------------- /notebooks/04.Direct.Summation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## N-Body problems" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Many physical problems require the evaluation of all pairwise interactions of a large number of particles, so-called N-body problems. These problems arise in molecular dynamics, astrodynamics and electromagnetics among others. \n", 15 | "\n", 16 | "Their pairwise interactions can be expressed as:\n", 17 | "\n", 18 | "\\begin{equation}\n", 19 | "f_i = \\sum_{j=1}^n{P \\left(\\boldsymbol{x}_i, \\boldsymbol{x}_j \\right)w_j} \\ \\ \\ \\text{for } i=1,2,...,n \n", 20 | "\\end{equation}\n", 21 | "\n", 22 | "* where subscripts $i$, $j$ respectively denote *target* and *source*\n", 23 | "* $f_i$ can be a *potential* (or *force*) at target point $i$\n", 24 | "* $w_j$ is the *source weight* \n", 25 | "* $\\boldsymbol{x}_i, \\boldsymbol{x}_j$ are the *spatial positions* of particles \n", 26 | "* $P \\left(\\boldsymbol{x}_i, \\boldsymbol{x}_j \\right)$ is the *interaction kernel*. \n", 27 | "\n", 28 | "In order to evalute the potential $f_i$ at a target point $i$, we have to loop over each source particle $j$. Since there are $n$ target points $i$, this 'brute-force' approach costs $\\mathcal{O} \\left(n^2 \\right)$ operations. " 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "One possible approach in this kind of problem is to define a few classes, say `Point` and `Particle` and then loop over the objects and perform the necessary point-to-point calculations. " 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": { 42 | "collapsed": true 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "import numpy" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": { 53 | "collapsed": true 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "class Point():\n", 58 | " \"\"\" \n", 59 | " Arguments:\n", 60 | " domain: the domain of random generated coordinates x,y,z, \n", 61 | " default=1.0\n", 62 | " \n", 63 | " Attributes:\n", 64 | " x, y, z: coordinates of the point\n", 65 | " \"\"\"\n", 66 | " def __init__(self, domain=1.0):\n", 67 | " self.x = domain * numpy.random.random()\n", 68 | " self.y = domain * numpy.random.random()\n", 69 | " self.z = domain * numpy.random.random()\n", 70 | " \n", 71 | " def distance(self, other):\n", 72 | " return ((self.x - other.x)**2 + \n", 73 | " (self.y - other.y)**2 + \n", 74 | " (self.z - other.z)**2)**.5" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": { 81 | "collapsed": false 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "class Particle(Point):\n", 86 | " \"\"\" \n", 87 | " Attributes:\n", 88 | " m: mass of the particle\n", 89 | " phi: the potential of the particle\n", 90 | " \"\"\"\n", 91 | " \n", 92 | " def __init__(self, domain=1.0, m=1.0):\n", 93 | " Point.__init__(self, domain)\n", 94 | " self.m = m\n", 95 | " self.phi = 0." 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | "Now we create a list of `n` random particles, define a function to calculate their interaction via direct summation and run!" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": { 109 | "collapsed": false 110 | }, 111 | "outputs": [], 112 | "source": [ 113 | "n = 1000\n", 114 | "particles = [Particle(m = 1 / n) for i in range(n)]" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "metadata": { 121 | "collapsed": true 122 | }, 123 | "outputs": [], 124 | "source": [ 125 | "def direct_sum(particles):\n", 126 | " \"\"\"\n", 127 | " Calculate the potential at each particle\n", 128 | " using direct summation method.\n", 129 | "\n", 130 | " Arguments:\n", 131 | " particles: the list of particles\n", 132 | "\n", 133 | " \"\"\"\n", 134 | " for i, target in enumerate(particles):\n", 135 | " for source in (particles[:i] + particles[i+1:]):\n", 136 | " r = target.distance(source)\n", 137 | " target.phi += source.m / r" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": { 144 | "collapsed": true 145 | }, 146 | "outputs": [], 147 | "source": [ 148 | "direct_sum(particles)" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "There was a noticeable lag there. How long does this thing take for 1000 particles?" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": { 162 | "collapsed": false 163 | }, 164 | "outputs": [], 165 | "source": [ 166 | "orig_time = %timeit -o direct_sum(particles)" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": { 173 | "collapsed": true 174 | }, 175 | "outputs": [], 176 | "source": [ 177 | "%load_ext line_profiler" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "metadata": { 184 | "collapsed": true 185 | }, 186 | "outputs": [], 187 | "source": [ 188 | "%lprun -f direct_sum direct_sum(particles)" 189 | ] 190 | }, 191 | { 192 | "cell_type": "markdown", 193 | "metadata": {}, 194 | "source": [ 195 | "## How do we use Numba on this problem?" 196 | ] 197 | }, 198 | { 199 | "cell_type": "markdown", 200 | "metadata": {}, 201 | "source": [ 202 | "Problem: Numba doesn't support jitting native Python classes. There is a `jit_class` structure in Numba but it's still in early development.\n", 203 | "\n", 204 | "But it's nice to have attributes for literate programming.\n", 205 | "\n", 206 | "Solution: NumPy custom dtypes." 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": null, 212 | "metadata": { 213 | "collapsed": true 214 | }, 215 | "outputs": [], 216 | "source": [ 217 | "particle_dtype = numpy.dtype({'names':['x','y','z','m','phi'], \n", 218 | " 'formats':[numpy.double, \n", 219 | " numpy.double, \n", 220 | " numpy.double, \n", 221 | " numpy.double, \n", 222 | " numpy.double]})" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "metadata": { 229 | "collapsed": true 230 | }, 231 | "outputs": [], 232 | "source": [ 233 | "myarray = numpy.ones(3, dtype=particle_dtype)" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "metadata": { 240 | "collapsed": false 241 | }, 242 | "outputs": [], 243 | "source": [ 244 | "myarray" 245 | ] 246 | }, 247 | { 248 | "cell_type": "markdown", 249 | "metadata": {}, 250 | "source": [ 251 | "You can access an individual \"attribute\" like this:" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": null, 257 | "metadata": { 258 | "collapsed": false 259 | }, 260 | "outputs": [], 261 | "source": [ 262 | "myarray[0]['x'] = 2.0" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": null, 268 | "metadata": { 269 | "collapsed": false 270 | }, 271 | "outputs": [], 272 | "source": [ 273 | "myarray" 274 | ] 275 | }, 276 | { 277 | "cell_type": "markdown", 278 | "metadata": {}, 279 | "source": [ 280 | "## [Exercise 1](./exercises/04.Direct.Summation.Exercises.ipynb#Exercise-1)\n", 281 | "\n", 282 | "Write a function `create_n_random_particles` that takes the arguments `n` (number of particles), `m` (mass of every particle) and a domain within to generate a random number (as in the class above).\n", 283 | "It should create an array with `n` elements and `dtype=particle_dtype` and then return that array.\n", 284 | "\n", 285 | "For each particle, the mass should be initialized to the value of `m` and the potential `phi` initialized to zero." 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": null, 291 | "metadata": { 292 | "collapsed": true 293 | }, 294 | "outputs": [], 295 | "source": [ 296 | "from numba import njit" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": null, 302 | "metadata": { 303 | "collapsed": false 304 | }, 305 | "outputs": [], 306 | "source": [ 307 | "# %load snippets/nbody/create_n.py" 308 | ] 309 | }, 310 | { 311 | "cell_type": "markdown", 312 | "metadata": {}, 313 | "source": [ 314 | "**Note**: You can use \"attribute\" access on dtypes but there's a caveat. If you need to debug this function without the decorator, you have to change them back to array access form. " 315 | ] 316 | }, 317 | { 318 | "cell_type": "code", 319 | "execution_count": null, 320 | "metadata": { 321 | "collapsed": false 322 | }, 323 | "outputs": [], 324 | "source": [ 325 | "parts = create_n_random_particles(1000, .001, 1)" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": null, 331 | "metadata": { 332 | "collapsed": false 333 | }, 334 | "outputs": [], 335 | "source": [ 336 | "parts[:3]" 337 | ] 338 | }, 339 | { 340 | "cell_type": "markdown", 341 | "metadata": { 342 | "collapsed": true 343 | }, 344 | "source": [ 345 | "We don't have a `distance` method anymore, so we need to write a function to take care of that." 346 | ] 347 | }, 348 | { 349 | "cell_type": "markdown", 350 | "metadata": {}, 351 | "source": [ 352 | "## [Exercise 2](./exercises/04.Direct.Summation.Exercises.ipynb#Exercise-2)\n", 353 | "Write a JITted function `distance` to calculate the distance between two particles of dtype `particle_dtype`" 354 | ] 355 | }, 356 | { 357 | "cell_type": "code", 358 | "execution_count": null, 359 | "metadata": { 360 | "collapsed": true 361 | }, 362 | "outputs": [], 363 | "source": [ 364 | "# %load snippets/nbody/distance.py" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": null, 370 | "metadata": { 371 | "collapsed": false 372 | }, 373 | "outputs": [], 374 | "source": [ 375 | "distance(parts[0], parts[1])" 376 | ] 377 | }, 378 | { 379 | "cell_type": "code", 380 | "execution_count": null, 381 | "metadata": { 382 | "collapsed": false 383 | }, 384 | "outputs": [], 385 | "source": [ 386 | "%%timeit\n", 387 | "distance(parts[0], parts[1])" 388 | ] 389 | }, 390 | { 391 | "cell_type": "markdown", 392 | "metadata": {}, 393 | "source": [ 394 | "## [Exercise 3](./exercises/04.Direct.Summation.Exercises.ipynb#Exercise-3)\n", 395 | "Modify the `direct_sum` function above to instead work a NumPy array of particles. Loop over each element in the array and calculate its total potential." 396 | ] 397 | }, 398 | { 399 | "cell_type": "code", 400 | "execution_count": null, 401 | "metadata": { 402 | "collapsed": true 403 | }, 404 | "outputs": [], 405 | "source": [ 406 | "# %load snippets/nbody/direct_sum.py" 407 | ] 408 | }, 409 | { 410 | "cell_type": "code", 411 | "execution_count": null, 412 | "metadata": { 413 | "collapsed": false 414 | }, 415 | "outputs": [], 416 | "source": [ 417 | "numba_time = %timeit -o direct_sum(parts)" 418 | ] 419 | }, 420 | { 421 | "cell_type": "code", 422 | "execution_count": null, 423 | "metadata": { 424 | "collapsed": true 425 | }, 426 | "outputs": [], 427 | "source": [ 428 | "orig_time.best / numba_time.best" 429 | ] 430 | } 431 | ], 432 | "metadata": { 433 | "kernelspec": { 434 | "display_name": "Python 3", 435 | "language": "python", 436 | "name": "python3" 437 | }, 438 | "language_info": { 439 | "codemirror_mode": { 440 | "name": "ipython", 441 | "version": 3 442 | }, 443 | "file_extension": ".py", 444 | "mimetype": "text/x-python", 445 | "name": "python", 446 | "nbconvert_exporter": "python", 447 | "pygments_lexer": "ipython3", 448 | "version": "3.5.1" 449 | } 450 | }, 451 | "nbformat": 4, 452 | "nbformat_minor": 0 453 | } 454 | -------------------------------------------------------------------------------- /notebooks/05.1.Cavity_Flow.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Application: Cavity Flow" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "One of the most common validation cases in CFD is the lid-driven cavity flow. We take a square cavity filled with a fluid and set the velocity of the lid to some constant value. The flow within the cavity is driven by the lid, a spiral flow pattern develops and two distinctive pressure zones are visible in the upper corners against the lid." 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": { 28 | "collapsed": true 29 | }, 30 | "outputs": [], 31 | "source": [ 32 | "import numpy" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "The Poisson equation is an elliptic PDE which almost always means using an iterative solver. We're going to use the Jacobi method. There are better ways, but that's beside the point. \n", 40 | "\n", 41 | "Here's the pressure Poisson equation:" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "\\begin{align}\n", 49 | "p_{i,j}^{n+1} &= \\frac{1}{4}\\left(p_{i+1,j}^{n}+p_{i-1,j}^{n}+p_{i,j+1}^{n}+p_{i,j-1}^{n}\\right) \\\\\n", 50 | "&-\\frac{\\rho \\Delta x}{16} \\left( \\frac{2}{\\Delta t} \\left(u_{i+1,j} - u_{i-1,j} + v_{i,j+1} - v_{i,j-1}\\right) \\right . \\\\\n", 51 | "&-\\frac{2}{\\Delta x}\\left(u_{i,j+1} - u_{i,j-1} \\right) \\left(v_{i+1,j} - v_{i-1,j} \\right) \\\\\n", 52 | "&- \\left . \\frac{\\left(u_{i+1,j} - u_{i-1,j} \\right)^2}{\\Delta x} \n", 53 | "- \\frac{ \\left(v_{i,j+1} - v_{i,j-1} \\right)^2 }{\\Delta x} \\right) \\\\\n", 54 | "\\end{align}" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "That looks a little nasty, but we only care about the top line when we iterate, since the bottom three lines depend only on values that don't change when we're correcting the pressure field. Because it doesn't change, we break it out into a separate function." 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": { 68 | "collapsed": false 69 | }, 70 | "outputs": [], 71 | "source": [ 72 | "def velocity_term(b, rho, dt, u, v, dx):\n", 73 | " b[1:-1, 1:-1] = (\n", 74 | " rho * dx / 16 * \n", 75 | " (2 / dt * (u[2:, 1:-1] - \n", 76 | " u[:-2, 1:-1] + \n", 77 | " v[1:-1, 2:] - \n", 78 | " v[1:-1, :-2]) - \n", 79 | " 2 / dx * (u[1:-1, 2:] - u[1:-1, :-2]) *\n", 80 | " (v[2:, 1:-1] - v[:-2, 1:-1]) - \n", 81 | " (u[2:, 1:-1] - u[:-2, 1:-1])**2 / dx - \n", 82 | " (v[1:-1, 2:] - v[1:-1, :-2])**2 / dx)\n", 83 | " )\n", 84 | "\n", 85 | " return b" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "Now, to calculate the pressure field, we pass in the original pressure field, the value `b` (which is the result of the `velocity_term` function above) and a target value for difference between two iterates. We repeatedly update the pressure field until the difference of the L2 norm between two successive iterations is less than that target value." 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": { 99 | "collapsed": true 100 | }, 101 | "outputs": [], 102 | "source": [ 103 | "def pressure_poisson(p, b, l2_target):\n", 104 | " iter_diff = l2_target + 1\n", 105 | " n = 0\n", 106 | " while iter_diff > l2_target and n <= 500:\n", 107 | "\n", 108 | " pn = p.copy()\n", 109 | " p[1:-1,1:-1] = (.25 * (pn[2:, 1:-1] +\n", 110 | " pn[:-2, 1:-1] +\n", 111 | " pn[1:-1, 2:] +\n", 112 | " pn[1:-1, :-2]) -\n", 113 | " b[1:-1, 1:-1])\n", 114 | "\n", 115 | " p[:, 0] = p[:, 1] #dp/dy = 0 at y = 0\n", 116 | " p[:, -1] = 0 #p = 0 at y = 2\n", 117 | " p[0, :] = p[1, :] #dp/dx = 0 at x = 0\n", 118 | " p[-1, :] = p[-2, :] #dp/dy = 0 at x = 2\n", 119 | " \n", 120 | " \n", 121 | " if n % 10 == 0:\n", 122 | " iter_diff = numpy.sqrt(numpy.sum((p - pn)**2)/numpy.sum(pn**2))\n", 123 | " \n", 124 | " n += 1\n", 125 | " \n", 126 | " return p" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "In the interests of brevity, we're only going to worry about the pressure Poisson solver. The rest of the 2D Navier-Stokes solution is encapsulated in the function `cavity_flow`, which we've prepared ahead of time and saved in a helper file. We just need to import the function:" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": { 140 | "collapsed": false 141 | }, 142 | "outputs": [], 143 | "source": [ 144 | "from snippets.ns_helper import cavity_flow" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "We'll also load up [pickled](https://docs.python.org/2/library/pickle.html) initial conditions, so we can reliably compare final solutions." 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "metadata": { 158 | "collapsed": true 159 | }, 160 | "outputs": [], 161 | "source": [ 162 | "import pickle" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "metadata": { 169 | "collapsed": true 170 | }, 171 | "outputs": [], 172 | "source": [ 173 | "def run_cavity():\n", 174 | " nx = 41\n", 175 | " ny = 41\n", 176 | " with open('IC.pickle', 'rb') as f:\n", 177 | " u, v, p, b = pickle.load(f)\n", 178 | "\n", 179 | " dx = 2 / (nx - 1)\n", 180 | " dt = .005\n", 181 | " nt = 1000\n", 182 | " \n", 183 | " u, v, p = cavity_flow(u, v, p, nt, dt, dx, \n", 184 | " velocity_term, \n", 185 | " pressure_poisson, \n", 186 | " rtol=1e-4)\n", 187 | " \n", 188 | " return u, v, p" 189 | ] 190 | }, 191 | { 192 | "cell_type": "markdown", 193 | "metadata": {}, 194 | "source": [ 195 | "So what does this all do? Let's check it out." 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": null, 201 | "metadata": { 202 | "collapsed": false 203 | }, 204 | "outputs": [], 205 | "source": [ 206 | "u, v, p = run_cavity()" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": null, 212 | "metadata": { 213 | "collapsed": false 214 | }, 215 | "outputs": [], 216 | "source": [ 217 | "%matplotlib inline\n", 218 | "from snippets.ns_helper import quiver_plot" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": null, 224 | "metadata": { 225 | "collapsed": false 226 | }, 227 | "outputs": [], 228 | "source": [ 229 | "quiver_plot(u, v, p)" 230 | ] 231 | }, 232 | { 233 | "cell_type": "markdown", 234 | "metadata": {}, 235 | "source": [ 236 | "#### Save NumPy answers for comparison" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": null, 242 | "metadata": { 243 | "collapsed": false 244 | }, 245 | "outputs": [], 246 | "source": [ 247 | "with open('numpy_ans.pickle', 'wb') as f:\n", 248 | " pickle.dump((u, v, p), f)" 249 | ] 250 | }, 251 | { 252 | "cell_type": "markdown", 253 | "metadata": {}, 254 | "source": [ 255 | "Let's profile the `cavity_flow` function and see if there's a specific place that's really hurting our performance." 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": null, 261 | "metadata": { 262 | "collapsed": false 263 | }, 264 | "outputs": [], 265 | "source": [ 266 | "%timeit run_cavity()" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": null, 272 | "metadata": { 273 | "collapsed": false 274 | }, 275 | "outputs": [], 276 | "source": [ 277 | "%load_ext line_profiler" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": null, 283 | "metadata": { 284 | "collapsed": false 285 | }, 286 | "outputs": [], 287 | "source": [ 288 | "%lprun -f cavity_flow run_cavity()" 289 | ] 290 | }, 291 | { 292 | "cell_type": "markdown", 293 | "metadata": {}, 294 | "source": [ 295 | "## Where is the bottleneck?" 296 | ] 297 | }, 298 | { 299 | "cell_type": "markdown", 300 | "metadata": {}, 301 | "source": [ 302 | "Clearly the PPE is the problem here, so let's use `numba` to rewrite it. \n", 303 | "\n", 304 | "## [Exercise: Speed up the PPE](./exercises/05.Cavity.Flow.Exercises.ipynb#Exercise-1)" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": null, 310 | "metadata": { 311 | "collapsed": true 312 | }, 313 | "outputs": [], 314 | "source": [ 315 | "from numba import jit" 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": null, 321 | "metadata": { 322 | "collapsed": false 323 | }, 324 | "outputs": [], 325 | "source": [ 326 | "# %load snippets/ppe_numba.py" 327 | ] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "execution_count": null, 332 | "metadata": { 333 | "collapsed": false 334 | }, 335 | "outputs": [], 336 | "source": [ 337 | "u_numba, v_numba, p_numba = run_cavity()" 338 | ] 339 | }, 340 | { 341 | "cell_type": "code", 342 | "execution_count": null, 343 | "metadata": { 344 | "collapsed": false 345 | }, 346 | "outputs": [], 347 | "source": [ 348 | "assert numpy.allclose(p, p_numba)\n", 349 | "assert numpy.allclose(u, u_numba)\n", 350 | "assert numpy.allclose(v, v_numba)" 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "execution_count": null, 356 | "metadata": { 357 | "collapsed": false 358 | }, 359 | "outputs": [], 360 | "source": [ 361 | "quiver_plot(u_numba, v_numba, p_numba)" 362 | ] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "execution_count": null, 367 | "metadata": { 368 | "collapsed": false 369 | }, 370 | "outputs": [], 371 | "source": [ 372 | "%timeit run_cavity()" 373 | ] 374 | }, 375 | { 376 | "cell_type": "code", 377 | "execution_count": null, 378 | "metadata": { 379 | "collapsed": false 380 | }, 381 | "outputs": [], 382 | "source": [ 383 | "%lprun -f cavity_flow run_cavity()" 384 | ] 385 | }, 386 | { 387 | "cell_type": "markdown", 388 | "metadata": {}, 389 | "source": [ 390 | "## One more bit of optimization?" 391 | ] 392 | }, 393 | { 394 | "cell_type": "code", 395 | "execution_count": null, 396 | "metadata": { 397 | "collapsed": false 398 | }, 399 | "outputs": [], 400 | "source": [ 401 | "@jit(nopython=True)\n", 402 | "def velocity_term(b, rho, dt, u, v, dx):\n", 403 | " I, J = b.shape\n", 404 | " \n", 405 | " for i in range(1, I):\n", 406 | " for j in range(1, J):\n", 407 | " b[i, j] = (\n", 408 | " rho * dx / 16 * \n", 409 | " (2 / dt * (u[i + 1, j] - \n", 410 | " u[i - 1, j] + \n", 411 | " v[i, j + 1] - \n", 412 | " v[i, j - 1]) - \n", 413 | " 2 / dx * (u[i, j + 1] - u[i, j - 1]) * \n", 414 | " (v[i + 1, j] - v[i - 1, j]) - \n", 415 | " (u[i + 1, j] - u[i - 1, j])**2 / dx - \n", 416 | " (v[i, j + 1] - v[i, j - 1])**2 / dx)\n", 417 | " )\n", 418 | " return b" 419 | ] 420 | } 421 | ], 422 | "metadata": { 423 | "kernelspec": { 424 | "display_name": "Python 3", 425 | "language": "python", 426 | "name": "python3" 427 | }, 428 | "language_info": { 429 | "codemirror_mode": { 430 | "name": "ipython", 431 | "version": 3 432 | }, 433 | "file_extension": ".py", 434 | "mimetype": "text/x-python", 435 | "name": "python", 436 | "nbconvert_exporter": "python", 437 | "pygments_lexer": "ipython3", 438 | "version": "3.5.2" 439 | } 440 | }, 441 | "nbformat": 4, 442 | "nbformat_minor": 0 443 | } 444 | -------------------------------------------------------------------------------- /notebooks/07.Make.your.own.ufuncs.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Defining `ufuncs` using `vectorize`" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "You have been able to define your own NumPy [`ufuncs`](http://docs.scipy.org/doc/numpy/reference/ufuncs.html) for quite some time, but it's a little involved. \n", 15 | "\n", 16 | "You can read through the [documentation](http://docs.scipy.org/doc/numpy/user/c-info.ufunc-tutorial.html), the example they post there is a ufunc to perform \n", 17 | "\n", 18 | "$$f(a) = \\log \\left(\\frac{a}{1-a}\\right)$$\n", 19 | "\n", 20 | "It looks like this:\n", 21 | "\n", 22 | "```c\n", 23 | "static void double_logit(char **args, npy_intp *dimensions,\n", 24 | " npy_intp* steps, void* data)\n", 25 | "{\n", 26 | " npy_intp i;\n", 27 | " npy_intp n = dimensions[0];\n", 28 | " char *in = args[0], *out = args[1];\n", 29 | " npy_intp in_step = steps[0], out_step = steps[1];\n", 30 | "\n", 31 | " double tmp;\n", 32 | "\n", 33 | " for (i = 0; i < n; i++) {\n", 34 | " /*BEGIN main ufunc computation*/\n", 35 | " tmp = *(double *)in;\n", 36 | " tmp /= 1-tmp;\n", 37 | " *((double *)out) = log(tmp);\n", 38 | " /*END main ufunc computation*/\n", 39 | "\n", 40 | " in += in_step;\n", 41 | " out += out_step;\n", 42 | " }\n", 43 | "}\n", 44 | "```" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "And **note**, that's just for a `double`. If you want `floats`, `long doubles`, etc... you have to write all of those, too. And then create a `setup.py` file to install it. And I left out a bunch of boilerplate stuff to set up the import hooks, etc..." 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "# Say \"thank you\" to the NumPy devs" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "We can use Numba to define ufuncs without all of the pain." 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": { 72 | "collapsed": true 73 | }, 74 | "outputs": [], 75 | "source": [ 76 | "import numpy\n", 77 | "import math" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "Let's define a function that operates on two inputs" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": { 91 | "collapsed": false 92 | }, 93 | "outputs": [], 94 | "source": [ 95 | "def trig(a, b):\n", 96 | " return math.sin(a**2) * math.exp(b)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": { 103 | "collapsed": false 104 | }, 105 | "outputs": [], 106 | "source": [ 107 | "trig(1, 1)" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "Seems reasonable. However, the `math` library only works on scalars. If we try to pass in arrays, we'll get an error." 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "metadata": { 121 | "collapsed": true 122 | }, 123 | "outputs": [], 124 | "source": [ 125 | "a = numpy.ones((5,5))\n", 126 | "b = numpy.ones((5,5))" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": null, 132 | "metadata": { 133 | "collapsed": false 134 | }, 135 | "outputs": [], 136 | "source": [ 137 | "trig(a, b)" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": { 144 | "collapsed": true 145 | }, 146 | "outputs": [], 147 | "source": [ 148 | "from numba import vectorize" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": { 155 | "collapsed": false 156 | }, 157 | "outputs": [], 158 | "source": [ 159 | "vec_trig = vectorize()(trig)" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": { 166 | "collapsed": false 167 | }, 168 | "outputs": [], 169 | "source": [ 170 | "vec_trig(a, b)" 171 | ] 172 | }, 173 | { 174 | "cell_type": "markdown", 175 | "metadata": {}, 176 | "source": [ 177 | "And just like that, the scalar function `trig` is now a NumPy `ufunc` called `vec_trig`\n", 178 | "\n", 179 | "Note that this is a \"Dynamic UFunc\" with no signature given. " 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": {}, 185 | "source": [ 186 | "How does it compare to just using NumPy? Let's check" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "metadata": { 193 | "collapsed": true 194 | }, 195 | "outputs": [], 196 | "source": [ 197 | "def numpy_trig(a, b):\n", 198 | " return numpy.sin(a**2) * numpy.exp(b)" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "metadata": { 205 | "collapsed": true 206 | }, 207 | "outputs": [], 208 | "source": [ 209 | "a = numpy.random.random((1000, 1000))\n", 210 | "b = numpy.random.random((1000, 1000))" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": { 217 | "collapsed": false 218 | }, 219 | "outputs": [], 220 | "source": [ 221 | "%timeit vec_trig(a, b)" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": null, 227 | "metadata": { 228 | "collapsed": false 229 | }, 230 | "outputs": [], 231 | "source": [ 232 | "%timeit numpy_trig(a, b)" 233 | ] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "metadata": {}, 238 | "source": [ 239 | "What happens if we do specify a signature? Is there a speed boost?" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": null, 245 | "metadata": { 246 | "collapsed": false 247 | }, 248 | "outputs": [], 249 | "source": [ 250 | "vec_trig = vectorize('float64(float64, float64)')(trig)" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": null, 256 | "metadata": { 257 | "collapsed": false 258 | }, 259 | "outputs": [], 260 | "source": [ 261 | "%timeit vec_trig(a, b)" 262 | ] 263 | }, 264 | { 265 | "cell_type": "markdown", 266 | "metadata": {}, 267 | "source": [ 268 | "No, not really. But(!), if we have a signature, then we can add the target `kwarg`." 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": null, 274 | "metadata": { 275 | "collapsed": false 276 | }, 277 | "outputs": [], 278 | "source": [ 279 | "vec_trig = vectorize('float64(float64, float64)', target='parallel')(trig)" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": null, 285 | "metadata": { 286 | "collapsed": false 287 | }, 288 | "outputs": [], 289 | "source": [ 290 | "%timeit vec_trig(a, b)" 291 | ] 292 | }, 293 | { 294 | "cell_type": "markdown", 295 | "metadata": {}, 296 | "source": [ 297 | "Automatic multicore operations!\n", 298 | "\n", 299 | "**Note**: `target='parallel'` is not always the best option. There is overhead in setting up the threading, so if the individual scalar operations that make up a `ufunc` are simple you'll probably get better performance in serial. If the individual operations are more expensive (like trig!) then parallel is (usually) a good option." 300 | ] 301 | }, 302 | { 303 | "cell_type": "markdown", 304 | "metadata": {}, 305 | "source": [ 306 | "### Passing multiple signatures" 307 | ] 308 | }, 309 | { 310 | "cell_type": "markdown", 311 | "metadata": {}, 312 | "source": [ 313 | "If you use multiple signatures, they have to be listed in order of most specific -> least specific" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": null, 319 | "metadata": { 320 | "collapsed": false 321 | }, 322 | "outputs": [], 323 | "source": [ 324 | "@vectorize(['int32(int32, int32)',\n", 325 | " 'int64(int64, int64)',\n", 326 | " 'float32(float32, float32)',\n", 327 | " 'float64(float64, float64)'])\n", 328 | "def trig(a, b):\n", 329 | " return math.sin(a**2) * math.exp(b)" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": null, 335 | "metadata": { 336 | "collapsed": false 337 | }, 338 | "outputs": [], 339 | "source": [ 340 | "trig(1, 1)" 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": null, 346 | "metadata": { 347 | "collapsed": false 348 | }, 349 | "outputs": [], 350 | "source": [ 351 | "trig(1., 1.)" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": null, 357 | "metadata": { 358 | "collapsed": false 359 | }, 360 | "outputs": [], 361 | "source": [ 362 | "trig.ntypes" 363 | ] 364 | }, 365 | { 366 | "cell_type": "markdown", 367 | "metadata": {}, 368 | "source": [ 369 | "## [Exercise: Clipping an array](./exercises/07.Vectorize.Exercises.ipynb#Exercise:-Clipping-an-array)" 370 | ] 371 | }, 372 | { 373 | "cell_type": "markdown", 374 | "metadata": {}, 375 | "source": [ 376 | "Yes, NumPy has a `clip` ufunc already, but let's pretend it doesn't. \n", 377 | "\n", 378 | "Create a Numba vectorized ufunc that takes a vector `a`, a lower limit `amin` and an upper limit `amax`. It should return the vector `a` with all values clipped such that $a_{min} < a < a_{max}$:" 379 | ] 380 | }, 381 | { 382 | "cell_type": "code", 383 | "execution_count": null, 384 | "metadata": { 385 | "collapsed": true 386 | }, 387 | "outputs": [], 388 | "source": [ 389 | "# %load snippets/clip.py" 390 | ] 391 | }, 392 | { 393 | "cell_type": "code", 394 | "execution_count": null, 395 | "metadata": { 396 | "collapsed": true 397 | }, 398 | "outputs": [], 399 | "source": [ 400 | "a = numpy.random.random((5000))" 401 | ] 402 | }, 403 | { 404 | "cell_type": "code", 405 | "execution_count": null, 406 | "metadata": { 407 | "collapsed": true 408 | }, 409 | "outputs": [], 410 | "source": [ 411 | "amin = .2\n", 412 | "amax = .6" 413 | ] 414 | }, 415 | { 416 | "cell_type": "code", 417 | "execution_count": null, 418 | "metadata": { 419 | "collapsed": false 420 | }, 421 | "outputs": [], 422 | "source": [ 423 | "%timeit vec_truncate_serial(a, amin, amax)" 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": null, 429 | "metadata": { 430 | "collapsed": false 431 | }, 432 | "outputs": [], 433 | "source": [ 434 | "%timeit vec_truncate_par(a, amin, amax)" 435 | ] 436 | }, 437 | { 438 | "cell_type": "code", 439 | "execution_count": null, 440 | "metadata": { 441 | "collapsed": false 442 | }, 443 | "outputs": [], 444 | "source": [ 445 | "%timeit numpy.clip(a, amin, amax)" 446 | ] 447 | }, 448 | { 449 | "cell_type": "code", 450 | "execution_count": null, 451 | "metadata": { 452 | "collapsed": true 453 | }, 454 | "outputs": [], 455 | "source": [ 456 | "a = numpy.random.random((100000))" 457 | ] 458 | }, 459 | { 460 | "cell_type": "code", 461 | "execution_count": null, 462 | "metadata": { 463 | "collapsed": false 464 | }, 465 | "outputs": [], 466 | "source": [ 467 | "%timeit vec_truncate_serial(a, amin, amax)" 468 | ] 469 | }, 470 | { 471 | "cell_type": "code", 472 | "execution_count": null, 473 | "metadata": { 474 | "collapsed": false 475 | }, 476 | "outputs": [], 477 | "source": [ 478 | "%timeit vec_truncate_par(a, amin, amax)" 479 | ] 480 | }, 481 | { 482 | "cell_type": "code", 483 | "execution_count": null, 484 | "metadata": { 485 | "collapsed": false 486 | }, 487 | "outputs": [], 488 | "source": [ 489 | "%timeit numpy.clip(a, amin, amax)" 490 | ] 491 | }, 492 | { 493 | "cell_type": "markdown", 494 | "metadata": {}, 495 | "source": [ 496 | "## [Exercise: Create `logit` ufunc](./exercises/07.Vectorize.Exercises.ipynb#Exercise:-Create-logit-ufunc)" 497 | ] 498 | }, 499 | { 500 | "cell_type": "markdown", 501 | "metadata": {}, 502 | "source": [ 503 | "Recall from above that this is a ufunc which performs this operation:\n", 504 | "\n", 505 | "$$f(a) = \\log \\left(\\frac{a}{1-a}\\right)$$" 506 | ] 507 | }, 508 | { 509 | "cell_type": "code", 510 | "execution_count": null, 511 | "metadata": { 512 | "collapsed": false 513 | }, 514 | "outputs": [], 515 | "source": [ 516 | "# %load snippets/logit.py" 517 | ] 518 | }, 519 | { 520 | "cell_type": "code", 521 | "execution_count": null, 522 | "metadata": { 523 | "collapsed": false 524 | }, 525 | "outputs": [], 526 | "source": [ 527 | "logit(a)" 528 | ] 529 | }, 530 | { 531 | "cell_type": "markdown", 532 | "metadata": {}, 533 | "source": [ 534 | "## Performance of `vectorize` vs. regular array-wide operations" 535 | ] 536 | }, 537 | { 538 | "cell_type": "code", 539 | "execution_count": null, 540 | "metadata": { 541 | "collapsed": false 542 | }, 543 | "outputs": [], 544 | "source": [ 545 | "@vectorize\n", 546 | "def discriminant(a, b, c):\n", 547 | " return b**2 - 4 * a * c" 548 | ] 549 | }, 550 | { 551 | "cell_type": "code", 552 | "execution_count": null, 553 | "metadata": { 554 | "collapsed": true 555 | }, 556 | "outputs": [], 557 | "source": [ 558 | "a = numpy.arange(10000)\n", 559 | "b = numpy.arange(10000)\n", 560 | "c = numpy.arange(10000)" 561 | ] 562 | }, 563 | { 564 | "cell_type": "code", 565 | "execution_count": null, 566 | "metadata": { 567 | "collapsed": false 568 | }, 569 | "outputs": [], 570 | "source": [ 571 | "%timeit discriminant(a, b, c)" 572 | ] 573 | }, 574 | { 575 | "cell_type": "code", 576 | "execution_count": null, 577 | "metadata": { 578 | "collapsed": false 579 | }, 580 | "outputs": [], 581 | "source": [ 582 | "%timeit b**2 - 4 * a * c" 583 | ] 584 | }, 585 | { 586 | "cell_type": "markdown", 587 | "metadata": {}, 588 | "source": [ 589 | "What's going on?\n", 590 | "\n", 591 | "* Each array operation creates a temporary copy\n", 592 | "* Each of these arrays are loaded into and out of cache a whole bunch" 593 | ] 594 | }, 595 | { 596 | "cell_type": "code", 597 | "execution_count": null, 598 | "metadata": { 599 | "collapsed": true 600 | }, 601 | "outputs": [], 602 | "source": [ 603 | "del a, b, c" 604 | ] 605 | } 606 | ], 607 | "metadata": { 608 | "kernelspec": { 609 | "display_name": "Python 3", 610 | "language": "python", 611 | "name": "python3" 612 | }, 613 | "language_info": { 614 | "codemirror_mode": { 615 | "name": "ipython", 616 | "version": 3 617 | }, 618 | "file_extension": ".py", 619 | "mimetype": "text/x-python", 620 | "name": "python", 621 | "nbconvert_exporter": "python", 622 | "pygments_lexer": "ipython3", 623 | "version": "3.5.1" 624 | } 625 | }, 626 | "nbformat": 4, 627 | "nbformat_minor": 0 628 | } 629 | -------------------------------------------------------------------------------- /notebooks/figures/2d_full_weighting_detail.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 19 | 21 | 29 | 35 | 36 | 45 | 51 | 52 | 60 | 66 | 67 | 75 | 81 | 82 | 90 | 96 | 97 | 105 | 111 | 112 | 121 | 127 | 128 | 137 | 143 | 144 | 152 | 158 | 159 | 160 | 183 | 185 | 186 | 188 | image/svg+xml 189 | 191 | 192 | 193 | 194 | 195 | 200 | 203 | 219 | 235 | 251 | 267 | 272 | 278 | 283 | 288 | 293 | 298 | 299 | 306 | 313 | 320 | 327 | 332 | 338 | 344 | 350 | 356 | 362 | 368 | 373 | 379 | 385 | 388 | 394 | 397 | 429 | 430 | 433 | 450 | 453 | 470 | 473 | 489 | 492 | 524 | 525 | 528 | 535 | 538 | 574 | 575 | 576 | 577 | --------------------------------------------------------------------------------