├── .devcontainer └── devcontainer.json ├── .gitignore ├── .gitmodules ├── .pre-commit-config.yaml ├── .ruff.toml ├── 00_intro.ipynb ├── 00_numpy.ipynb ├── 00_pandas.ipynb ├── 01_fractal_accelerate.ipynb ├── 01_fractal_interactive.ipynb ├── 02_temperatures.ipynb ├── 03_mcmc.ipynb ├── 04_runge_kutta.ipynb ├── 05_distributed.ipynb ├── 06_jax.ipynb ├── 06b_jax.ipynb ├── 07_callables.ipynb ├── 08_pandas_covid.ipynb ├── README.md ├── check.py ├── classic └── 2-just-numpy.ipynb ├── conda-lock.yml ├── data ├── nasa-exoplanets-details.txt ├── nasa-exoplanets.csv ├── newark-days-ago.txt ├── newark-temperature-avg.txt ├── newark-temperature-max.txt ├── newark-temperature-min.txt └── newark-temperature.csv ├── environment.yml └── img ├── FastestGrowing.png ├── GitHubLang.png ├── PyPLLang.png ├── cards-chance-deck-19060.jpg ├── png-spec-chunks.png └── png-spec-scanline.png /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "performance-minicourse", 3 | "image": "mambaorg/micromamba", 4 | "postCreateCommand": "", 5 | "updateContentCommand": "MAMBA_ALWAYS_YES=true micromamba install -f conda-lock.yml -q", 6 | "waitFor": "onCreateCommand", 7 | "customizations": { 8 | "codespaces": { 9 | "openFiles": [] 10 | }, 11 | "vscode": { 12 | "extensions": [ 13 | "ms-toolsai.jupyter", 14 | "ms-python.python" 15 | ] 16 | } 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.ipynb_checkpoints 2 | dask-worker-space/* 3 | mydask.png 4 | multiproc.py 5 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryiii/python-performance-minicourse/417a0c695c510fc7b78a28d4dbca6744896b5257/.gitmodules -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: "v4.4.0" 4 | hooks: 5 | - id: check-added-large-files 6 | - id: check-case-conflict 7 | - id: check-merge-conflict 8 | - id: check-symlinks 9 | - id: check-yaml 10 | - id: debug-statements 11 | - id: end-of-file-fixer 12 | - id: mixed-line-ending 13 | - id: requirements-txt-fixer 14 | - id: trailing-whitespace 15 | 16 | - repo: https://github.com/astral-sh/ruff-pre-commit 17 | rev: "v0.0.278" 18 | hooks: 19 | - id: ruff 20 | types_or: [python, pyi, jupyter] 21 | 22 | - repo: https://github.com/psf/black 23 | rev: 23.7.0 24 | hooks: 25 | - id: black-jupyter 26 | args: [--target-version=py311] 27 | 28 | - repo: https://github.com/kynan/nbstripout 29 | rev: 0.6.1 30 | hooks: 31 | - id: nbstripout 32 | 33 | - repo: https://github.com/codespell-project/codespell 34 | rev: "v2.2.5" 35 | hooks: 36 | - id: codespell 37 | args: ["-L", "nd,hist,whet"] 38 | exclude: \.csv$ 39 | 40 | - repo: local 41 | hooks: 42 | - id: disallow-caps 43 | name: Disallow improper capitalization 44 | language: pygrep 45 | entry: PyBind|Numpy|Cmake|CCache|Github|PyTest 46 | exclude: .pre-commit-config.yaml 47 | -------------------------------------------------------------------------------- /.ruff.toml: -------------------------------------------------------------------------------- 1 | select = [ 2 | "E", "F", "W", # flake8 3 | "B", # flake8-bugbear 4 | "I", # isort 5 | "ARG", # flake8-unused-arguments 6 | "C4", # flake8-comprehensions 7 | "EM", # flake8-errmsg 8 | "ICN", # flake8-import-conventions 9 | "ISC", # flake8-implicit-str-concat 10 | "G", # flake8-logging-format 11 | "PGH", # pygrep-hooks 12 | "PIE", # flake8-pie 13 | "PL", # pylint 14 | "PT", # flake8-pytest-style 15 | "RET", # flake8-return 16 | "RUF", # Ruff-specific 17 | "SIM", # flake8-simplify 18 | "UP", # pyupgrade 19 | "YTT", # flake8-2020 20 | "EXE", # flake8-executable 21 | "NPY", # NumPy specific rules 22 | "PD", # pandas-vet 23 | ] 24 | extend-ignore = [ 25 | "PLR", # Design related pylint codes 26 | "E501", # Line too long 27 | "NPY002", # Replace legacy random 28 | "PD008", # False positive with Jax 29 | ] 30 | target-version = "py38" 31 | 32 | [per-file-ignores] 33 | "*.ipynb" = [ 34 | "B018", # Useless expression 35 | "F811", # Redefinition of unused 36 | "B015", # Pointless comparison 37 | "E402", # Module level import not at top of file 38 | "E703", # Notebooks often have "pointless" semicolons 39 | "I001", # Hard to get right with IPython 40 | "F704", # await outside function (allowed in IPython) 41 | "PLE1142", # await outside async function (allowed in IPython) 42 | "PD901", # df is a bad variable name 43 | ] 44 | "check.py" = [ 45 | "F401", # This files does this 46 | ] 47 | "02_temperatures.ipynb" = [ 48 | "F401", # Unused import is used in %%timeit 49 | "F821", # Can't look inside magic (yet) 50 | ] 51 | "03_mcmc.ipynb" = ["F821"] 52 | "08_pandas_covid.ipynb" = ["F821"] 53 | -------------------------------------------------------------------------------- /00_intro.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Why Python?\n", 8 | "\n", 9 | "Python is now the second most popular language on GitHub, after only JavaScript.\n", 10 | "\n", 11 | "![GitHub Languages](./img/GitHubLang.png)" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "* Jupyter notebooks growth over 100% every year for the last three years! (in 2019)\n", 19 | "* Still in the top 10 growing languages! (in 2022)\n", 20 | "\n", 21 | "![GitHub Language Growth](./img/FastestGrowing.png)\n", 22 | "\n", 23 | "[State of the Octoverse, 2019 - 2022](https://octoverse.github.com)" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "# Why Python?\n", 31 | "\n", 32 | "![PyPI Languages](./img/PYPLLang.png)\n", 33 | "\n", 34 | "[PyPL rankings](http://pypl.github.io/PYPL.html) of some of the most popular languages for data science. Quote: \"Worldwide, Python is the most popular language, Python grew the most in the last 5 years (6.9%)\" (March 2023)" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "# Timeline of Python\n", 42 | "\n", 43 | "* 1994: Python 1.0 released\n", 44 | "* 1995: First array package: Numeric\n", 45 | "* 2003: Matplotlib\n", 46 | "* 2005: Numeric and numarray merged into NumPy\n", 47 | "* 2008: Pandas introduced, Python 3 released\n", 48 | "* 2012: The Anaconda python distribution was born\n", 49 | "* 2014: IPython produces the Jupyter project and notebook\n", 50 | "* 2016: LIGO's discovery was shown in a Jupyter Notebook, and was written in Python\n", 51 | "* 2017: Google releases TensorFlow\n", 52 | "* 2019: All Machine Learning libraries are primarily or exclusively used through Python\n", 53 | "* 2020: Python 2 died, long live Python 3.6+!\n", 54 | "* 2022: The faster CPython project provides 25% speedup in 3.12!" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "# Timeline of Python, key points\n", 62 | "\n", 63 | "\n", 64 | "## 2005: NumPy\n", 65 | "* Merged two competing codebases, created single ecosystem\n", 66 | "\n", 67 | "## 2008: Pandas\n", 68 | "* Took on specialized statistics languages (like R) with a *library* in a general purpose language\n", 69 | "* Pioneered \"Pythonic\" shortcuts, breaking down traditional design barriers\n", 70 | "\n", 71 | "## 2014: Jupyter\n", 72 | "* The notebook format, with code, outputs, and descriptions interleaved, became multilingual" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "# Python vs. a compiled language\n", 80 | "\n", 81 | "Python is an interpreted language. When we talk about Python, we usually mean CPython, which is not even Just In Time (JIT) compiled; it's purely interpreted.\n", 82 | "\n", 83 | "TLDR: Python is *slow*.\n", 84 | "\n", 85 | "Hundreds to thousands of times slower than C/C++/Fortran/Go/Swift/Rust/Haskell... You get the point.\n", 86 | "\n", 87 | "Python is like a car. Compiled languages are like a plane.\n", 88 | "\n", 89 | "So why use it?" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "# A hybrid approach\n", 97 | "\n", 98 | "If you want to get to South America, the fastest way to do so is take a car to get to the airport to catch a plane. \n", 99 | "\n", 100 | "Same idea for Python and compiled languages. You can do the big, common, easy tasks in compiled languages, and steer it with Python.\n", 101 | "\n", 102 | "And, as you'll see today, that's easier than you think!" 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "metadata": {}, 108 | "source": [ 109 | "# Mini-courses\n", 110 | "\n", 111 | "\n", 112 | "## High Performance Python: CPU\n", 113 | "\n", 114 | "* Today's class\n", 115 | "* How to make Python code fast *without* fully leaving Python\n", 116 | "\n", 117 | "\n", 118 | "## [High Performance Python: GPU](https://github.com/henryiii/pygpu-minicourse)\n", 119 | "\n", 120 | "* Using a GPU to accelerate code\n", 121 | "* Using accelerators to boost your code\n", 122 | "\n", 123 | "\n", 124 | "## [Compiled code & Python](https://github.com/henryiii/python-compiled-minicourse)\n", 125 | "\n", 126 | "* How to interface and accelerate with compiled code (mostly C++)" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "# Lessons\n", 134 | "\n", 135 | "* [00 Intro](./00_intro.ipynb): The introduction\n", 136 | "* [01 Fractal accelerate](./01_fractal_accelerate.ipynb): A look at a fractal computation, and ways to accelerate it with NumPy changes, numexpr, and numba.\n", 137 | " - [01b Fractal interactive](./01b_fractal_interactive.ipynb): An interactive example using Numba.\n", 138 | "* [02 Temperatures](./02_temperatures.ipynb): A look at reading files and array manipulation in NumPy and Pandas.\n", 139 | "* [03 MCMC](./03_mcmc.ipynb): A Marco Chain Monte Carlo generator (and metropolis generator) in Python and Numba, with a focus on profiling.\n", 140 | "* [04 Runge-Kutta](./04_runge_kutta.ipynb): Implementing a popular integration algorithm in NumPy and Numba.\n", 141 | "* [05 Distributed](./05_distributed.ipynb): An exploration of ways to break up code (fractal) into chunks for multithreading, multiproccessing, and Dask distribution.\n", 142 | "* [06 Jax](./06_jax.ipynb): A look at Google's JAX.\n", 143 | " - [06b Jax](./06b_jax.ipynb): More JAX.\n", 144 | "* [07 Callables](./07_callables.ipynb): A look at Scipy's LowLevelCallable, and how to implement one with Numba.\n", 145 | "* [08 Pandas COVID data](./08_pandas_covid.ipynb): A further look at Pandas for a COVID dataset.\n", 146 | "\n", 147 | "We may not go through these in order." 148 | ] 149 | }, 150 | { 151 | "cell_type": "markdown", 152 | "metadata": {}, 153 | "source": [ 154 | "## Survey\n", 155 | "\n", 156 | "Before we finish, please complete the survey. We will give you some time near the end to fill it out." 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "metadata": {}, 163 | "outputs": [], 164 | "source": [ 165 | "%config InteractiveShell.ast_node_interactivity='last_expr_or_assign'" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "# Background\n", 173 | "\n", 174 | "> 5 minute pause: Please look through the following text, or ask for help getting setup. We will go over this quickly after the pause. Most of it should be review, except for some ufunc specifics.\n", 175 | "\n", 176 | "Python lists/tuples can contain any Python object, and so waste memory and layout:" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": null, 182 | "metadata": {}, 183 | "outputs": [], 184 | "source": [ 185 | "import math\n", 186 | "\n", 187 | "import numpy as np" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "metadata": {}, 194 | "outputs": [], 195 | "source": [ 196 | "lst = [1, \"hi\", 3.0, \"🤣\"]" 197 | ] 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "metadata": {}, 202 | "source": [ 203 | "*Each* Python object stores *at least* a type and a reference count. They can be different sizes, so Python has to chase pointers down to get them. NumPy introduced an array class:" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": null, 209 | "metadata": {}, 210 | "outputs": [], 211 | "source": [ 212 | "arr = np.array([1, 2, 3, 4])" 213 | ] 214 | }, 215 | { 216 | "cell_type": "markdown", 217 | "metadata": {}, 218 | "source": [ 219 | "The array object is a normal Python object (with refcounts and such), but the items *inside it* are stored nicely packed in memory, with a single \"dtype\" for all the data. You can use `dtype=object`, but if it is anything else, this is much nicer than pure Python for larger amounts of data. All the standard datatypes are present, rather than the simple 64-bit `float` and unlimited `int` that regular Python provides." 220 | ] 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "metadata": {}, 225 | "source": [ 226 | "NumPy provides \"array\" processing, where operations and functions are applied to arrays rather than in loops, and this allows the operations to usually loop in a compiled language, skipping the type lookups and such Python would have to do. To facilitate this, NumPy introduced UFuncs, Generalized UFuncs, and functions that operate on arrays. They also helped Python 3 come up with a memory buffer interface for communicating data structure between libraries without NumPy, and an overload system for UFuncs (1.13) and later array functions (1.18).\n", 227 | "\n", 228 | "Out of all of that, let's peak at a UFunct:" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "metadata": {}, 235 | "outputs": [], 236 | "source": [ 237 | "vals = np.linspace(0, np.pi, 9)\n", 238 | "\n", 239 | "# Ufunc: np.sin\n", 240 | "print(np.sin(vals))" 241 | ] 242 | }, 243 | { 244 | "cell_type": "markdown", 245 | "metadata": {}, 246 | "source": [ 247 | "`np.sin` is a UFunc. It can be called on any dimension of array, and it will return the same dimensionality array, with the function (`sin`, in this case) transforming each element. If it took multiple arguments, each could be ND, and the output would be the broadcast combination of the inputs (fails if not compatible). There are a set of standard arguments, such as `out=` (use an existing array for the output), `where=` (mask items), `casting`, `order`, `dtype`, and `subok`. You can also call a set of standard methods, such as `accumulate`, `at`, `outer`, `reduce`, and `reduceat` - though some do not work on all ufuncs. There are some properties, too." 248 | ] 249 | }, 250 | { 251 | "cell_type": "markdown", 252 | "metadata": {}, 253 | "source": [ 254 | "Let's use `out=` to pre-allocate our own output:" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": null, 260 | "metadata": {}, 261 | "outputs": [], 262 | "source": [ 263 | "vals = np.linspace(0, np.pi, 9)\n", 264 | "out = np.empty_like(vals)\n", 265 | "np.sin(vals, out=out)\n", 266 | "print(out)" 267 | ] 268 | }, 269 | { 270 | "cell_type": "markdown", 271 | "metadata": {}, 272 | "source": [ 273 | "The operators on arrays, along with most of the methods on arrays, are actually ufuncts and array functions defined elsewhere in NumPy:" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": null, 279 | "metadata": {}, 280 | "outputs": [], 281 | "source": [ 282 | "out_simple = vals + vals\n", 283 | "\n", 284 | "out_inplace = np.empty_like(vals)\n", 285 | "np.add(vals, vals, out=out_inplace)\n", 286 | "\n", 287 | "np.testing.assert_array_equal(out_simple, out_inplace)" 288 | ] 289 | }, 290 | { 291 | "cell_type": "markdown", 292 | "metadata": {}, 293 | "source": [ 294 | "We will consider the simple form of this, array manipulation with the simple operations, to be the baseline. There is a \"simpler\" baseline, or maybe just an older one, of loops over arrays. I *think* most people who learn Python today or in the last few years start quite early with array programming, and that is the one most familiar, so we will start there." 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": null, 300 | "metadata": {}, 301 | "outputs": [], 302 | "source": [ 303 | "# Array looping method, do not use\n", 304 | "\n", 305 | "vals = np.linspace(0, np.pi, 9)\n", 306 | "out = []\n", 307 | "for val in vals:\n", 308 | " out.append(math.sin(val))\n", 309 | "print(out)" 310 | ] 311 | }, 312 | { 313 | "cell_type": "markdown", 314 | "metadata": {}, 315 | "source": [ 316 | "# Interesting projects\n", 317 | "\n", 318 | "I am part of [Scikit-HEP](http://scikit-hep.org), a project to build tools for High Energy Physicists in Python. Some of the projects are applicable outside of HEP:\n", 319 | "\n", 320 | "* [AwkwardArray](https://github.com/scikit-hep/awkward-array): Jagged array structures\n", 321 | "* [Vector](https://github.com/scikit-hep/vector): A package for 2D, 3D, and Lorentz vectors\n", 322 | "* [boost-histogram](https://github.com/scikit-hep/boost-histogram): A compiled package for powerful, fast histograms in Python\n", 323 | " - [hist](https://github.com/scikit-hep/hist), a package for fast analysis and plotting of histograms (in development)\n", 324 | "* [iMinuit](https://github.com/scikit-hep/iminuit): A powerful minimization package (used in HEP and Astrophysics)\n", 325 | "\n", 326 | "Other projects I am a developer on:\n", 327 | "\n", 328 | "* [scikit-build](https://github.com/scikit-build): A build backend for CMake code in Python\n", 329 | "* [pybind11](https://github.com/pybind/pybind11): Python Bindings in pure C++11+, no other tool needed!\n", 330 | "* [build](https://github.com/pypa/build): Build wheels and SDists for Python.\n", 331 | "* [cibuildwheel](https://github.com/pypa/cibuildwheel): Build redistributable binary wheels for Python!\n", 332 | "* [Plumbum](https://plumbum.readthedocs.io/en/latest/): A toolkit for bash-like scripting in Python\n", 333 | "* [CLI11](https://github.com/CLIUtils/CLI11): A command line parser for C++11" 334 | ] 335 | }, 336 | { 337 | "cell_type": "markdown", 338 | "metadata": {}, 339 | "source": [ 340 | "# Further reading\n", 341 | "\n", 342 | "## My Materials\n", 343 | "\n", 344 | "### Favorite posts and series\n", 345 | "\n", 346 | "[C++](https://iscinumpy.dev/tags/cppxx) [11](https://iscinumpy.dev/post/cpp-11) [14](https://iscinumpy.dev/post/cpp-14) [17](https://iscinumpy.dev/post/cpp-17) [20](https://iscinumpy.dev/post/cpp-20) •\n", 347 | "[macOS Setup](https://iscinumpy.dev/post/setup-a-new-mac) [(AS)](https://iscinumpy.dev/post/setup-apple-silicon) •\n", 348 | "[Azure DevOps](https://iscinumpy.dev/categories/azure-devops) ([Python Wheels](https://iscinumpy.dev/post/azure-devops-python-wheels)) •\n", 349 | "[Conda-Forge ROOT](https://iscinumpy.dev/post/root-conda) •\n", 350 | "[CLI11](https://iscinumpy.dev/tags/cli11) •\n", 351 | "[GooFit](https://iscinumpy.dev/tags/goofit) •\n", 352 | "[cibuildwheel](https://iscinumpy.dev/tags/cibuildwheel) •\n", 353 | "[Hist](https://iscinumpy.dev/tags/hist) •\n", 354 | "[Python Bindings](https://iscinumpy.dev/tags/bindings) •\n", 355 | "[Python 2→3](https://iscinumpy.dev/post/python-3-upgrade), [3.7](https://iscinumpy.dev/post/python-37), [3.8](https://iscinumpy.dev/post/python-38), [3.9](https://iscinumpy.dev/post/python-39), [3.10](https://iscinumpy.dev/post/python-310), [3.11](https://iscinumpy.dev/post/python-311) •\n", 356 | "[SSH](https://iscinumpy.dev/post/setting-up-ssh-forwarding/)\n", 357 | "\n", 358 | "### My classes and books\n", 359 | "\n", 360 | "[Modern CMake](https://cliutils.gitlab.io/modern-cmake/) •\n", 361 | "[CompClass](https://henryiii.github.io/compclass) •\n", 362 | "[se-for-sci](https://henryiii.github.io/se-for-sci)\n", 363 | "\n", 364 | "### My workshops\n", 365 | "\n", 366 | "[CMake Workshop](https://hsf-training.github.io/hsf-training-cmake-webpage/) •\n", 367 | "Python [CPU](https://github.com/henryiii/python-performance-minicourse), [GPU](https://github.com/henryiii/pygpu-minicourse), [Compiled](https://github.com/henryiii/python-compiled-minicourse) minicourses •\n", 368 | "[Level Up Your Python](https://henryiii.github.io/level-up-your-python) •\n", 369 | "[Packaging (WIP)](https://intersect-training.org/packaging/)\n", 370 | "\n", 371 | "### My projects\n", 372 | "\n", 373 | "[pybind11](https://pybind11.readthedocs.io) ([python_example](https://github.com/pybind/python_example), [cmake_example](https://github.com/pybind/cmake_example), [scikit_build_example](https://github.com/pybind/scikit_build_example)) •\n", 374 | "[cibuildwheel](https://cibuildwheel.readthedocs.io) •\n", 375 | "[build](https://pypa-build.readthedocs.io) •\n", 376 | "[scikit-build](https://github.com/scikit-build/scikit-build) ([core](https://github.com/scikit-build/scikit-build-core), [cmake](https://github.com/scikit-build/cmake-python-distributions), [ninja](https://github.com/scikit-build/ninja-python-distributions), [moderncmakedomain]()) •\n", 377 | "[boost-histogram](https://github.com/scikit-hep/boost-histogram) •\n", 378 | "[Hist](https://github.com/scikit-hep/hist) •\n", 379 | "[UHI](https://github.com/scikit-hep/uhi) •\n", 380 | "[Scikit-HEP/cookie](https://github.com/scikit-hep/cookie) •\n", 381 | "[Vector](https://github.com/scikit-hep/vector) •\n", 382 | "[CLI11](https://github.com/CLIUtils/CLI11) •\n", 383 | "[Plumbum](https://plumbum.readthedocs.io/en/latest) •\n", 384 | "[GooFit](https://github.com/GooFit/GooFit) •\n", 385 | "[Particle](https://github.com/scikit-hep/particle) •\n", 386 | "[DecayLanguage](https://github.com/scikit-hep/decaylanguage) •\n", 387 | "[Conda-Forge ROOT](https://github.com/conda-forge/root-feedstock) •\n", 388 | "[POVM](https://github.com/Princeton-Penn-Vents/princeton-penn-flowmeter) •\n", 389 | "[Jekyll-Indico](https://github.com/iris-hep/jekyll-indico) •\n", 390 | "[pytest GHA annotate-failures](https://github.com/utgwkk/pytest-github-actions-annotate-failures) •\n", 391 | "[uproot-browser](https://github.com/scikit-hep/uproot-browser) •\n", 392 | "[Scikit-HEP-repo-review](https://github.com/scikit-hep/repo-review) •\n", 393 | "[meson-python](https://github.com/mesonbuild/meson-python) •\n", 394 | "[flake8-errmsg](https://github.com/henryiii/flake8-errmsg) •\n", 395 | "[beautifulhugo](https://github.com/halogenica/beautifulhugo)\n", 396 | "\n", 397 | "\n", 398 | "### My sites\n", 399 | "\n", 400 | "[ISciNumPy](https://iscinumpy.dev) •\n", 401 | "[IRIS-HEP](https://iris-hep.org) •\n", 402 | "[Scikit-HEP](https://scikit-hep.org) ([Developer pages](https://scikit-hep.org/developer)) •\n", 403 | "[CLARIPHY](https://clariphy.org)\n" 404 | ] 405 | }, 406 | { 407 | "cell_type": "markdown", 408 | "metadata": {}, 409 | "source": [ 410 | "\n", 411 | "\n", 412 | "## Jim Pivarski's materials\n", 413 | "\n", 414 | "Jim taught earlier iterations of this mini-course, and his materials are great:\n", 415 | "\n", 416 | "* [Mini-course Fall 2018](https://github.com/jpivarski/python-numpy-mini-course)\n", 417 | "* [Mini-course Spring 2019](https://github.com/jpivarski/2019-04-08-picscie-numpy)\n", 418 | "* [CoDaS HEP Summer 2019](https://github.com/jpivarski/2019-07-23-codas-hep)\n", 419 | "* [DPF Summer 2019](https://github.com/jpivarski/2019-07-29-dpf-python)" 420 | ] 421 | } 422 | ], 423 | "metadata": { 424 | "kernelspec": { 425 | "display_name": "Python 3 (ipykernel)", 426 | "language": "python", 427 | "name": "python3" 428 | }, 429 | "language_info": { 430 | "codemirror_mode": { 431 | "name": "ipython", 432 | "version": 3 433 | }, 434 | "file_extension": ".py", 435 | "mimetype": "text/x-python", 436 | "name": "python", 437 | "nbconvert_exporter": "python", 438 | "pygments_lexer": "ipython3", 439 | "version": "3.11.4" 440 | }, 441 | "vscode": { 442 | "interpreter": { 443 | "hash": "2d4e9b9c84dab3e1662173f95b81bd7f8a551068d04f5f3c42d164db7312a928" 444 | } 445 | } 446 | }, 447 | "nbformat": 4, 448 | "nbformat_minor": 4 449 | } 450 | -------------------------------------------------------------------------------- /00_numpy.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# NumPy: Numeric computing\n", 8 | "\n", 9 | "The core of all modern scientific computing. You should know this one! This is NumPy + SciPy + Matplotlib, which form the core of the modern computing stack for [Scientific Python](https://scientific-python.org)." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import matplotlib.pyplot as plt\n", 19 | "import numpy as np" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "It is used for N-dimensional arrays:" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "v = np.array([1, 2, 3])\n", 36 | "v" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "The clever trick is that computations on an array are pre-compiled, and can be much faster than Python:" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "v**2" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "Note that it was designed for large arrays; around 10 elements or less you _might_ be faster using plain Python (though it's still more expressive). You should \"vectorize\" your code (by making the arrays bigger with more dimensions) if your arrays are very small and you care about performance." 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "In the spirit of the course, I'll avoid covering the basics of NumPy here (arrays, simple dtypes, operations), and instead focus on intermediate features." 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "## Slicing\n", 74 | "\n", 75 | "NumPy slicing may not make copies of arrays, but can just adjust the start and strides of the data. This is usually true of reshaping and adding empty (length 1) dimensions, as well. For example:" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "arr = np.zeros([2, 3, 4], dtype=int)\n", 85 | "print(arr.flags)" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "arr[:1, :1, :1]" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "Slicing produces a copy; we can see the effect of setting values, for example:" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "arr = np.zeros([2, 3, 4], dtype=int)\n", 111 | "x = arr[:, :2, 2:]\n", 112 | "x[...] = 1\n", 113 | "print(arr)" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "x.strides" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "x.flags" 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "metadata": {}, 137 | "source": [ 138 | "Flattening also avoids a copy if the data is contagious:" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "arr = np.zeros([2, 3, 4], dtype=int)\n", 148 | "y = arr.ravel()\n", 149 | "y[:3] = 2\n", 150 | "print(arr)" 151 | ] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "metadata": {}, 156 | "source": [ 157 | "However, flattening / reshaping an array without a copy may not always be possible, such as if the data is non-contagious:" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "metadata": {}, 164 | "outputs": [], 165 | "source": [ 166 | "y = arr.T.ravel()\n", 167 | "y[:3] = 3\n", 168 | "print(arr)" 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "metadata": {}, 174 | "source": [ 175 | "## Selections\n", 176 | "\n", 177 | "You can also use a very similar syntax to select items from an array. Selections always *return* copies." 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "metadata": {}, 184 | "outputs": [], 185 | "source": [ 186 | "arr = np.zeros([12], dtype=int)\n", 187 | "v = arr[[1, 2, 3]]\n", 188 | "v[...] = [1, 2, 3]\n", 189 | "print(arr)" 190 | ] 191 | }, 192 | { 193 | "cell_type": "markdown", 194 | "metadata": {}, 195 | "source": [ 196 | "On the right of an equal sign, they are a totally different overload, so that is supported (nothing is \"returned\", however, so there's no issue here)." 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": null, 202 | "metadata": {}, 203 | "outputs": [], 204 | "source": [ 205 | "arr = np.zeros([12], dtype=int)\n", 206 | "arr[[1, 2, 3]] = [1, 2, 3]\n", 207 | "print(arr)" 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "metadata": {}, 213 | "source": [ 214 | "You can also use a boolean mask:" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": null, 220 | "metadata": {}, 221 | "outputs": [], 222 | "source": [ 223 | "arr = np.arange(4, dtype=int)\n", 224 | "arr[[True, False, True, False]]" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": null, 230 | "metadata": {}, 231 | "outputs": [], 232 | "source": [ 233 | "arr = np.zeros([4], dtype=int)\n", 234 | "arr[[True, False, True, False]] = [1, 2]\n", 235 | "print(arr)" 236 | ] 237 | }, 238 | { 239 | "cell_type": "markdown", 240 | "metadata": {}, 241 | "source": [ 242 | "An easy way to make a boolean array of the same shape? Use a comparison on the original array:" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": null, 248 | "metadata": {}, 249 | "outputs": [], 250 | "source": [ 251 | "arr = np.arange(12, dtype=int)\n", 252 | "arr[arr > 5] = -1\n", 253 | "print(arr)" 254 | ] 255 | }, 256 | { 257 | "cell_type": "markdown", 258 | "metadata": {}, 259 | "source": [ 260 | "Logical operators on arrays are actually overloaded bitwise operators (`&`, `|`, `~`), not actual logical operators (`and`, `or`, `not`)! This causes several problems for arrays:\n", 261 | "\n", 262 | "* `a < b < c` expands to `a < b and b < c`, which doesn't work on arrays\n", 263 | "* `a < b & b < c` is actually `a < (b & b) < c`, which is not what you wanted\n", 264 | "* `(a < b) & (b < c)` parentheses are required to make it work on arrays!\n", 265 | "\n", 266 | "There are some ideas to fix this, but nothing has made it into Python yet. So for now, parentheses it up!" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": null, 272 | "metadata": {}, 273 | "outputs": [], 274 | "source": [ 275 | "arr = np.arange(12, dtype=int)\n", 276 | "print(f\"{arr[(arr>5) & (arr<9)] = }\")" 277 | ] 278 | }, 279 | { 280 | "cell_type": "markdown", 281 | "metadata": { 282 | "tags": [] 283 | }, 284 | "source": [ 285 | "## Random Numbers\n", 286 | "\n", 287 | "The simple random number interface `np.random.` should only be used for highly interactive work. Normal usage should use a more modern, object oriented approach; first construct a random number generator:" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": null, 293 | "metadata": {}, 294 | "outputs": [], 295 | "source": [ 296 | "rng = np.random.default_rng()" 297 | ] 298 | }, 299 | { 300 | "cell_type": "markdown", 301 | "metadata": {}, 302 | "source": [ 303 | "You can pass an explicit seed if you want reproducibility. Then this generator has distributions as methods:" 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": null, 309 | "metadata": {}, 310 | "outputs": [], 311 | "source": [ 312 | "dist = rng.normal(0, 1, size=1_000_000)" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": null, 318 | "metadata": {}, 319 | "outputs": [], 320 | "source": [ 321 | "fix, ax = plt.subplots()\n", 322 | "ax.hist(dist, bins=100)\n", 323 | "plt.show()" 324 | ] 325 | }, 326 | { 327 | "cell_type": "markdown", 328 | "metadata": { 329 | "jp-MarkdownHeadingCollapsed": true, 330 | "tags": [] 331 | }, 332 | "source": [ 333 | "The benefits include explicit control over the generator, reproducibility, and support for multiple independent generators." 334 | ] 335 | }, 336 | { 337 | "cell_type": "markdown", 338 | "metadata": {}, 339 | "source": [ 340 | "## Broadcasting\n", 341 | "\n", 342 | "One benefit of true 1D arrays (vs. languages that don't have 1D arrays) is that NumPy can support broadcasting. Broadcasting occurs whenever an operation (element-wise or matrix multiplication) encounters an array that is the wrong shape. The rules can be viewed two ways:\n", 343 | "\n", 344 | "1. If the number of dimensions does not match, prepend length-1 dimensions until they do.\n", 345 | "2. If the size of a dimension does not match and one of the dimensions is 1, duplicate along that axis to make it match.\n", 346 | "\n", 347 | "Or:\n", 348 | "1. Moving right to left, if a dimension is missing or 1, set the stride to 0 on that array, so the iteration pointer does not change along that dimension.\n", 349 | "\n", 350 | "The former is often easier to reason about, while the later is how it's implemented (so it is very efficient)." 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "execution_count": null, 356 | "metadata": {}, 357 | "outputs": [], 358 | "source": [ 359 | "(np.ones((2, 3, 4)) * np.ones((1, 4))).shape" 360 | ] 361 | }, 362 | { 363 | "cell_type": "markdown", 364 | "metadata": {}, 365 | "source": [ 366 | "You can predict the final shape with `broadcast_shapes`:" 367 | ] 368 | }, 369 | { 370 | "cell_type": "code", 371 | "execution_count": null, 372 | "metadata": {}, 373 | "outputs": [], 374 | "source": [ 375 | "np.broadcast_shapes((2, 3, 4), (1, 4))" 376 | ] 377 | }, 378 | { 379 | "cell_type": "markdown", 380 | "metadata": {}, 381 | "source": [ 382 | "You can also explicitly expand with `np.broadcast_arrays`.\n", 383 | "\n", 384 | "Broadcasting is extremely useful for array-at-a-time programming, such as for coordinate arrays and the like. If you organize your calculations to delay expansion, you can optimize a lot of computation out." 385 | ] 386 | }, 387 | { 388 | "cell_type": "markdown", 389 | "metadata": {}, 390 | "source": [ 391 | "For example, we can use `ogrid` or `meshgrid(..., sparse=True)` to replace `mgrid` but with compressed arrays ready for broadcasting. Here we make a grid with 101 points on x from 1-2 and 501 points on y from 0 to 5 (ends inclusive):" 392 | ] 393 | }, 394 | { 395 | "cell_type": "code", 396 | "execution_count": null, 397 | "metadata": {}, 398 | "outputs": [], 399 | "source": [ 400 | "x, y = np.mgrid[2.5:3.5:101j, 0:5:501j]\n", 401 | "radius = np.sqrt(x**2 + y**2)\n", 402 | "print(f\"{x.shape=}, {y.shape=}\")\n", 403 | "print(f\"{radius[50,400] = }\")" 404 | ] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": null, 409 | "metadata": {}, 410 | "outputs": [], 411 | "source": [ 412 | "x, y = np.ogrid[2.5:3.5:101j, 0:5:501j]\n", 413 | "radius = np.sqrt(x**2 + y**2)\n", 414 | "print(f\"{x.shape=}, {y.shape=}\")\n", 415 | "print(f\"{radius[50,400] = }\")" 416 | ] 417 | }, 418 | { 419 | "cell_type": "markdown", 420 | "metadata": {}, 421 | "source": [ 422 | "## Masked arrays\n", 423 | "\n", 424 | "A powerful and under supported feature is masked arrays. If values are missing, you can \"mask\" them.\n", 425 | "\n", 426 | "This is quite nice logically, but the downside is not all interfaces support masked arrays. You also use extra space for the boolean mask. Another trick is to use NaN's in a floating point array, or to use Pandas's support for None's in all arrays." 427 | ] 428 | }, 429 | { 430 | "cell_type": "code", 431 | "execution_count": null, 432 | "metadata": {}, 433 | "outputs": [], 434 | "source": [ 435 | "x, y = np.ogrid[-5:5:101j, -5:5:101j]\n", 436 | "radius = np.sqrt(x**2 + y**2)\n", 437 | "mradius = np.ma.masked_where(radius > 5, radius)" 438 | ] 439 | }, 440 | { 441 | "cell_type": "code", 442 | "execution_count": null, 443 | "metadata": {}, 444 | "outputs": [], 445 | "source": [ 446 | "plt.pcolormesh(x)" 447 | ] 448 | }, 449 | { 450 | "cell_type": "code", 451 | "execution_count": null, 452 | "metadata": {}, 453 | "outputs": [], 454 | "source": [ 455 | "plt.pcolormesh(y)" 456 | ] 457 | }, 458 | { 459 | "cell_type": "code", 460 | "execution_count": null, 461 | "metadata": {}, 462 | "outputs": [], 463 | "source": [ 464 | "fix, ax = plt.subplots()\n", 465 | "ax.pcolormesh(x.T, y.T, mradius.T)\n", 466 | "ax.set_aspect(\"equal\")\n", 467 | "plt.show()" 468 | ] 469 | }, 470 | { 471 | "cell_type": "markdown", 472 | "metadata": {}, 473 | "source": [ 474 | "## Record arrays (AKA preview for Pandas!)\n", 475 | "\n", 476 | "NumPy has support for complex DTypes. While Pandas or xarray handles structured dtypes more elegantly (and in a columnar form), but if you actually have data from some source that is structured and in an array form, this can be incredibly useful." 477 | ] 478 | }, 479 | { 480 | "cell_type": "code", 481 | "execution_count": null, 482 | "metadata": {}, 483 | "outputs": [], 484 | "source": [ 485 | "arr = np.array([(1, 2.0), (3, 4.0)], dtype=[(\"id\", int), (\"val\", float)])\n", 486 | "arr" 487 | ] 488 | }, 489 | { 490 | "cell_type": "markdown", 491 | "metadata": {}, 492 | "source": [ 493 | "You can select out a single named dtype (without copy) as well:" 494 | ] 495 | }, 496 | { 497 | "cell_type": "code", 498 | "execution_count": null, 499 | "metadata": {}, 500 | "outputs": [], 501 | "source": [ 502 | "arr[\"val\"]" 503 | ] 504 | }, 505 | { 506 | "cell_type": "markdown", 507 | "metadata": {}, 508 | "source": [ 509 | "## Smaller features" 510 | ] 511 | }, 512 | { 513 | "cell_type": "markdown", 514 | "metadata": {}, 515 | "source": [ 516 | "### Buffer protocol\n", 517 | "\n", 518 | "There's a buffer protocol in Python 3 that allows different libraries to communicate this sort of data without copies. This is also useful to communicate things like shared memory or existing memory to NumPy." 519 | ] 520 | }, 521 | { 522 | "cell_type": "code", 523 | "execution_count": null, 524 | "metadata": {}, 525 | "outputs": [], 526 | "source": [ 527 | "import array" 528 | ] 529 | }, 530 | { 531 | "cell_type": "code", 532 | "execution_count": null, 533 | "metadata": {}, 534 | "outputs": [], 535 | "source": [ 536 | "python_array = array.array(\"d\", (1, 2, 3, 4))" 537 | ] 538 | }, 539 | { 540 | "cell_type": "markdown", 541 | "metadata": {}, 542 | "source": [ 543 | "We can explicitly convert from a buffer:" 544 | ] 545 | }, 546 | { 547 | "cell_type": "code", 548 | "execution_count": null, 549 | "metadata": {}, 550 | "outputs": [], 551 | "source": [ 552 | "np.frombuffer(python_array)" 553 | ] 554 | }, 555 | { 556 | "cell_type": "markdown", 557 | "metadata": {}, 558 | "source": [ 559 | "Or most NumPy functions also work directly on buffers by converting them:" 560 | ] 561 | }, 562 | { 563 | "cell_type": "code", 564 | "execution_count": null, 565 | "metadata": {}, 566 | "outputs": [], 567 | "source": [ 568 | "arr = np.asarray(python_array)" 569 | ] 570 | }, 571 | { 572 | "cell_type": "markdown", 573 | "metadata": {}, 574 | "source": [ 575 | "This \"conversion\" does not copy! You still are looking at the buffer's original memory:" 576 | ] 577 | }, 578 | { 579 | "cell_type": "code", 580 | "execution_count": null, 581 | "metadata": {}, 582 | "outputs": [], 583 | "source": [ 584 | "arr[1] = 42" 585 | ] 586 | }, 587 | { 588 | "cell_type": "code", 589 | "execution_count": null, 590 | "metadata": {}, 591 | "outputs": [], 592 | "source": [ 593 | "python_array" 594 | ] 595 | }, 596 | { 597 | "cell_type": "markdown", 598 | "metadata": {}, 599 | "source": [ 600 | "You can verify that the NumPy array doesn't own the memory:" 601 | ] 602 | }, 603 | { 604 | "cell_type": "code", 605 | "execution_count": null, 606 | "metadata": {}, 607 | "outputs": [], 608 | "source": [ 609 | "arr.flags.owndata" 610 | ] 611 | }, 612 | { 613 | "cell_type": "markdown", 614 | "metadata": { 615 | "tags": [] 616 | }, 617 | "source": [ 618 | "## Custom arrays\n", 619 | "\n", 620 | "### UFuncs\n", 621 | "\n", 622 | "NumPy has the concept of UFuncs; functions that can take array arguments (broadcastable) and will return a broadcast result. For example:" 623 | ] 624 | }, 625 | { 626 | "cell_type": "code", 627 | "execution_count": null, 628 | "metadata": {}, 629 | "outputs": [], 630 | "source": [ 631 | "np.sin(1)" 632 | ] 633 | }, 634 | { 635 | "cell_type": "code", 636 | "execution_count": null, 637 | "metadata": {}, 638 | "outputs": [], 639 | "source": [ 640 | "np.sin(np.array([0, 1, 2]))" 641 | ] 642 | }, 643 | { 644 | "cell_type": "code", 645 | "execution_count": null, 646 | "metadata": {}, 647 | "outputs": [], 648 | "source": [ 649 | "np.add(7, np.arange(3))" 650 | ] 651 | }, 652 | { 653 | "cell_type": "markdown", 654 | "metadata": {}, 655 | "source": [ 656 | "UFuncs have several standard keyword arguments:\n", 657 | "\n", 658 | "* `out`: Output to existing array, skip allocation\n", 659 | "* `where`: Mask computation\n", 660 | "* `axes`, `axis`: axes to operate on (generalized UFuncs only)\n", 661 | "* `keepdims`: To remove reduced dims (some generalized UFuncts only)\n", 662 | "* `casting='same_kind'`: rules for casting different DTypes\n", 663 | "* `order='K'`: memory layout (\"K\"eep)\n", 664 | "* `dtype=None`: Dtype for output array\n", 665 | "* `subok=True`: Output can be a subclass\n", 666 | "* `signature`/`extobj`: Exact control over dtypes/buffers\n", 667 | "\n", 668 | "(A generalized UFunc supports specific patterns, like matrix multiplication, rather than being element-wise)" 669 | ] 670 | }, 671 | { 672 | "cell_type": "markdown", 673 | "metadata": {}, 674 | "source": [ 675 | "UFuncs also support several methods and have a few properties. There are exactly six methods:\n", 676 | "\n", 677 | "* `__call__`: Elementwise computation\n", 678 | "* `at`: Local elementwise computation (provide indices)\n", 679 | "* `reduce`: A reduction\n", 680 | "* `reduceat`: A local reduction (provide indices)\n", 681 | "* `accumulate`: An accumulation\n", 682 | "* `outer`: An outer apply (`np.multiply.outer` is identical to `np.tensordot`)" 683 | ] 684 | }, 685 | { 686 | "cell_type": "markdown", 687 | "metadata": {}, 688 | "source": [ 689 | "### NEP 13/18\n", 690 | "\n", 691 | "If you have a custom class, you can also customize UFuncs via NEP 13!" 692 | ] 693 | }, 694 | { 695 | "cell_type": "code", 696 | "execution_count": null, 697 | "metadata": {}, 698 | "outputs": [], 699 | "source": [ 700 | "# Aside: let's add pytest's raises decorator\n", 701 | "import pytest" 702 | ] 703 | }, 704 | { 705 | "cell_type": "code", 706 | "execution_count": null, 707 | "metadata": {}, 708 | "outputs": [], 709 | "source": [ 710 | "class SimpleUnc(np.ndarray):\n", 711 | " pass\n", 712 | "\n", 713 | "\n", 714 | "def simple_unc(val, unc):\n", 715 | " arr = np.array(list(zip(val, unc)), dtype=[(\"val\", float), (\"unc\", float)])\n", 716 | " return arr.view(SimpleUnc)" 717 | ] 718 | }, 719 | { 720 | "cell_type": "code", 721 | "execution_count": null, 722 | "metadata": {}, 723 | "outputs": [], 724 | "source": [ 725 | "a = simple_unc([1, 2, 3], [0.1, 0.1, 0.1])\n", 726 | "a" 727 | ] 728 | }, 729 | { 730 | "cell_type": "code", 731 | "execution_count": null, 732 | "metadata": {}, 733 | "outputs": [], 734 | "source": [ 735 | "a == a" 736 | ] 737 | }, 738 | { 739 | "cell_type": "markdown", 740 | "metadata": {}, 741 | "source": [ 742 | "We can see that this does raise an error if you try to add it, though:" 743 | ] 744 | }, 745 | { 746 | "cell_type": "code", 747 | "execution_count": null, 748 | "metadata": {}, 749 | "outputs": [], 750 | "source": [ 751 | "with pytest.raises(TypeError):\n", 752 | " a + a" 753 | ] 754 | }, 755 | { 756 | "cell_type": "markdown", 757 | "metadata": {}, 758 | "source": [ 759 | "Now let's provide `__array_ufunc__`, which will allow us to customize the behavior of UFuncs. This will use Python 3.10's pattern matching for simplicity." 760 | ] 761 | }, 762 | { 763 | "cell_type": "code", 764 | "execution_count": null, 765 | "metadata": {}, 766 | "outputs": [], 767 | "source": [ 768 | "class SimpleUnc(np.ndarray):\n", 769 | " def __array_ufunc__(self, ufunc, method: str, *inputs, **kwargs):\n", 770 | " # Avoid infinite recursion\n", 771 | " raw_inputs = [np.asarray(x) for x in inputs]\n", 772 | "\n", 773 | " match (ufunc, method, raw_inputs):\n", 774 | " # Custom add / subtract\n", 775 | " case np.add | np.subtract, \"__call__\", (a, b):\n", 776 | " # This can waste an allocation\n", 777 | " (result,) = kwargs.pop(\"out\", [np.empty(self.shape, self.dtype)])\n", 778 | "\n", 779 | " ufunc(a[\"val\"], b[\"val\"], out=result[\"val\"], **kwargs)\n", 780 | " np.add(a[\"unc\"] ** 2, b[\"unc\"] ** 2, out=result[\"unc\"], **kwargs)\n", 781 | " np.sqrt(result[\"unc\"], out=result[\"unc\"], **kwargs)\n", 782 | "\n", 783 | " return result.view(self.__class__)\n", 784 | "\n", 785 | " # Fall back on whatever it would have done before - do not return this subclass\n", 786 | " case _:\n", 787 | " return super().__array_ufunc__(ufunc, method, *raw_inputs, **kwargs)" 788 | ] 789 | }, 790 | { 791 | "cell_type": "code", 792 | "execution_count": null, 793 | "metadata": {}, 794 | "outputs": [], 795 | "source": [ 796 | "a = simple_unc([1, 2, 3], [0.1, 0.1, 0.1])\n", 797 | "a + a" 798 | ] 799 | }, 800 | { 801 | "cell_type": "code", 802 | "execution_count": null, 803 | "metadata": {}, 804 | "outputs": [], 805 | "source": [ 806 | "a - a" 807 | ] 808 | }, 809 | { 810 | "cell_type": "markdown", 811 | "metadata": {}, 812 | "source": [ 813 | "This is very simple and doesn't account for correlations, but it does show that it works." 814 | ] 815 | }, 816 | { 817 | "cell_type": "code", 818 | "execution_count": null, 819 | "metadata": {}, 820 | "outputs": [], 821 | "source": [ 822 | "a == a" 823 | ] 824 | }, 825 | { 826 | "cell_type": "markdown", 827 | "metadata": {}, 828 | "source": [ 829 | "There's a lot more to NumPy; it's worth looking around in the docs. An example of a function you might find is `np.diff(a)`, which is equivalent to `a[:-1] - a[1:]` but reads better without repeating the array name. Also look at the huge number of useful methods in SciPy." 830 | ] 831 | } 832 | ], 833 | "metadata": { 834 | "kernelspec": { 835 | "display_name": "Python 3 (ipykernel)", 836 | "language": "python", 837 | "name": "python3" 838 | }, 839 | "language_info": { 840 | "codemirror_mode": { 841 | "name": "ipython", 842 | "version": 3 843 | }, 844 | "file_extension": ".py", 845 | "mimetype": "text/x-python", 846 | "name": "python", 847 | "nbconvert_exporter": "python", 848 | "pygments_lexer": "ipython3", 849 | "version": "3.11.4" 850 | }, 851 | "vscode": { 852 | "interpreter": { 853 | "hash": "2d4e9b9c84dab3e1662173f95b81bd7f8a551068d04f5f3c42d164db7312a928" 854 | } 855 | } 856 | }, 857 | "nbformat": 4, 858 | "nbformat_minor": 4 859 | } 860 | -------------------------------------------------------------------------------- /00_pandas.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Pandas: DataFrames for Python\n", 8 | "\n", 9 | "Python is a general purpose language. It doesn't have to be better than a specialized language, it just has to have a good enough library - it is better at all the other parts, like dealing with files, CLI/GUI, etc.\n" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "DataFrames (well known from R) are like Excel spreadsheets in Python. (In fact, it can open Excel files). They are for _structured data_. If a NumPy axis has a meaning you want to assign a name to, it's probably structured." 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "import pandas as pd" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "We could make a DataFrame by hand, most most of the time you'll load them from various data sources. So let's make a CSV:" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "%%writefile tmp.csv\n", 42 | "id, version, os, arch\n", 43 | "cp37-macos_arm64, 3.7, macos, arm64\n", 44 | "cp38-macos_arm64, 3.8, macos, arm64\n", 45 | "cp39-macos_arm64, 3.9, macos, arm64\n", 46 | "cp37-macos_x86_64, 3.7, macos, x86_64\n", 47 | "cp38-macos_x86_64, 3.8, macos, x86_64\n", 48 | "cp39-macos_x86_64, 3.9, macos, x86_64" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "By default, pandas can read it, and even nicely format something for your screen:" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "pd.read_csv(\"tmp.csv\")" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "There are lots of powerful tools when reading and for later cleanup; let's do a better job of importing." 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "df = pd.read_csv(\n", 81 | " \"tmp.csv\",\n", 82 | " index_col=0,\n", 83 | " skipinitialspace=True,\n", 84 | " dtype={\"os\": \"category\", \"arch\": \"category\"},\n", 85 | ")\n", 86 | "df" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "df.info()" 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | "We can query columns (or anything else):" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "df[\"os\"]" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "For simple names, columns can be even easier to access:" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "df.arch" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "You have quick, easy access to lots of analysis tools:" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "df.version.plot.bar();" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "You can select using a variety of methods, including NumPy style boolean arrays:" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "df[df.arch == \"arm64\"]" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "The powerful groupby lets you collect and analyze with ease. For example, to compute the mean for each possible arch:" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "df.groupby(\"arch\").version.mean()" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": {}, 181 | "source": [ 182 | "Pandas pioneered a lot of DSL (Domain Specific Language) for Python, taking over the Python language to keep things simple and consistent within DataFrames. For example, it provides accessors, like the `.str` accessor, that apply normal methods to a series instead:" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [ 191 | "df.arch.str.upper()" 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "metadata": {}, 197 | "source": [ 198 | "This is just scratching the surface. Besides manipulating these dataframes and series, Pandas also offers:\n", 199 | "\n", 200 | "* Fantastic date manipulation, including holidays, work weeks, and more\n", 201 | "* Great periodic tools, rolling calculations, and more\n", 202 | "\n", 203 | "Great Pandas, like vectorized NumPy, can be a little hard to write, taking a few iterations, but once you have it written, it is easy to read and very expressive." 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": {}, 209 | "source": [ 210 | "## More reading" 211 | ] 212 | }, 213 | { 214 | "cell_type": "markdown", 215 | "metadata": {}, 216 | "source": [ 217 | "See this notebook than analyze COVID data that runs daily on my website: " 218 | ] 219 | } 220 | ], 221 | "metadata": { 222 | "kernelspec": { 223 | "display_name": "Python [conda env:level-up-your-python]", 224 | "language": "python", 225 | "name": "conda-env-level-up-your-python-py" 226 | }, 227 | "language_info": { 228 | "codemirror_mode": { 229 | "name": "ipython", 230 | "version": 3 231 | }, 232 | "file_extension": ".py", 233 | "mimetype": "text/x-python", 234 | "name": "python", 235 | "nbconvert_exporter": "python", 236 | "pygments_lexer": "ipython3", 237 | "version": "3.10.5" 238 | } 239 | }, 240 | "nbformat": 4, 241 | "nbformat_minor": 4 242 | } 243 | -------------------------------------------------------------------------------- /01_fractal_accelerate.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%config InteractiveShell.ast_node_interactivity='last_expr_or_assign'" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": { 15 | "tags": [] 16 | }, 17 | "source": [ 18 | "# Problem 1: The Mandelbrot Fractal" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": { 24 | "tags": [] 25 | }, 26 | "source": [ 27 | "This is a fun problem, for several reasons:\n", 28 | "\n", 29 | "* It produces pretty pictures\n", 30 | "* There are lots of variations to play with\n", 31 | "* The algorithm can exit early, making it non-trivial to vectorize\n", 32 | "\n", 33 | "Let's import some libraries. Note, to automatically see plots, sometimes you may have to do:\n", 34 | "```python\n", 35 | "%matplotlib inline\n", 36 | "```\n", 37 | "\n", 38 | "(or `notebook`, `widget`) - for the recommended setup, you should be fine without these." 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "import matplotlib.pyplot as plt\n", 48 | "\n", 49 | "# Extra performance libraries for later\n", 50 | "# import numexpr\n", 51 | "import numba\n", 52 | "import numpy as np" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "You can generate a Mandelbrot fractal by applying the transform:\n", 60 | "\n", 61 | "$$\n", 62 | "z_{n+1}=z_{n}^{2}+c\n", 63 | "$$\n", 64 | "\n", 65 | "repeatedly to a regular matrix of complex numbers $c$, and recording the iteration number where the value $|z|$ surpassed some bound, usually 2. You start at $z_0 = c$.\n", 66 | "\n", 67 | "\n", 68 | "\n", 69 | "Let's set up some initial parameters and a helper matrix:" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "1j + 1" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "maxiterations = 50\n", 88 | "\n", 89 | "# 300 x 400 matrix of complex numbers from [-1.5j, 1.5j] x [-2, 2]\n", 90 | "c = np.sum(np.broadcast_arrays(*np.ogrid[-1.5j:1.5j:300j, -2:2:400j]), axis=0)" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "> Note that in the interest of absolute brevity, I've taken advantage of the fact `ogrid` works with complex numbers; however, `mgrid` does not. `ogrid` is faster anyway.\n", 98 | "\n", 99 | "Let's make sure we have the correct `c`:" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "fig, axs = plt.subplots(1, 2, figsize=(6, 3))\n", 109 | "axs[0].pcolormesh(c.real, c.imag, c.real)\n", 110 | "axs[1].pcolormesh(c.real, c.imag, c.imag);" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "Okay, let's make the fractal as simply as possible in numpy:" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "fractal = np.zeros_like(c, dtype=np.int32)\n", 127 | "z = c.copy()\n", 128 | "\n", 129 | "for i in range(1, maxiterations + 1):\n", 130 | " z = z**2 + c # Compute z\n", 131 | " diverge = abs(z) > 2 # Divergence criteria\n", 132 | "\n", 133 | " z[diverge] = 2 # Keep number size small\n", 134 | " fractal[~diverge] = i # Fill in non-diverged iteration number" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "fig, ax = plt.subplots(figsize=(4, 3))\n", 144 | "ax.pcolormesh(c.real, c.imag, fractal);" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "In a notebook, you often start with raw code (like above) for easy debugging, but once it works, you put in in a function, like the function below:" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "metadata": {}, 158 | "outputs": [], 159 | "source": [ 160 | "def fractal_numpy(c, maxiterations):\n", 161 | " f = np.zeros_like(c, dtype=np.int32)\n", 162 | " z = c.copy()\n", 163 | "\n", 164 | " for i in range(1, maxiterations + 1):\n", 165 | " z = z * z + c # Compute z\n", 166 | " diverge = np.abs(z**2) > 2**2 # Divergence criteria\n", 167 | "\n", 168 | " z[diverge] = 2 # Keep number size small\n", 169 | " f[~diverge] = i # Fill in non-diverged iteration number\n", 170 | "\n", 171 | " return f" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [ 180 | "%%timeit\n", 181 | "fractal_numpy(c, maxiterations)" 182 | ] 183 | }, 184 | { 185 | "cell_type": "markdown", 186 | "metadata": {}, 187 | "source": [ 188 | "While we wouldn't really do this normally, expecting it to be *much* slower, here is the pure Python version:" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": null, 194 | "metadata": {}, 195 | "outputs": [], 196 | "source": [ 197 | "def fractal_pure(c, maxiterations):\n", 198 | " fractal = np.zeros_like(c, dtype=np.int32)\n", 199 | "\n", 200 | " for yi in range(c.shape[0]):\n", 201 | " for xi in range(c.shape[1]):\n", 202 | " z = cxy = c[yi, xi]\n", 203 | " for i in range(1, maxiterations + 1):\n", 204 | " z = z**2 + cxy\n", 205 | " if abs(z) > 2:\n", 206 | " break\n", 207 | " fractal[yi, xi] = i\n", 208 | " return fractal" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "metadata": {}, 215 | "outputs": [], 216 | "source": [ 217 | "%%timeit\n", 218 | "fractal_pure(c, maxiterations)" 219 | ] 220 | }, 221 | { 222 | "cell_type": "markdown", 223 | "metadata": {}, 224 | "source": [ 225 | "I don't know about you, but that was much faster than I would have naively expected. Why? What is different about the *algorithm*?\n", 226 | "\n", 227 | "\n", 230 | "\n", 231 | "For later use, and for better design, let's break up the above function into to pieces; the \"on each\" part and the part that applies it to each element (vectorization)." 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": null, 237 | "metadata": {}, 238 | "outputs": [], 239 | "source": [ 240 | "def on_each_python(cxy, maxiterations):\n", 241 | " z = cxy\n", 242 | " for i in range(1, maxiterations + 1):\n", 243 | " z = z * z + cxy\n", 244 | " if abs(z) > 2:\n", 245 | " return i\n", 246 | " return i\n", 247 | "\n", 248 | "\n", 249 | "def fractal_python(c, maxiterations):\n", 250 | " fractal = np.zeros_like(c, dtype=np.int32)\n", 251 | "\n", 252 | " for yi in range(c.shape[0]):\n", 253 | " for xi in range(c.shape[1]):\n", 254 | " fractal[yi, xi] = on_each_python(c[yi, xi], maxiterations)\n", 255 | "\n", 256 | " return fractal" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": null, 262 | "metadata": {}, 263 | "outputs": [], 264 | "source": [ 265 | "%%timeit\n", 266 | "fractal_python(c, maxiterations)" 267 | ] 268 | }, 269 | { 270 | "cell_type": "markdown", 271 | "metadata": {}, 272 | "source": [ 273 | "While we'll look at something much more interesting soon, here is NumPy's vectorize. This is not supposed to do much except replace the outer function we had to manually define (though I've actually found it to be noticeably faster)." 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": null, 279 | "metadata": {}, 280 | "outputs": [], 281 | "source": [ 282 | "fractal_npvectorize = np.vectorize(on_each_python)" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": null, 288 | "metadata": {}, 289 | "outputs": [], 290 | "source": [ 291 | "%%timeit\n", 292 | "fractal_npvectorize(c, maxiterations)" 293 | ] 294 | }, 295 | { 296 | "cell_type": "markdown", 297 | "metadata": {}, 298 | "source": [ 299 | "We can plot any of these to make sure they work:" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": null, 305 | "metadata": {}, 306 | "outputs": [], 307 | "source": [ 308 | "fractal = fractal_npvectorize(c, maxiterations)\n", 309 | "fig, ax = plt.subplots(figsize=(4, 3))\n", 310 | "ax.pcolormesh(c.real, c.imag, fractal)" 311 | ] 312 | }, 313 | { 314 | "cell_type": "markdown", 315 | "metadata": {}, 316 | "source": [ 317 | "### Profiling\n", 318 | "\n", 319 | "Never optimize until you have profiled! If code becomes uglier/harder to maintain, you *must* have a solid reason for doing so.\n", 320 | "\n", 321 | "Let's look at the `line_profiler` package, which has fairly nice IPython magic support. First let's load the magic:" 322 | ] 323 | }, 324 | { 325 | "cell_type": "code", 326 | "execution_count": null, 327 | "metadata": {}, 328 | "outputs": [], 329 | "source": [ 330 | "%load_ext line_profiler" 331 | ] 332 | }, 333 | { 334 | "cell_type": "markdown", 335 | "metadata": {}, 336 | "source": [ 337 | "Now, we run the magic with `-f function_to_profile` and the command to profile. Only the lines of the function we specify will show up:" 338 | ] 339 | }, 340 | { 341 | "cell_type": "code", 342 | "execution_count": null, 343 | "metadata": {}, 344 | "outputs": [], 345 | "source": [ 346 | "%lprun -f fractal_numpy fractal_numpy(c, maxiterations)" 347 | ] 348 | }, 349 | { 350 | "cell_type": "code", 351 | "execution_count": null, 352 | "metadata": {}, 353 | "outputs": [], 354 | "source": [] 355 | }, 356 | { 357 | "cell_type": "markdown", 358 | "metadata": {}, 359 | "source": [ 360 | "If you don't have external packages available, the built-in `cProfile` is also usable, though not as pretty.\n", 361 | "\n", 362 | "> #### Note\n", 363 | ">\n", 364 | "> Most standard library modules with names like `something, cSomething` were merged in Python 3, with the faster compiled implementation being selected automatically. This one was not, since `cProfile` and `profile` are not quite identical. `profile` is much slower." 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": null, 370 | "metadata": {}, 371 | "outputs": [], 372 | "source": [ 373 | "import cProfile" 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": null, 379 | "metadata": {}, 380 | "outputs": [], 381 | "source": [ 382 | "cProfile.run(\"fractal_numpy(c, maxiterations)\", sort=2)" 383 | ] 384 | }, 385 | { 386 | "cell_type": "markdown", 387 | "metadata": {}, 388 | "source": [ 389 | "(Note: Numba takes full advantage of the instruction set on your system, since it does not expect to be compiled and run on a different machine; thus often Numba will be faster than normally compiled code)." 390 | ] 391 | }, 392 | { 393 | "cell_type": "markdown", 394 | "metadata": {}, 395 | "source": [ 396 | "## Numexpr\n", 397 | "\n", 398 | "How can we make NumPy faster? Expressions are slow in NumPy because they usually create lots of temporary arrays, and memory allocations are costly. To avoid this, you could manually reuse memory, but this would require lots of ugly rewriting, such as taking `a + b + c` and writing `t = a + b; b += c`. \n", 399 | "\n", 400 | "> Starting with NumPy 1.13, some simple expressions like the one above, will [avoid making memory copies](https://docs.scipy.org/doc/numpy/release.html#temporary-elision) (generally on Unix only)\n", 401 | "\n", 402 | "There's a second issue; even with avoiding unneeded temporaries, you still have to run multiple kernels (computation functions) - it would be nicer if you could just do the full calculation on each input and produce one output, with no in-between steps.\n", 403 | "\n", 404 | "One way to do this is with numexpr. This is an odd little library that can compile small expressions just-in-time (JIT). Here's what it looks like in practice:" 405 | ] 406 | }, 407 | { 408 | "cell_type": "code", 409 | "execution_count": null, 410 | "metadata": {}, 411 | "outputs": [], 412 | "source": [ 413 | "import numexpr\n", 414 | "\n", 415 | "a, b = np.random.random_sample(size=(2, 100_000))\n", 416 | "\n", 417 | "print(\"classic\", 2 * a + 3 * b)\n", 418 | "print(\"numexpr\", numexpr.evaluate(\"2 * a + 3 * b\"))" 419 | ] 420 | }, 421 | { 422 | "cell_type": "code", 423 | "execution_count": null, 424 | "metadata": {}, 425 | "outputs": [], 426 | "source": [ 427 | "%%timeit\n", 428 | "c = 2 * a + 3 * b # Try 2 * a**2 + 3 * b**3" 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "execution_count": null, 434 | "metadata": {}, 435 | "outputs": [], 436 | "source": [ 437 | "%%timeit\n", 438 | "c = numexpr.evaluate(\"2 * a + 3 * b\")" 439 | ] 440 | }, 441 | { 442 | "cell_type": "markdown", 443 | "metadata": {}, 444 | "source": [ 445 | "However, numexpr is *very* limited. It has a small set of data types, a small collection of supported operators and basic functions, and works one-line-at a time. You can make it less magical with feed dictionaries if you want." 446 | ] 447 | }, 448 | { 449 | "cell_type": "markdown", 450 | "metadata": {}, 451 | "source": [ 452 | "## Numba\n", 453 | "\n", 454 | "If that managed to whet your appitite, let's look at Numba - a Python to LLVM JIT compiler! We'll see it again, but for now, here's a little demo:" 455 | ] 456 | }, 457 | { 458 | "cell_type": "code", 459 | "execution_count": null, 460 | "metadata": {}, 461 | "outputs": [], 462 | "source": [ 463 | "@numba.vectorize\n", 464 | "def f(a, b):\n", 465 | " return 2 * a + 3 * b" 466 | ] 467 | }, 468 | { 469 | "cell_type": "code", 470 | "execution_count": null, 471 | "metadata": {}, 472 | "outputs": [], 473 | "source": [ 474 | "f" 475 | ] 476 | }, 477 | { 478 | "cell_type": "code", 479 | "execution_count": null, 480 | "metadata": {}, 481 | "outputs": [], 482 | "source": [ 483 | "%%time\n", 484 | "c = f(a, b)" 485 | ] 486 | }, 487 | { 488 | "cell_type": "code", 489 | "execution_count": null, 490 | "metadata": {}, 491 | "outputs": [], 492 | "source": [ 493 | "%%timeit\n", 494 | "c = f(a, b)" 495 | ] 496 | }, 497 | { 498 | "cell_type": "markdown", 499 | "metadata": {}, 500 | "source": [ 501 | "It took the function we defined, pulled it apart, and turned into Low Level Virtual Machine (LLVM) code, and compiled it. No special strings or special syntax; it is just a (large) subset of Python and NumPy. And users and libraries can extend it too. It also supports:\n", 502 | "\n", 503 | "* Vectorized, general vectorized, or regular functions\n", 504 | "* Ahead of time compilation, JIT, or dynamic JIT\n", 505 | "* Parallelized targets\n", 506 | "* GPU targets via CUDA or ROCm\n", 507 | "* Nesting\n", 508 | "* Creating cfunction callbacks\n", 509 | "\n", 510 | "It is almost always as fast or faster than any other compiled solution (minus the JIT time). A couple of years ago it became much easier to install (via PIP with LLVMLite's lightweight and independent LLVM build)." 511 | ] 512 | }, 513 | { 514 | "cell_type": "markdown", 515 | "metadata": {}, 516 | "source": [ 517 | "### Project: accelerate\n", 518 | "\n", 519 | "Try some of the following:\n", 520 | "\n", 521 | "* Use `numexpr` to replace parts of the above calculation. Why is this not very effective?\n", 522 | "* Try reducing the number of memory allocations by using numpy\n", 523 | "* Try accelerating using `@numba.njit`\n", 524 | "* Try accelerating using `@numba.vectorize`" 525 | ] 526 | }, 527 | { 528 | "cell_type": "markdown", 529 | "metadata": {}, 530 | "source": [ 531 | "Further reading:\n", 532 | "\n", 533 | "* [Christoph Deil's Numba talk](https://christophdeil.com/download/2019-07-11_Christoph_Deil_Numba.pdf)\n", 534 | "* [CompClass](https://github.com/henryiii/compclass): Several days visited this, including week 12\n", 535 | "* Any of Jim's classes (see intro talk)\n", 536 | "* The distributed lesson will revisit fractals" 537 | ] 538 | } 539 | ], 540 | "metadata": { 541 | "kernelspec": { 542 | "display_name": "Python [conda env:performance-minicourse] *", 543 | "language": "python", 544 | "name": "conda-env-performance-minicourse-py" 545 | }, 546 | "language_info": { 547 | "codemirror_mode": { 548 | "name": "ipython", 549 | "version": 3 550 | }, 551 | "file_extension": ".py", 552 | "mimetype": "text/x-python", 553 | "name": "python", 554 | "nbconvert_exporter": "python", 555 | "pygments_lexer": "ipython3", 556 | "version": "3.11.4" 557 | }, 558 | "vscode": { 559 | "interpreter": { 560 | "hash": "2d4e9b9c84dab3e1662173f95b81bd7f8a551068d04f5f3c42d164db7312a928" 561 | } 562 | } 563 | }, 564 | "nbformat": 4, 565 | "nbformat_minor": 4 566 | } 567 | -------------------------------------------------------------------------------- /01_fractal_interactive.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# Make sure @jupyter-widgets/jupyterlab-manager\n", 10 | "# and jupyter-matplotlib\n", 11 | "# are installed and enabled in the extension manager.\n", 12 | "\n", 13 | "%matplotlib widget" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "import matplotlib.pyplot as plt\n", 23 | "import numba\n", 24 | "\n", 25 | "# Extra performance libraries\n", 26 | "import numpy as np\n", 27 | "\n", 28 | "# Plotting\n", 29 | "from ipywidgets import interact" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "maxiterations = 50\n", 39 | "\n", 40 | "# 300 x 400 matrix of complex numbers from [-1.5j, 1.5j] x [-2, 2]\n", 41 | "c = np.sum(np.broadcast_arrays(*np.ogrid[-1.5j:1.5j:300j, -2:2:400j]), axis=0)" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "def fractal_numpy(c, maxiterations):\n", 51 | " f = np.zeros_like(c, dtype=np.int32)\n", 52 | " z = c.copy()\n", 53 | "\n", 54 | " for i in range(1, maxiterations + 1):\n", 55 | " z = z**2 + c # Compute z\n", 56 | " diverge = abs(z**2) > 2**2 # Divergence criteria\n", 57 | "\n", 58 | " z[diverge] = 2 # Keep number size small\n", 59 | " f[~diverge] = i # Fill in non-diverged iteration number\n", 60 | "\n", 61 | " return f" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "@numba.njit((numba.complex128[:, :], numba.int32))\n", 71 | "def fractal_numba(c, maxiterations):\n", 72 | " fractal = np.zeros_like(c, dtype=np.int32)\n", 73 | "\n", 74 | " for yi in range(c.shape[0]):\n", 75 | " for xi in range(c.shape[1]):\n", 76 | " z = cxy = c[yi, xi]\n", 77 | " for i in range(1, maxiterations + 1):\n", 78 | " z = z**2 + cxy\n", 79 | " if abs(z) > 2:\n", 80 | " break\n", 81 | " fractal[yi, xi] = i\n", 82 | " return fractal" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "Change the numpy calculation to the numba one. Do you see a difference?" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "fig, ax = plt.subplots()\n", 99 | "mesh = ax.imshow(c.real, vmin=0, vmax=1)\n", 100 | "ax.set_xlabel(\"Re(x)\")\n", 101 | "ax.set_ylabel(\"Im(y)\")\n", 102 | "\n", 103 | "\n", 104 | "@interact(centerx=(-2.0, 2.0, 0.01), centery=(-2.0, 2.0, 0.1), scale=(-5.0, 2, 0.01))\n", 105 | "def interactive_fractal(centerx=0.38, centery=-0.6, scale=0.25):\n", 106 | " maxiterations = 50\n", 107 | " scale = 10**scale\n", 108 | "\n", 109 | " c = np.sum(\n", 110 | " np.broadcast_arrays(\n", 111 | " *np.ogrid[\n", 112 | " (centery - scale) * 1j : (centery + scale) * 1j : 400j,\n", 113 | " (centerx - scale) : (centerx + scale) : 400j,\n", 114 | " ]\n", 115 | " ),\n", 116 | " axis=0,\n", 117 | " )\n", 118 | "\n", 119 | " f = fractal_numpy(c, maxiterations)\n", 120 | " mesh.set_data(f / 50)\n", 121 | " mesh.set_extent(\n", 122 | " (centerx - scale, centerx + scale, centery - scale, centery + scale)\n", 123 | " )" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [] 132 | } 133 | ], 134 | "metadata": { 135 | "kernelspec": { 136 | "display_name": "Python [conda env:performance-minicourse] *", 137 | "language": "python", 138 | "name": "conda-env-performance-minicourse-py" 139 | }, 140 | "language_info": { 141 | "codemirror_mode": { 142 | "name": "ipython", 143 | "version": 3 144 | }, 145 | "file_extension": ".py", 146 | "mimetype": "text/x-python", 147 | "name": "python", 148 | "nbconvert_exporter": "python", 149 | "pygments_lexer": "ipython3", 150 | "version": "3.10.9" 151 | } 152 | }, 153 | "nbformat": 4, 154 | "nbformat_minor": 4 155 | } 156 | -------------------------------------------------------------------------------- /02_temperatures.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Temperature dataset\n", 8 | "\n" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "%config InteractiveShell.ast_node_interactivity='last_expr_or_assign'" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": null, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "import numba\n", 27 | "import numexpr\n", 28 | "import numpy as np" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "Let's start with a plausible problem. We have a dataset of all daily temperatures measured at Newark since 1893 and we want to analyze it. First, this is the \"pure Python\" way to open it:" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": { 42 | "slideshow": { 43 | "slide_type": "fragment" 44 | } 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "%%time\n", 49 | "with open(\"data/newark-temperature-avg.txt\") as file:\n", 50 | " temperatures = [float(line) for line in file]\n", 51 | "\n", 52 | "temperatures = np.array(temperatures)\n", 53 | "print(temperatures, len(temperatures), \"elements\")" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "> #### Note:\n", 61 | ">\n", 62 | "> Don't forget the *double* percent sign for cell magics! Single percent sign is a line magic, which measures the line (basically nothing if you were trying to measure a cell)." 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "We could easily then convert this to NumPy. Let's instead use NumPy directly, which will save memory:" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "%%time\n", 79 | "temperatures = np.loadtxt(\"data/newark-temperature-avg.txt\")\n", 80 | "print(temperatures, len(temperatures), \"elements\")" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "Sadly, this does not save time, since it's trying to be general and can support several things, like multiple columns and more. The reduction in time, clarity, and generality *should usually* be worth it." 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "Let's load the rest of the data:" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "min_temperatures = np.loadtxt(\"data/newark-temperature-min.txt\")\n", 104 | "max_temperatures = np.loadtxt(\"data/newark-temperature-max.txt\")" 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": {}, 110 | "source": [ 111 | "Now let's check the fraction of nan values:" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "fraction_nan = np.sum(np.isnan(temperatures)) / len(temperatures)\n", 121 | "print(f\"Fraction of values that are NaN: {fraction_nan:.2%}\")" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "Let's look at two ways of doing the same thing: Computing missing temperatures from average of min and max temperatures:" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [ 137 | "%%timeit\n", 138 | "\n", 139 | "missing = np.isnan(temperatures)\n", 140 | "imputed_temperatures = temperatures.copy()\n", 141 | "imputed_temperatures[missing] = 0.5 * (\n", 142 | " min_temperatures[missing] + max_temperatures[missing]\n", 143 | ")" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "%%timeit\n", 153 | "\n", 154 | "imputed_temperatures = np.where(\n", 155 | " np.isnan(temperatures), # condition\n", 156 | " 0.5 * (min_temperatures + max_temperatures), # if true\n", 157 | " temperatures, # if false\n", 158 | ")" 159 | ] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "metadata": {}, 164 | "source": [ 165 | "Remember, timeit does not change the environment, so let's repeat that here. We will use np.mean, because it is more descriptive, even though it is slower. If we used `minmax_temps = np.stack([min_temperatures, max_temperatures])`, then it would be much closer in speed." 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "imputed_temperatures = np.where(\n", 175 | " np.isnan(temperatures), # condition\n", 176 | " np.mean([min_temperatures, max_temperatures], axis=0), # if true\n", 177 | " temperatures, # if false\n", 178 | ")" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": null, 184 | "metadata": {}, 185 | "outputs": [], 186 | "source": [ 187 | "fraction_nan = np.sum(np.isnan(imputed_temperatures)) / len(imputed_temperatures)\n", 188 | "print(f\"Fraction of values that are NaN: {fraction_nan:.2%}\")" 189 | ] 190 | }, 191 | { 192 | "cell_type": "markdown", 193 | "metadata": {}, 194 | "source": [ 195 | "Now, let's try a more interesting calculation (we are limited in what we can find interesting to do here until we introduce Pandas, since it's a simple dataset).\n", 196 | "\n", 197 | "> #### Note:\n", 198 | "> \n", 199 | "> These are *very* simple calculations, but we can still see performance differences." 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": null, 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [ 208 | "%%timeit\n", 209 | "c_temps = (imputed_temperatures - 32) * 5 / 9" 210 | ] 211 | }, 212 | { 213 | "cell_type": "markdown", 214 | "metadata": {}, 215 | "source": [ 216 | "Predict: Will this be slower, faster, or the same?" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "metadata": {}, 223 | "outputs": [], 224 | "source": [ 225 | "%%timeit\n", 226 | "c_temps = (imputed_temperatures - 32) * (5 / 9)" 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | "On older NumPy, this used to be faster - due to fusion, it should be the same on Unix systems:" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "metadata": {}, 240 | "outputs": [], 241 | "source": [ 242 | "%%timeit\n", 243 | "c_temps = imputed_temperatures - 32\n", 244 | "c_temps *= 5 / 9" 245 | ] 246 | }, 247 | { 248 | "cell_type": "markdown", 249 | "metadata": {}, 250 | "source": [ 251 | "Sadly, this is too simple to get help from numexpr:" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": null, 257 | "metadata": {}, 258 | "outputs": [], 259 | "source": [ 260 | "%%timeit\n", 261 | "c_temps = numexpr.evaluate(\"(imputed_temperatures - 32) * (5/9)\")" 262 | ] 263 | }, 264 | { 265 | "cell_type": "markdown", 266 | "metadata": {}, 267 | "source": [ 268 | "Even in this simple case, a properly compiled function can help out just a little:" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": null, 274 | "metadata": {}, 275 | "outputs": [], 276 | "source": [ 277 | "@numba.vectorize((numba.float64(numba.float64),), target=\"parallel\")\n", 278 | "def convert(degrees):\n", 279 | " return (degrees - 32) * (5 / 9)" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": null, 285 | "metadata": {}, 286 | "outputs": [], 287 | "source": [ 288 | "%%timeit\n", 289 | "c_temps = convert(imputed_temperatures)" 290 | ] 291 | }, 292 | { 293 | "cell_type": "markdown", 294 | "metadata": {}, 295 | "source": [ 296 | "## Pandas\n", 297 | "\n", 298 | "Let's try a little more analysis, but we will do it properly, in Pandas!\n", 299 | "\n", 300 | "The datasets above were really part of the newark-temperature csv file, so let's open that in Pandas:" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": null, 306 | "metadata": {}, 307 | "outputs": [], 308 | "source": [ 309 | "import pandas as pd" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": null, 315 | "metadata": {}, 316 | "outputs": [], 317 | "source": [ 318 | "df_orig = pd.read_csv(\n", 319 | " \"data/newark-temperature.csv\",\n", 320 | " index_col=\"DATE\",\n", 321 | " usecols=\"DATE TAVG TMAX TMIN\".split(),\n", 322 | " parse_dates=[\"DATE\"],\n", 323 | ")" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": null, 329 | "metadata": {}, 330 | "outputs": [], 331 | "source": [ 332 | "df_orig.info();" 333 | ] 334 | }, 335 | { 336 | "cell_type": "markdown", 337 | "metadata": {}, 338 | "source": [ 339 | "Let's fill in the NAN values:" 340 | ] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": null, 345 | "metadata": {}, 346 | "outputs": [], 347 | "source": [ 348 | "df = df_orig.copy()\n", 349 | "df.TAVG[df.TAVG.isna()] = df[df.TAVG.isna()][[\"TMAX\", \"TMIN\"]].mean(axis=1)\n", 350 | "df" 351 | ] 352 | }, 353 | { 354 | "cell_type": "markdown", 355 | "metadata": {}, 356 | "source": [ 357 | "Or, even better:" 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": null, 363 | "metadata": {}, 364 | "outputs": [], 365 | "source": [ 366 | "tavg = df_orig.TAVG.where(~df.TAVG.isna(), df_orig[[\"TMAX\", \"TMIN\"]].mean(axis=1))" 367 | ] 368 | }, 369 | { 370 | "cell_type": "markdown", 371 | "metadata": {}, 372 | "source": [ 373 | "Better still:" 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": null, 379 | "metadata": {}, 380 | "outputs": [], 381 | "source": [ 382 | "tavg = df_orig.TAVG.fillna(df_orig[[\"TMAX\", \"TMIN\"]].mean(axis=1))" 383 | ] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "execution_count": null, 388 | "metadata": {}, 389 | "outputs": [], 390 | "source": [ 391 | "tavg.plot(style=\".\");" 392 | ] 393 | }, 394 | { 395 | "cell_type": "code", 396 | "execution_count": null, 397 | "metadata": {}, 398 | "outputs": [], 399 | "source": [ 400 | "tavg[\"1893-01-01\":\"1910-01-01\"].plot(style=\".\");" 401 | ] 402 | }, 403 | { 404 | "cell_type": "code", 405 | "execution_count": null, 406 | "metadata": {}, 407 | "outputs": [], 408 | "source": [ 409 | "dfm = df.groupby(pd.Grouper(freq=\"M\")).mean()" 410 | ] 411 | }, 412 | { 413 | "cell_type": "code", 414 | "execution_count": null, 415 | "metadata": {}, 416 | "outputs": [], 417 | "source": [ 418 | "dfm[\"1893-01-01\":\"1920-01-01\"].TAVG.plot(style=\".-\");" 419 | ] 420 | }, 421 | { 422 | "cell_type": "markdown", 423 | "metadata": {}, 424 | "source": [ 425 | "Another thing we can do is a rolling mean; let's average over three years:" 426 | ] 427 | }, 428 | { 429 | "cell_type": "code", 430 | "execution_count": null, 431 | "metadata": {}, 432 | "outputs": [], 433 | "source": [ 434 | "df.rolling(3 * 365).mean().plot();" 435 | ] 436 | }, 437 | { 438 | "cell_type": "markdown", 439 | "metadata": {}, 440 | "source": [ 441 | "### Pandas: speed" 442 | ] 443 | }, 444 | { 445 | "cell_type": "markdown", 446 | "metadata": {}, 447 | "source": [ 448 | "Pandas is not necessarily *faster* than raw NumPy. It is more descriptive and more powerful. When you need speed, ***profile*** it then write just what you need in numba or something similar.\n", 449 | "\n", 450 | "Here is the underlying array, as a PandasArray:" 451 | ] 452 | }, 453 | { 454 | "cell_type": "code", 455 | "execution_count": null, 456 | "metadata": {}, 457 | "outputs": [], 458 | "source": [ 459 | "dfm.TAVG.array" 460 | ] 461 | }, 462 | { 463 | "cell_type": "markdown", 464 | "metadata": {}, 465 | "source": [ 466 | "Note that a Series, the 1D array that makes up the columns of a DataFrame, actually stores two arrays; the data you see above and an index (reference)" 467 | ] 468 | }, 469 | { 470 | "cell_type": "markdown", 471 | "metadata": {}, 472 | "source": [ 473 | "This supports the Python 3 memoryview / NumPy array protocol:" 474 | ] 475 | }, 476 | { 477 | "cell_type": "code", 478 | "execution_count": null, 479 | "metadata": {}, 480 | "outputs": [], 481 | "source": [ 482 | "arr = np.asarray(dfm.TAVG.array)" 483 | ] 484 | }, 485 | { 486 | "cell_type": "code", 487 | "execution_count": null, 488 | "metadata": {}, 489 | "outputs": [], 490 | "source": [ 491 | "arr.flags[\"OWNDATA\"]" 492 | ] 493 | }, 494 | { 495 | "cell_type": "markdown", 496 | "metadata": {}, 497 | "source": [ 498 | "So no copies are involved. You can now take full advantage of anything you could on a NumPy array. Note that if you want a numpy array, you can use the shortcut:" 499 | ] 500 | }, 501 | { 502 | "cell_type": "code", 503 | "execution_count": null, 504 | "metadata": {}, 505 | "outputs": [], 506 | "source": [ 507 | "dfm.TAVG.to_numpy()" 508 | ] 509 | }, 510 | { 511 | "cell_type": "markdown", 512 | "metadata": {}, 513 | "source": [ 514 | "### Pandas: alternatives\n", 515 | "\n", 516 | "The Pandas DataFrame is wildly popular. So much so that it is being used as an API by projects that do things that normal Pandas does not do, such as out-of-memory DataFrames (Dask).\n", 517 | "\n", 518 | "There's also a Rust reimplementation called PolaRS (which supports Python or Rust usage, and is fast).\n", 519 | "\n", 520 | "Pandas is filling out full support for PyArrow backed storage in 1.5 & 2.0, which is also faster than current Pandas." 521 | ] 522 | }, 523 | { 524 | "cell_type": "markdown", 525 | "metadata": {}, 526 | "source": [ 527 | "## See also:\n", 528 | "\n", 529 | "* [CompClass: Structured data](https://nbviewer.jupyter.org/github/henryiii/compclass/blob/master/classes/week7/1_pandas.ipynb)\n", 530 | "\n" 531 | ] 532 | } 533 | ], 534 | "metadata": { 535 | "kernelspec": { 536 | "display_name": "Python [conda env:performance-minicourse] *", 537 | "language": "python", 538 | "name": "conda-env-performance-minicourse-py" 539 | }, 540 | "language_info": { 541 | "codemirror_mode": { 542 | "name": "ipython", 543 | "version": 3 544 | }, 545 | "file_extension": ".py", 546 | "mimetype": "text/x-python", 547 | "name": "python", 548 | "nbconvert_exporter": "python", 549 | "pygments_lexer": "ipython3", 550 | "version": "3.11.4" 551 | } 552 | }, 553 | "nbformat": 4, 554 | "nbformat_minor": 4 555 | } 556 | -------------------------------------------------------------------------------- /03_mcmc.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# MCMC: sampler" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "%load_ext line_profiler" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "import matplotlib.pyplot as plt\n", 26 | "import numba\n", 27 | "import numpy as np\n", 28 | "import scipy.stats" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "Markov Chain Monte Carlo is a good example of a non-trivial algorithm we'd like to compute. To keep this tractable, we are just going to implement a gaussian sampler from MCMC. Don't worry about the code; we'll look at a much simpler Metropolis sampler at the end." 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "def sampler(\n", 45 | " data, samples=4, mu_init=0.5, proposal_width=0.5, mu_prior_mu=0, mu_prior_sd=1.0\n", 46 | "):\n", 47 | " mu_current = mu_init\n", 48 | " posterior = [mu_current]\n", 49 | " for _ in range(samples):\n", 50 | " # suggest new position\n", 51 | " mu_proposal = scipy.stats.norm(mu_current, proposal_width).rvs()\n", 52 | "\n", 53 | " # Compute likelihood by multiplying probabilities of each data point\n", 54 | " likelihood_current = scipy.stats.norm(mu_current, 1).pdf(data).prod()\n", 55 | " likelihood_proposal = scipy.stats.norm(mu_proposal, 1).pdf(data).prod()\n", 56 | "\n", 57 | " # Compute prior probability of current and proposed mu\n", 58 | " prior_current = scipy.stats.norm(mu_prior_mu, mu_prior_sd).pdf(mu_current)\n", 59 | " prior_proposal = scipy.stats.norm(mu_prior_mu, mu_prior_sd).pdf(mu_proposal)\n", 60 | "\n", 61 | " p_current = likelihood_current * prior_current\n", 62 | " p_proposal = likelihood_proposal * prior_proposal\n", 63 | "\n", 64 | " # Accept proposal?\n", 65 | " p_accept = p_proposal / p_current\n", 66 | "\n", 67 | " # Usually would include prior probability, which we neglect here for simplicity\n", 68 | " accept = np.random.rand() < p_accept\n", 69 | "\n", 70 | " if accept:\n", 71 | " # Update position\n", 72 | " mu_current = mu_proposal\n", 73 | "\n", 74 | " posterior.append(mu_current)\n", 75 | "\n", 76 | " return posterior" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "%%time\n", 86 | "np.random.seed(123)\n", 87 | "data = np.random.randn(20)\n", 88 | "\n", 89 | "posterior = sampler(data, samples=15_000, mu_init=1.0)" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "fig, axs = plt.subplots(1, 2, figsize=(12, 5))\n", 99 | "vals = posterior\n", 100 | "axs[0].plot(vals)\n", 101 | "axs[0].set_xlabel(\"Raw data\")\n", 102 | "axs[1].hist(vals[500:], bins=np.linspace(-1, 1, 100))\n", 103 | "axs[1].set_xlabel(\"Histogram\")\n", 104 | "plt.show()" 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": {}, 110 | "source": [ 111 | "I made a big, fat performance mistake. I expect it might not be the one you might expect. If I profiled the code above, I could see it:" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "%lprun -f sampler sampler(data, samples=1_500, mu_init=1.)" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "Now do you see it?" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "Here's a much lighter weight norm:" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "def norm_pdf(loc, scale, x):\n", 144 | " y = (x - loc) / scale\n", 145 | " return np.exp(-(y**2) / 2) / np.sqrt(2 * np.pi) / scale" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": null, 151 | "metadata": {}, 152 | "outputs": [], 153 | "source": [ 154 | "assert norm_pdf(0.4, 0.7, 0.2) == scipy.stats.norm(0.4, 0.7).pdf(0.2)\n", 155 | "assert scipy.stats.norm(0.3, 1).pdf(data).prod() == np.prod(norm_pdf(0.3, 1, data))" 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": {}, 161 | "source": [ 162 | "Here's a new version. Let's remove the list appending while we are at it:" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "metadata": {}, 169 | "outputs": [], 170 | "source": [ 171 | "def sampler(\n", 172 | " data, samples=4, mu_init=0.5, proposal_width=0.5, mu_prior_mu=0, mu_prior_sd=1.0\n", 173 | "):\n", 174 | " mu_current = mu_init\n", 175 | "\n", 176 | " posterior = np.empty(samples + 1)\n", 177 | " posterior[0] = mu_current\n", 178 | "\n", 179 | " for i in range(samples):\n", 180 | " # suggest new position\n", 181 | " mu_proposal = np.random.normal(mu_current, proposal_width)\n", 182 | "\n", 183 | " # Compute likelihood by multiplying probabilities of each data point\n", 184 | " likelihood_current = np.prod(norm_pdf(mu_current, 1, data))\n", 185 | " likelihood_proposal = np.prod(norm_pdf(mu_proposal, 1, data))\n", 186 | "\n", 187 | " # Compute prior probability of current and proposed mu\n", 188 | " prior_current = norm_pdf(mu_prior_mu, mu_prior_sd, mu_current)\n", 189 | " prior_proposal = norm_pdf(mu_prior_mu, mu_prior_sd, mu_proposal)\n", 190 | "\n", 191 | " p_current = likelihood_current * prior_current\n", 192 | " p_proposal = likelihood_proposal * prior_proposal\n", 193 | "\n", 194 | " # Accept proposal?\n", 195 | " p_accept = p_proposal / p_current\n", 196 | "\n", 197 | " # Usually would include prior probability, which we neglect here for simplicity\n", 198 | " accept = np.random.rand() < p_accept\n", 199 | "\n", 200 | " if accept:\n", 201 | " # Update position\n", 202 | " mu_current = mu_proposal\n", 203 | "\n", 204 | " posterior[i + 1] = mu_current\n", 205 | "\n", 206 | " return posterior" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": null, 212 | "metadata": {}, 213 | "outputs": [], 214 | "source": [ 215 | "%%time\n", 216 | "np.random.seed(123)\n", 217 | "data = np.random.randn(20)\n", 218 | "\n", 219 | "posterior = sampler(data, samples=15_000, mu_init=1.0)" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "metadata": {}, 226 | "outputs": [], 227 | "source": [ 228 | "%lprun -f sampler sampler(data, samples=15_000, mu_init=1.)" 229 | ] 230 | }, 231 | { 232 | "cell_type": "markdown", 233 | "metadata": {}, 234 | "source": [ 235 | "*Much* better. Instancing scipy distributions is *very* costly. But we'd love to be able to produce massive amounts of MC. Can we try Numba?" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": null, 241 | "metadata": {}, 242 | "outputs": [], 243 | "source": [ 244 | "@numba.vectorize([numba.float64(numba.float64, numba.float64, numba.float64)])\n", 245 | "def norm_pdf(loc, scale, x):\n", 246 | " y = (x - loc) / scale\n", 247 | " return np.exp(-(y**2) / 2) / np.sqrt(2 * np.pi) / scale" 248 | ] 249 | }, 250 | { 251 | "cell_type": "markdown", 252 | "metadata": {}, 253 | "source": [ 254 | "> ### Python note\n", 255 | ">\n", 256 | "> Functions look up methods and values from their surrounding scope *when called*. This means that `norm_pdf` is now the new `norm_pdf`, even though we have not changed the surrounding function.\n", 257 | ">\n", 258 | "> You probably should not normally do this. And it makes this notebook depend on the order of execution, which is not ideal." 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": null, 264 | "metadata": {}, 265 | "outputs": [], 266 | "source": [ 267 | "%%time\n", 268 | "np.random.seed(123)\n", 269 | "data = np.random.randn(20)\n", 270 | "\n", 271 | "posterior = sampler(data, samples=15000, mu_init=1.0)" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": null, 277 | "metadata": {}, 278 | "outputs": [], 279 | "source": [ 280 | "%lprun -f sampler sampler(data, samples=15_000, mu_init=1.)" 281 | ] 282 | }, 283 | { 284 | "cell_type": "markdown", 285 | "metadata": {}, 286 | "source": [ 287 | "Let's go all in and do Numba start to finish:" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": null, 293 | "metadata": {}, 294 | "outputs": [], 295 | "source": [ 296 | "@numba.njit\n", 297 | "def norm_pdf(loc, scale, x):\n", 298 | " y = (x - loc) / scale\n", 299 | " return np.exp(-(y**2) / 2) / np.sqrt(2 * np.pi) / scale" 300 | ] 301 | }, 302 | { 303 | "cell_type": "markdown", 304 | "metadata": {}, 305 | "source": [ 306 | "Note: This is mostly redefined to show that is could be done with `njit` instead of `vectorize`, `vectorize` is actually a bit simpler/faster." 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": null, 312 | "metadata": {}, 313 | "outputs": [], 314 | "source": [ 315 | "@numba.njit\n", 316 | "def sampler(\n", 317 | " data,\n", 318 | " samples=4,\n", 319 | " mu_init=0.5,\n", 320 | " proposal_width=0.5,\n", 321 | " mu_prior_mu=0,\n", 322 | " mu_prior_sd=1.0,\n", 323 | "):\n", 324 | " mu_current = mu_init\n", 325 | "\n", 326 | " posterior = np.empty(samples + 1)\n", 327 | " posterior[0] = mu_current\n", 328 | "\n", 329 | " for i in range(samples):\n", 330 | " # suggest new position\n", 331 | " mu_proposal = np.random.normal(mu_current, proposal_width)\n", 332 | "\n", 333 | " # Compute likelihood by multiplying probabilities of each data point\n", 334 | " likelihood_current = np.prod(norm_pdf(mu_current, 1, data))\n", 335 | " likelihood_proposal = np.prod(norm_pdf(mu_proposal, 1, data))\n", 336 | "\n", 337 | " # Compute prior probability of current and proposed mu\n", 338 | " prior_current = norm_pdf(mu_prior_mu, mu_prior_sd, mu_current)\n", 339 | " prior_proposal = norm_pdf(mu_prior_mu, mu_prior_sd, mu_proposal)\n", 340 | "\n", 341 | " p_current = likelihood_current * prior_current\n", 342 | " p_proposal = likelihood_proposal * prior_proposal\n", 343 | "\n", 344 | " # Accept proposal?\n", 345 | " p_accept = p_proposal / p_current\n", 346 | "\n", 347 | " # Usually would include prior probability, which we neglect here for simplicity\n", 348 | " accept = np.random.rand() < p_accept\n", 349 | "\n", 350 | " if accept:\n", 351 | " # Update position\n", 352 | " mu_current = mu_proposal\n", 353 | "\n", 354 | " posterior[i + 1] = mu_current\n", 355 | "\n", 356 | " return posterior" 357 | ] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "execution_count": null, 362 | "metadata": {}, 363 | "outputs": [], 364 | "source": [ 365 | "%%time\n", 366 | "np.random.seed(123)\n", 367 | "data = np.random.randn(20)\n", 368 | "\n", 369 | "posterior = sampler(data, samples=15_000, mu_init=1.0)" 370 | ] 371 | }, 372 | { 373 | "cell_type": "markdown", 374 | "metadata": {}, 375 | "source": [ 376 | "Ouch! Let's try that again:" 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": null, 382 | "metadata": {}, 383 | "outputs": [], 384 | "source": [ 385 | "%%time\n", 386 | "np.random.seed(123)\n", 387 | "data = np.random.randn(20)\n", 388 | "\n", 389 | "posterior = sampler(data, samples=15_000, mu_init=1.0)" 390 | ] 391 | }, 392 | { 393 | "cell_type": "markdown", 394 | "metadata": {}, 395 | "source": [ 396 | "Sweet." 397 | ] 398 | }, 399 | { 400 | "cell_type": "markdown", 401 | "metadata": {}, 402 | "source": [ 403 | "## Metropolis sampler\n", 404 | "\n", 405 | "Let's look at a simpler example, and one that you might find more useful/instructive." 406 | ] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "execution_count": null, 411 | "metadata": {}, 412 | "outputs": [], 413 | "source": [ 414 | "## Metropolis sampler\n", 415 | "\n", 416 | "\n", 417 | "def p(x):\n", 418 | " \"Any function that you want to sample. Plot will look better if it is normalized.\"\n", 419 | " return 1 / (1 + x**2) / np.pi\n", 420 | " # return 1/(1 + x**2) * np.sin(x)**2 / (np.pi * np.sinh(1) / np.exp(1))\n", 421 | "\n", 422 | "\n", 423 | "def metropolis(p, samples=50_000, sigma=1):\n", 424 | " x = np.zeros(samples + 1)\n", 425 | " x[0] = np.random.rand()\n", 426 | "\n", 427 | " for i in range(samples):\n", 428 | " # suggest new position\n", 429 | " x_Star = np.random.normal(x[i], sigma)\n", 430 | "\n", 431 | " # Compute alpha - the fractional chance of moving to a new point\n", 432 | " alpha = p(x_Star) / p(x[i])\n", 433 | "\n", 434 | " # Accept/reject based on alpha\n", 435 | " accept = alpha > np.random.rand()\n", 436 | "\n", 437 | " # Add the current (moved?) point\n", 438 | " x[i + 1] = x_Star if accept else x[i]\n", 439 | "\n", 440 | " return x" 441 | ] 442 | }, 443 | { 444 | "cell_type": "code", 445 | "execution_count": null, 446 | "metadata": {}, 447 | "outputs": [], 448 | "source": [ 449 | "vals = metropolis(p)" 450 | ] 451 | }, 452 | { 453 | "cell_type": "code", 454 | "execution_count": null, 455 | "metadata": {}, 456 | "outputs": [], 457 | "source": [ 458 | "%%timeit\n", 459 | "vals = metropolis(p)" 460 | ] 461 | }, 462 | { 463 | "cell_type": "code", 464 | "execution_count": null, 465 | "metadata": {}, 466 | "outputs": [], 467 | "source": [ 468 | "x = np.linspace(-10, 10, 200)\n", 469 | "fig, axs = plt.subplots(2, figsize=(10, 6))\n", 470 | "axs[0].plot(vals[:500], \"r\")\n", 471 | "axs[0].plot(np.arange(500, len(vals)), vals[500:], \"g\")\n", 472 | "\n", 473 | "axs[1].hist(vals[500:], bins=400, range=(-20, 20), density=True)\n", 474 | "axs[1].plot(x, p(x), lw=3)\n", 475 | "axs[1].set_xlim(-10, 10);" 476 | ] 477 | }, 478 | { 479 | "cell_type": "markdown", 480 | "metadata": {}, 481 | "source": [ 482 | "Try the other function, and try adding `@numba.njit` above." 483 | ] 484 | } 485 | ], 486 | "metadata": { 487 | "kernelspec": { 488 | "display_name": "Python 3 (ipykernel)", 489 | "language": "python", 490 | "name": "python3" 491 | }, 492 | "language_info": { 493 | "codemirror_mode": { 494 | "name": "ipython", 495 | "version": 3 496 | }, 497 | "file_extension": ".py", 498 | "mimetype": "text/x-python", 499 | "name": "python", 500 | "nbconvert_exporter": "python", 501 | "pygments_lexer": "ipython3", 502 | "version": "3.10.9" 503 | }, 504 | "vscode": { 505 | "interpreter": { 506 | "hash": "2d4e9b9c84dab3e1662173f95b81bd7f8a551068d04f5f3c42d164db7312a928" 507 | } 508 | } 509 | }, 510 | "nbformat": 4, 511 | "nbformat_minor": 4 512 | } 513 | -------------------------------------------------------------------------------- /04_runge_kutta.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Runge–Kutta algorithm\n", 8 | "\n", 9 | "Let's try a ODE solver, the Runge-Kutta algorithm:" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": { 16 | "tags": [] 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "import matplotlib.pyplot as plt\n", 21 | "import numba\n", 22 | "import numpy as np" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "Let's setup an ODE function to solve. We can write our ODE as a system of linear first order ODE equations:" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "The harmonic motion equation can be written in terms of $\\mathbf{f}(t, \\mathbf{y}) = \\dot{\\mathbf{y}}$, where this is in the standard form:\n", 37 | "\n", 38 | "$$\n", 39 | "\\mathbf{y} =\n", 40 | "\\left(\n", 41 | "\\begin{matrix}\n", 42 | "\\dot{x} \\\\\n", 43 | "x\n", 44 | "\\end{matrix}\n", 45 | "\\right)\n", 46 | "$$\n", 47 | "\n", 48 | "$$\n", 49 | "\\mathbf{f}(t, \\mathbf{y}) = \n", 50 | "\\dot{\\mathbf{y}}\n", 51 | "=\n", 52 | "\\left(\n", 53 | "\\begin{matrix}\n", 54 | "\\ddot{x} \\\\\n", 55 | "\\dot{x}\n", 56 | "\\end{matrix}\n", 57 | "\\right)\n", 58 | "=\n", 59 | "\\left(\n", 60 | "\\begin{matrix}\n", 61 | "-\\frac{k}{m} x \\\\\n", 62 | "\\dot{x}\n", 63 | "\\end{matrix}\n", 64 | "\\right)\n", 65 | "=\n", 66 | "\\left(\n", 67 | "\\begin{matrix}\n", 68 | "-\\frac{k}{m} y_1 \\\\\n", 69 | "y_0\n", 70 | "\\end{matrix}\n", 71 | "\\right)\n", 72 | "$$" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": { 79 | "tags": [] 80 | }, 81 | "outputs": [], 82 | "source": [ 83 | "x_max = 1 # Size of x max\n", 84 | "v_0 = 0\n", 85 | "koverm = 1 # k / m\n", 86 | "\n", 87 | "\n", 88 | "def f(_t, y):\n", 89 | " \"Y has two elements, x and v\"\n", 90 | " return np.array([-koverm * y[1], y[0]])" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "Now let's do the most basic, stupid thing possible: The Euler algorithm:" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": { 104 | "tags": [] 105 | }, 106 | "outputs": [], 107 | "source": [ 108 | "def euler_ivp(f, init_y, t):\n", 109 | " steps = len(t)\n", 110 | " order = len(init_y) # Number of equations\n", 111 | "\n", 112 | " y = np.empty((steps, order))\n", 113 | " y[0] = init_y # Note that this sets the elements of the first row\n", 114 | "\n", 115 | " for n in range(steps - 1):\n", 116 | " h = t[n + 1] - t[n]\n", 117 | "\n", 118 | " # Compute dydt based on *current* position\n", 119 | " dydt = f(t[n], y[n])\n", 120 | "\n", 121 | " # Compute next velocity and position\n", 122 | " y[n + 1] = y[n] - dydt * h\n", 123 | "\n", 124 | " return y" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": {}, 130 | "source": [ 131 | "We can see, when compared to the actual solution, that the Euler very quickly is destroyed by errors at each step (in this case, the frequency is not too bad, though):" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": { 138 | "tags": [] 139 | }, 140 | "outputs": [], 141 | "source": [ 142 | "ts = np.linspace(0, 40, 1000 + 1)\n", 143 | "y = euler_ivp(f, [x_max, v_0], ts)\n", 144 | "plt.plot(ts, np.cos(ts))\n", 145 | "plt.plot(ts, y[:, 0], \"--\");" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "## Range-Kutta introduction\n", 153 | "\n", 154 | "Note that $h = t_{n+1} - t_n $.\n", 155 | "\n", 156 | "$$\n", 157 | "\\dot{y} = f(t,y)\n", 158 | "\\\\\n", 159 | "\\implies y = \\int f(t,y) \\, dt\n", 160 | "\\\\\n", 161 | "\\implies y_{n+1} = y_{n} + \\int_{t_n}^{t_{n+1}} f(t,y) \\, dt\n", 162 | "$$" 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": {}, 168 | "source": [ 169 | "Now, expand $f$ in a Taylor series around the *midpoint* of the interval:\n", 170 | "\n", 171 | "$$\n", 172 | "f(t,y) \\approx f(t_{n+\\frac{1}{2}},y_{n+\\frac{1}{2}})\n", 173 | " + \\left( t - t_{n+\\frac{1}{2}}\\right)\n", 174 | " \\dot{f}(t_{n+\\frac{1}{2}})\n", 175 | " + \\mathcal{O}(h^2)\n", 176 | "$$\n", 177 | "\n", 178 | "The second term here is symmetric in the interval, so all we have left is the first term in the integral:\n", 179 | "\n", 180 | "$$\n", 181 | "\\int_{t_n}^{t_{n+1}} f(t,y) \\, dt \\approx\n", 182 | " h\\, f(t_{n+\\frac{1}{2}},y_{n+\\frac{1}{2}}) + \\mathcal{O}(h^3)\n", 183 | "$$" 184 | ] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": {}, 189 | "source": [ 190 | "Back into the original statement, we get:\n", 191 | "\n", 192 | "$$\n", 193 | "y_{n+1} \\approx \n", 194 | "\\color{blue}{\n", 195 | "y_{n}\n", 196 | "+ h\\, f(t_{n+\\frac{1}{2}},y_{n+\\frac{1}{2}})\n", 197 | "}\n", 198 | "+ \\mathcal{O}(h^3)\n", 199 | "\\tag{rk2}\n", 200 | "$$\n", 201 | "\n", 202 | "We've got one more problem! How do we calculate $f(t_{n+\\frac{1}{2}},y_{n+\\frac{1}{2}})$? We can use the Euler's algorithm that we saw last time:\n", 203 | "\n", 204 | "$$\n", 205 | "y_{n+\\frac{1}{2}}\n", 206 | "\\approx y_n + \\frac{1}{2} h \\dot{y}\n", 207 | "= \\color{red}{\n", 208 | "y_n + \\frac{1}{2} h f(t_{n},y_{n})\n", 209 | "}\n", 210 | "$$" 211 | ] 212 | }, 213 | { 214 | "cell_type": "markdown", 215 | "metadata": {}, 216 | "source": [ 217 | "Putting it together, this is our RK2 algorithm:\n", 218 | "\n", 219 | "$$\n", 220 | "\\mathbf{y}_{n+1} \\approx\n", 221 | "\\color{blue}{\n", 222 | "\\mathbf{y}_{n}\n", 223 | "+ \\mathbf{k}_2\n", 224 | "}\n", 225 | "\\tag{1.0}\n", 226 | "$$\n", 227 | "\n", 228 | "\n", 229 | "$$\n", 230 | "\\mathbf{k}_1 = h \\mathbf{f}(t_n,\\, \\mathbf{y}_n)\n", 231 | "\\tag{1.1}\n", 232 | "$$\n", 233 | "\n", 234 | "$$\n", 235 | "\\mathbf{k}_2 = h \\mathbf{f}(t_n + \\frac{h}{2},\\, \\color{red}{\\mathbf{y}_n\n", 236 | "+ \\frac{\\mathbf{k}_1}{2}})\n", 237 | "\\tag{1.2}\n", 238 | "$$\n", 239 | "\n", 240 | "We've picked up bold face to indicate that we can have a vector of ODEs." 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": null, 246 | "metadata": { 247 | "tags": [] 248 | }, 249 | "outputs": [], 250 | "source": [ 251 | "def rk2_ivp(f, init_y, t):\n", 252 | " steps = len(t)\n", 253 | " order = len(init_y)\n", 254 | "\n", 255 | " y = np.empty((steps, order))\n", 256 | " y[0] = init_y\n", 257 | "\n", 258 | " for n in range(steps - 1):\n", 259 | " h = t[n + 1] - t[n]\n", 260 | "\n", 261 | " k1 = h * f(t[n], y[n]) # 1.1\n", 262 | " k2 = h * f(t[n] + h / 2, y[n] + k1 / 2) # 1.2\n", 263 | "\n", 264 | " y[n + 1] = y[n] + k2 # 1.0\n", 265 | "\n", 266 | " return y" 267 | ] 268 | }, 269 | { 270 | "cell_type": "markdown", 271 | "metadata": {}, 272 | "source": [ 273 | "Let's try this with the same grid as before:" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": null, 279 | "metadata": { 280 | "tags": [] 281 | }, 282 | "outputs": [], 283 | "source": [ 284 | "ts = np.linspace(0, 40, 1000 + 1)\n", 285 | "y = rk2_ivp(f, [x_max, v_0], ts)\n", 286 | "plt.plot(ts, np.cos(ts))\n", 287 | "plt.plot(ts, y[:, 0], \"--\");" 288 | ] 289 | }, 290 | { 291 | "cell_type": "markdown", 292 | "metadata": {}, 293 | "source": [ 294 | "And, on a coarser grid:" 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": null, 300 | "metadata": { 301 | "tags": [] 302 | }, 303 | "outputs": [], 304 | "source": [ 305 | "ts = np.linspace(0, 40, 100 + 1)\n", 306 | "y = rk2_ivp(f, [x_max, v_0], ts)\n", 307 | "plt.plot(ts, np.cos(ts))\n", 308 | "plt.plot(ts, y[:, 0], \"--\");" 309 | ] 310 | }, 311 | { 312 | "cell_type": "markdown", 313 | "metadata": {}, 314 | "source": [ 315 | "We can get the RK4 algorithm by keeping another non-zero term in the Taylor series:\n", 316 | "\n", 317 | "$$\n", 318 | "\\mathbf{y}_{n+1} \\approx\n", 319 | "\\mathbf{y}_{n}\n", 320 | "+ \\frac{1}{6} (\\mathbf{k}_1 + 2 \\mathbf{k}_2 + 2 \\mathbf{k}_3 + \\mathbf{k}_4 )\n", 321 | "\\tag{2.0}\n", 322 | "$$\n", 323 | "\n", 324 | "$$\n", 325 | "\\mathbf{k}_1 = h \\mathbf{f}(t_n,\\, \\mathbf{y}_n)\n", 326 | "\\tag{2.1}\n", 327 | "$$\n", 328 | "\n", 329 | "$$\n", 330 | "\\mathbf{k}_2 = h \\mathbf{f}(t_n + \\frac{h}{2},\\,\n", 331 | " \\mathbf{y}_n + \\frac{\\mathrm{k}_1}{2})\n", 332 | "\\tag{2.2}\n", 333 | "$$\n", 334 | "\n", 335 | "$$\n", 336 | "\\mathbf{k}_3 = h \\mathbf{f}(t_n + \\frac{h}{2},\\,\n", 337 | " \\mathbf{y}_n + \\frac{\\mathrm{k}_2}{2})\n", 338 | "\\tag{2.3}\n", 339 | "$$\n", 340 | "\n", 341 | "$$\n", 342 | "\\mathbf{k}_4 = h \\mathbf{f}(t_n + h,\\,\n", 343 | " \\mathbf{y}_n + \\mathrm{k}_3)\n", 344 | "\\tag{2.4}\n", 345 | "$$" 346 | ] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "execution_count": null, 351 | "metadata": { 352 | "tags": [] 353 | }, 354 | "outputs": [], 355 | "source": [ 356 | "def rk4_ivp(f, init_y, t):\n", 357 | " steps = len(t)\n", 358 | " order = len(init_y)\n", 359 | "\n", 360 | " y = np.empty((steps, order))\n", 361 | " y[0] = init_y\n", 362 | "\n", 363 | " for n in range(steps - 1):\n", 364 | " h = t[n + 1] - t[n]\n", 365 | "\n", 366 | " k1 = h * f(t[n], y[n]) # 2.1\n", 367 | " k2 = h * f(t[n] + h / 2, y[n] + k1 / 2) # 2.2\n", 368 | " k3 = h * f(t[n] + h / 2, y[n] + k2 / 2) # 2.3\n", 369 | " k4 = h * f(t[n] + h, y[n] + k3) # 2.4\n", 370 | "\n", 371 | " y[n + 1] = y[n] + 1 / 6 * (k1 + 2 * k2 + 2 * k3 + k4) # 2.0\n", 372 | "\n", 373 | " return y" 374 | ] 375 | }, 376 | { 377 | "cell_type": "markdown", 378 | "metadata": {}, 379 | "source": [ 380 | "Let's try this with the same course grid as before:" 381 | ] 382 | }, 383 | { 384 | "cell_type": "code", 385 | "execution_count": null, 386 | "metadata": { 387 | "tags": [] 388 | }, 389 | "outputs": [], 390 | "source": [ 391 | "ts = np.linspace(0, 40, 100 + 1)\n", 392 | "y = rk4_ivp(f, [x_max, v_0], ts)\n", 393 | "plt.plot(ts, np.cos(ts))\n", 394 | "plt.plot(ts, y[:, 0], \"--\");" 395 | ] 396 | }, 397 | { 398 | "cell_type": "code", 399 | "execution_count": null, 400 | "metadata": { 401 | "tags": [] 402 | }, 403 | "outputs": [], 404 | "source": [ 405 | "%%timeit\n", 406 | "ts = np.linspace(0, 40, 1000 + 1)\n", 407 | "y = rk4_ivp(f, [x_max, v_0], ts)" 408 | ] 409 | }, 410 | { 411 | "cell_type": "code", 412 | "execution_count": null, 413 | "metadata": { 414 | "tags": [] 415 | }, 416 | "outputs": [], 417 | "source": [ 418 | "f_jit = numba.njit(f)\n", 419 | "rk4_ivp_jit = numba.njit(rk4_ivp)" 420 | ] 421 | }, 422 | { 423 | "cell_type": "code", 424 | "execution_count": null, 425 | "metadata": { 426 | "tags": [] 427 | }, 428 | "outputs": [], 429 | "source": [ 430 | "%%timeit\n", 431 | "ts = np.linspace(0, 40, 1000 + 1)\n", 432 | "y = rk4_ivp_jit(f_jit, np.array([x_max, v_0]), ts)" 433 | ] 434 | }, 435 | { 436 | "cell_type": "markdown", 437 | "metadata": {}, 438 | "source": [ 439 | "You can inspect the types if you'd like to add them after running once:" 440 | ] 441 | }, 442 | { 443 | "cell_type": "code", 444 | "execution_count": null, 445 | "metadata": {}, 446 | "outputs": [], 447 | "source": [ 448 | "f_jit.inspect_types()" 449 | ] 450 | }, 451 | { 452 | "cell_type": "markdown", 453 | "metadata": {}, 454 | "source": [ 455 | "## See also:\n", 456 | "\n", 457 | "* [CompClass: RK](https://nbviewer.jupyter.org/github/henryiii/compclass/blob/master/classes/week10/2_rk.ipynb)" 458 | ] 459 | } 460 | ], 461 | "metadata": { 462 | "kernelspec": { 463 | "display_name": "Python [conda env:performance-minicourse] *", 464 | "language": "python", 465 | "name": "conda-env-performance-minicourse-py" 466 | }, 467 | "language_info": { 468 | "codemirror_mode": { 469 | "name": "ipython", 470 | "version": 3 471 | }, 472 | "file_extension": ".py", 473 | "mimetype": "text/x-python", 474 | "name": "python", 475 | "nbconvert_exporter": "python", 476 | "pygments_lexer": "ipython3", 477 | "version": "3.10.9" 478 | } 479 | }, 480 | "nbformat": 4, 481 | "nbformat_minor": 4 482 | } 483 | -------------------------------------------------------------------------------- /05_distributed.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Multiprocessing\n", 8 | "\n", 9 | "Threads and processes" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "Multithreading in Python is split into two groups: multithreading and multiprocessing.\n", 17 | "\n", 18 | "Multithreading means you have one Python process. Due to the way that Python is implemented with Global Interpreter Lock (GIL), you can only run one Python instruction at a time, even from multiple threads. This is very limiting, but not the end of the world for multithreading. One loophole is that this only is valid for *Python* instructions; as long as they don't change Python's internal memory model (like changing refcounts), *compiled* code is allowed to escape the GIL. This include JIT code like Numba!\n", 19 | "\n", 20 | "The other method is multiprocessing. This involves creating two or more Python *processes*, with their own memory space, then either transferring data (via Pickle) or by sharing selected portions of memory (less common before Python 3.8, but possible). This is much heaver-weight than threading, but can be used effectively." 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "#### Built-in libraries\n", 28 | "\n", 29 | "* **Thread**: The core low-level threading library.\n", 30 | "* **Threading**: A basic interface to thread, still rather low-level by modern standards.\n", 31 | "* **Multiprocessing**: Similar to threading, but with processes. Shared memory tools added in Python 3.8.\n", 32 | "* **Concurrent.futures:** Higher-level interface to both threading and multiprocessing. Introduced in Python 3.3 and backported in PyPI.\n", 33 | "* **Ascynio:** Explicit control over switching points." 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "import asyncio\n", 43 | "import concurrent.futures\n", 44 | "import sys # noqa: F401\n", 45 | "import threading\n", 46 | "import time\n", 47 | "\n", 48 | "import matplotlib.pyplot as plt\n", 49 | "import numpy as np" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "def prepare(height, width):\n", 59 | " c = np.sum(\n", 60 | " np.broadcast_arrays(*np.ogrid[-1j : 0j : height * 1j, -1.5 : 0 : width * 1j]),\n", 61 | " axis=0,\n", 62 | " )\n", 63 | " fractal = np.zeros_like(c, dtype=np.int32)\n", 64 | " return c, fractal\n", 65 | "\n", 66 | "\n", 67 | "def run(c, fractal, maxiterations=20):\n", 68 | " z = c\n", 69 | "\n", 70 | " for i in range(1, maxiterations + 1):\n", 71 | " z = z**2 + c # Compute z\n", 72 | " diverge = abs(z) > 2 # Divergence criteria\n", 73 | "\n", 74 | " z[diverge] = 2 # Keep number size small\n", 75 | " fractal[~diverge] = i # Fill in non-diverged iteration number\n", 76 | "\n", 77 | " return fractal" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "size = 4000, 3000" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "c, fractal = prepare(*size)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "%%time\n", 105 | "fractal = run(c, fractal)" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "fig, ax = plt.subplots(figsize=(12, 8))\n", 115 | "ax.imshow(fractal);" 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "## Raw threading" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "import threading\n", 132 | "\n", 133 | "c, fractal = prepare(*size)\n", 134 | "\n", 135 | "\n", 136 | "def piece(i):\n", 137 | " ci = c[10 * i : 10 * (i + 1), :]\n", 138 | " fi = fractal[10 * i : 10 * (i + 1), :]\n", 139 | " run(ci, fi)\n", 140 | "\n", 141 | "\n", 142 | "workers = []\n", 143 | "for i in range(size[0] // 10):\n", 144 | " workers.append(threading.Thread(target=piece, args=(i,)))" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "Note: You can also use the OO interface, which I sometimes prefer.\n", 152 | "\n", 153 | "```python\n", 154 | "class Worker(threading.Thread):\n", 155 | " def __init__(self, c, fractal, i):\n", 156 | " super(Worker, self).__init__()\n", 157 | " self.c = c\n", 158 | " self.fractal = fractal\n", 159 | " self.i = i\n", 160 | " def run(self):\n", 161 | " run(self.c[10*self.i : 10*(self.i + 1), :], self.fractal[10*self.i : 10*(self.i + 1), :])\n", 162 | "\n", 163 | "workers = []\n", 164 | "for i in range(size[0]//10):\n", 165 | " workers.append(Worker(c, fractal, i))\n", 166 | "```" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "%%time\n", 176 | "for worker in workers:\n", 177 | " worker.start()\n", 178 | "for worker in workers:\n", 179 | " worker.join()" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [ 188 | "len(workers)" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": null, 194 | "metadata": {}, 195 | "outputs": [], 196 | "source": [ 197 | "fig, ax = plt.subplots(figsize=(12, 8))\n", 198 | "ax.imshow(fractal);" 199 | ] 200 | }, 201 | { 202 | "cell_type": "markdown", 203 | "metadata": {}, 204 | "source": [ 205 | "## High level: Executors" 206 | ] 207 | }, 208 | { 209 | "cell_type": "markdown", 210 | "metadata": { 211 | "slideshow": { 212 | "slide_type": "slide" 213 | } 214 | }, 215 | "source": [ 216 | "Python 3 introduced an \"executor\" interface that manages workers for you. Instead of creating threads or processes with a `run` method, you create an executor and send work to it. It has the same interface for threads and processes." 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "metadata": { 223 | "slideshow": { 224 | "slide_type": "-" 225 | } 226 | }, 227 | "outputs": [], 228 | "source": [ 229 | "executor = concurrent.futures.ThreadPoolExecutor(max_workers=8)" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": null, 235 | "metadata": {}, 236 | "outputs": [], 237 | "source": [ 238 | "def piece(i):\n", 239 | " ci = c[10 * i : 10 * (i + 1), :]\n", 240 | " fi = fractal[10 * i : 10 * (i + 1), :]\n", 241 | " run(ci, fi)\n", 242 | "\n", 243 | "\n", 244 | "c, fractal = prepare(*size)" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": null, 250 | "metadata": {}, 251 | "outputs": [], 252 | "source": [ 253 | "%%time\n", 254 | "futures = executor.map(piece, range(size[0] // 10))\n", 255 | "for _ in futures: # iterating over them waits for the results\n", 256 | " pass" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": null, 262 | "metadata": {}, 263 | "outputs": [], 264 | "source": [ 265 | "fig, ax = plt.subplots(figsize=(12, 8))\n", 266 | "ax.imshow(fractal);" 267 | ] 268 | }, 269 | { 270 | "cell_type": "markdown", 271 | "metadata": {}, 272 | "source": [ 273 | "## Async: no threads at all\n", 274 | "\n", 275 | "Python has native support for async functions, which don't use threads at all, and were originally even implemented on top of generators. This is a control scheme that makes all suspension points explicit, using the \"await\" keyword. This is much easier to program multiple threads sharing data, since you can see the await points; you are running in a single thread except for being able to temporarily give control elsewhere when \"await\" is present.\n", 276 | "\n", 277 | "This _can_, however, be integrated with threading, and can take advantage of functions that release the GIL." 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": null, 283 | "metadata": { 284 | "tags": [] 285 | }, 286 | "outputs": [], 287 | "source": [ 288 | "async def compute_async():\n", 289 | " await asyncio.gather(*(asyncio.to_thread(piece, x) for x in range(size[0] // 10)))" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": null, 295 | "metadata": { 296 | "tags": [] 297 | }, 298 | "outputs": [], 299 | "source": [ 300 | "c, fractal = prepare(*size)" 301 | ] 302 | }, 303 | { 304 | "cell_type": "markdown", 305 | "metadata": {}, 306 | "source": [ 307 | "IPython supports top-level await, but it does not support it in some magics, including the time related magics. So we'll replace the time magic ourselves." 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": null, 313 | "metadata": { 314 | "tags": [] 315 | }, 316 | "outputs": [], 317 | "source": [ 318 | "start = time.time()\n", 319 | "await compute_async()\n", 320 | "print(time.time() - start)" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": null, 326 | "metadata": { 327 | "tags": [] 328 | }, 329 | "outputs": [], 330 | "source": [ 331 | "fig, ax = plt.subplots(figsize=(12, 8))\n", 332 | "ax.imshow(fractal);" 333 | ] 334 | }, 335 | { 336 | "cell_type": "markdown", 337 | "metadata": {}, 338 | "source": [ 339 | "## Shared Memory and Multiprocessing" 340 | ] 341 | }, 342 | { 343 | "cell_type": "markdown", 344 | "metadata": {}, 345 | "source": [ 346 | "It's often difficult to get \"perfect scaling,\" N times more work from N threads, in real situations. Even though this problem is \"embarrassingly parallel\" (none of the workers need to know other workers' results), there can be scheduling overhead, contention for memory, or slow-downs due to Python's [Global Interpreter Lock](https://realpython.com/python-gil/)." 347 | ] 348 | }, 349 | { 350 | "cell_type": "markdown", 351 | "metadata": { 352 | "slideshow": { 353 | "slide_type": "slide" 354 | } 355 | }, 356 | "source": [ 357 | "One way to avoid the global interpreter lock is to send work to separate processes. Python interpreters in separate processes do not share memory and therefore do not need to coordinate." 358 | ] 359 | }, 360 | { 361 | "cell_type": "markdown", 362 | "metadata": { 363 | "slideshow": { 364 | "slide_type": "fragment" 365 | } 366 | }, 367 | "source": [ 368 | "However, that means that we can't send data by simply sharing variables. We have to send it through a `multiprocessing.Queue` (which serializes— pickles— the data so that it can go through a pipe). In Python 3.8, we have the new `multiprocessing.shared_memory` module!" 369 | ] 370 | }, 371 | { 372 | "cell_type": "markdown", 373 | "metadata": { 374 | "slideshow": { 375 | "slide_type": "slide" 376 | } 377 | }, 378 | "source": [ 379 | "You can share arrays among processes if you declare them as shared memory before launching the subprocesses. Python has a special type for this:" 380 | ] 381 | }, 382 | { 383 | "cell_type": "code", 384 | "execution_count": null, 385 | "metadata": { 386 | "tags": [] 387 | }, 388 | "outputs": [], 389 | "source": [ 390 | "%%writefile multiproc.py\n", 391 | "\n", 392 | "import concurrent.futures\n", 393 | "import multiprocessing.shared_memory\n", 394 | "import time\n", 395 | "\n", 396 | "import numpy as np\n", 397 | "\n", 398 | "size = 4000, 3000\n", 399 | "\n", 400 | "\n", 401 | "def prepare(height, width):\n", 402 | " c = np.sum(\n", 403 | " np.broadcast_arrays(*np.ogrid[-1j : 0j : height * 1j, -1.5 : 0 : width * 1j]),\n", 404 | " axis=0,\n", 405 | " )\n", 406 | " fractal = np.zeros_like(c, dtype=np.int32)\n", 407 | " return c, fractal\n", 408 | "\n", 409 | "\n", 410 | "def run(c, fractal, maxiterations=20):\n", 411 | " z = c\n", 412 | "\n", 413 | " for i in range(1, maxiterations + 1):\n", 414 | " z = z**2 + c # Compute z\n", 415 | " diverge = abs(z) > 2 # Divergence criteria\n", 416 | "\n", 417 | " z[diverge] = 2 # Keep number size small\n", 418 | " fractal[~diverge] = i # Fill in non-diverged iteration number\n", 419 | "\n", 420 | " return fractal\n", 421 | "\n", 422 | "\n", 423 | "c, orig_fractal = prepare(*size)\n", 424 | "\n", 425 | "\n", 426 | "def piece(i):\n", 427 | " mem = multiprocessing.shared_memory.SharedMemory(name=\"perfdistnumpy\")\n", 428 | " fractal = np.ndarray(shape=c.shape, dtype=np.int32, buffer=mem.buf)\n", 429 | "\n", 430 | " ci = c[10 * i : 10 * (i + 1), :]\n", 431 | " fi = fractal[10 * i : 10 * (i + 1), :]\n", 432 | " run(ci, fi)\n", 433 | " mem.close()\n", 434 | "\n", 435 | "\n", 436 | "if __name__ == \"__main__\":\n", 437 | " d_size = np.int32().itemsize * np.prod(orig_fractal.size)\n", 438 | "\n", 439 | " mem = multiprocessing.shared_memory.SharedMemory(\n", 440 | " name=\"perfdistnumpy\", create=True, size=d_size\n", 441 | " )\n", 442 | " try:\n", 443 | " fractal = np.ndarray(shape=c.shape, dtype=np.int32, buffer=mem.buf)\n", 444 | " fractal[...] = orig_fractal\n", 445 | "\n", 446 | " executor = concurrent.futures.ProcessPoolExecutor(max_workers=8)\n", 447 | "\n", 448 | " start = time.time()\n", 449 | " futures = executor.map(piece, range(size[0] // 10))\n", 450 | " for _ in futures: # iterating over them waits for the results\n", 451 | " pass\n", 452 | " print(time.time() - start, \"seconds\")\n", 453 | " print(fractal)\n", 454 | " finally:\n", 455 | " mem.close()\n", 456 | " mem.unlink()" 457 | ] 458 | }, 459 | { 460 | "cell_type": "markdown", 461 | "metadata": {}, 462 | "source": [ 463 | "This is shared across processes and can enen outlive the owning process, so make _sure_ you close (per process) and unlink (once) the memory you take! Having a fixed name (like above) can be safer." 464 | ] 465 | }, 466 | { 467 | "cell_type": "code", 468 | "execution_count": null, 469 | "metadata": { 470 | "tags": [] 471 | }, 472 | "outputs": [], 473 | "source": [ 474 | "!{sys.executable} multiproc.py" 475 | ] 476 | }, 477 | { 478 | "cell_type": "markdown", 479 | "metadata": {}, 480 | "source": [ 481 | "## Dask (external packages)" 482 | ] 483 | }, 484 | { 485 | "cell_type": "markdown", 486 | "metadata": {}, 487 | "source": [ 488 | "Now let's try an external package: Dask." 489 | ] 490 | }, 491 | { 492 | "cell_type": "markdown", 493 | "metadata": { 494 | "slideshow": { 495 | "slide_type": "slide" 496 | } 497 | }, 498 | "source": [ 499 | "Still, there needs to be a better way. Our array slices in `piece` are fragile: an indexing error can ruin the result. Can't the problem of scattering work be generalized?" 500 | ] 501 | }, 502 | { 503 | "cell_type": "code", 504 | "execution_count": null, 505 | "metadata": { 506 | "slideshow": { 507 | "slide_type": "slide" 508 | } 509 | }, 510 | "outputs": [], 511 | "source": [ 512 | "import dask.array" 513 | ] 514 | }, 515 | { 516 | "cell_type": "code", 517 | "execution_count": null, 518 | "metadata": {}, 519 | "outputs": [], 520 | "source": [ 521 | "c, fractal = prepare(*size)\n", 522 | "\n", 523 | "c = dask.array.from_array(c, chunks=(10, size[1]))\n", 524 | "fractal = dask.array.from_array(fractal, chunks=(10, size[1]))" 525 | ] 526 | }, 527 | { 528 | "cell_type": "code", 529 | "execution_count": null, 530 | "metadata": {}, 531 | "outputs": [], 532 | "source": [ 533 | "%%time\n", 534 | "fractal = run(c, fractal)" 535 | ] 536 | }, 537 | { 538 | "cell_type": "markdown", 539 | "metadata": {}, 540 | "source": [ 541 | "That's AWESOME! So fast for so little. Let's take a look!" 542 | ] 543 | }, 544 | { 545 | "cell_type": "code", 546 | "execution_count": null, 547 | "metadata": {}, 548 | "outputs": [], 549 | "source": [ 550 | "fractal" 551 | ] 552 | }, 553 | { 554 | "cell_type": "markdown", 555 | "metadata": {}, 556 | "source": [ 557 | "What the heck? This is not an array: it is a description of how to make an array. Dask has stepped through our procedure and built an execution graph, encoding all the dependencies so that it can correctly apply it to individual chunks. When we execute this graph, Dask will send a chunk to each processor in the computer and combine results." 558 | ] 559 | }, 560 | { 561 | "cell_type": "code", 562 | "execution_count": null, 563 | "metadata": {}, 564 | "outputs": [], 565 | "source": [ 566 | "%%time\n", 567 | "fractal = fractal.compute()" 568 | ] 569 | }, 570 | { 571 | "cell_type": "code", 572 | "execution_count": null, 573 | "metadata": {}, 574 | "outputs": [], 575 | "source": [ 576 | "fig, ax = plt.subplots(figsize=(12, 8))\n", 577 | "ax.imshow(fractal);" 578 | ] 579 | }, 580 | { 581 | "cell_type": "markdown", 582 | "metadata": {}, 583 | "source": [ 584 | "Much better and less exciting. Dask now did our chunking for us. It's not any faster than single threaded, though; we need to point it to a scheduler/worker. This could be local, or it could be many computers anywhere in the world!" 585 | ] 586 | }, 587 | { 588 | "cell_type": "markdown", 589 | "metadata": { 590 | "slideshow": { 591 | "slide_type": "slide" 592 | } 593 | }, 594 | "source": [ 595 | "We seem to have paid for this simplicity: it took twice as long as the carefully sliced `pieces` in the executor." 596 | ] 597 | }, 598 | { 599 | "cell_type": "markdown", 600 | "metadata": { 601 | "slideshow": { 602 | "slide_type": "fragment" 603 | } 604 | }, 605 | "source": [ 606 | "The reason is that our code is not as simple as it looks. It has masking and piecemeal assignments, which in principle could introduce complex dependencies. _We_ know that everything will be fine if you just chop up the array in independent sections— and thus we implemented our thread and executor-based solutions that way." 607 | ] 608 | }, 609 | { 610 | "cell_type": "markdown", 611 | "metadata": { 612 | "slideshow": { 613 | "slide_type": "slide" 614 | } 615 | }, 616 | "source": [ 617 | "Let me show you what Dask has to do for a 1×1 chunking of our problem." 618 | ] 619 | }, 620 | { 621 | "cell_type": "code", 622 | "execution_count": null, 623 | "metadata": {}, 624 | "outputs": [], 625 | "source": [ 626 | "c, fractal = prepare(1, 1) # try 2, 2\n", 627 | "c = dask.array.from_array(c, chunks=(1, 1))\n", 628 | "fractal = dask.array.from_array(fractal, chunks=(1, 1))\n", 629 | "fractal = run(c, fractal, maxiterations=1) # try more iterations\n", 630 | "fractal.visualize()" 631 | ] 632 | }, 633 | { 634 | "cell_type": "markdown", 635 | "metadata": { 636 | "slideshow": { 637 | "slide_type": "slide" 638 | } 639 | }, 640 | "source": [ 641 | "If that were all, I'd probably stick to chopping up the grid by hand (when possible). However, _exactly the same interface_ that distributes work across cores in my laptop can distribute work around the world, just by pointing it to a remote scheduler.\n", 642 | "\n", 643 | "This is truly the ~~lazy~~ busy researcher approach!" 644 | ] 645 | }, 646 | { 647 | "cell_type": "markdown", 648 | "metadata": { 649 | "slideshow": { 650 | "slide_type": "fragment" 651 | } 652 | }, 653 | "source": [ 654 | "> Note to self: launch\n", 655 | "> \n", 656 | "> ```bash\n", 657 | "> dask-scheduler &\n", 658 | "> dask-worker --nthreads 8 127.0.0.1:8786 &\n", 659 | "> ```\n", 660 | "> \n", 661 | "> in a terminal now." 662 | ] 663 | }, 664 | { 665 | "cell_type": "code", 666 | "execution_count": null, 667 | "metadata": { 668 | "slideshow": { 669 | "slide_type": "slide" 670 | } 671 | }, 672 | "outputs": [], 673 | "source": [ 674 | "import dask.distributed\n", 675 | "\n", 676 | "client = dask.distributed.Client(\"127.0.0.1:8786\")\n", 677 | "client" 678 | ] 679 | }, 680 | { 681 | "cell_type": "code", 682 | "execution_count": null, 683 | "metadata": {}, 684 | "outputs": [], 685 | "source": [ 686 | "c, fractal = prepare(*size)\n", 687 | "\n", 688 | "c = dask.array.from_array(c, chunks=(100, size[1]))\n", 689 | "fractal = dask.array.from_array(fractal, chunks=(100, size[1]))\n", 690 | "fractal = run(c, fractal)" 691 | ] 692 | }, 693 | { 694 | "cell_type": "code", 695 | "execution_count": null, 696 | "metadata": {}, 697 | "outputs": [], 698 | "source": [ 699 | "%%time\n", 700 | "fractal = client.compute(fractal, sync=True)" 701 | ] 702 | }, 703 | { 704 | "cell_type": "markdown", 705 | "metadata": { 706 | "slideshow": { 707 | "slide_type": "fragment" 708 | } 709 | }, 710 | "source": [ 711 | "Well, that was exciting!\n", 712 | "\n", 713 | "In the end, this example took longer than the single-core version, but it illustrates how array operations _can be_ distributed in a simple way." 714 | ] 715 | }, 716 | { 717 | "cell_type": "markdown", 718 | "metadata": { 719 | "slideshow": { 720 | "slide_type": "slide" 721 | } 722 | }, 723 | "source": [ 724 | "I haven't shown very much of what Dask can do. It's a general toolkit for delayed and distributed evaluation. As such, it provides a nice way to work on Pandas-like DataFrames that are too large for memory:" 725 | ] 726 | }, 727 | { 728 | "cell_type": "code", 729 | "execution_count": null, 730 | "metadata": {}, 731 | "outputs": [], 732 | "source": [ 733 | "import dask.dataframe\n", 734 | "\n", 735 | "df = dask.dataframe.read_csv(\"data/nasa-exoplanets.csv\")\n", 736 | "df" 737 | ] 738 | }, 739 | { 740 | "cell_type": "markdown", 741 | "metadata": { 742 | "slideshow": { 743 | "slide_type": "fragment" 744 | } 745 | }, 746 | "source": [ 747 | "We don't see the data because they haven't been loaded. But we can get them if we need them." 748 | ] 749 | }, 750 | { 751 | "cell_type": "code", 752 | "execution_count": null, 753 | "metadata": {}, 754 | "outputs": [], 755 | "source": [ 756 | "df[[\"pl_hostname\", \"pl_pnum\"]].compute()" 757 | ] 758 | }, 759 | { 760 | "cell_type": "markdown", 761 | "metadata": { 762 | "slideshow": { 763 | "slide_type": "slide" 764 | } 765 | }, 766 | "source": [ 767 | "Additionally, Dask isn't the only project filling this need. There's also:\n", 768 | "\n", 769 | " * **Joblib:** annotate functions to execute remotely with decorators.\n", 770 | " * **Parsl:** same, but work with conventional schedulers (Condor, Slurm, GRID); an academic project.\n", 771 | " * **PySpark:** Spark is a big, scalable project, though its Python interface has performance issues.\n", 772 | "\n", 773 | "and many smaller projects.\n", 774 | "\n", 775 | "(Distributed computing hasn't been fully figured out yet.)" 776 | ] 777 | } 778 | ], 779 | "metadata": { 780 | "kernelspec": { 781 | "display_name": "Python [conda env:performance-minicourse] *", 782 | "language": "python", 783 | "name": "conda-env-performance-minicourse-py" 784 | }, 785 | "language_info": { 786 | "codemirror_mode": { 787 | "name": "ipython", 788 | "version": 3 789 | }, 790 | "file_extension": ".py", 791 | "mimetype": "text/x-python", 792 | "name": "python", 793 | "nbconvert_exporter": "python", 794 | "pygments_lexer": "ipython3", 795 | "version": "3.11.4" 796 | } 797 | }, 798 | "nbformat": 4, 799 | "nbformat_minor": 4 800 | } 801 | -------------------------------------------------------------------------------- /06_jax.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# JAX\n", 8 | "\n", 9 | "Let's look at a problem that looks just a bit like machine learning: Curve fitting for unbinned data. We are going to ignore the actual minimizer, and instead just compute the negative log likelihood (nll)." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": { 16 | "jupyter": { 17 | "source_hidden": true 18 | }, 19 | "tags": [] 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "# from jax.config import config\n", 24 | "# config.update(\"jax_enable_x64\", True)" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "import numpy as np\n", 34 | "\n", 35 | "np.random.seed(42)\n", 36 | "\n", 37 | "dist = np.hstack(\n", 38 | " [\n", 39 | " np.random.normal(loc=1, scale=2.0, size=1_000_000),\n", 40 | " np.random.normal(loc=1, scale=0.5, size=1_000_000),\n", 41 | " ]\n", 42 | ")" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "Let's start with NumPy, just to show how it would be done:" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "def gaussian(x, μ, σ):\n", 59 | " return 1 / np.sqrt(2 * np.pi * σ**2) * np.exp(-((x - μ) ** 2) / (2 * σ**2))\n", 60 | "\n", 61 | "\n", 62 | "def add(x, f_0, μ, σ, σ2):\n", 63 | " return f_0 * gaussian(x, μ, σ) + (1 - f_0) * gaussian(x, μ, σ2)\n", 64 | "\n", 65 | "\n", 66 | "def nll(x, f_0, μ, σ, σ2):\n", 67 | " return -np.sum(np.log(add(x, f_0, μ, σ, σ2)))" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "%%time\n", 77 | "nll(dist, *np.random.rand(4))" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "%%timeit\n", 87 | "nll(dist, *np.random.rand(4))" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "## Jax\n", 95 | "\n", 96 | "Jax is a tool from Google. It can target a wide variety of backends (CPU, GPU, TPU), can JIT compile, and can take gradients. It is _very_ powerful, and rather tricky, since it does quite a few things a bit differently. First let's try using it:" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "import jax\n", 106 | "import jax.numpy as jnp" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "Now we'll just replace `np` with `jnp` everywhere in the above code, to produce:" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "def gaussian(x, μ, σ):\n", 123 | " return 1 / jnp.sqrt(2 * jnp.pi * σ**2) * jnp.exp(-((x - μ) ** 2) / (2 * σ**2))\n", 124 | "\n", 125 | "\n", 126 | "def add(x, f_0, μ, σ, σ2):\n", 127 | " return f_0 * gaussian(x, μ, σ) + (1 - f_0) * gaussian(x, μ, σ2)\n", 128 | "\n", 129 | "\n", 130 | "def nll(x, f_0, μ, σ, σ2):\n", 131 | " return -jnp.sum(jnp.log(add(x, f_0, μ, σ, σ2)))" 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "metadata": {}, 137 | "source": [ 138 | "Now we need just one more step - we need Jax arrays instead of NumPy arrays:" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "d_dist = jnp.asarray(dist)" 148 | ] 149 | }, 150 | { 151 | "cell_type": "markdown", 152 | "metadata": {}, 153 | "source": [ 154 | "There's one more step, but let's just check this first:" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "metadata": {}, 161 | "outputs": [], 162 | "source": [ 163 | "%%time\n", 164 | "nll(d_dist, *np.random.rand(4)).block_until_ready()" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": null, 170 | "metadata": {}, 171 | "outputs": [], 172 | "source": [ 173 | "%%timeit\n", 174 | "nll(d_dist, *np.random.rand(4)).block_until_ready()" 175 | ] 176 | }, 177 | { 178 | "cell_type": "markdown", 179 | "metadata": {}, 180 | "source": [ 181 | "We probably are seeing a nice speedup here. File it away - we'll explain it later, and let's move on.\n", 182 | "\n", 183 | "Now we can JIT our function. Unlike numba, we just pass the top level function in." 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "metadata": {}, 190 | "outputs": [], 191 | "source": [ 192 | "nll_jit = jax.jit(nll)" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": {}, 198 | "source": [ 199 | "Now the first time we call it, JAX will \"trace\" the function and produce the XLA code for it. Like other tracers, it can't handle non-vectorized control flow." 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": null, 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [ 208 | "%%time\n", 209 | "nll_jit(d_dist, *np.random.rand(4)).block_until_ready()" 210 | ] 211 | }, 212 | { 213 | "cell_type": "markdown", 214 | "metadata": {}, 215 | "source": [ 216 | "Now that it's primed, let's measure:" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "metadata": {}, 223 | "outputs": [], 224 | "source": [ 225 | "%%timeit\n", 226 | "nll_jit(d_dist, *np.random.rand(4)).block_until_ready()" 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | "This is very nice, but there is a caveat; this is in 32 bit mode. Uncomment the code at the top and _restart_ the kernel; compare the timings again." 234 | ] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "metadata": {}, 239 | "source": [ 240 | "#### Further reading:\n", 241 | "\n", 242 | "* [CompClass: Fitting](https://github.com/henryiii/compclass/blob/master/classes/week12/1_fitting.ipynb)" 243 | ] 244 | } 245 | ], 246 | "metadata": { 247 | "kernelspec": { 248 | "display_name": "Python [conda env:performance-minicourse] *", 249 | "language": "python", 250 | "name": "conda-env-performance-minicourse-py" 251 | }, 252 | "language_info": { 253 | "codemirror_mode": { 254 | "name": "ipython", 255 | "version": 3 256 | }, 257 | "file_extension": ".py", 258 | "mimetype": "text/x-python", 259 | "name": "python", 260 | "nbconvert_exporter": "python", 261 | "pygments_lexer": "ipython3", 262 | "version": "3.10.9" 263 | } 264 | }, 265 | "nbformat": 4, 266 | "nbformat_minor": 4 267 | } 268 | -------------------------------------------------------------------------------- /06b_jax.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "703c2864-e0a8-4640-ba92-2bbdb63a1b5b", 6 | "metadata": {}, 7 | "source": [ 8 | "# JAX\n", 9 | "\n", 10 | "## Grad\n", 11 | "\n", 12 | "Jax's tracer can compute gradients! Let's try:\n", 13 | "\n", 14 | "$$\n", 15 | "y = x^3 + x^2 + x \\\\\n", 16 | "y' = 3x^2 + 2x + 1 \\\\\n", 17 | "y'' = 6x + 2 \\\\\n", 18 | "$$" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "id": "187ebafe-43c5-4192-b4a9-a1719eb1ace2", 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "import jax\n", 29 | "import jax.numpy as jnp\n", 30 | "import numpy as np" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "id": "d383b64a-6b86-4f63-938e-273f39901418", 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "def f(x):\n", 41 | " return x**3 + x**2 + x" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "id": "7442ccd4-a79b-4e87-b498-80593dca4f37", 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "f(1.0)" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "id": "6b445caf-50ba-4c8e-ab5c-f5e19967a7e2", 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "fp = jax.grad(f)" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "id": "bab3d9f4-c21e-4311-8aa2-45d8e59e4879", 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "fp(1.0)" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "id": "53f44bb6-9633-450d-8cea-bfd0aea6b34c", 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "fpp = jax.grad(fp)" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "id": "cab68f79-75c1-4797-b85b-8093b914592b", 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [ 91 | "fpp(1.0)" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "id": "4cd5a826-f100-47cb-8720-1ee591970371", 97 | "metadata": {}, 98 | "source": [ 99 | "## Tracer limitations\n" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "id": "9c01cce6-5611-4fee-ade9-9768734aeee0", 105 | "metadata": {}, 106 | "source": [ 107 | "Let's watch the tracer:" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "id": "9d77aaa0-e3ce-4a77-94f4-4b56ec67b9a9", 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "def f(x):\n", 118 | " print(f\"{x = }\")\n", 119 | " y = x**2\n", 120 | " print(f\"{y = }\")\n", 121 | " return y" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "id": "e18d72f0-2977-4439-9671-e01687014216", 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "f_jit = jax.jit(f)" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "id": "c093d74b-9fc3-43d3-9d4e-8ddd99d8455b", 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [ 141 | "f_jit(2)" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "id": "f5f4759a-5c7c-4e32-bcef-06a65b727338", 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [ 151 | "f_jit(2)" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "id": "4a795676-90f9-4028-aac6-f515fd893513", 157 | "metadata": {}, 158 | "source": [ 159 | "Notice that the Python code runs once, and something that is not an integer at all is being passed in. From then on, the function doesn't run the Python code anymore. Well, as long as you use the same input types / shapes:" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "id": "43479bc8-ba81-4829-9d83-606ee03027ea", 166 | "metadata": {}, 167 | "outputs": [], 168 | "source": [ 169 | "f_jit(1.0)" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": null, 175 | "id": "d45aa5d9-6649-483e-a6aa-b1be38990e5a", 176 | "metadata": {}, 177 | "outputs": [], 178 | "source": [ 179 | "f_jit(1.0)" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "id": "7daef99c-533c-4bde-be1d-1229f4d941f7", 186 | "metadata": {}, 187 | "outputs": [], 188 | "source": [ 189 | "f_jit(1)" 190 | ] 191 | }, 192 | { 193 | "cell_type": "markdown", 194 | "id": "9c921c20-f61f-4d5e-a933-a7becaaab176", 195 | "metadata": {}, 196 | "source": [ 197 | "You can't trace through flow control that depends on the tracers, or dynamically change the shape of the array:" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": null, 203 | "id": "47a0d168-eab8-45c2-820b-bbda150799f8", 204 | "metadata": {}, 205 | "outputs": [], 206 | "source": [ 207 | "@jax.jit\n", 208 | "def broken(x):\n", 209 | " if x == 3:\n", 210 | " return x**3\n", 211 | " return x" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": null, 217 | "id": "ab207814-a2bb-497e-8e64-2a4086f19114", 218 | "metadata": {}, 219 | "outputs": [], 220 | "source": [ 221 | "broken(2)" 222 | ] 223 | }, 224 | { 225 | "cell_type": "markdown", 226 | "id": "7176568d-0651-453e-ae46-d63aa4785b1f", 227 | "metadata": {}, 228 | "source": [ 229 | "## Jax is functional\n", 230 | "\n", 231 | "Unlike NumPY, Jax arrays are immutable. You also should write pure functions (ones without side effects / state)." 232 | ] 233 | }, 234 | { 235 | "cell_type": "markdown", 236 | "id": "271e7b3c-9e10-442c-af5b-00eb2ea8df49", 237 | "metadata": {}, 238 | "source": [ 239 | "For example, you can't do an in-place set:" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": null, 245 | "id": "8b2e0d2d-ec0c-4c62-810c-f58b2ae57ad2", 246 | "metadata": {}, 247 | "outputs": [], 248 | "source": [ 249 | "jarr = jnp.zeros((3, 3))\n", 250 | "jarr[np.diag(np.ones(3, dtype=bool))] = 1\n", 251 | "jarr" 252 | ] 253 | }, 254 | { 255 | "cell_type": "markdown", 256 | "id": "c8d47edc-6cae-400e-8063-56edc29fac34", 257 | "metadata": {}, 258 | "source": [ 259 | "Jax provides a trick to make this easy to do while avoiding an in-place mutation:" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": null, 265 | "id": "32967641-18ff-4b83-8a96-5cbc3afb7cbe", 266 | "metadata": {}, 267 | "outputs": [], 268 | "source": [ 269 | "j1 = jnp.zeros((3, 3))\n", 270 | "j2 = j1.at[np.diag(np.ones(3, dtype=bool))].set(1)\n", 271 | "j2" 272 | ] 273 | }, 274 | { 275 | "cell_type": "markdown", 276 | "id": "3e0f8b99-e725-4211-a7cc-cc9890393faa", 277 | "metadata": {}, 278 | "source": [ 279 | "## Further reading\n", 280 | "\n", 281 | "See the Jax docs!\n", 282 | "\n", 283 | "https://jax.readthedocs.io/en/latest/notebooks/quickstart.html" 284 | ] 285 | } 286 | ], 287 | "metadata": { 288 | "kernelspec": { 289 | "display_name": "performance-minicourse", 290 | "language": "python", 291 | "name": "python3" 292 | }, 293 | "language_info": { 294 | "codemirror_mode": { 295 | "name": "ipython", 296 | "version": 3 297 | }, 298 | "file_extension": ".py", 299 | "mimetype": "text/x-python", 300 | "name": "python", 301 | "nbconvert_exporter": "python", 302 | "pygments_lexer": "ipython3", 303 | "version": "3.10.9" 304 | }, 305 | "vscode": { 306 | "interpreter": { 307 | "hash": "493d32115352708ee205b0d097e176d1d360fe34639a9405e4a1e16a5d39b607" 308 | } 309 | } 310 | }, 311 | "nbformat": 4, 312 | "nbformat_minor": 5 313 | } 314 | -------------------------------------------------------------------------------- /07_callables.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# LowLevelCallables" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Simple question: Let's say you have an array (nicely laid out in memory), but you *have* to loop over it. Which is faster, `np.log` or `math.log`?\n" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import ctypes\n", 24 | "import math\n", 25 | "\n", 26 | "import numba\n", 27 | "import numpy as np\n", 28 | "import scipy.integrate" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "arr = np.random.rand(10_000)" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "%%timeit\n", 47 | "for item in arr:\n", 48 | " np.log(item)" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "%%timeit\n", 58 | "for item in arr:\n", 59 | " math.log(item)" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "Of course, if we use array processing:" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "%%timeit\n", 76 | "np.log(arr)" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "> ## Aside\n", 84 | ">\n", 85 | "> NumPy arrays were designed to work at larger scales. Under about 10 elements, you may even get higher performance from Python lists! Normally, the nice syntax is still worth it, and you can usually find a way to scale out to array processing by adding another dimension, but something to keep in mind." 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "Let's cast the above problem in a slightly different use case, one you are much more likely to run into: functions that take a function. Let's say we have a processing function that takes an array and a function:" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "def array_map(array, func):\n", 102 | " out_array = np.empty_like(array)\n", 103 | "\n", 104 | " # Note the comma\n", 105 | " (size,) = array.shape\n", 106 | "\n", 107 | " for i in range(size):\n", 108 | " out_array[i] = func(array[i])\n", 109 | "\n", 110 | " return out_array" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "Quick check to see if the above effect is still true:" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "%%timeit\n", 127 | "array_map(arr, np.log)" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [ 136 | "%%timeit\n", 137 | "array_map(arr, math.log)" 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "Now, let's imagine that array_map actually contained a compiled loop. And our function was a compiled function. What will happen *inside the loop*, though, is:\n", 145 | "\n", 146 | "```\n", 147 | "Compiled -> Python -> Compiled\n", 148 | "```\n", 149 | "\n", 150 | "\n", 151 | "Which kills our performance. What we'd like to do is skip the Python middle man in this case.\n", 152 | "\n", 153 | "Hey, we have Numba, we don't have to imagine:" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "metadata": {}, 160 | "outputs": [], 161 | "source": [ 162 | "@numba.njit\n", 163 | "def numba_array_map(array, func):\n", 164 | " out_array = np.empty_like(array)\n", 165 | "\n", 166 | " # Note the comma\n", 167 | " (size,) = array.shape\n", 168 | "\n", 169 | " for i in range(size):\n", 170 | " out_array[i] = func(array[i])\n", 171 | "\n", 172 | " return out_array" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": {}, 178 | "source": [ 179 | "Numba helpfully will not allow us to pass in a Python function. But if we pass in a numba function:" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [ 188 | "@numba.njit\n", 189 | "def numba_log(v):\n", 190 | " return math.log(v)" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [ 199 | "%%timeit\n", 200 | "numba_array_map(arr, numba_log)" 201 | ] 202 | }, 203 | { 204 | "cell_type": "markdown", 205 | "metadata": {}, 206 | "source": [ 207 | "Success! We can pass in a jit function into a jit function and they call each other *without* going back to Python! But wouldn't it be nice to be able to do this without a jit master function?" 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "metadata": {}, 213 | "source": [ 214 | "# Callbacks and Scipy\n", 215 | "\n", 216 | "Let's look at Scipy. It is a large library with lots of routines that can take functions and iterate with them. They have implemented a standard way to interact with compiled function pointers through what they call a LowLevelCallable interface; if you have a function pointer, you can completely skip the Python middle man!\n", 217 | "\n", 218 | "> Note that the LowLevelCallable is just a standard interface they proposed inside Scipy to handle callables from three different sources (PyCapsule, ctypes, and cffi), and to bundle in the idea of user data (absent in ctypes and cffi). The idea can be used in other places, usually with just the ctypes interface." 219 | ] 220 | }, 221 | { 222 | "cell_type": "markdown", 223 | "metadata": {}, 224 | "source": [ 225 | "Let's try the following integral:\n", 226 | "\n", 227 | "$$\n", 228 | "\\int _{-\\infty} ^ {\\infty} e^{-a x ^2} dx = \\sqrt{\\frac{\\pi}{a}}\n", 229 | "$$" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": null, 235 | "metadata": {}, 236 | "outputs": [], 237 | "source": [ 238 | "# @numba.vectorize([numba.double(numba.double, numba.double)])\n", 239 | "def integrand(x, a):\n", 240 | " return np.exp(-a * x**2)\n", 241 | "\n", 242 | "\n", 243 | "@np.vectorize\n", 244 | "def gauss_py(a):\n", 245 | " y, abserr = scipy.integrate.quad(integrand, -np.inf, np.inf, (a,))\n", 246 | " return y" 247 | ] 248 | }, 249 | { 250 | "cell_type": "markdown", 251 | "metadata": {}, 252 | "source": [ 253 | "> #### Note:\n", 254 | ">\n", 255 | "> Since you may not have seen it before, `np.vectorize` is a Python version of `numba.vectorize`; you don't get a performance benefit from it, but it simplifies calling this on an array." 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": null, 261 | "metadata": {}, 262 | "outputs": [], 263 | "source": [ 264 | "%%time\n", 265 | "a = np.linspace(0.1, 10, 10_000)\n", 266 | "\n", 267 | "py_result = gauss_py(a)\n", 268 | "\n", 269 | "print(py_result)\n", 270 | "print(np.sqrt(np.pi / a))" 271 | ] 272 | }, 273 | { 274 | "cell_type": "markdown", 275 | "metadata": {}, 276 | "source": [ 277 | "Results are not bad, but the performance is not great, for just 10K points. Even if we add numba, not much changes. This is because we are calling integrand through Python in a loop inside the quad routine.\n", 278 | "\n", 279 | "Let's check the LowLevelCallable signature:" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": null, 285 | "metadata": {}, 286 | "outputs": [], 287 | "source": [ 288 | "# scipy.integrate.quad?" 289 | ] 290 | }, 291 | { 292 | "cell_type": "markdown", 293 | "metadata": {}, 294 | "source": [ 295 | "Here's the key part:\n", 296 | "\n", 297 | "\n", 298 | "```\n", 299 | "func : {function, scipy.LowLevelCallable}\n", 300 | " A Python function or method to integrate. If `func` takes many\n", 301 | " arguments, it is integrated along the axis corresponding to the\n", 302 | " first argument.\n", 303 | "\n", 304 | " If the user desires improved integration performance, then `f` may\n", 305 | " be a `scipy.LowLevelCallable` with one of the signatures::\n", 306 | "\n", 307 | " double func(double x)\n", 308 | " double func(double x, void *user_data)\n", 309 | " double func(int n, double *xx)\n", 310 | " double func(int n, double *xx, void *user_data)\n", 311 | "\n", 312 | " The ``user_data`` is the data contained in the `scipy.LowLevelCallable`.\n", 313 | " In the call forms with ``xx``, ``n`` is the length of the ``xx``\n", 314 | " array which contains ``xx[0] == x`` and the rest of the items are\n", 315 | " numbers contained in the ``args`` argument of quad.\n", 316 | "\n", 317 | " In addition, certain ctypes call signatures are supported for\n", 318 | " backward compatibility, but those should not be used in new code.\n", 319 | "```" 320 | ] 321 | }, 322 | { 323 | "cell_type": "markdown", 324 | "metadata": {}, 325 | "source": [ 326 | "Okay, double(double) sounds easy - but we need to pass in one more bit of information, the value of `a`. Let's try making that first using args (nicer), and then using user data (ugly):" 327 | ] 328 | }, 329 | { 330 | "cell_type": "markdown", 331 | "metadata": {}, 332 | "source": [ 333 | "#### Args" 334 | ] 335 | }, 336 | { 337 | "cell_type": "markdown", 338 | "metadata": {}, 339 | "source": [ 340 | "This is the signature we expect:\n", 341 | "\n", 342 | "```c\n", 343 | "double func(int n, double *xx)\n", 344 | "```" 345 | ] 346 | }, 347 | { 348 | "cell_type": "code", 349 | "execution_count": null, 350 | "metadata": {}, 351 | "outputs": [], 352 | "source": [ 353 | "@numba.cfunc(numba.double(numba.int32, numba.types.CPointer(numba.double)))\n", 354 | "def integrand(n, x_ptr):\n", 355 | " x, a = numba.carray(x_ptr, (n,), np.double) # Fails if n != 2, but that's good\n", 356 | " return np.exp(-a * x**2)" 357 | ] 358 | }, 359 | { 360 | "cell_type": "markdown", 361 | "metadata": {}, 362 | "source": [ 363 | "Now the numba function provides a ctypes interface through the `.ctypes` property, so we can use that in LowLevelCallable:" 364 | ] 365 | }, 366 | { 367 | "cell_type": "code", 368 | "execution_count": null, 369 | "metadata": {}, 370 | "outputs": [], 371 | "source": [ 372 | "c = scipy.LowLevelCallable(integrand.ctypes)\n", 373 | "\n", 374 | "\n", 375 | "@np.vectorize\n", 376 | "def gauss_py(a):\n", 377 | " y, abserr = scipy.integrate.quad(c, -np.inf, np.inf, (a,))\n", 378 | " return y" 379 | ] 380 | }, 381 | { 382 | "cell_type": "code", 383 | "execution_count": null, 384 | "metadata": {}, 385 | "outputs": [], 386 | "source": [ 387 | "%%time\n", 388 | "a = np.linspace(0.1, 10, 10_000)\n", 389 | "\n", 390 | "py_result = gauss_py(a)\n", 391 | "\n", 392 | "print(py_result)\n", 393 | "print(np.sqrt(np.pi / a))" 394 | ] 395 | }, 396 | { 397 | "cell_type": "markdown", 398 | "metadata": {}, 399 | "source": [ 400 | "Much better! We've now avoided calling Python at all once we enter the integrate loop. This should perform close to a full Fortran or C implementation, and it just took adding 2-3 lines of code." 401 | ] 402 | }, 403 | { 404 | "cell_type": "markdown", 405 | "metadata": {}, 406 | "source": [ 407 | "#### User Data\n", 408 | "\n", 409 | "> Included as an example. Don't do it this way. Just don't." 410 | ] 411 | }, 412 | { 413 | "cell_type": "markdown", 414 | "metadata": {}, 415 | "source": [ 416 | "This is the signature we expect:\n", 417 | "\n", 418 | "```c\n", 419 | "double func(double x, void *user_data)\n", 420 | "```" 421 | ] 422 | }, 423 | { 424 | "cell_type": "code", 425 | "execution_count": null, 426 | "metadata": {}, 427 | "outputs": [], 428 | "source": [ 429 | "@numba.cfunc(numba.double(numba.double, numba.types.voidptr))\n", 430 | "def integrand(x, user_ptr):\n", 431 | " (a,) = numba.carray(user_ptr, (1,), np.double)\n", 432 | " return np.exp(-a * x**2)" 433 | ] 434 | }, 435 | { 436 | "cell_type": "code", 437 | "execution_count": null, 438 | "metadata": {}, 439 | "outputs": [], 440 | "source": [ 441 | "a_array = np.array([0.0])\n", 442 | "c = scipy.LowLevelCallable(integrand.ctypes, a_array.ctypes.data_as(ctypes.c_void_p))\n", 443 | "\n", 444 | "\n", 445 | "@np.vectorize\n", 446 | "def gauss_py(a):\n", 447 | " a_array[0] = a\n", 448 | " y, abserr = scipy.integrate.quad(c, -np.inf, np.inf)\n", 449 | " return y" 450 | ] 451 | }, 452 | { 453 | "cell_type": "code", 454 | "execution_count": null, 455 | "metadata": {}, 456 | "outputs": [], 457 | "source": [ 458 | "%%time\n", 459 | "a = np.linspace(0.1, 10, 10_000)\n", 460 | "\n", 461 | "py_result = gauss_py(a)\n", 462 | "\n", 463 | "print(py_result)\n", 464 | "print(np.sqrt(np.pi / a))" 465 | ] 466 | }, 467 | { 468 | "cell_type": "markdown", 469 | "metadata": {}, 470 | "source": [ 471 | "Note to self: you can get the address of the callable directly from the ctypes object with:\n", 472 | "\n", 473 | "```python\n", 474 | "ctypes.cast(integrand.ctypes, ctypes.c_void_p)\n", 475 | "```" 476 | ] 477 | }, 478 | { 479 | "cell_type": "markdown", 480 | "metadata": {}, 481 | "source": [ 482 | "## Further reading\n", 483 | "\n", 484 | "* [Cython example](https://tjol.eu/blog/lowlevelcallable-magic.html) (this lesson was based generally on this)\n", 485 | "* [Numba example](https://ilovesymposia.com/2017/03/12/scipys-new-lowlevelcallable-is-a-game-changer/)" 486 | ] 487 | } 488 | ], 489 | "metadata": { 490 | "kernelspec": { 491 | "display_name": "Python [conda env:performance-minicourse] *", 492 | "language": "python", 493 | "name": "conda-env-performance-minicourse-py" 494 | }, 495 | "language_info": { 496 | "codemirror_mode": { 497 | "name": "ipython", 498 | "version": 3 499 | }, 500 | "file_extension": ".py", 501 | "mimetype": "text/x-python", 502 | "name": "python", 503 | "nbconvert_exporter": "python", 504 | "pygments_lexer": "ipython3", 505 | "version": "3.10.9" 506 | }, 507 | "vscode": { 508 | "interpreter": { 509 | "hash": "2d4e9b9c84dab3e1662173f95b81bd7f8a551068d04f5f3c42d164db7312a928" 510 | } 511 | } 512 | }, 513 | "nbformat": 4, 514 | "nbformat_minor": 4 515 | } 516 | -------------------------------------------------------------------------------- /08_pandas_covid.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "3dbf4b0c-a305-48f3-b31b-34b71242dd61", 6 | "metadata": {}, 7 | "source": [ 8 | "# Johns Hopkins COVID-19 Dataset in Pandas\n", 9 | "\n", 10 | "COVID-19 is ravaging the globe. Let's look at the excellent Johns Hopkins\n", 11 | "dataset using Pandas. This will serve both as a guideline for getting the data\n", 12 | "and exploring on your own, as well as an example of Pandas multi-indexing in an\n", 13 | "easy to understand situation." 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "id": "42355661-3b32-4221-9fdc-3bfbe534cea2", 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "# Display final value in cell even if assigned\n", 24 | "%config InteractiveShell.ast_node_interactivity='last_expr_or_assign'" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "id": "a5aa090f-3c57-412a-bac6-521477d8d85a", 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "from typing import Optional\n", 35 | "from urllib.error import HTTPError\n", 36 | "\n", 37 | "import matplotlib.dates as mdates\n", 38 | "import matplotlib.pyplot as plt\n", 39 | "import numpy as np\n", 40 | "import pandas as pd\n", 41 | "\n", 42 | "plt.style.use(\"ggplot\")" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "id": "7063f140-fc5a-4366-b9bf-c8859a264840", 48 | "metadata": {}, 49 | "source": [ 50 | "Now that we've made some basic imports, let's write a function that can\n", 51 | "read in a datafile from GitHub:" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "id": "620612f7-d807-4227-9e28-14b55e1fa1c8", 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "def get_day(day: pd.Timestamp) -> pd.DataFrame:\n", 62 | " # Read in a datafile from GitHub\n", 63 | " try:\n", 64 | " table = pd.read_csv(\n", 65 | " \"https://raw.githubusercontent.com/CSSEGISandData/COVID-19/\"\n", 66 | " \"master/csse_covid_19_data/csse_covid_19_daily_reports/\"\n", 67 | " f\"{day:%m-%d-%Y}.csv\",\n", 68 | " )\n", 69 | " except HTTPError:\n", 70 | " return pd.DataFrame()\n", 71 | "\n", 72 | " # Cleanup - sadly, the format has changed a bit over time - we can normalize that here\n", 73 | " table.columns = [\n", 74 | " f.replace(\"/\", \"_\")\n", 75 | " .replace(\" \", \"_\")\n", 76 | " .replace(\"Latitude\", \"Lat\")\n", 77 | " .replace(\"Longitude\", \"Long_\")\n", 78 | " for f in table.columns\n", 79 | " ]\n", 80 | "\n", 81 | " # This column is new in recent datasets\n", 82 | " if \"Admin2\" not in table.columns:\n", 83 | " table[\"Admin2\"] = None\n", 84 | "\n", 85 | " # New datasets have these, but they are not very useful for now\n", 86 | " table = table.drop(\n", 87 | " columns=[\"FIPS\", \"Combined_Key\", \"Lat\", \"Long_\"], errors=\"ignore\"\n", 88 | " )\n", 89 | "\n", 90 | " # If the last update time was useful, we would make this day only, rather than day + time\n", 91 | " # table[\"Last_Update\"] = pd.to_datetime(table[\"Last_Update\"]).dt.normalize()\n", 92 | " #\n", 93 | " # However, last update is odd, let's just make this the current day\n", 94 | " table[\"Last_Update\"] = day\n", 95 | "\n", 96 | " # Make sure indexes are not NaN, which causes later bits to not work. 0 isn't\n", 97 | " # perfect, but good enough.\n", 98 | " # Return as a multindex\n", 99 | " return table.fillna(0).set_index(\n", 100 | " [\"Last_Update\", \"Country_Region\", \"Province_State\", \"Admin2\"], drop=True\n", 101 | " )" 102 | ] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "id": "faf76b61-89d5-4402-8fec-471fd681d3f7", 107 | "metadata": {}, 108 | "source": [ 109 | "Now let's loop over all days and build a multi-index DataFrame with the whole\n", 110 | "dataset. We'll be doing quite a bit of cleanup here as well. If you do this\n", 111 | "outside of a function, you should never modify an object in multiple cells;\n", 112 | "ideally you create an object like `df`, and make any modifications and\n", 113 | "replacements in the same cell. That way, running any cell again or running a\n", 114 | "cell multiple times will not cause unusual errors and problems to show up." 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "id": "c013d07c-6174-47e2-b428-5d1543a6ebcf", 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "def get_all_days(\n", 125 | " *, start_day: Optional[str] = None, end_day: Optional[str] = None\n", 126 | ") -> pd.DataFrame:\n", 127 | " # Make a list of all dates\n", 128 | " date_range = pd.date_range(\n", 129 | " start_day or \"2020-01-22\", end_day or pd.Timestamp.now().normalize()\n", 130 | " )\n", 131 | "\n", 132 | " # Create a generator that returns each day's dataframe\n", 133 | " day_gen = (get_day(day) for day in date_range)\n", 134 | "\n", 135 | " # Make a big dataframe, NaN is 0\n", 136 | " df = pd.concat(day_gen).fillna(0).astype(int)\n", 137 | "\n", 138 | " # Remove a few duplicate keys\n", 139 | " df = df.groupby(level=df.index.names).sum()\n", 140 | "\n", 141 | " # Sometimes active is not filled in; we can compute easily\n", 142 | " df[\"Active\"] = np.clip(df[\"Confirmed\"] - df[\"Deaths\"] - df[\"Recovered\"], 0, None)\n", 143 | "\n", 144 | " # Change in confirmed cases (placed in a pleasing location in the table)\n", 145 | " df.insert(\n", 146 | " 1,\n", 147 | " \"ΔConfirmed\",\n", 148 | " df.groupby(level=(\"Country_Region\", \"Province_State\", \"Admin2\"))[\"Confirmed\"]\n", 149 | " .diff()\n", 150 | " .fillna(0)\n", 151 | " .astype(int),\n", 152 | " )\n", 153 | "\n", 154 | " # Change in deaths\n", 155 | " df.insert(\n", 156 | " 3,\n", 157 | " \"ΔDeaths\",\n", 158 | " df.groupby(level=(\"Country_Region\", \"Province_State\", \"Admin2\"))[\"Deaths\"]\n", 159 | " .diff()\n", 160 | " .fillna(0)\n", 161 | " .astype(int),\n", 162 | " )\n", 163 | "\n", 164 | " return df" 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "id": "0d6ddef1-21fc-4e2b-b034-66524e08e13a", 170 | "metadata": {}, 171 | "source": [ 172 | "If this were a larger/real project, it would be time to bundle up the functions\n", 173 | "above and put them into a `.py` file - notebooks are for experimentation,\n", 174 | "teaching, and high level manipulation. Functions and classes should normally\n", 175 | "move to normal Python files when ready.\n", 176 | "\n", 177 | "Let's look at a few lines of this DataFrame to see what we have:" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "id": "5443d282-f3d1-4499-ba4d-fcedb43193e1", 184 | "metadata": {}, 185 | "outputs": [], 186 | "source": [ 187 | "%%time\n", 188 | "df = get_all_days(end_day=\"2020-08-01\")" 189 | ] 190 | }, 191 | { 192 | "cell_type": "markdown", 193 | "id": "83367b8c-1b89-4f85-83f2-876f87a00050", 194 | "metadata": {}, 195 | "source": [ 196 | "The benefit of doing this all at once, in one DataFrame, should quickly become\n", 197 | "apparent. We can now use simple selection and grouping to \"ask\" almost anything\n", 198 | "about our dataset.\n", 199 | "\n", 200 | "As an example, let's look at just the US portion of the dataset. We'll use the\n", 201 | "pandas selection `.xs`:" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": null, 207 | "id": "888f5508-9784-44a5-8206-ad04844865fc", 208 | "metadata": {}, 209 | "outputs": [], 210 | "source": [ 211 | "us = df.xs(\"US\", level=\"Country_Region\")" 212 | ] 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "id": "c2814db8-b36b-4db7-b27c-c8f19f586446", 217 | "metadata": {}, 218 | "source": [ 219 | "Notice we have counties (early datasets just have one \"county\" called `\"0\"`). If\n", 220 | "we were only interested in states, we can group by the remaining levels and sum\n", 221 | "out the `\"Admin2\"` (county and similar) dimension:" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": null, 227 | "id": "107fd0a0-a8c8-4c76-b731-c56233b064a7", 228 | "metadata": {}, 229 | "outputs": [], 230 | "source": [ 231 | "by_state = us.groupby(level=(\"Last_Update\", \"Province_State\")).sum()" 232 | ] 233 | }, 234 | { 235 | "cell_type": "markdown", 236 | "id": "db492713-9b02-403c-8f60-59905b9f0e03", 237 | "metadata": {}, 238 | "source": [ 239 | "Using the same selector as before, we can pick out North Carolina:" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": null, 245 | "id": "e141948f-7336-4c9e-a61d-550be7a99a72", 246 | "metadata": {}, 247 | "outputs": [], 248 | "source": [ 249 | "by_state.xs(\"North Carolina\", level=\"Province_State\")" 250 | ] 251 | }, 252 | { 253 | "cell_type": "markdown", 254 | "id": "238742c2-5f62-4681-bfb2-37a70d2553d9", 255 | "metadata": {}, 256 | "source": [ 257 | "We can look at all of US, as well:" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": null, 263 | "id": "3189f9e1-0ae0-4968-822b-77fbfd71ac7d", 264 | "metadata": {}, 265 | "outputs": [], 266 | "source": [ 267 | "all_states = by_state.groupby(level=\"Last_Update\").sum()" 268 | ] 269 | }, 270 | { 271 | "cell_type": "markdown", 272 | "id": "68a20b3a-6acc-415f-8705-334de7b780db", 273 | "metadata": {}, 274 | "source": [ 275 | "#### US total cases\n", 276 | "\n", 277 | "Let's try a simple plot first; this is the one you see quite often." 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": null, 283 | "id": "b9054b72-e49a-4d9d-96dc-1f543f064ca2", 284 | "metadata": {}, 285 | "outputs": [], 286 | "source": [ 287 | "plt.figure(figsize=(10, 5))\n", 288 | "all_states.Confirmed.plot(logy=True, style=\"o\");" 289 | ] 290 | }, 291 | { 292 | "cell_type": "markdown", 293 | "id": "624936a4-e403-4d1f-bf40-d2c63be2cb71", 294 | "metadata": {}, 295 | "source": [ 296 | "#### Italy, new cases per day\n", 297 | "\n", 298 | "As another example, let's view the new cases per day for Italy. We will add a\n", 299 | "rolling mean, just to help guide the eye through the fluctuations - it is not a\n", 300 | "fit or anything fancy." 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": null, 306 | "id": "319bb91e-a4ab-487d-8ccc-3839ae8cce42", 307 | "metadata": {}, 308 | "outputs": [], 309 | "source": [ 310 | "interesting = df.xs(\"Italy\", level=\"Country_Region\").groupby(level=\"Last_Update\").sum()\n", 311 | "\n", 312 | "plt.figure(figsize=(10, 5))\n", 313 | "interesting.ΔConfirmed.rolling(5, center=True).mean().plot(\n", 314 | " style=\"-\", label=\"Rolling mean\"\n", 315 | ")\n", 316 | "interesting.ΔConfirmed.plot(style=\"o\", label=\"Data\")\n", 317 | "plt.ylabel(\"New cases per day\")\n", 318 | "plt.legend();" 319 | ] 320 | }, 321 | { 322 | "cell_type": "markdown", 323 | "id": "bf7eab76-3caf-43ae-8620-a1d636ac0862", 324 | "metadata": {}, 325 | "source": [ 326 | "#### Italy, transmission rate\n", 327 | "\n", 328 | "It's more interesting to instead look at the transmission rate per day, which is\n", 329 | "new cases / active cases. The colors in the plot start changing when Italy\n", 330 | "implemented a lockdown on the 11th, and change over 14 days, which is roughly 1x\n", 331 | "the time to first symptoms. The lockdown make take longer than that to take full\n", 332 | "effect. There were several partial steps taken before the full lockdown on the\n", 333 | "4th and 9th. Notice the transmission is slowing noticeably!" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": null, 339 | "id": "70351a27-d680-491f-a24c-d3b0cfe7e395", 340 | "metadata": {}, 341 | "outputs": [], 342 | "source": [ 343 | "interesting = df.xs(\"Italy\", level=\"Country_Region\").groupby(level=\"Last_Update\").sum()\n", 344 | "growth = interesting.ΔConfirmed / interesting.Active\n", 345 | "growth = growth[\"2020-02-24\":]\n", 346 | "\n", 347 | "# Color based on lockdown (which happened in 3 stages, 4th, 9th, and 11th)\n", 348 | "lockdown = growth.index - pd.Timestamp(\"2020-03-11\")\n", 349 | "lockdown = np.clip(lockdown.days, 0, 14) / 14\n", 350 | "\n", 351 | "fix, ax = plt.subplots(figsize=(10, 5))\n", 352 | "ax.scatter(growth.index, growth, cmap=\"cool\", c=lockdown)\n", 353 | "\n", 354 | "ax.set_ylabel(\"new cases / active cases\")\n", 355 | "\n", 356 | "# set ticks every week\n", 357 | "ax.xaxis.set_major_locator(mdates.WeekdayLocator())\n", 358 | "# set major ticks format\n", 359 | "ax.xaxis.set_major_formatter(mdates.DateFormatter(\"%b %d\"));" 360 | ] 361 | }, 362 | { 363 | "cell_type": "markdown", 364 | "id": "6e9d292c-2e8b-4c96-b9ad-f1989bf92ea1", 365 | "metadata": {}, 366 | "source": [ 367 | "#### US, transmission rate\n", 368 | "\n", 369 | "Same plot for the US. The colors in the plot start changing when the US started\n", 370 | "the 15 plan to slow the spread, and change over 14 days, which is roughly 1x the\n", 371 | "time to first symptoms. Each state has implemented different guidelines, so the\n", 372 | "effect will be spread out even further. Again, we are see the effect of the\n", 373 | "lockdown!" 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": null, 379 | "id": "1449072b-512b-426d-8bc1-40391a1905b9", 380 | "metadata": {}, 381 | "outputs": [], 382 | "source": [ 383 | "interesting = df.xs(\"US\", level=\"Country_Region\").groupby(level=\"Last_Update\").sum()\n", 384 | "growth = interesting.ΔConfirmed / interesting.Active\n", 385 | "growth = growth[\"2020-03-01\":]\n", 386 | "\n", 387 | "# Not really a full lockdown, just a distancing guideline + local lockdowns later\n", 388 | "lockdown = growth.index - pd.Timestamp(\"2020-03-15\")\n", 389 | "lockdown = np.clip(lockdown.days, 0, 14) / 14\n", 390 | "\n", 391 | "fix, ax = plt.subplots(figsize=(10, 5))\n", 392 | "ax.scatter(growth.index, growth, cmap=\"cool\", c=lockdown)\n", 393 | "\n", 394 | "ax.set_ylabel(\"new cases / active cases\")\n", 395 | "\n", 396 | "# set ticks every week\n", 397 | "ax.xaxis.set_major_locator(mdates.WeekdayLocator())\n", 398 | "# set major ticks format\n", 399 | "ax.xaxis.set_major_formatter(mdates.DateFormatter(\"%b %d\"));" 400 | ] 401 | }, 402 | { 403 | "cell_type": "code", 404 | "execution_count": null, 405 | "id": "ec516769-7f7f-4ad4-9162-d00570d0be93", 406 | "metadata": {}, 407 | "outputs": [], 408 | "source": [] 409 | }, 410 | { 411 | "cell_type": "code", 412 | "execution_count": null, 413 | "id": "7d281bb7-c46f-4222-a210-efd0deccaace", 414 | "metadata": {}, 415 | "outputs": [], 416 | "source": [] 417 | }, 418 | { 419 | "cell_type": "code", 420 | "execution_count": null, 421 | "id": "44c4bf65-5d1f-4274-afb5-a63ba877efcb", 422 | "metadata": {}, 423 | "outputs": [], 424 | "source": [] 425 | } 426 | ], 427 | "metadata": { 428 | "kernelspec": { 429 | "display_name": "Python [conda env:performance-minicourse] *", 430 | "language": "python", 431 | "name": "conda-env-performance-minicourse-py" 432 | }, 433 | "language_info": { 434 | "codemirror_mode": { 435 | "name": "ipython", 436 | "version": 3 437 | }, 438 | "file_extension": ".py", 439 | "mimetype": "text/x-python", 440 | "name": "python", 441 | "nbconvert_exporter": "python", 442 | "pygments_lexer": "ipython3", 443 | "version": "3.10.9" 444 | } 445 | }, 446 | "nbformat": 4, 447 | "nbformat_minor": 5 448 | } 449 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # High Performance Python 2 | ## Princeton mini-course 3 | ### By Henry Schreiner, with Jim Pivarski 4 | 5 | ## Installation 6 | 7 | #### Binder 8 | 9 | In the minicourse, if you haven't prepared beforehand, please use this link to run online via Binder: [![Binder](https://mybinder.org/badge.svg)](https://mybinder.org/v2/gh/henryiii/python-performance-minicourse/master?urlpath=lab) 10 | 11 | ##### Codespaces 12 | 13 | GitHub provides 120 core-hours (60 real-time hours if you use the smallest (2-core) setting) of CodeSpaces usage every month. You can run this in a codespace: [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/henryiii/python-performance-minicourse?quickstart=1) 14 | 15 | Note that you should currently start `jupyter lab` manually from the VSCode terminal once it's built (3-5 minutes after starting it for the first time). 16 | 17 | 18 | #### Local install: 19 | 20 | If you are reading this at least 10 minutes before the course starts or you have anaconda 21 | or miniconda installed, you will probably be best off installing miniconda. 22 | This way you will keep local edits and will have an environment to play with. 23 | 24 | Get the repository: 25 | 26 | ```bash 27 | git clone https://github.com/henryiii/python-performance-minicourse.git 28 | cd python-performance-minicourse 29 | ``` 30 | 31 | Download and install 32 | [miniconda](https://docs.conda.io/en/latest/miniconda.html). On macOS with 33 | homebrew, just run `brew cask install miniconda` [(see my 34 | recommendations)](https://iscinumpy.gitlab.io/post/setup-a-new-mac/). 35 | 36 | Run: 37 | 38 | ```bash 39 | conda env create 40 | ``` 41 | 42 | from this directory. This will create an environment `performance-minicourse`. To use: 43 | 44 | ```bash 45 | conda activate performance-minicourse 46 | ./check.py # Check to see if you've installed this correctly 47 | jupyter lab 48 | ``` 49 | 50 | And, to disable: 51 | 52 | ```bash 53 | conda deactivate 54 | ``` 55 | 56 | or restart your terminal. 57 | 58 | 59 | > If you want to add a package, modify `environment.yml` then run: 60 | > 61 | > ```bash 62 | > conda env update 63 | > ``` 64 | 65 | 66 | ## Lessons 67 | 68 | * [00 Intro](./00_intro.ipynb): The introduction 69 | * [01 Fractal accelerate](./01_fractal_accelerate.ipynb): A look at a fractal computation, and ways to accelerate it with NumPy changes, numexpr, and numba. 70 | - [01b Fractal interactive](./01b_fractal_interactive.ipynb): An interactive example using Numba. 71 | * [02 Temperatures](./02_temperatures.ipynb): A look at reading files and array manipulation in NumPy and Pandas. 72 | * [03 MCMC](./03_mcmc.ipynb): A Marco Chain Monte Carlo generator (and metropolis generator) in Python and Numba, with a focus on profiling. 73 | * [04 Runge-Kutta](./04_runge_kutta.ipynb): Implementing a popular integration algorithm in NumPy and Numba. 74 | * [05 Distributed](./05_distributed.ipynb): An exploration of ways to break up code (fractal) into chunks for multithreading, multiproccessing, and Dask distribution. 75 | * [06 Tensorflow](./06_tensorflow.ipynb): A look at implementing a Negative Log Likelihood function (used for unbinned fitting) in NumPy and Google's Tensorflow. 76 | * [07 Callables](./07_callables.ipynb): A look at Scipy's LowLevelCallable, and how to implement one with Numba. 77 | 78 | 79 | Class participants: please complete the survey that will be posted. 80 | -------------------------------------------------------------------------------- /check.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import importlib.util 4 | import sys 5 | 6 | assert sys.version_info >= (3, 8), "Python should be 3.8+" 7 | 8 | 9 | for lib in ["pandas", "numba", "line_profiler"]: 10 | if importlib.util.find_spec(lib) is None: 11 | print("Environment not setup property! Use conda, see readme.") 12 | print("Maybe `conda activate performance-minicourse` is missing?") 13 | print("At least", lib, "is missing") 14 | sys.exit(1) 15 | 16 | print("Environment appears correct, congratulations!") 17 | -------------------------------------------------------------------------------- /data/nasa-exoplanets-details.txt: -------------------------------------------------------------------------------- 1 | # This file was produced by the NASA Exoplanet Archive http://exoplanetarchive.ipac.caltech.edu 2 | # Fri Nov 9 17:35:38 2018 3 | # 4 | # COLUMN pl_hostname: Host Name 5 | # COLUMN pl_letter: Planet Letter 6 | # COLUMN pl_name: Planet Name 7 | # COLUMN pl_discmethod: Discovery Method 8 | # COLUMN pl_pnum: Number of Planets in System 9 | # COLUMN pl_orbper: Orbital Period [days] 10 | # COLUMN pl_orbsmax: Orbit Semi-Major Axis [AU]) 11 | # COLUMN pl_orbeccen: Eccentricity 12 | # COLUMN pl_orbincl: Inclination [deg] 13 | # COLUMN pl_bmassj: Planet Mass or M*sin(i) [Jupiter mass] 14 | # COLUMN pl_bmassprov: Planet Mass or M*sin(i) Provenance 15 | # COLUMN pl_radj: Planet Radius [Jupiter radii] 16 | # COLUMN pl_dens: Planet Density [g/cm**3] 17 | # COLUMN pl_ttvflag: TTV Flag 18 | # COLUMN pl_kepflag: Kepler Field Flag 19 | # COLUMN pl_k2flag: K2 Mission Flag 20 | # COLUMN pl_nnotes: Number of Notes 21 | # COLUMN ra_str: RA [sexagesimal] 22 | # COLUMN ra: RA [decimal degrees] 23 | # COLUMN dec_str: Dec [sexagesimal] 24 | # COLUMN dec: Dec [decimal degrees] 25 | # COLUMN st_dist: Distance [pc] 26 | # COLUMN st_optmag: Optical Magnitude [mag] 27 | # COLUMN st_optband: Optical Magnitude Band 28 | # COLUMN gaia_gmag: G-band (Gaia) [mag] 29 | # COLUMN st_teff: Effective Temperature [K] 30 | # COLUMN st_mass: Stellar Mass [Solar mass] 31 | # COLUMN st_rad: Stellar Radius [Solar radii] 32 | # COLUMN rowupdate: Date of Last Update 33 | # COLUMN pl_tranflag: Planet Transit Flag 34 | # COLUMN pl_rvflag: Planet RV Flag 35 | # COLUMN pl_imgflag: Planet Imaging Flag 36 | # COLUMN pl_astflag: Planet Astrometry Flag 37 | # COLUMN pl_omflag: Planet Orbital Modulation Flag 38 | # COLUMN pl_cbflag: Planet Circumbinary Flag 39 | # COLUMN pl_angsep: Calculated Angular Separation [mas] 40 | # COLUMN pl_orbtper: Time of Periastron [days] 41 | # COLUMN pl_orblper: Long. of Periastron [deg] 42 | # COLUMN pl_rvamp: Radial Velocity Amplitude [m/s] 43 | # COLUMN pl_eqt: Equilibrium Temperature [K] 44 | # COLUMN pl_insol: Insolation Flux [Earth flux] 45 | # COLUMN pl_massj: Planet Mass [Jupiter mass] 46 | # COLUMN pl_msinij: Planet M*sin(i) [Jupiter mass] 47 | # COLUMN pl_masse: Planet Mass [Earth mass] 48 | # COLUMN pl_msinie: Planet M*sin(i) [Earth mass] 49 | # COLUMN pl_bmasse: Planet Mass or M*sin(i) [Earth mass] 50 | # COLUMN pl_rade: Planet Radius [Earth radii] 51 | # COLUMN pl_rads: Planet Radius [Solar radii] 52 | # COLUMN pl_trandep: Transit Depth [percent] 53 | # COLUMN pl_trandur: Transit Duration [days] 54 | # COLUMN pl_tranmid: Transit Midpoint [days] 55 | # COLUMN pl_tsystemref: Time System Reference 56 | # COLUMN pl_imppar: Impact Parameter 57 | # COLUMN pl_occdep: Occultation Depth [percentage] 58 | # COLUMN pl_ratdor: Ratio of Distance to Stellar Radius 59 | # COLUMN pl_ratror: Ratio of Planet to Stellar Radius 60 | # COLUMN pl_def_reflink: Default Reference 61 | # COLUMN pl_disc: Year of Discovery 62 | # COLUMN pl_disc_reflink: Discovery Reference 63 | # COLUMN pl_locale: Discovery Locale 64 | # COLUMN pl_facility: Discovery Facility 65 | # COLUMN pl_telescope: Discovery Telescope 66 | # COLUMN pl_instrument: Discovery Instrument 67 | # COLUMN pl_status: Status 68 | # COLUMN pl_mnum: Number of Moons in System 69 | # COLUMN pl_st_npar: Number of Stellar and Planet Parameters 70 | # COLUMN pl_st_nref: Number of Stellar and Planet References 71 | # COLUMN pl_pelink: Link to Exoplanet Encyclopaedia 72 | # COLUMN pl_edelink: Link to Exoplanet Data Explorer 73 | # COLUMN pl_publ_date: Publication Date 74 | # COLUMN hd_name: HD Name 75 | # COLUMN hip_name: HIP Name 76 | # COLUMN st_rah: RA [hrs] 77 | # COLUMN st_glon: Galactic Longitude [deg] 78 | # COLUMN st_glat: Galactic Latitude [deg] 79 | # COLUMN st_elon: Ecliptic Longitude [deg] 80 | # COLUMN st_elat: Ecliptic Latitude [deg] 81 | # COLUMN st_plx: Parallax [mas] 82 | # COLUMN gaia_plx: Gaia Parallax [mas] 83 | # COLUMN gaia_dist: Gaia Distance [pc] 84 | # COLUMN st_pmra: Proper Motion (RA) [mas/yr] 85 | # COLUMN st_pmdec: Proper Motion (Dec) [mas/yr] 86 | # COLUMN st_pm: Total Proper Motion [mas/yr] 87 | # COLUMN gaia_pmra: Gaia Proper Motion (RA) [mas/yr] 88 | # COLUMN gaia_pmdec: Gaia Proper Motion (Dec) [mas/yr] 89 | # COLUMN gaia_pm: Gaia Total Proper Motion [mas/yr] 90 | # COLUMN st_radv: Radial Velocity [km/s] 91 | # COLUMN st_sp: Spectral Type 92 | # COLUMN st_spstr: Spectral Type 93 | # COLUMN st_logg: Stellar Surface Gravity [log10(cm/s**2)] 94 | # COLUMN st_lum: Stellar Luminosity [log(Solar)] 95 | # COLUMN st_dens: Stellar Density [g/cm**3] 96 | # COLUMN st_metfe: Stellar Metallicity [dex] 97 | # COLUMN st_metratio: Metallicity Ratio 98 | # COLUMN st_age: Stellar Age [Gyr] 99 | # COLUMN st_vsini: Rot. Velocity V*sin(i) [km/s] 100 | # COLUMN st_acts: Stellar Activity S-index 101 | # COLUMN st_actr: Stellar Activity log(R'HK) 102 | # COLUMN st_actlx: X-ray Activity log(Lx) 103 | # COLUMN swasp_id: SWASP Identifier 104 | # COLUMN st_nts: Number of Time Series 105 | # COLUMN st_nplc: Number of Planet Transit Light Curves 106 | # COLUMN st_nglc: Number of General Light Curves 107 | # COLUMN st_nrvc: Number of Radial Velocity Time Series 108 | # COLUMN st_naxa: Number of Amateur Light Curves 109 | # COLUMN st_nimg: Number of Images 110 | # COLUMN st_nspec: Number of Spectra 111 | # COLUMN st_uj: U-band (Johnson) [mag] 112 | # COLUMN st_vj: V-band (Johnson) [mag] 113 | # COLUMN st_bj: B-band (Johnson) [mag] 114 | # COLUMN st_rc: R-band (Cousins) [mag] 115 | # COLUMN st_ic: I-band (Cousins) [mag] 116 | # COLUMN st_j: J-band (2MASS) [mag] 117 | # COLUMN st_h: H-band (2MASS) [mag] 118 | # COLUMN st_k: Ks-band (2MASS) [mag] 119 | # COLUMN st_wise1: WISE 3.4um [mag] 120 | # COLUMN st_wise2: WISE 4.6um [mag] 121 | # COLUMN st_wise3: WISE 12.um [mag] 122 | # COLUMN st_wise4: WISE 22.um [mag] 123 | # COLUMN st_irac1: IRAC 3.6um [mag] 124 | # COLUMN st_irac2: IRAC 4.5um [mag] 125 | # COLUMN st_irac3: IRAC 5.8um [mag] 126 | # COLUMN st_irac4: IRAC 8.0um [mag] 127 | # COLUMN st_mips1: MIPS 24um [mag] 128 | # COLUMN st_mips2: MIPS 70um [mag] 129 | # COLUMN st_mips3: MIPS 160um [mag] 130 | # COLUMN st_iras1: IRAS 12um Flux [Jy] 131 | # COLUMN st_iras2: IRAS 25um Flux [Jy] 132 | # COLUMN st_iras3: IRAS 60um Flux [Jy] 133 | # COLUMN st_iras4: IRAS 100um Flux [Jy] 134 | # COLUMN st_photn: Number of Photometry Measurements 135 | # COLUMN st_umbj: U-B (Johnson) [mag] 136 | # COLUMN st_bmvj: B-V (Johnson) [mag] 137 | # COLUMN st_vjmic: V-I (Johnson-Cousins) [mag] 138 | # COLUMN st_vjmrc: V-R (Johnson-Cousins) [mag] 139 | # COLUMN st_jmh2: J-H (2MASS) [mag] 140 | # COLUMN st_hmk2: H-Ks (2MASS) [mag] 141 | # COLUMN st_jmk2: J-Ks (2MASS) [mag] 142 | # COLUMN st_bmy: b-y (Stromgren) [mag] 143 | # COLUMN st_m1: m1 (Stromgren) [mag] 144 | # COLUMN st_c1: c1 (Stromgren) [mag] 145 | # COLUMN st_colorn: Number of Color Measurements 146 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: performance-minicourse 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python >=3.10 6 | - numpy >=1.22.0 7 | - matplotlib >=3.5 8 | - pandas >=1.4 9 | - scipy >=1.8 10 | - jupyterlab >=3 11 | - plumbum >=1.7 12 | - numba >=0.55 13 | - numexpr >=2.8 14 | - jax >=0.3 15 | - dask 16 | - distributed 17 | - line_profiler 18 | - ipywidgets 19 | - nodejs 20 | - ipympl 21 | - python-graphviz 22 | - nb_conda_kernels 23 | - jupyterlab_code_formatter 24 | - black 25 | - isort 26 | - rich 27 | -------------------------------------------------------------------------------- /img/FastestGrowing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryiii/python-performance-minicourse/417a0c695c510fc7b78a28d4dbca6744896b5257/img/FastestGrowing.png -------------------------------------------------------------------------------- /img/GitHubLang.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryiii/python-performance-minicourse/417a0c695c510fc7b78a28d4dbca6744896b5257/img/GitHubLang.png -------------------------------------------------------------------------------- /img/PyPLLang.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryiii/python-performance-minicourse/417a0c695c510fc7b78a28d4dbca6744896b5257/img/PyPLLang.png -------------------------------------------------------------------------------- /img/cards-chance-deck-19060.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryiii/python-performance-minicourse/417a0c695c510fc7b78a28d4dbca6744896b5257/img/cards-chance-deck-19060.jpg -------------------------------------------------------------------------------- /img/png-spec-chunks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryiii/python-performance-minicourse/417a0c695c510fc7b78a28d4dbca6744896b5257/img/png-spec-chunks.png -------------------------------------------------------------------------------- /img/png-spec-scanline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryiii/python-performance-minicourse/417a0c695c510fc7b78a28d4dbca6744896b5257/img/png-spec-scanline.png --------------------------------------------------------------------------------