├── .gitignore
├── 1-intro.pdf
├── 2-just-numpy.ipynb
├── 3-ecosystem.pdf
├── 4-pandas.ipynb
├── 5-dask.ipynb
├── 6-compilers.ipynb
├── 7-gpu.ipynb
├── 8-low-level.ipynb
├── LICENSE
├── README.md
├── data
    ├── nasa-exoplanets-details.txt
    ├── nasa-exoplanets.csv
    ├── newark-days-ago.txt
    ├── newark-temperature-avg.txt
    ├── newark-temperature-max.txt
    ├── newark-temperature-min.txt
    └── newark-temperature.csv
├── img
    ├── cards-chance-deck-19060.jpg
    ├── clock-rate.jpg
    ├── plan-for-the-day.png
    ├── plan-for-the-day.svg
    ├── png-spec-chunks.png
    ├── png-spec-scanline.png
    ├── vectorization-example.png
    └── vectorization-example.svg
├── notes.md
└── tex
    ├── 1-intro.tex
    ├── 3-ecosystem.tex
    ├── apl-timeline.pdf
    ├── caffe2-logo.png
    ├── cesium-logo.png
    ├── chainer-logo.png
    ├── cntk-logo.png
    ├── commute-by-plane.png
    ├── cupy.png
    ├── gluon-logo.png
    ├── hurdle9.jpg
    ├── keras-logo.png
    ├── lasagne-logo.png
    ├── lsst-notebook.png
    ├── mentions-of-programming-languages.png
    ├── numpy-logo.png
    ├── onnx-logo.png
    ├── pandas-logo.png
    ├── princeton-logo-long.png
    ├── princeton-logo-long.svg
    ├── princeton-logo.png
    ├── princeton-logo.svg
    ├── pyminuit.png
    ├── pypl-popularity.png
    ├── python-r-cpp-googletrends-data.png
    ├── python-r-cpp-googletrends-datascience.png
    ├── python-r-cpp-googletrends-dataset.png
    ├── python-r-cpp-googletrends-machinelearning.png
    ├── pytorch-logo.png
    ├── quantstack.png
    ├── root-spark-pandas-google-trends.png
    ├── shells-1.png
    ├── shells-2.png
    ├── shells-3.png
    ├── shells-4.png
    ├── shells-5.png
    ├── sklearn-logo.png
    ├── tensorflow-logo.png
    ├── thesis-code-flow.pdf
    ├── tshirt.jpg
    ├── unreasonable-effectiveness.png
    └── xgboost-logo.png


/.gitignore:
--------------------------------------------------------------------------------
  1 | tex/*.aux
  2 | tex/*.nav
  3 | tex/*.out
  4 | tex/*.snm
  5 | tex/*.toc
  6 | tex/01-intro.pdf
  7 | tex/03-ecosystem.pdf
  8 | 
  9 | # Byte-compiled / optimized / DLL files
 10 | __pycache__/
 11 | *.py[cod]
 12 | *$py.class
 13 | 
 14 | # C extensions
 15 | *.so
 16 | 
 17 | # Distribution / packaging
 18 | .Python
 19 | build/
 20 | develop-eggs/
 21 | dist/
 22 | downloads/
 23 | eggs/
 24 | .eggs/
 25 | lib/
 26 | lib64/
 27 | parts/
 28 | sdist/
 29 | var/
 30 | wheels/
 31 | *.egg-info/
 32 | .installed.cfg
 33 | *.egg
 34 | MANIFEST
 35 | 
 36 | # PyInstaller
 37 | #  Usually these files are written by a python script from a template
 38 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 39 | *.manifest
 40 | *.spec
 41 | 
 42 | # Installer logs
 43 | pip-log.txt
 44 | pip-delete-this-directory.txt
 45 | 
 46 | # Unit test / coverage reports
 47 | htmlcov/
 48 | .tox/
 49 | .coverage
 50 | .coverage.*
 51 | .cache
 52 | nosetests.xml
 53 | coverage.xml
 54 | *.cover
 55 | .hypothesis/
 56 | .pytest_cache/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | 
 67 | # Flask stuff:
 68 | instance/
 69 | .webassets-cache
 70 | 
 71 | # Scrapy stuff:
 72 | .scrapy
 73 | 
 74 | # Sphinx documentation
 75 | docs/_build/
 76 | 
 77 | # PyBuilder
 78 | target/
 79 | 
 80 | # Jupyter Notebook
 81 | .ipynb_checkpoints
 82 | 
 83 | # pyenv
 84 | .python-version
 85 | 
 86 | # celery beat schedule file
 87 | celerybeat-schedule
 88 | 
 89 | # SageMath parsed files
 90 | *.sage.py
 91 | 
 92 | # Environments
 93 | .env
 94 | .venv
 95 | env/
 96 | venv/
 97 | ENV/
 98 | env.bak/
 99 | venv.bak/
100 | 
101 | # Spyder project settings
102 | .spyderproject
103 | .spyproject
104 | 
105 | # Rope project settings
106 | .ropeproject
107 | 
108 | # mkdocs documentation
109 | /site
110 | 
111 | # mypy
112 | .mypy_cache/
113 | 


--------------------------------------------------------------------------------
/1-intro.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/1-intro.pdf


--------------------------------------------------------------------------------
/3-ecosystem.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/3-ecosystem.pdf


--------------------------------------------------------------------------------
/4-pandas.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {
   6 |     "slideshow": {
   7 |      "slide_type": "slide"
   8 |     }
   9 |    },
  10 |    "source": [
  11 |     "# Pandas"
  12 |    ]
  13 |   },
  14 |   {
  15 |    "cell_type": "markdown",
  16 |    "metadata": {
  17 |     "slideshow": {
  18 |      "slide_type": "fragment"
  19 |     }
  20 |    },
  21 |    "source": [
  22 |     "```\n",
  23 |     "conda install pandas matplotlib\n",
  24 |     "```\n",
  25 |     "\n",
  26 |     "_(and numpy from before)_"
  27 |    ]
  28 |   },
  29 |   {
  30 |    "cell_type": "markdown",
  31 |    "metadata": {
  32 |     "slideshow": {
  33 |      "slide_type": "slide"
  34 |     }
  35 |    },
  36 |    "source": [
  37 |     "Let's go back to the very first problem from this morning. We have an incomplete record of Newark temperatures since 1893."
  38 |    ]
  39 |   },
  40 |   {
  41 |    "cell_type": "code",
  42 |    "execution_count": null,
  43 |    "metadata": {},
  44 |    "outputs": [],
  45 |    "source": [
  46 |     "import numpy\n",
  47 |     "temperatures = numpy.loadtxt(\"data/newark-temperature-avg.txt\")\n",
  48 |     "temperatures"
  49 |    ]
  50 |   },
  51 |   {
  52 |    "cell_type": "markdown",
  53 |    "metadata": {
  54 |     "slideshow": {
  55 |      "slide_type": "fragment"
  56 |     }
  57 |    },
  58 |    "source": [
  59 |     "But instead of analyzing it with raw Numpy, let's use Pandas."
  60 |    ]
  61 |   },
  62 |   {
  63 |    "cell_type": "code",
  64 |    "execution_count": null,
  65 |    "metadata": {
  66 |     "scrolled": true
  67 |    },
  68 |    "outputs": [],
  69 |    "source": [
  70 |     "import pandas\n",
  71 |     "temperatures = pandas.Series(temperatures)\n",
  72 |     "temperatures"
  73 |    ]
  74 |   },
  75 |   {
  76 |    "cell_type": "markdown",
  77 |    "metadata": {
  78 |     "slideshow": {
  79 |      "slide_type": "slide"
  80 |     }
  81 |    },
  82 |    "source": [
  83 |     "Numpy was designed to do fast calculations with minimal dependencies.\n",
  84 |     "\n",
  85 |     "Pandas was designed to make a data analyst's life easier."
  86 |    ]
  87 |   },
  88 |   {
  89 |    "cell_type": "code",
  90 |    "execution_count": null,
  91 |    "metadata": {},
  92 |    "outputs": [],
  93 |    "source": [
  94 |     "%matplotlib inline"
  95 |    ]
  96 |   },
  97 |   {
  98 |    "cell_type": "code",
  99 |    "execution_count": null,
 100 |    "metadata": {},
 101 |    "outputs": [],
 102 |    "source": [
 103 |     "temperatures.plot()"
 104 |    ]
 105 |   },
 106 |   {
 107 |    "cell_type": "markdown",
 108 |    "metadata": {
 109 |     "slideshow": {
 110 |      "slide_type": "fragment"
 111 |     }
 112 |    },
 113 |    "source": [
 114 |     "Behold the turning of the seasons!"
 115 |    ]
 116 |   },
 117 |   {
 118 |    "cell_type": "markdown",
 119 |    "metadata": {
 120 |     "slideshow": {
 121 |      "slide_type": "slide"
 122 |     }
 123 |    },
 124 |    "source": [
 125 |     "You can think of a Pandas Series as a Numpy array with bells and whistles, but it's more than that."
 126 |    ]
 127 |   },
 128 |   {
 129 |    "cell_type": "markdown",
 130 |    "metadata": {
 131 |     "slideshow": {
 132 |      "slide_type": "fragment"
 133 |     }
 134 |    },
 135 |    "source": [
 136 |     "It is an _indexed_ Numpy array with bells and whistles."
 137 |    ]
 138 |   },
 139 |   {
 140 |    "cell_type": "code",
 141 |    "execution_count": null,
 142 |    "metadata": {
 143 |     "slideshow": {
 144 |      "slide_type": "slide"
 145 |     }
 146 |    },
 147 |    "outputs": [],
 148 |    "source": [
 149 |     "temperatures.index"
 150 |    ]
 151 |   },
 152 |   {
 153 |    "cell_type": "code",
 154 |    "execution_count": null,
 155 |    "metadata": {
 156 |     "slideshow": {
 157 |      "slide_type": "fragment"
 158 |     }
 159 |    },
 160 |    "outputs": [],
 161 |    "source": [
 162 |     "temperatures.index = numpy.loadtxt(\"data/newark-days-ago.txt\")\n",
 163 |     "temperatures.index.name = \"days ago\""
 164 |    ]
 165 |   },
 166 |   {
 167 |    "cell_type": "code",
 168 |    "execution_count": null,
 169 |    "metadata": {},
 170 |    "outputs": [],
 171 |    "source": [
 172 |     "temperatures.index"
 173 |    ]
 174 |   },
 175 |   {
 176 |    "cell_type": "code",
 177 |    "execution_count": null,
 178 |    "metadata": {
 179 |     "slideshow": {
 180 |      "slide_type": "fragment"
 181 |     }
 182 |    },
 183 |    "outputs": [],
 184 |    "source": [
 185 |     "temperatures.plot()"
 186 |    ]
 187 |   },
 188 |   {
 189 |    "cell_type": "code",
 190 |    "execution_count": null,
 191 |    "metadata": {
 192 |     "slideshow": {
 193 |      "slide_type": "slide"
 194 |     }
 195 |    },
 196 |    "outputs": [],
 197 |    "source": [
 198 |     "temperatures.index = pandas.to_datetime(temperatures.index, unit=\"D\", origin=pandas.Timestamp(\"2018-11-04\"))\n",
 199 |     "temperatures.index.name = \"date\"\n",
 200 |     "temperatures.index"
 201 |    ]
 202 |   },
 203 |   {
 204 |    "cell_type": "code",
 205 |    "execution_count": null,
 206 |    "metadata": {},
 207 |    "outputs": [],
 208 |    "source": [
 209 |     "temperatures.plot()"
 210 |    ]
 211 |   },
 212 |   {
 213 |    "cell_type": "markdown",
 214 |    "metadata": {
 215 |     "slideshow": {
 216 |      "slide_type": "slide"
 217 |     }
 218 |    },
 219 |    "source": [
 220 |     "Now let's return to the problem of imputing the missing temperature data."
 221 |    ]
 222 |   },
 223 |   {
 224 |    "cell_type": "code",
 225 |    "execution_count": null,
 226 |    "metadata": {},
 227 |    "outputs": [],
 228 |    "source": [
 229 |     "min_temperatures = pandas.Series(numpy.loadtxt(\"data/newark-temperature-min.txt\"))\n",
 230 |     "max_temperatures = pandas.Series(numpy.loadtxt(\"data/newark-temperature-max.txt\"))\n",
 231 |     "min_temperatures.index = temperatures.index\n",
 232 |     "max_temperatures.index = temperatures.index"
 233 |    ]
 234 |   },
 235 |   {
 236 |    "cell_type": "code",
 237 |    "execution_count": null,
 238 |    "metadata": {},
 239 |    "outputs": [],
 240 |    "source": [
 241 |     "min_temperatures.plot()"
 242 |    ]
 243 |   },
 244 |   {
 245 |    "cell_type": "code",
 246 |    "execution_count": null,
 247 |    "metadata": {},
 248 |    "outputs": [],
 249 |    "source": [
 250 |     "max_temperatures.plot()"
 251 |    ]
 252 |   },
 253 |   {
 254 |    "cell_type": "markdown",
 255 |    "metadata": {
 256 |     "slideshow": {
 257 |      "slide_type": "slide"
 258 |     }
 259 |    },
 260 |    "source": [
 261 |     "It would be more convenient if these were in the same object. A DataFrame is several Series glued together with a common index."
 262 |    ]
 263 |   },
 264 |   {
 265 |    "cell_type": "code",
 266 |    "execution_count": null,
 267 |    "metadata": {
 268 |     "scrolled": true
 269 |    },
 270 |    "outputs": [],
 271 |    "source": [
 272 |     "df = pandas.concat([temperatures, min_temperatures, max_temperatures], axis=\"columns\")\n",
 273 |     "df.columns = [\"avg\", \"min\", \"max\"]   # name them!\n",
 274 |     "df"
 275 |    ]
 276 |   },
 277 |   {
 278 |    "cell_type": "markdown",
 279 |    "metadata": {
 280 |     "slideshow": {
 281 |      "slide_type": "slide"
 282 |     }
 283 |    },
 284 |    "source": [
 285 |     "Performing calculations on columns of a DataFrame is very much like Numpy: all of the elementwise operations, masking, and fancy indexing apply. In fact, Pandas usually just passes off this work on Numpy. However, it _organizes_ that work."
 286 |    ]
 287 |   },
 288 |   {
 289 |    "cell_type": "code",
 290 |    "execution_count": null,
 291 |    "metadata": {
 292 |     "scrolled": true
 293 |    },
 294 |    "outputs": [],
 295 |    "source": [
 296 |     "df[\"min-max avg\"] = (df[\"min\"] + df[\"max\"]) / 2\n",
 297 |     "df"
 298 |    ]
 299 |   },
 300 |   {
 301 |    "cell_type": "markdown",
 302 |    "metadata": {
 303 |     "slideshow": {
 304 |      "slide_type": "slide"
 305 |     }
 306 |    },
 307 |    "source": [
 308 |     "Although we could have repeated the Numpy method of masking by `numpy.isnan(df[\"avg\"])`, Pandas has functions for dealing with missing data. (A lot of them, actually.)"
 309 |    ]
 310 |   },
 311 |   {
 312 |    "cell_type": "code",
 313 |    "execution_count": null,
 314 |    "metadata": {
 315 |     "scrolled": true
 316 |    },
 317 |    "outputs": [],
 318 |    "source": [
 319 |     "df[\"imputed\"] = df[\"avg\"].fillna(df[\"min-max avg\"])\n",
 320 |     "df"
 321 |    ]
 322 |   },
 323 |   {
 324 |    "cell_type": "markdown",
 325 |    "metadata": {
 326 |     "slideshow": {
 327 |      "slide_type": "slide"
 328 |     }
 329 |    },
 330 |    "source": [
 331 |     "We can select columns by strings in square brackets (like Numpy's record arrays, a feature I didn't show you), but rows cannot be selected by integer index."
 332 |    ]
 333 |   },
 334 |   {
 335 |    "cell_type": "markdown",
 336 |    "metadata": {
 337 |     "slideshow": {
 338 |      "slide_type": "fragment"
 339 |     }
 340 |    },
 341 |    "source": [
 342 |     "Whereas an array of length `N` is a function `[0, N) → V`, a DataFrame is a function `K → V1×V2×V3`, where `K` is the index and `V1×V2×V3` are the columns. Integer indexing won't work unless the index has integer type:"
 343 |    ]
 344 |   },
 345 |   {
 346 |    "cell_type": "code",
 347 |    "execution_count": null,
 348 |    "metadata": {
 349 |     "slideshow": {
 350 |      "slide_type": "fragment"
 351 |     }
 352 |    },
 353 |    "outputs": [],
 354 |    "source": [
 355 |     "try:\n",
 356 |     "    df[-1]\n",
 357 |     "except KeyError:\n",
 358 |     "    print(\"Nope, -1 is not a valid index for this DataFrame.\")"
 359 |    ]
 360 |   },
 361 |   {
 362 |    "cell_type": "markdown",
 363 |    "metadata": {
 364 |     "slideshow": {
 365 |      "slide_type": "slide"
 366 |     }
 367 |    },
 368 |    "source": [
 369 |     "But unlike an SQL table, a DataFrame index has an ordering. We should be able to ask for the last element, so there's a special accessor for that: `iloc`."
 370 |    ]
 371 |   },
 372 |   {
 373 |    "cell_type": "code",
 374 |    "execution_count": null,
 375 |    "metadata": {},
 376 |    "outputs": [],
 377 |    "source": [
 378 |     "df.iloc[-1]"
 379 |    ]
 380 |   },
 381 |   {
 382 |    "cell_type": "markdown",
 383 |    "metadata": {
 384 |     "slideshow": {
 385 |      "slide_type": "fragment"
 386 |     }
 387 |    },
 388 |    "source": [
 389 |     "But generally, the index type should be more meaningful than the positions, so we'd want to access rows by index elements. That's `loc`."
 390 |    ]
 391 |   },
 392 |   {
 393 |    "cell_type": "code",
 394 |    "execution_count": null,
 395 |    "metadata": {},
 396 |    "outputs": [],
 397 |    "source": [
 398 |     "df.loc[pandas.Timestamp(\"2018-11-04\")]"
 399 |    ]
 400 |   },
 401 |   {
 402 |    "cell_type": "markdown",
 403 |    "metadata": {
 404 |     "slideshow": {
 405 |      "slide_type": "slide"
 406 |     }
 407 |    },
 408 |    "source": [
 409 |     "We can slice regions of time just as we slice integer indexes."
 410 |    ]
 411 |   },
 412 |   {
 413 |    "cell_type": "code",
 414 |    "execution_count": null,
 415 |    "metadata": {},
 416 |    "outputs": [],
 417 |    "source": [
 418 |     "df.loc[pandas.Timestamp(\"2017-11-04\"):].plot()"
 419 |    ]
 420 |   },
 421 |   {
 422 |    "cell_type": "markdown",
 423 |    "metadata": {
 424 |     "slideshow": {
 425 |      "slide_type": "fragment"
 426 |     }
 427 |    },
 428 |    "source": [
 429 |     "And that lets us zoom into interesting regions in the data space."
 430 |    ]
 431 |   },
 432 |   {
 433 |    "cell_type": "code",
 434 |    "execution_count": null,
 435 |    "metadata": {},
 436 |    "outputs": [],
 437 |    "source": [
 438 |     "df[[\"imputed\", \"avg\"]].loc[pandas.Timestamp(\"2011-01-01\"):pandas.Timestamp(\"2015-01-01\")].plot()"
 439 |    ]
 440 |   },
 441 |   {
 442 |    "cell_type": "markdown",
 443 |    "metadata": {
 444 |     "slideshow": {
 445 |      "slide_type": "slide"
 446 |     }
 447 |    },
 448 |    "source": [
 449 |     "Pandas has oodles of statistical functions. Whenever I want to do something, I do a web search to find out what it's named— _somebody's_ asked about it on StackOverflow."
 450 |    ]
 451 |   },
 452 |   {
 453 |    "cell_type": "markdown",
 454 |    "metadata": {
 455 |     "slideshow": {
 456 |      "slide_type": "fragment"
 457 |     }
 458 |    },
 459 |    "source": [
 460 |     "Let's look for local warming! (Not global; this is just Newark.)"
 461 |    ]
 462 |   },
 463 |   {
 464 |    "cell_type": "code",
 465 |    "execution_count": null,
 466 |    "metadata": {},
 467 |    "outputs": [],
 468 |    "source": [
 469 |     "df[\"imputed\"].plot()"
 470 |    ]
 471 |   },
 472 |   {
 473 |    "cell_type": "markdown",
 474 |    "metadata": {
 475 |     "slideshow": {
 476 |      "slide_type": "fragment"
 477 |     }
 478 |    },
 479 |    "source": [
 480 |     "Day-to-day variations are drowning out any effect. Can we smooth these data?"
 481 |    ]
 482 |   },
 483 |   {
 484 |    "cell_type": "markdown",
 485 |    "metadata": {
 486 |     "slideshow": {
 487 |      "slide_type": "slide"
 488 |     }
 489 |    },
 490 |    "source": [
 491 |     "Yes. It's called a rolling mean."
 492 |    ]
 493 |   },
 494 |   {
 495 |    "cell_type": "code",
 496 |    "execution_count": null,
 497 |    "metadata": {},
 498 |    "outputs": [],
 499 |    "source": [
 500 |     "df[\"imputed\"].rolling(3*365).mean().plot()"
 501 |    ]
 502 |   },
 503 |   {
 504 |    "cell_type": "markdown",
 505 |    "metadata": {
 506 |     "slideshow": {
 507 |      "slide_type": "slide"
 508 |     }
 509 |    },
 510 |    "source": [
 511 |     "The input files we've been working with are artificial (one number per line as text). The original file that came from the NCDC was a CSV with redundant information."
 512 |    ]
 513 |   },
 514 |   {
 515 |    "cell_type": "code",
 516 |    "execution_count": null,
 517 |    "metadata": {
 518 |     "scrolled": true
 519 |    },
 520 |    "outputs": [],
 521 |    "source": [
 522 |     "df2 = pandas.read_csv(\"data/newark-temperature.csv\")\n",
 523 |     "df2"
 524 |    ]
 525 |   },
 526 |   {
 527 |    "cell_type": "markdown",
 528 |    "metadata": {
 529 |     "slideshow": {
 530 |      "slide_type": "slide"
 531 |     }
 532 |    },
 533 |    "source": [
 534 |     "The first two columns have only one value because this was a database selection. Pandas's `read_csv` function recognized that the temperature columns are numbers, but not that the date is a date."
 535 |    ]
 536 |   },
 537 |   {
 538 |    "cell_type": "code",
 539 |    "execution_count": null,
 540 |    "metadata": {
 541 |     "slideshow": {
 542 |      "slide_type": "fragment"
 543 |     }
 544 |    },
 545 |    "outputs": [],
 546 |    "source": [
 547 |     "df2[\"DATE\"].dtype"
 548 |    ]
 549 |   },
 550 |   {
 551 |    "cell_type": "markdown",
 552 |    "metadata": {
 553 |     "slideshow": {
 554 |      "slide_type": "fragment"
 555 |     }
 556 |    },
 557 |    "source": [
 558 |     "The `'O'` type (object) is for cases when Pandas doesn't recognize the type of a field. In principle, it could be any Python object, but from a CSV file, it's a string.\n",
 559 |     "\n",
 560 |     "We can try to read it again, this time providing a hint that this column is supposed to be a date."
 561 |    ]
 562 |   },
 563 |   {
 564 |    "cell_type": "code",
 565 |    "execution_count": null,
 566 |    "metadata": {
 567 |     "slideshow": {
 568 |      "slide_type": "slide"
 569 |     }
 570 |    },
 571 |    "outputs": [],
 572 |    "source": [
 573 |     "df2 = pandas.read_csv(\"data/newark-temperature.csv\", parse_dates=[\"DATE\"])\n",
 574 |     "df2.columns"
 575 |    ]
 576 |   },
 577 |   {
 578 |    "cell_type": "markdown",
 579 |    "metadata": {
 580 |     "slideshow": {
 581 |      "slide_type": "fragment"
 582 |     }
 583 |    },
 584 |    "source": [
 585 |     "It has the same columns, but now the date is a date."
 586 |    ]
 587 |   },
 588 |   {
 589 |    "cell_type": "code",
 590 |    "execution_count": null,
 591 |    "metadata": {},
 592 |    "outputs": [],
 593 |    "source": [
 594 |     "df2[\"DATE\"].dtype"
 595 |    ]
 596 |   },
 597 |   {
 598 |    "cell_type": "markdown",
 599 |    "metadata": {
 600 |     "slideshow": {
 601 |      "slide_type": "slide"
 602 |     }
 603 |    },
 604 |    "source": [
 605 |     "We'd like this date column to be the index of the whole table, so we say so."
 606 |    ]
 607 |   },
 608 |   {
 609 |    "cell_type": "code",
 610 |    "execution_count": null,
 611 |    "metadata": {},
 612 |    "outputs": [],
 613 |    "source": [
 614 |     "df2.index = df2[\"DATE\"]"
 615 |    ]
 616 |   },
 617 |   {
 618 |    "cell_type": "markdown",
 619 |    "metadata": {
 620 |     "slideshow": {
 621 |      "slide_type": "fragment"
 622 |     }
 623 |    },
 624 |    "source": [
 625 |     "Now we can do the same analysis we did before, but directly on the DataFrame from the CSV file, not by gluing together Series derived from Numpy arrays."
 626 |    ]
 627 |   },
 628 |   {
 629 |    "cell_type": "code",
 630 |    "execution_count": null,
 631 |    "metadata": {},
 632 |    "outputs": [],
 633 |    "source": [
 634 |     "df2[\"imputed\"] = df2[\"TAVG\"].fillna((df2[\"TMIN\"] + df2[\"TMAX\"]) / 2)\n",
 635 |     "df2[\"imputed\"].rolling(3*365).mean().plot()"
 636 |    ]
 637 |   },
 638 |   {
 639 |    "cell_type": "markdown",
 640 |    "metadata": {
 641 |     "slideshow": {
 642 |      "slide_type": "slide"
 643 |     }
 644 |    },
 645 |    "source": [
 646 |     "Temperature data are fairly simple: a one-dimensional time series. Let's consider something with a bit more structure— exoplanets.\n",
 647 |     "\n",
 648 |     "Each star may have a different number of planets, which complicates the indexing."
 649 |    ]
 650 |   },
 651 |   {
 652 |    "cell_type": "code",
 653 |    "execution_count": null,
 654 |    "metadata": {
 655 |     "scrolled": true
 656 |    },
 657 |    "outputs": [],
 658 |    "source": [
 659 |     "planets = pandas.read_csv(\"data/nasa-exoplanets.csv\")\n",
 660 |     "planets"
 661 |    ]
 662 |   },
 663 |   {
 664 |    "cell_type": "markdown",
 665 |    "metadata": {
 666 |     "slideshow": {
 667 |      "slide_type": "slide"
 668 |     }
 669 |    },
 670 |    "source": [
 671 |     "Not knowing much about this dataset, let's get a quick summary of the columns."
 672 |    ]
 673 |   },
 674 |   {
 675 |    "cell_type": "code",
 676 |    "execution_count": null,
 677 |    "metadata": {
 678 |     "scrolled": true
 679 |    },
 680 |    "outputs": [],
 681 |    "source": [
 682 |     "planets.describe().transpose()"
 683 |    ]
 684 |   },
 685 |   {
 686 |    "cell_type": "markdown",
 687 |    "metadata": {
 688 |     "slideshow": {
 689 |      "slide_type": "slide"
 690 |     }
 691 |    },
 692 |    "source": [
 693 |     "Fortunately, NASA gave us longer explanations of the column names."
 694 |    ]
 695 |   },
 696 |   {
 697 |    "cell_type": "code",
 698 |    "execution_count": null,
 699 |    "metadata": {},
 700 |    "outputs": [],
 701 |    "source": [
 702 |     "!cat data/nasa-exoplanets-details.txt"
 703 |    ]
 704 |   },
 705 |   {
 706 |    "cell_type": "markdown",
 707 |    "metadata": {
 708 |     "slideshow": {
 709 |      "slide_type": "fragment"
 710 |     }
 711 |    },
 712 |    "source": [
 713 |     "Number of moons? (\"`pl_mnum`: Number of Moons in System\") Awesome!"
 714 |    ]
 715 |   },
 716 |   {
 717 |    "cell_type": "code",
 718 |    "execution_count": null,
 719 |    "metadata": {
 720 |     "slideshow": {
 721 |      "slide_type": "slide"
 722 |     }
 723 |    },
 724 |    "outputs": [],
 725 |    "source": [
 726 |     "planets[planets[\"pl_mnum\"] > 0]"
 727 |    ]
 728 |   },
 729 |   {
 730 |    "cell_type": "markdown",
 731 |    "metadata": {
 732 |     "slideshow": {
 733 |      "slide_type": "fragment"
 734 |     }
 735 |    },
 736 |    "source": [
 737 |     "I guess not yet. They're just being hopeful."
 738 |    ]
 739 |   },
 740 |   {
 741 |    "cell_type": "markdown",
 742 |    "metadata": {
 743 |     "slideshow": {
 744 |      "slide_type": "slide"
 745 |     }
 746 |    },
 747 |    "source": [
 748 |     "In this dataset, one row is one planet. Thus, star data for stars with multiple known planets are duplicated (which effectively weights star data by their number of planets in `planets.describe()`).\n",
 749 |     "\n",
 750 |     "To get a table of stars only, we'd have to do a group-by. We expect the star data to be the same for each planet associated with a star, so the mean is an appropriate summary."
 751 |    ]
 752 |   },
 753 |   {
 754 |    "cell_type": "code",
 755 |    "execution_count": null,
 756 |    "metadata": {
 757 |     "scrolled": true
 758 |    },
 759 |    "outputs": [],
 760 |    "source": [
 761 |     "planets.groupby(\"pl_hostname\").mean()"
 762 |    ]
 763 |   },
 764 |   {
 765 |    "cell_type": "markdown",
 766 |    "metadata": {
 767 |     "slideshow": {
 768 |      "slide_type": "slide"
 769 |     }
 770 |    },
 771 |    "source": [
 772 |     "**Exercise:** This reduction averages all values per star, which is not meaningful for planet variables (which start with `\"pl_\"`). After all, what does it mean to average their \"semi-major axes?\" Or their \"discovery methods?\"\n",
 773 |     "\n",
 774 |     "To avoid confusion, repeat the group-by with only the columns that start with `\"st_\"`."
 775 |    ]
 776 |   },
 777 |   {
 778 |    "cell_type": "code",
 779 |    "execution_count": null,
 780 |    "metadata": {
 781 |     "scrolled": true
 782 |    },
 783 |    "outputs": [],
 784 |    "source": [
 785 |     "???"
 786 |    ]
 787 |   },
 788 |   {
 789 |    "cell_type": "markdown",
 790 |    "metadata": {
 791 |     "slideshow": {
 792 |      "slide_type": "slide"
 793 |     }
 794 |    },
 795 |    "source": [
 796 |     "Wouldn't it be great if the index encapsulated the hierarchical relationship between stars and planets?\n",
 797 |     "\n",
 798 |     "Pandas has a `MultiIndex`, which allows sub-indexes to be nested within outer indexes."
 799 |    ]
 800 |   },
 801 |   {
 802 |    "cell_type": "code",
 803 |    "execution_count": null,
 804 |    "metadata": {
 805 |     "scrolled": true
 806 |    },
 807 |    "outputs": [],
 808 |    "source": [
 809 |     "planets.index = pandas.MultiIndex.from_arrays([planets[\"pl_hostname\"], planets[\"pl_letter\"]])\n",
 810 |     "planets"
 811 |    ]
 812 |   },
 813 |   {
 814 |    "cell_type": "markdown",
 815 |    "metadata": {
 816 |     "slideshow": {
 817 |      "slide_type": "slide"
 818 |     }
 819 |    },
 820 |    "source": [
 821 |     "The transpose of this multiindexed table is a table with two levels of columns."
 822 |    ]
 823 |   },
 824 |   {
 825 |    "cell_type": "code",
 826 |    "execution_count": null,
 827 |    "metadata": {
 828 |     "scrolled": true
 829 |    },
 830 |    "outputs": [],
 831 |    "source": [
 832 |     "planets.transpose()"
 833 |    ]
 834 |   },
 835 |   {
 836 |    "cell_type": "markdown",
 837 |    "metadata": {
 838 |     "slideshow": {
 839 |      "slide_type": "slide"
 840 |     }
 841 |    },
 842 |    "source": [
 843 |     "To simplify this table, let's reduce it to the following fields:\n",
 844 |     "\n",
 845 |     "  * `pl_discmethod`:  Discovery Method\n",
 846 |     "  * `pl_orbper`:      Orbital Period [days]\n",
 847 |     "  * `pl_orbsmax`:     Orbit Semi-Major Axis [AU])\n",
 848 |     "  * `pl_orbeccen`:    Eccentricity\n",
 849 |     "  * `pl_orbincl`:     Inclination [deg]\n",
 850 |     "  * `pl_bmassj`:      Planet Mass or M*sin(i) [Jupiter mass]\n",
 851 |     "  * `pl_bmassprov`:   Planet Mass or M*sin(i) Provenance\n",
 852 |     "  * `pl_disc`:        Year of Discovery\n",
 853 |     "  * `pl_telescope`:   Discovery Telescope"
 854 |    ]
 855 |   },
 856 |   {
 857 |    "cell_type": "code",
 858 |    "execution_count": null,
 859 |    "metadata": {
 860 |     "scrolled": true
 861 |    },
 862 |    "outputs": [],
 863 |    "source": [
 864 |     "simple = planets[[\"pl_discmethod\", \"pl_orbper\", \"pl_orbsmax\", \"pl_orbeccen\", \"pl_bmassj\", \"pl_bmassprov\", \"pl_disc\", \"pl_telescope\"]]\n",
 865 |     "simple"
 866 |    ]
 867 |   },
 868 |   {
 869 |    "cell_type": "markdown",
 870 |    "metadata": {
 871 |     "slideshow": {
 872 |      "slide_type": "slide"
 873 |     }
 874 |    },
 875 |    "source": [
 876 |     "The syntax for selecting rows by star name now selects subtables of all planets associated with a star."
 877 |    ]
 878 |   },
 879 |   {
 880 |    "cell_type": "code",
 881 |    "execution_count": null,
 882 |    "metadata": {},
 883 |    "outputs": [],
 884 |    "source": [
 885 |     "simple.loc[\"tau Boo\"]"
 886 |    ]
 887 |   },
 888 |   {
 889 |    "cell_type": "code",
 890 |    "execution_count": null,
 891 |    "metadata": {},
 892 |    "outputs": [],
 893 |    "source": [
 894 |     "simple.loc[\"tau Cet\"]"
 895 |    ]
 896 |   },
 897 |   {
 898 |    "cell_type": "markdown",
 899 |    "metadata": {
 900 |     "slideshow": {
 901 |      "slide_type": "slide"
 902 |     }
 903 |    },
 904 |    "source": [
 905 |     "How about the opposite selection? To get all planets labeled `\"b\"`— this is one planet per star. The method for that is `xs` (for \"cross-section\")."
 906 |    ]
 907 |   },
 908 |   {
 909 |    "cell_type": "code",
 910 |    "execution_count": null,
 911 |    "metadata": {
 912 |     "scrolled": true
 913 |    },
 914 |    "outputs": [],
 915 |    "source": [
 916 |     "simple.xs(\"b\", level=\"pl_letter\")"
 917 |    ]
 918 |   },
 919 |   {
 920 |    "cell_type": "markdown",
 921 |    "metadata": {
 922 |     "slideshow": {
 923 |      "slide_type": "slide"
 924 |     }
 925 |    },
 926 |    "source": [
 927 |     "In a sense, the multiindexed table represents three dimensional information: (1) star, (2) planet letter, (3) columns. Some combinations of star and planet letter do not exist: these two dimensions are not completely filled. Some stars have only one planet, while others have as many as..."
 928 |    ]
 929 |   },
 930 |   {
 931 |    "cell_type": "code",
 932 |    "execution_count": null,
 933 |    "metadata": {
 934 |     "slideshow": {
 935 |      "slide_type": "fragment"
 936 |     }
 937 |    },
 938 |    "outputs": [],
 939 |    "source": [
 940 |     "planets[\"pl_letter\"].groupby(\"pl_hostname\").count().max()"
 941 |    ]
 942 |   },
 943 |   {
 944 |    "cell_type": "markdown",
 945 |    "metadata": {},
 946 |    "source": [
 947 |     "...8 planets. Most have only one. I would call this dimension \"jagged\" or \"ragged.\" Pandas represents this _sparsely,_ by enumerating only the combinations that do exist."
 948 |    ]
 949 |   },
 950 |   {
 951 |    "cell_type": "markdown",
 952 |    "metadata": {
 953 |     "slideshow": {
 954 |      "slide_type": "fragment"
 955 |     }
 956 |    },
 957 |    "source": [
 958 |     "Pandas used to have a type to handle 3+ dimensional data (Series is 1D, DataFrame is 2D, ...), but this was dropped in favor of multiindexes."
 959 |    ]
 960 |   },
 961 |   {
 962 |    "cell_type": "markdown",
 963 |    "metadata": {
 964 |     "slideshow": {
 965 |      "slide_type": "fragment"
 966 |     }
 967 |    },
 968 |    "source": [
 969 |     "There's another library called xarray to fill this niche, but generally, I'd rather deal with 3+ dimensions sparsely than densely."
 970 |    ]
 971 |   },
 972 |   {
 973 |    "cell_type": "markdown",
 974 |    "metadata": {
 975 |     "slideshow": {
 976 |      "slide_type": "slide"
 977 |     }
 978 |    },
 979 |    "source": [
 980 |     "If we want to pass these data to a machine learning model or something, we'll have to flatten the star-planet structure.\n",
 981 |     "\n",
 982 |     "A lossless way to do that is to pivot planet letter keys into columns. (Note: the `stack` and `unstack` functions do a similar thing.)"
 983 |    ]
 984 |   },
 985 |   {
 986 |    "cell_type": "code",
 987 |    "execution_count": null,
 988 |    "metadata": {},
 989 |    "outputs": [],
 990 |    "source": [
 991 |     "simple.pivot_table(index=\"pl_hostname\", columns=\"pl_letter\")"
 992 |    ]
 993 |   },
 994 |   {
 995 |    "cell_type": "markdown",
 996 |    "metadata": {
 997 |     "slideshow": {
 998 |      "slide_type": "fragment"
 999 |     }
1000 |    },
1001 |    "source": [
1002 |     "There's now a column for each letter-column combination. Most of them are empty because there was one star with 8 planets but most have 1 planet."
1003 |    ]
1004 |   },
1005 |   {
1006 |    "cell_type": "markdown",
1007 |    "metadata": {
1008 |     "slideshow": {
1009 |      "slide_type": "slide"
1010 |     }
1011 |    },
1012 |    "source": [
1013 |     "If you want to simplify this table, you'll have to make choices because anything you do from here on loses information.\n",
1014 |     "\n",
1015 |     "   * Do you fill in NaN with `fillna`?\n",
1016 |     "   * Do you pick a few of the most common planet letters, like `[\"b\", \"c\", \"d\"]`?\n",
1017 |     "   * Do you average over all planets?\n",
1018 |     "   * Do you turn the original into two tables, one for each star and another for each planet, with identifiers linking them?"
1019 |    ]
1020 |   },
1021 |   {
1022 |    "cell_type": "markdown",
1023 |    "metadata": {
1024 |     "slideshow": {
1025 |      "slide_type": "slide"
1026 |     }
1027 |    },
1028 |    "source": [
1029 |     "**One last topic:** This dataset has a lot of strings. When we read it from the CSV file, Pandas left them as Python strings. This can be inefficient for very large tables (in storage and comparison speed). A common technique is to find unique strings and replace each value with an integer. This is exactly what we did with the Gettysburg Address this morning (\"dictionary encoding\"). Pandas has an automated way to do it."
1030 |    ]
1031 |   },
1032 |   {
1033 |    "cell_type": "code",
1034 |    "execution_count": null,
1035 |    "metadata": {
1036 |     "slideshow": {
1037 |      "slide_type": "fragment"
1038 |     }
1039 |    },
1040 |    "outputs": [],
1041 |    "source": [
1042 |     "simple[\"pl_discmethod\"].dtype"
1043 |    ]
1044 |   },
1045 |   {
1046 |    "cell_type": "markdown",
1047 |    "metadata": {
1048 |     "slideshow": {
1049 |      "slide_type": "fragment"
1050 |     }
1051 |    },
1052 |    "source": [
1053 |     "Type `'O'` is \"object,\" for arbitrary Python objects (usually strings). We want it to be a \"category\" type. Let's convert it (`astype`) and assign it."
1054 |    ]
1055 |   },
1056 |   {
1057 |    "cell_type": "code",
1058 |    "execution_count": null,
1059 |    "metadata": {},
1060 |    "outputs": [],
1061 |    "source": [
1062 |     "simple[\"pl_discmethod\"] = simple[\"pl_discmethod\"].astype(\"category\")"
1063 |    ]
1064 |   },
1065 |   {
1066 |    "cell_type": "markdown",
1067 |    "metadata": {
1068 |     "slideshow": {
1069 |      "slide_type": "fragment"
1070 |     }
1071 |    },
1072 |    "source": [
1073 |     "Uh oh... what's that warning?"
1074 |    ]
1075 |   },
1076 |   {
1077 |    "cell_type": "markdown",
1078 |    "metadata": {
1079 |     "slideshow": {
1080 |      "slide_type": "slide"
1081 |     }
1082 |    },
1083 |    "source": [
1084 |     "It's one of the most common you get with Pandas ([here's a whole blog on it](https://www.dataquest.io/blog/settingwithcopywarning/)). Remember the \"view vs copy\" discussion this morning? This is the same thing in a Pandas, rather than Numpy, context."
1085 |    ]
1086 |   },
1087 |   {
1088 |    "cell_type": "markdown",
1089 |    "metadata": {
1090 |     "slideshow": {
1091 |      "slide_type": "fragment"
1092 |     }
1093 |    },
1094 |    "source": [
1095 |     "Unlike Numpy, Pandas gives you a warning. We really should address that warning."
1096 |    ]
1097 |   },
1098 |   {
1099 |    "cell_type": "markdown",
1100 |    "metadata": {
1101 |     "slideshow": {
1102 |      "slide_type": "fragment"
1103 |     }
1104 |    },
1105 |    "source": [
1106 |     "The issue was that `simple` is a view of `planets`, so modifying a column in `simple` modifies `planets`. That could lead to surprising results.\n",
1107 |     "\n",
1108 |     "The solution? Turn `simple` into a copy."
1109 |    ]
1110 |   },
1111 |   {
1112 |    "cell_type": "code",
1113 |    "execution_count": null,
1114 |    "metadata": {},
1115 |    "outputs": [],
1116 |    "source": [
1117 |     "simple = simple.copy()"
1118 |    ]
1119 |   },
1120 |   {
1121 |    "cell_type": "markdown",
1122 |    "metadata": {
1123 |     "slideshow": {
1124 |      "slide_type": "slide"
1125 |     }
1126 |    },
1127 |    "source": [
1128 |     "Now there's no warning because everything's fine."
1129 |    ]
1130 |   },
1131 |   {
1132 |    "cell_type": "code",
1133 |    "execution_count": null,
1134 |    "metadata": {},
1135 |    "outputs": [],
1136 |    "source": [
1137 |     "simple[\"pl_discmethod\"] = simple[\"pl_discmethod\"].astype(\"category\")"
1138 |    ]
1139 |   },
1140 |   {
1141 |    "cell_type": "code",
1142 |    "execution_count": null,
1143 |    "metadata": {},
1144 |    "outputs": [],
1145 |    "source": [
1146 |     "simple[\"pl_discmethod\"].dtype"
1147 |    ]
1148 |   },
1149 |   {
1150 |    "cell_type": "markdown",
1151 |    "metadata": {
1152 |     "slideshow": {
1153 |      "slide_type": "fragment"
1154 |     }
1155 |    },
1156 |    "source": [
1157 |     "Now when we do analysis on `\"pl_discmethod\"`, it's number-crunching, rather than string-crunching."
1158 |    ]
1159 |   },
1160 |   {
1161 |    "cell_type": "code",
1162 |    "execution_count": null,
1163 |    "metadata": {},
1164 |    "outputs": [],
1165 |    "source": [
1166 |     "simple[\"pl_discmethod\"].value_counts().plot.bar(logy=True)"
1167 |    ]
1168 |   },
1169 |   {
1170 |    "cell_type": "markdown",
1171 |    "metadata": {
1172 |     "slideshow": {
1173 |      "slide_type": "slide"
1174 |     }
1175 |    },
1176 |    "source": [
1177 |     "Just poking around now... discovery method by date?"
1178 |    ]
1179 |   },
1180 |   {
1181 |    "cell_type": "code",
1182 |    "execution_count": null,
1183 |    "metadata": {},
1184 |    "outputs": [],
1185 |    "source": [
1186 |     "simple.reset_index().groupby([\"pl_disc\", \"pl_discmethod\"]).count()[\"pl_letter\"].unstack(\"pl_discmethod\").fillna(0).plot()"
1187 |    ]
1188 |   },
1189 |   {
1190 |    "cell_type": "markdown",
1191 |    "metadata": {
1192 |     "slideshow": {
1193 |      "slide_type": "fragment"
1194 |     }
1195 |    },
1196 |    "source": [
1197 |     "Wow— a lot of transits! Why is that?"
1198 |    ]
1199 |   },
1200 |   {
1201 |    "cell_type": "code",
1202 |    "execution_count": null,
1203 |    "metadata": {
1204 |     "slideshow": {
1205 |      "slide_type": "slide"
1206 |     }
1207 |    },
1208 |    "outputs": [],
1209 |    "source": [
1210 |     "planets[planets[\"pl_discmethod\"] == \"Transit\"].groupby([\"pl_disc\", \"pl_telescope\"]).count()[\"pl_letter\"].unstack(\"pl_telescope\").fillna(0).plot(figsize=(10, 7))"
1211 |    ]
1212 |   },
1213 |   {
1214 |    "cell_type": "markdown",
1215 |    "metadata": {
1216 |     "slideshow": {
1217 |      "slide_type": "fragment"
1218 |     }
1219 |    },
1220 |    "source": [
1221 |     "Oh. Kepler. The first peak corresponds to the Kepler telescope's first mission, which ended early in instrument failure. The second mission, K2, worked around the failure to get results until earlier this year."
1222 |    ]
1223 |   },
1224 |   {
1225 |    "cell_type": "markdown",
1226 |    "metadata": {
1227 |     "slideshow": {
1228 |      "slide_type": "slide"
1229 |     }
1230 |    },
1231 |    "source": [
1232 |     "Other than transits, what are the most popular methods?"
1233 |    ]
1234 |   },
1235 |   {
1236 |    "cell_type": "code",
1237 |    "execution_count": null,
1238 |    "metadata": {
1239 |     "slideshow": {
1240 |      "slide_type": "-"
1241 |     }
1242 |    },
1243 |    "outputs": [],
1244 |    "source": [
1245 |     "simple[simple[\"pl_discmethod\"] != \"Transit\"].reset_index().groupby([\"pl_disc\", \"pl_discmethod\"]).count()[\"pl_letter\"].unstack(\"pl_discmethod\").fillna(0).plot(figsize=(10, 7))"
1246 |    ]
1247 |   },
1248 |   {
1249 |    "cell_type": "markdown",
1250 |    "metadata": {
1251 |     "slideshow": {
1252 |      "slide_type": "fragment"
1253 |     }
1254 |    },
1255 |    "source": [
1256 |     "Pulsar timing, apparently."
1257 |    ]
1258 |   },
1259 |   {
1260 |    "cell_type": "markdown",
1261 |    "metadata": {
1262 |     "slideshow": {
1263 |      "slide_type": "slide"
1264 |     }
1265 |    },
1266 |    "source": [
1267 |     "On to Dask!"
1268 |    ]
1269 |   }
1270 |  ],
1271 |  "metadata": {
1272 |   "celltoolbar": "Slideshow",
1273 |   "kernelspec": {
1274 |    "display_name": "Python 3",
1275 |    "language": "python",
1276 |    "name": "python3"
1277 |   },
1278 |   "language_info": {
1279 |    "codemirror_mode": {
1280 |     "name": "ipython",
1281 |     "version": 3
1282 |    },
1283 |    "file_extension": ".py",
1284 |    "mimetype": "text/x-python",
1285 |    "name": "python",
1286 |    "nbconvert_exporter": "python",
1287 |    "pygments_lexer": "ipython3",
1288 |    "version": "3.7.0"
1289 |   }
1290 |  },
1291 |  "nbformat": 4,
1292 |  "nbformat_minor": 2
1293 | }
1294 | 


--------------------------------------------------------------------------------
/5-dask.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "slideshow": {
  7 |      "slide_type": "slide"
  8 |     }
  9 |    },
 10 |    "source": [
 11 |     "# Dask & multiprocessing"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {
 17 |     "slideshow": {
 18 |      "slide_type": "fragment"
 19 |     }
 20 |    },
 21 |    "source": [
 22 |     "```\n",
 23 |     "conda install dask distributed -c conda-forge\n",
 24 |     "```\n",
 25 |     "\n",
 26 |     "_(and numpy, matplotlib from before)_"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "markdown",
 31 |    "metadata": {
 32 |     "slideshow": {
 33 |      "slide_type": "slide"
 34 |     }
 35 |    },
 36 |    "source": [
 37 |     "Computers aren't getting any _faster._\n",
 38 |     "\n",
 39 |     "<center><img src=\"img/clock-rate.jpg\" width=\"60%\" /><div style=\"font-size: 14px\"><i>Computer Architecture: A Quantitative Approach,</i> David A. Patterson and John L. Hennessy</div></center>"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {
 45 |     "slideshow": {
 46 |      "slide_type": "slide"
 47 |     }
 48 |    },
 49 |    "source": [
 50 |     "But Moore's Law is still in effect: the number of transistors per square inch continues to grow exponentially (for now). In the 21st century, however, those extra transitors are used to make more execution units, not to incrase the rate through smaller pipelines."
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "markdown",
 55 |    "metadata": {
 56 |     "slideshow": {
 57 |      "slide_type": "fragment"
 58 |     }
 59 |    },
 60 |    "source": [
 61 |     "(The scaling of clock rates— Dennard's Law— ended because power dissipation scales with clock rate squared: anything faster than 3 GHz _cooks_ the chip!)"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "markdown",
 66 |    "metadata": {
 67 |     "slideshow": {
 68 |      "slide_type": "fragment"
 69 |     }
 70 |    },
 71 |    "source": [
 72 |     "Most programming languages, Python among them, cannot be transparently parallelized. You'll have to change your programs to use the extra processors."
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "markdown",
 77 |    "metadata": {
 78 |     "slideshow": {
 79 |      "slide_type": "slide"
 80 |     }
 81 |    },
 82 |    "source": [
 83 |     "Let's take a moderately complex problem as an example."
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": null,
 89 |    "metadata": {},
 90 |    "outputs": [],
 91 |    "source": [
 92 |     "%matplotlib inline\n",
 93 |     "import matplotlib.pyplot as plt\n",
 94 |     "import numpy\n",
 95 |     "import time\n",
 96 |     "\n",
 97 |     "def prepare(height, width):\n",
 98 |     "    y, x = numpy.ogrid[-1:0:height*1j, -1.5:0:width*1j]\n",
 99 |     "    c = x + y*1j\n",
100 |     "    fractal = numpy.zeros(c.shape, dtype=numpy.int32)\n",
101 |     "    return c, fractal\n",
102 |     "\n",
103 |     "def run(c, fractal, maxiterations=20):\n",
104 |     "    fractal *= 0                  # set fractal to maxiterations without replacing it\n",
105 |     "    fractal += maxiterations\n",
106 |     "    z = c\n",
107 |     "    for i in range(maxiterations):\n",
108 |     "        z = z**2 + c\n",
109 |     "        diverge = numpy.absolute(z) > 2\n",
110 |     "        divnow = diverge & (fractal == maxiterations)\n",
111 |     "        fractal[divnow] = i\n",
112 |     "        z[diverge] = 2\n",
113 |     "    return fractal"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": null,
119 |    "metadata": {
120 |     "slideshow": {
121 |      "slide_type": "slide"
122 |     }
123 |    },
124 |    "outputs": [],
125 |    "source": [
126 |     "c, fractal = prepare(8000, 12000)\n",
127 |     "\n",
128 |     "starttime = time.time()\n",
129 |     "fractal = run(c, fractal)\n",
130 |     "time.time() - starttime"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": null,
136 |    "metadata": {},
137 |    "outputs": [],
138 |    "source": [
139 |     "fig, ax = plt.subplots(figsize=(12, 8))\n",
140 |     "ax.imshow(fractal)\n",
141 |     "# ax.imshow(fractal[-200:, :300])"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "markdown",
146 |    "metadata": {
147 |     "slideshow": {
148 |      "slide_type": "slide"
149 |     }
150 |    },
151 |    "source": [
152 |     "Python has built-in libraries for parallel processing:\n",
153 |     "\n",
154 |     "   * **threading:** lets you launch individual threads; you manage coordination.\n",
155 |     "   * **multiprocessing:** same interface but it launches processes. Pro: can't make common mistakes due to shared memory. Con: memory isn't shared; have to ship data to and from workers.\n",
156 |     "   * **concurrent.futures:** higher-level interface: Python manages workers; you send work."
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "markdown",
161 |    "metadata": {
162 |     "slideshow": {
163 |      "slide_type": "slide"
164 |     }
165 |    },
166 |    "source": [
167 |     "Here's an illustration of the threading interface. Since memory is shared, we don't have to send data to the workers or send results back— they can all see and modify the same array."
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": null,
173 |    "metadata": {},
174 |    "outputs": [],
175 |    "source": [
176 |     "import threading\n",
177 |     "\n",
178 |     "class Worker(threading.Thread):\n",
179 |     "    def __init__(self, c, fractal, i):\n",
180 |     "        super(Worker, self).__init__()\n",
181 |     "        self.c, self.fractal, self.i = c, fractal, i\n",
182 |     "    def run(self):\n",
183 |     "        run(self.c[10*self.i : 10*(self.i + 1), :], self.fractal[10*self.i : 10*(self.i + 1), :])\n",
184 |     "\n",
185 |     "c, fractal = prepare(8000, 12000)\n",
186 |     "workers = []\n",
187 |     "for i in range(800):\n",
188 |     "    workers.append(Worker(c, fractal, i))\n",
189 |     "\n",
190 |     "starttime = time.time()\n",
191 |     "\n",
192 |     "for worker in workers:\n",
193 |     "    worker.start()\n",
194 |     "for worker in workers:\n",
195 |     "    worker.join()\n",
196 |     "\n",
197 |     "time.time() - starttime"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "markdown",
202 |    "metadata": {
203 |     "slideshow": {
204 |      "slide_type": "slide"
205 |     }
206 |    },
207 |    "source": [
208 |     "Now we have to check the result because it's easy to screw this up. (I did many times, preparing this talk.)"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "code",
213 |    "execution_count": null,
214 |    "metadata": {
215 |     "scrolled": false
216 |    },
217 |    "outputs": [],
218 |    "source": [
219 |     "fig, ax = plt.subplots(figsize=(12, 8))\n",
220 |     "ax.imshow(fractal)"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "markdown",
225 |    "metadata": {
226 |     "slideshow": {
227 |      "slide_type": "slide"
228 |     }
229 |    },
230 |    "source": [
231 |     "1 thread took 35 seconds to complete.\n",
232 |     "\n",
233 |     "8 threads took 12 seconds to complete."
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "code",
238 |    "execution_count": null,
239 |    "metadata": {
240 |     "slideshow": {
241 |      "slide_type": "fragment"
242 |     }
243 |    },
244 |    "outputs": [],
245 |    "source": [
246 |     "35 / 12"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "markdown",
251 |    "metadata": {
252 |     "slideshow": {
253 |      "slide_type": "fragment"
254 |     }
255 |    },
256 |    "source": [
257 |     "3 ≠ 8.\n",
258 |     "\n",
259 |     "It's often difficult to get \"perfect scaling,\" N times more work from N threads, in real situations. Even though this problem is \"embarrassingly parallel\" (none of the workers need to know other workers' results), there can be scheduling overhead, contention for memory, or slow-downs due to Python's [Global Interpreter Lock](https://realpython.com/python-gil/)."
260 |    ]
261 |   },
262 |   {
263 |    "cell_type": "markdown",
264 |    "metadata": {
265 |     "slideshow": {
266 |      "slide_type": "slide"
267 |     }
268 |    },
269 |    "source": [
270 |     "One way to avoid the global interpreter lock is to send work to separate processes. Python interpreters in separate processes do not share memory and therefore do not need to coordinate."
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "markdown",
275 |    "metadata": {
276 |     "slideshow": {
277 |      "slide_type": "fragment"
278 |     }
279 |    },
280 |    "source": [
281 |     "However, that means that we can't send data by simply sharing variables. We have to send it through a `multiprocessing.Queue` (which serializes— pickles— the data so that it can go through a pipe)."
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "markdown",
286 |    "metadata": {
287 |     "slideshow": {
288 |      "slide_type": "slide"
289 |     }
290 |    },
291 |    "source": [
292 |     "...usually. There's an exception to this: you can share arrays among processes if you declare them as shared memory before launching the subprocesses. Python has a special type for this:"
293 |    ]
294 |   },
295 |   {
296 |    "cell_type": "code",
297 |    "execution_count": null,
298 |    "metadata": {},
299 |    "outputs": [],
300 |    "source": [
301 |     "import multiprocessing\n",
302 |     "import ctypes\n",
303 |     "\n",
304 |     "sharedarray = multiprocessing.RawArray(ctypes.c_double, 100)\n",
305 |     "sharedarray"
306 |    ]
307 |   },
308 |   {
309 |    "cell_type": "markdown",
310 |    "metadata": {
311 |     "slideshow": {
312 |      "slide_type": "fragment"
313 |     }
314 |    },
315 |    "source": [
316 |     "This is not a Numpy array, but it can be cast as a Numpy array (in the forked process) like this:"
317 |    ]
318 |   },
319 |   {
320 |    "cell_type": "code",
321 |    "execution_count": null,
322 |    "metadata": {},
323 |    "outputs": [],
324 |    "source": [
325 |     "numpy.frombuffer(sharedarray, dtype=numpy.float64)"
326 |    ]
327 |   },
328 |   {
329 |    "cell_type": "markdown",
330 |    "metadata": {
331 |     "slideshow": {
332 |      "slide_type": "slide"
333 |     }
334 |    },
335 |    "source": [
336 |     "If the forked processes are not writing to different parts of the array, they can seriously garble the data if they write to the same element at the same time.\n",
337 |     "\n",
338 |     "\n",
339 |     "It's not for the faint of heart, but it can be the fastest way to communicate between processes, and seperate processes are the only way to fully escape synchronization delays due to Python's global interpreter lock."
340 |    ]
341 |   },
342 |   {
343 |    "cell_type": "markdown",
344 |    "metadata": {
345 |     "slideshow": {
346 |      "slide_type": "fragment"
347 |     }
348 |    },
349 |    "source": [
350 |     "By now, you may be wondering if there's a more \"high level\" approach."
351 |    ]
352 |   },
353 |   {
354 |    "cell_type": "markdown",
355 |    "metadata": {
356 |     "slideshow": {
357 |      "slide_type": "slide"
358 |     }
359 |    },
360 |    "source": [
361 |     "Python 3 introduced an \"executor\" interface that manages workers for you. Instead of creating threads or processes with a `run` method, you create an executor and send work to it."
362 |    ]
363 |   },
364 |   {
365 |    "cell_type": "code",
366 |    "execution_count": null,
367 |    "metadata": {
368 |     "slideshow": {
369 |      "slide_type": "-"
370 |     }
371 |    },
372 |    "outputs": [],
373 |    "source": [
374 |     "import concurrent.futures\n",
375 |     "executor = concurrent.futures.ThreadPoolExecutor(max_workers=8)"
376 |    ]
377 |   },
378 |   {
379 |    "cell_type": "code",
380 |    "execution_count": null,
381 |    "metadata": {},
382 |    "outputs": [],
383 |    "source": [
384 |     "c, fractal = prepare(8000, 12000)\n",
385 |     "# fractal = numpy.asfortranarray(fractal)\n",
386 |     "\n",
387 |     "def piece(i):\n",
388 |     "    ci =       c[10*i : 10*(i + 1), :]\n",
389 |     "    fi = fractal[10*i : 10*(i + 1), :]\n",
390 |     "    run(ci, fi)\n",
391 |     "\n",
392 |     "starttime = time.time()\n",
393 |     "\n",
394 |     "futures = executor.map(piece, range(800))\n",
395 |     "for future in futures:         # iterating over them waits for the results\n",
396 |     "    pass\n",
397 |     "\n",
398 |     "time.time() - starttime"
399 |    ]
400 |   },
401 |   {
402 |    "cell_type": "markdown",
403 |    "metadata": {
404 |     "slideshow": {
405 |      "slide_type": "fragment"
406 |     }
407 |    },
408 |    "source": [
409 |     "Yay! A tiny bit better! What happens when we change to Fortran order? Why?"
410 |    ]
411 |   },
412 |   {
413 |    "cell_type": "markdown",
414 |    "metadata": {
415 |     "slideshow": {
416 |      "slide_type": "slide"
417 |     }
418 |    },
419 |    "source": [
420 |     "Always make sure we haven't screwed things up."
421 |    ]
422 |   },
423 |   {
424 |    "cell_type": "code",
425 |    "execution_count": null,
426 |    "metadata": {
427 |     "slideshow": {
428 |      "slide_type": "-"
429 |     }
430 |    },
431 |    "outputs": [],
432 |    "source": [
433 |     "fig, ax = plt.subplots(figsize=(12, 8))\n",
434 |     "ax.imshow(fractal)"
435 |    ]
436 |   },
437 |   {
438 |    "cell_type": "markdown",
439 |    "metadata": {
440 |     "slideshow": {
441 |      "slide_type": "slide"
442 |     }
443 |    },
444 |    "source": [
445 |     "Still, there needs to be a better way. Our array slices in `piece` are fragile: an indexing error can ruin the result. Can't the problem of scattering work be generalized?"
446 |    ]
447 |   },
448 |   {
449 |    "cell_type": "code",
450 |    "execution_count": null,
451 |    "metadata": {
452 |     "slideshow": {
453 |      "slide_type": "slide"
454 |     }
455 |    },
456 |    "outputs": [],
457 |    "source": [
458 |     "import dask.array"
459 |    ]
460 |   },
461 |   {
462 |    "cell_type": "code",
463 |    "execution_count": null,
464 |    "metadata": {},
465 |    "outputs": [],
466 |    "source": [
467 |     "c, fractal = prepare(8000, 12000)\n",
468 |     "\n",
469 |     "c = dask.array.from_array(c, chunks=(10, 12000))\n",
470 |     "fractal = dask.array.from_array(fractal, chunks=(10, 12000))\n",
471 |     "\n",
472 |     "starttime = time.time()\n",
473 |     "fractal = run(c, fractal)\n",
474 |     "time.time() - starttime"
475 |    ]
476 |   },
477 |   {
478 |    "cell_type": "markdown",
479 |    "metadata": {
480 |     "slideshow": {
481 |      "slide_type": "fragment"
482 |     }
483 |    },
484 |    "source": [
485 |     "That was too fast: too good to be true."
486 |    ]
487 |   },
488 |   {
489 |    "cell_type": "code",
490 |    "execution_count": null,
491 |    "metadata": {},
492 |    "outputs": [],
493 |    "source": [
494 |     "fractal"
495 |    ]
496 |   },
497 |   {
498 |    "cell_type": "markdown",
499 |    "metadata": {
500 |     "slideshow": {
501 |      "slide_type": "fragment"
502 |     }
503 |    },
504 |    "source": [
505 |     "This is not an array: it is a description of how to make an array. Dask has stepped through our procedure and built an execution graph, encoding all the dependencies so that it can correctly apply it to individual chunks. When we execute this graph, Dask will send a chunk to each processor in the computer and combine results."
506 |    ]
507 |   },
508 |   {
509 |    "cell_type": "code",
510 |    "execution_count": null,
511 |    "metadata": {
512 |     "slideshow": {
513 |      "slide_type": "slide"
514 |     }
515 |    },
516 |    "outputs": [],
517 |    "source": [
518 |     "starttime = time.time()\n",
519 |     "fractal = fractal.compute()    # replace `fractal` the execution graph with `fractal` the array result\n",
520 |     "time.time() - starttime"
521 |    ]
522 |   },
523 |   {
524 |    "cell_type": "markdown",
525 |    "metadata": {
526 |     "slideshow": {
527 |      "slide_type": "fragment"
528 |     }
529 |    },
530 |    "source": [
531 |     "Now this check is a formality: Dask has managed the chunking, so we won't accidentally miss a slice."
532 |    ]
533 |   },
534 |   {
535 |    "cell_type": "code",
536 |    "execution_count": null,
537 |    "metadata": {},
538 |    "outputs": [],
539 |    "source": [
540 |     "fig, ax = plt.subplots(figsize=(12, 8))\n",
541 |     "ax.imshow(fractal)"
542 |    ]
543 |   },
544 |   {
545 |    "cell_type": "markdown",
546 |    "metadata": {
547 |     "slideshow": {
548 |      "slide_type": "slide"
549 |     }
550 |    },
551 |    "source": [
552 |     "We seem to have paid for this simplicity: it took twice as long as the carefully sliced `pieces` in the executor."
553 |    ]
554 |   },
555 |   {
556 |    "cell_type": "markdown",
557 |    "metadata": {
558 |     "slideshow": {
559 |      "slide_type": "fragment"
560 |     }
561 |    },
562 |    "source": [
563 |     "The reason is that our code is not as simple as it looks. It has masking and piecemeal assignments, which in principle could introduce complex dependencies. _We_ know that everything will be fine if you just chop up the array in independent sections— and thus we implemented our thread and executor-based solutions that way."
564 |    ]
565 |   },
566 |   {
567 |    "cell_type": "markdown",
568 |    "metadata": {
569 |     "slideshow": {
570 |      "slide_type": "slide"
571 |     }
572 |    },
573 |    "source": [
574 |     "Let me show you what Dask has to do for a 1×1 chunking of our problem."
575 |    ]
576 |   },
577 |   {
578 |    "cell_type": "code",
579 |    "execution_count": null,
580 |    "metadata": {
581 |     "scrolled": true
582 |    },
583 |    "outputs": [],
584 |    "source": [
585 |     "c, fractal = prepare(1, 1)                                 # try 2, 2\n",
586 |     "c = dask.array.from_array(c, chunks=(1, 1))\n",
587 |     "fractal = dask.array.from_array(fractal, chunks=(1, 1))\n",
588 |     "fractal = run(c, fractal, maxiterations=1)                 # try more iterations\n",
589 |     "fractal.visualize()"
590 |    ]
591 |   },
592 |   {
593 |    "cell_type": "markdown",
594 |    "metadata": {
595 |     "slideshow": {
596 |      "slide_type": "slide"
597 |     }
598 |    },
599 |    "source": [
600 |     "If that were all, I'd probably stick to chopping up the grid by hand (when possible). However, _exactly the same interface_ that distributes work across cores in my laptop can distribute work around the world, just by pointing it to a remote scheduler.\n",
601 |     "\n",
602 |     "This is truly the ~~lazy~~ busy researcher approach!"
603 |    ]
604 |   },
605 |   {
606 |    "cell_type": "markdown",
607 |    "metadata": {
608 |     "slideshow": {
609 |      "slide_type": "fragment"
610 |     }
611 |    },
612 |    "source": [
613 |     "Note to self: launch\n",
614 |     "\n",
615 |     "`dask-scheduler &`\n",
616 |     "\n",
617 |     "and\n",
618 |     "\n",
619 |     "`dask-worker --nthreads 8 127.0.0.1:8786 &`\n",
620 |     "\n",
621 |     "in a terminal now."
622 |    ]
623 |   },
624 |   {
625 |    "cell_type": "code",
626 |    "execution_count": null,
627 |    "metadata": {
628 |     "slideshow": {
629 |      "slide_type": "slide"
630 |     }
631 |    },
632 |    "outputs": [],
633 |    "source": [
634 |     "import dask.distributed\n",
635 |     "client = dask.distributed.Client(\"127.0.0.1:8786\")\n",
636 |     "client"
637 |    ]
638 |   },
639 |   {
640 |    "cell_type": "code",
641 |    "execution_count": null,
642 |    "metadata": {},
643 |    "outputs": [],
644 |    "source": [
645 |     "c, fractal = prepare(8000, 12000)\n",
646 |     "\n",
647 |     "c = dask.array.from_array(c, chunks=(100, 12000))\n",
648 |     "fractal = dask.array.from_array(fractal, chunks=(100, 12000))\n",
649 |     "fractal = run(c, fractal)\n",
650 |     "\n",
651 |     "starttime = time.time()\n",
652 |     "fractal = client.compute(fractal, sync=True)\n",
653 |     "time.time() - starttime"
654 |    ]
655 |   },
656 |   {
657 |    "cell_type": "markdown",
658 |    "metadata": {
659 |     "slideshow": {
660 |      "slide_type": "fragment"
661 |     }
662 |    },
663 |    "source": [
664 |     "Well, that was exciting!\n",
665 |     "\n",
666 |     "In the end, this example took longer than the single-core version, but it illustrates how array operations _can be_ distributed in a simple way."
667 |    ]
668 |   },
669 |   {
670 |    "cell_type": "markdown",
671 |    "metadata": {
672 |     "slideshow": {
673 |      "slide_type": "slide"
674 |     }
675 |    },
676 |    "source": [
677 |     "I haven't shown very much of what Dask can do. It's a general toolkit for delayed and distributed evaluation. As such, it provides a nice way to work on Pandas-like DataFrames that are too large for memory:"
678 |    ]
679 |   },
680 |   {
681 |    "cell_type": "code",
682 |    "execution_count": null,
683 |    "metadata": {},
684 |    "outputs": [],
685 |    "source": [
686 |     "import dask.dataframe\n",
687 |     "\n",
688 |     "df = dask.dataframe.read_csv(\"data/nasa-exoplanets.csv\")\n",
689 |     "df"
690 |    ]
691 |   },
692 |   {
693 |    "cell_type": "markdown",
694 |    "metadata": {
695 |     "slideshow": {
696 |      "slide_type": "fragment"
697 |     }
698 |    },
699 |    "source": [
700 |     "We don't see the data because they haven't been loaded. But we can get them if we need them."
701 |    ]
702 |   },
703 |   {
704 |    "cell_type": "code",
705 |    "execution_count": null,
706 |    "metadata": {
707 |     "scrolled": true
708 |    },
709 |    "outputs": [],
710 |    "source": [
711 |     "df[[\"pl_hostname\", \"pl_pnum\"]].compute()"
712 |    ]
713 |   },
714 |   {
715 |    "cell_type": "markdown",
716 |    "metadata": {
717 |     "slideshow": {
718 |      "slide_type": "slide"
719 |     }
720 |    },
721 |    "source": [
722 |     "Additionally, Dask isn't the only project filling this need. There's also:\n",
723 |     "\n",
724 |     "   * **Joblib:** annotate functions to execute remotely with decorators.\n",
725 |     "   * **Parsl:** same, but work with conventional schedulers (Condor, Slurm, GRID); an academic project.\n",
726 |     "   * **PySpark:** Spark is a big, scalable project, though its Python interface has performance issues.\n",
727 |     "\n",
728 |     "and many smaller projects.\n",
729 |     "\n",
730 |     "(Distributed computing hasn't been fully figured out yet.)"
731 |    ]
732 |   }
733 |  ],
734 |  "metadata": {
735 |   "celltoolbar": "Slideshow",
736 |   "kernelspec": {
737 |    "display_name": "Python 3",
738 |    "language": "python",
739 |    "name": "python3"
740 |   },
741 |   "language_info": {
742 |    "codemirror_mode": {
743 |     "name": "ipython",
744 |     "version": 3
745 |    },
746 |    "file_extension": ".py",
747 |    "mimetype": "text/x-python",
748 |    "name": "python",
749 |    "nbconvert_exporter": "python",
750 |    "pygments_lexer": "ipython3",
751 |    "version": "3.7.0"
752 |   }
753 |  },
754 |  "nbformat": 4,
755 |  "nbformat_minor": 2
756 | }
757 | 


--------------------------------------------------------------------------------
/6-compilers.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "slideshow": {
  7 |      "slide_type": "slide"
  8 |     }
  9 |    },
 10 |    "source": [
 11 |     "# Compilers: Numba, Cython, ~~pybind11~~"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {
 17 |     "slideshow": {
 18 |      "slide_type": "fragment"
 19 |     }
 20 |    },
 21 |    "source": [
 22 |     "```\n",
 23 |     "conda install numba cython\n",
 24 |     "```\n",
 25 |     "\n",
 26 |     "_(and numpy, matplotlib, dask from before)_"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "markdown",
 31 |    "metadata": {
 32 |     "slideshow": {
 33 |      "slide_type": "slide"
 34 |     }
 35 |    },
 36 |    "source": [
 37 |     "Speeding things up through parallel processing is called \"horizontal scaling.\" Often, analysis code can also be accelerated on a single thread, known as \"vertical scaling.\"\n",
 38 |     "\n",
 39 |     "Horizontal and vertical scaling are complementary."
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {
 45 |     "slideshow": {
 46 |      "slide_type": "slide"
 47 |     }
 48 |    },
 49 |    "source": [
 50 |     "Let's illustrate this with the fractal example from last time."
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": null,
 56 |    "metadata": {},
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "%matplotlib inline\n",
 60 |     "import matplotlib.pyplot as plt\n",
 61 |     "import numpy\n",
 62 |     "import time\n",
 63 |     "\n",
 64 |     "def run_numpy(height, width, maxiterations=20):\n",
 65 |     "    y, x = numpy.ogrid[-1:0:height*1j, -1.5:0:width*1j]  # ask Numpy to make an x, y grid for us\n",
 66 |     "    c = x + y*1j                                         # c is a constant: a grid of complex coordinates\n",
 67 |     "    z = c\n",
 68 |     "    fractal = numpy.zeros(c.shape, dtype=numpy.int32) + maxiterations   # the fractal image starts as \"20\" everywhere\n",
 69 |     "    for i in range(maxiterations):\n",
 70 |     "        z = z**2 + c                                     # iteratively apply z -> z**2 + c\n",
 71 |     "        diverge = numpy.absolute(z) > 2                  # define \"divergence\" by |z| > 2\n",
 72 |     "        divnow = diverge & (fractal == maxiterations)    # the pixels that are diverging in this iteration\n",
 73 |     "        fractal[divnow] = i                              # the fractal image is a plot of the iteration number\n",
 74 |     "        z[diverge] = 2                                   # clamp to 2 so they don't diverge too much\n",
 75 |     "    return fractal"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "markdown",
 80 |    "metadata": {
 81 |     "slideshow": {
 82 |      "slide_type": "fragment"
 83 |     }
 84 |    },
 85 |    "source": [
 86 |     "Stare at this code: it performs operations across the whole grid, identifies pixels that have diverged, and repeats everything 20 times, even though the parts that have already diverged are \"done.\""
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "markdown",
 91 |    "metadata": {
 92 |     "slideshow": {
 93 |      "slide_type": "slide"
 94 |     }
 95 |    },
 96 |    "source": [
 97 |     "More significant than the unnecessary work, though, is the memory movement. Each `z**2 + c` creates new intermediate arrays, moving a lot of memory, flushing CPU caches. Nowadays, mathematical operations are much faster than memory movement."
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "markdown",
102 |    "metadata": {
103 |     "slideshow": {
104 |      "slide_type": "slide"
105 |     }
106 |    },
107 |    "source": [
108 |     "As a reminder, this took 35 seconds to run."
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": null,
114 |    "metadata": {},
115 |    "outputs": [],
116 |    "source": [
117 |     "starttime = time.time()\n",
118 |     "fractal = run_numpy(8000, 12000)\n",
119 |     "time.time() - starttime"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "markdown",
124 |    "metadata": {
125 |     "slideshow": {
126 |      "slide_type": "slide"
127 |     }
128 |    },
129 |    "source": [
130 |     "If we weren't using Numpy, we'd write the algorithm differently: we'd deal with one pixel at a time. Once the pixel has diverged, we'd move on to the next, saving some work. But more importantly, we make only one pass over the image, avoiding repeated memory access."
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": null,
136 |    "metadata": {},
137 |    "outputs": [],
138 |    "source": [
139 |     "def run_python(height, width, maxiterations=20):\n",
140 |     "    y, x = numpy.ogrid[-1:0:height*1j, -1.5:0:width*1j]\n",
141 |     "    c = x + y*1j\n",
142 |     "    fractal = numpy.zeros(c.shape, dtype=numpy.int32) + maxiterations\n",
143 |     "    for h in range(height):\n",
144 |     "        for w in range(width):\n",
145 |     "            z = c[h, w]\n",
146 |     "            for i in range(maxiterations):\n",
147 |     "                z = z**2 + c[h, w]\n",
148 |     "                if abs(z) > 2:\n",
149 |     "                    fractal[h, w] = i\n",
150 |     "                    break\n",
151 |     "    return fractal"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "markdown",
156 |    "metadata": {
157 |     "slideshow": {
158 |      "slide_type": "slide"
159 |     }
160 |    },
161 |    "source": [
162 |     "Before I run this, I'm going to drop the number of pixels from 8000×12000 to 800×1200, a factor of 100.\n",
163 |     "\n",
164 |     "We don't want to wait 900 seconds (15 minutes)!"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "code",
169 |    "execution_count": null,
170 |    "metadata": {},
171 |    "outputs": [],
172 |    "source": [
173 |     "starttime = time.time()\n",
174 |     "fractal = run_python(800, 1200)\n",
175 |     "time.time() - starttime"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": null,
181 |    "metadata": {
182 |     "slideshow": {
183 |      "slide_type": "slide"
184 |     }
185 |    },
186 |    "outputs": [],
187 |    "source": [
188 |     "fig, ax = plt.subplots(figsize=(12, 8))\n",
189 |     "ax.imshow(fractal)\n",
190 |     "# ax.imshow(fractal[-200:, :300])"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "markdown",
195 |    "metadata": {
196 |     "slideshow": {
197 |      "slide_type": "fragment"
198 |     }
199 |    },
200 |    "source": [
201 |     "It works, but it's _super slow!_ This is how an efficient algorithm would go, but stepping through each pixel in Python code kills performance due to all the type-checking, numeric boxing, and virtualization that Python does."
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "markdown",
206 |    "metadata": {
207 |     "slideshow": {
208 |      "slide_type": "slide"
209 |     }
210 |    },
211 |    "source": [
212 |     "At this point, we'd normally start thinking about compiled code. And we should: compilation is exactly how to avoid all the aforementioned issues."
213 |    ]
214 |   },
215 |   {
216 |    "cell_type": "markdown",
217 |    "metadata": {
218 |     "slideshow": {
219 |      "slide_type": "fragment"
220 |     }
221 |    },
222 |    "source": [
223 |     "However, we _don't_ need to rewrite our code in another language."
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "code",
228 |    "execution_count": null,
229 |    "metadata": {
230 |     "slideshow": {
231 |      "slide_type": "slide"
232 |     }
233 |    },
234 |    "outputs": [],
235 |    "source": [
236 |     "import numba\n",
237 |     "\n",
238 |     "@numba.jit\n",
239 |     "def run_numba_loop(height, width, maxiterations, c, fractal):\n",
240 |     "    for h in range(height):\n",
241 |     "        for w in range(width):\n",
242 |     "            z = c[h, w]\n",
243 |     "            for i in range(maxiterations):\n",
244 |     "                z = z**2 + c[h, w]\n",
245 |     "                if abs(z) > 2:\n",
246 |     "                    fractal[h, w] = i\n",
247 |     "                    break\n",
248 |     "    return fractal\n",
249 |     "\n",
250 |     "def run_numba(height, width, maxiterations=20):\n",
251 |     "    y, x = numpy.ogrid[-1:0:height*1j, -1.5:0:width*1j]\n",
252 |     "    c = x + y*1j\n",
253 |     "    fractal = numpy.zeros(c.shape, dtype=numpy.int32) + maxiterations\n",
254 |     "    return run_numba_loop(height, width, maxiterations, c, fractal)"
255 |    ]
256 |   },
257 |   {
258 |    "cell_type": "code",
259 |    "execution_count": null,
260 |    "metadata": {
261 |     "slideshow": {
262 |      "slide_type": "fragment"
263 |     }
264 |    },
265 |    "outputs": [],
266 |    "source": [
267 |     "starttime = time.time()\n",
268 |     "fractal = run_numba(8000, 12000, maxiterations=20)\n",
269 |     "time.time() - starttime"
270 |    ]
271 |   },
272 |   {
273 |    "cell_type": "markdown",
274 |    "metadata": {
275 |     "slideshow": {
276 |      "slide_type": "slide"
277 |     }
278 |    },
279 |    "source": [
280 |     "Numba is a \"just in time\" compiler (JIT) for numeric Python. That is, it compiles the Python code as soon as it knows the data types of the inputs, just before execution. (Remember that the compilation time is included in the measurement— it's small compared to 10 seconds, though.)"
281 |    ]
282 |   },
283 |   {
284 |    "cell_type": "markdown",
285 |    "metadata": {
286 |     "slideshow": {
287 |      "slide_type": "fragment"
288 |     }
289 |    },
290 |    "source": [
291 |     "Numba knew to compile the `run_numba_loop` function because it was preceeded by the decorator ` @numba.jit`. It is now a wrapped function."
292 |    ]
293 |   },
294 |   {
295 |    "cell_type": "code",
296 |    "execution_count": null,
297 |    "metadata": {},
298 |    "outputs": [],
299 |    "source": [
300 |     "run_numba_loop"
301 |    ]
302 |   },
303 |   {
304 |    "cell_type": "markdown",
305 |    "metadata": {
306 |     "slideshow": {
307 |      "slide_type": "slide"
308 |     }
309 |    },
310 |    "source": [
311 |     "Its \"overloads\" are the saved, compiled functions for each signature. There's only one so far: `int, int, int, array(complex), array(int)`."
312 |    ]
313 |   },
314 |   {
315 |    "cell_type": "code",
316 |    "execution_count": null,
317 |    "metadata": {
318 |     "scrolled": false
319 |    },
320 |    "outputs": [],
321 |    "source": [
322 |     "run_numba_loop.overloads"
323 |    ]
324 |   },
325 |   {
326 |    "cell_type": "markdown",
327 |    "metadata": {
328 |     "slideshow": {
329 |      "slide_type": "slide"
330 |     }
331 |    },
332 |    "source": [
333 |     "Notice that we split the process into `run_numba`, a plain function, and `run_numba_loop`, a JIT-compiled function. Not all Python can be compiled, or we'd be doing it all the time! Python is a highly dynamic language (did you know you can change an object's class after it's created?), so there will always be things Python can do that a compiled language can't do. There will always be data types Numba doesn't recognize."
334 |    ]
335 |   },
336 |   {
337 |    "cell_type": "markdown",
338 |    "metadata": {
339 |     "slideshow": {
340 |      "slide_type": "fragment"
341 |     }
342 |    },
343 |    "source": [
344 |     "Numba lists the [Python language features](https://numba.pydata.org/numba-doc/latest/reference/pysupported.html) and [Numpy types and functions](https://numba.pydata.org/numba-doc/latest/reference/numpysupported.html) that it recognizes on its website. This is a growing list, bit it will never converge to the entirety of Python and all its libraries."
345 |    ]
346 |   },
347 |   {
348 |    "cell_type": "markdown",
349 |    "metadata": {
350 |     "slideshow": {
351 |      "slide_type": "fragment"
352 |     }
353 |    },
354 |    "source": [
355 |     "For the most part, Numba recognizes numbers and arrays, and even if it can handle a given language feature (iterators, classes), it will perform best on simple loops and straightforward code. Generally, you only want to wrap the most arithmetically intense part of your calculation.\n",
356 |     "\n",
357 |     "In the above example, I didn't include the array-creation steps because the first one was an unsupported function (`numpy.ogrid`)."
358 |    ]
359 |   },
360 |   {
361 |    "cell_type": "markdown",
362 |    "metadata": {
363 |     "slideshow": {
364 |      "slide_type": "slide"
365 |     }
366 |    },
367 |    "source": [
368 |     "One of the early reasons for Numba's existence was to write new Numpy universal functions (\"ufuncs\")."
369 |    ]
370 |   },
371 |   {
372 |    "cell_type": "code",
373 |    "execution_count": null,
374 |    "metadata": {},
375 |    "outputs": [],
376 |    "source": [
377 |     "@numba.vectorize\n",
378 |     "def as_ufunc(c, maxiterations):\n",
379 |     "    z = c\n",
380 |     "    for i in range(maxiterations):\n",
381 |     "        z = z**2 + c\n",
382 |     "        if abs(z) > 2:\n",
383 |     "            return i\n",
384 |     "    return maxiterations\n",
385 |     "\n",
386 |     "def run_numba_2(height, width, maxiterations=20):\n",
387 |     "    y, x = numpy.ogrid[-1:0:height*1j, -1.5:0:width*1j]\n",
388 |     "    c = x + y*1j\n",
389 |     "    return as_ufunc(c, maxiterations)"
390 |    ]
391 |   },
392 |   {
393 |    "cell_type": "code",
394 |    "execution_count": null,
395 |    "metadata": {
396 |     "slideshow": {
397 |      "slide_type": "fragment"
398 |     }
399 |    },
400 |    "outputs": [],
401 |    "source": [
402 |     "starttime = time.time()\n",
403 |     "fractal = run_numba_2(8000, 12000, maxiterations=20)\n",
404 |     "time.time() - starttime"
405 |    ]
406 |   },
407 |   {
408 |    "cell_type": "code",
409 |    "execution_count": null,
410 |    "metadata": {
411 |     "slideshow": {
412 |      "slide_type": "fragment"
413 |     }
414 |    },
415 |    "outputs": [],
416 |    "source": [
417 |     "type(as_ufunc)"
418 |    ]
419 |   },
420 |   {
421 |    "cell_type": "markdown",
422 |    "metadata": {
423 |     "slideshow": {
424 |      "slide_type": "slide"
425 |     }
426 |    },
427 |    "source": [
428 |     "This is only possible if the process we want to apply is elementwise— we do an independent thing to each element, and the output shape is the same as the input shape— because that's what a ufunc does. The function definition is much simpler since the input argument `c` is now a (complex) number, rather than an array. We don't need to write the for loops."
429 |    ]
430 |   },
431 |   {
432 |    "cell_type": "markdown",
433 |    "metadata": {
434 |     "slideshow": {
435 |      "slide_type": "slide"
436 |     }
437 |    },
438 |    "source": [
439 |     "It even has the funky ufunc methods, like `.at` and `.reduce`:"
440 |    ]
441 |   },
442 |   {
443 |    "cell_type": "code",
444 |    "execution_count": null,
445 |    "metadata": {},
446 |    "outputs": [],
447 |    "source": [
448 |     "a = numpy.arange(0, 2, 0.1) * 1j\n",
449 |     "a"
450 |    ]
451 |   },
452 |   {
453 |    "cell_type": "code",
454 |    "execution_count": null,
455 |    "metadata": {},
456 |    "outputs": [],
457 |    "source": [
458 |     "as_ufunc.at(a, [0, 2, 4, 6, 8, 10, 12, 14], 20)\n",
459 |     "a"
460 |    ]
461 |   },
462 |   {
463 |    "cell_type": "markdown",
464 |    "metadata": {
465 |     "slideshow": {
466 |      "slide_type": "slide"
467 |     }
468 |    },
469 |    "source": [
470 |     "Remember when I said horizontal and vertical scaling are complementary? I didn't say that they're multiplicative because it's sometimes much better than that. Let's put this Numba-compiled ufunc into Dask:"
471 |    ]
472 |   },
473 |   {
474 |    "cell_type": "code",
475 |    "execution_count": null,
476 |    "metadata": {},
477 |    "outputs": [],
478 |    "source": [
479 |     "import dask.array\n",
480 |     "\n",
481 |     "def run_dask(height, width, maxiterations=20):\n",
482 |     "    y, x = numpy.ogrid[-1:0:height*1j, -1.5:0:width*1j]\n",
483 |     "    c = dask.array.from_array(x + y*1j, chunks=(100, 12000))\n",
484 |     "    return as_ufunc(c, maxiterations)"
485 |    ]
486 |   },
487 |   {
488 |    "cell_type": "code",
489 |    "execution_count": null,
490 |    "metadata": {
491 |     "slideshow": {
492 |      "slide_type": "fragment"
493 |     }
494 |    },
495 |    "outputs": [],
496 |    "source": [
497 |     "starttime = time.time()\n",
498 |     "fractal = run_dask(8000, 12000, maxiterations=20).compute()\n",
499 |     "time.time() - starttime"
500 |    ]
501 |   },
502 |   {
503 |    "cell_type": "markdown",
504 |    "metadata": {
505 |     "slideshow": {
506 |      "slide_type": "slide"
507 |     }
508 |    },
509 |    "source": [
510 |     "It took\n",
511 |     "\n",
512 |     "   * 35 seconds to run in Numpy on 1 core.\n",
513 |     "   * 21 seconds to run in Numpy on 12 cores with Dask.\n",
514 |     "   * 10 seconds to run as a Numba-compiled ufunc on 1 core.\n",
515 |     "   * 3.7 seconds to run as a Numba-compiled ufunc on 12 cores with Dask.\n",
516 |     "\n",
517 |     "Dask multiprocessing scales better with the Numba-compiled ufunc because it's a much simpler computation graph. Dask can't see inside `as_ufunc` to worry about interdependencies."
518 |    ]
519 |   },
520 |   {
521 |    "cell_type": "code",
522 |    "execution_count": null,
523 |    "metadata": {
524 |     "slideshow": {
525 |      "slide_type": "slide"
526 |     }
527 |    },
528 |    "outputs": [],
529 |    "source": [
530 |     "c = dask.array.from_array(numpy.array([[0j, 1j], [0j, 1j]]), chunks=(1, 1))\n",
531 |     "as_ufunc(c, 20).visualize()"
532 |    ]
533 |   },
534 |   {
535 |    "cell_type": "markdown",
536 |    "metadata": {
537 |     "slideshow": {
538 |      "slide_type": "fragment"
539 |     }
540 |    },
541 |    "source": [
542 |     "Remember how it used to look? How its complexity scaled with the number of iterations? Now all of the complexity of our algorithm is internal to `as_ufunc`."
543 |    ]
544 |   },
545 |   {
546 |    "cell_type": "markdown",
547 |    "metadata": {
548 |     "slideshow": {
549 |      "slide_type": "slide"
550 |     }
551 |    },
552 |    "source": [
553 |     "(Incidentally, the reason it's not scaling beyond 3 cores is likely memory bandwidth: the above example was fetching memory at 1.5 GB/sec, which I've found to be an approximate limit on all systems I've encountered except for Knight's Landing's MCDRAM.)"
554 |    ]
555 |   },
556 |   {
557 |    "cell_type": "markdown",
558 |    "metadata": {
559 |     "slideshow": {
560 |      "slide_type": "fragment"
561 |     }
562 |    },
563 |    "source": [
564 |     "I presented Numba first because it involves the least change to your code— the orthodox mantra is to get your code working first, profile it to find the slowest parts, and only accelerate those parts. Numba lets you do that with the least effort."
565 |    ]
566 |   },
567 |   {
568 |    "cell_type": "markdown",
569 |    "metadata": {
570 |     "slideshow": {
571 |      "slide_type": "fragment"
572 |     }
573 |    },
574 |    "source": [
575 |     "But sometimes you need something more: features that are only available in C++, for instance. Python is unable to express some concepts related to performance tuning (deliberately: to keep the language simple) and compilers aren't magical— Numba can't always guess what you mean."
576 |    ]
577 |   },
578 |   {
579 |    "cell_type": "markdown",
580 |    "metadata": {
581 |     "slideshow": {
582 |      "slide_type": "slide"
583 |     }
584 |    },
585 |    "source": [
586 |     "Cython is a halfway language, part Python and part C/C++. It translates to C or C++ and uses a conventional compiler to turn into a Python extension module. They also have a Jupyter extension, which I'll use for this demo."
587 |    ]
588 |   },
589 |   {
590 |    "cell_type": "code",
591 |    "execution_count": null,
592 |    "metadata": {},
593 |    "outputs": [],
594 |    "source": [
595 |     "%load_ext Cython"
596 |    ]
597 |   },
598 |   {
599 |    "cell_type": "markdown",
600 |    "metadata": {
601 |     "slideshow": {
602 |      "slide_type": "slide"
603 |     }
604 |    },
605 |    "source": [
606 |     "The following cell creates C++ code from Python, compiles it, and loads the resulting Python module."
607 |    ]
608 |   },
609 |   {
610 |    "cell_type": "code",
611 |    "execution_count": null,
612 |    "metadata": {},
613 |    "outputs": [],
614 |    "source": [
615 |     "%%cython --cplus\n",
616 |     "import numpy\n",
617 |     "\n",
618 |     "def run_cython(height, width, maxiterations=20):\n",
619 |     "    y, x = numpy.ogrid[-1:0:height*1j, -1.5:0:width*1j]\n",
620 |     "    c = x + y*1j\n",
621 |     "    fractal = numpy.zeros(c.shape, dtype=numpy.int32) + maxiterations\n",
622 |     "    for h in range(height):\n",
623 |     "        for w in range(width):\n",
624 |     "            z = c[h, w]\n",
625 |     "            for i in range(maxiterations):\n",
626 |     "                z = z**2 + c[h, w]\n",
627 |     "                if abs(z) > 2:\n",
628 |     "                    fractal[h, w] = i\n",
629 |     "                    break\n",
630 |     "    return fractal"
631 |    ]
632 |   },
633 |   {
634 |    "cell_type": "code",
635 |    "execution_count": null,
636 |    "metadata": {
637 |     "slideshow": {
638 |      "slide_type": "slide"
639 |     }
640 |    },
641 |    "outputs": [],
642 |    "source": [
643 |     "starttime = time.time()\n",
644 |     "fractal = run_cython(800, 1200)     # note: small grid; 100× smaller problem\n",
645 |     "time.time() - starttime"
646 |    ]
647 |   },
648 |   {
649 |    "cell_type": "markdown",
650 |    "metadata": {
651 |     "slideshow": {
652 |      "slide_type": "fragment"
653 |     }
654 |    },
655 |    "source": [
656 |     "But the resulting _compiled_ module runs almost as slowly as Python itself: 7 sec vs 9.5 sec (note: we're using the smaller grid again, so this is hundreds of times slower than Numpy or Numba). Why is that? Isn't this compiled?"
657 |    ]
658 |   },
659 |   {
660 |    "cell_type": "markdown",
661 |    "metadata": {
662 |     "slideshow": {
663 |      "slide_type": "fragment"
664 |     }
665 |    },
666 |    "source": [
667 |     "The issue is that Cython does nothing about all the runtime type-checking of Python objects. Numba replaced Python objects with raw numbers, where possible, which makes the real difference."
668 |    ]
669 |   },
670 |   {
671 |    "cell_type": "markdown",
672 |    "metadata": {
673 |     "slideshow": {
674 |      "slide_type": "fragment"
675 |     }
676 |    },
677 |    "source": [
678 |     "Cython chose to cover the entire Python language and make naive translations by default. Numba chose to make optimized translations by default but not cover the entire Python language."
679 |    ]
680 |   },
681 |   {
682 |    "cell_type": "markdown",
683 |    "metadata": {
684 |     "slideshow": {
685 |      "slide_type": "slide"
686 |     }
687 |    },
688 |    "source": [
689 |     "To get optimizations, we have to introduce C++ by hand."
690 |    ]
691 |   },
692 |   {
693 |    "cell_type": "code",
694 |    "execution_count": null,
695 |    "metadata": {},
696 |    "outputs": [],
697 |    "source": [
698 |     "%%cython --cplus --annotate\n",
699 |     "import cython\n",
700 |     "import numpy      # load Python interface to Numpy\n",
701 |     "cimport numpy     # load C++ interface to Numpy (types end in _t)\n",
702 |     "\n",
703 |     "@cython.boundscheck(False) # turn off bounds-checking\n",
704 |     "@cython.wraparound(False)  # turn off negative index wrapping (e.g. -1 for last element)\n",
705 |     "def run_cython(int height, int width, int maxiterations=20):\n",
706 |     "    y, x = numpy.ogrid[-1:0:height*1j, -1.5:0:width*1j]\n",
707 |     "    c = x + y*1j\n",
708 |     "    fractal = numpy.zeros(c.shape, dtype=numpy.int32) + maxiterations\n",
709 |     "\n",
710 |     "    cdef numpy.ndarray[numpy.complex128_t, ndim=2, mode=\"c\"] c_array = c\n",
711 |     "    cdef numpy.ndarray[numpy.int32_t,      ndim=2, mode=\"c\"] fractal_array = fractal\n",
712 |     "    cdef numpy.complex128_t z\n",
713 |     "    \n",
714 |     "    for h in range(height):\n",
715 |     "        for w in range(width):\n",
716 |     "            z = c_array[h, w]\n",
717 |     "            for i in range(maxiterations):\n",
718 |     "                z = z**2 + c_array[h, w]\n",
719 |     "                if abs(z) > 2:\n",
720 |     "                    fractal_array[h, w] = i\n",
721 |     "                    break\n",
722 |     "    return fractal"
723 |    ]
724 |   },
725 |   {
726 |    "cell_type": "markdown",
727 |    "metadata": {
728 |     "slideshow": {
729 |      "slide_type": "slide"
730 |     }
731 |    },
732 |    "source": [
733 |     "(Still using the small grid; still unable to scale to native speeds.)"
734 |    ]
735 |   },
736 |   {
737 |    "cell_type": "code",
738 |    "execution_count": null,
739 |    "metadata": {},
740 |    "outputs": [],
741 |    "source": [
742 |     "starttime = time.time()\n",
743 |     "fractal = run_cython(800, 1200)     # note: small grid; 100× smaller problem\n",
744 |     "time.time() - starttime"
745 |    ]
746 |   },
747 |   {
748 |    "cell_type": "markdown",
749 |    "metadata": {
750 |     "slideshow": {
751 |      "slide_type": "slide"
752 |     }
753 |    },
754 |    "source": [
755 |     "In addition to importing Python libraries, Cython can include C++ headers. A hidden feature in the `cdef extern` syntax for including C++ allows you to write literal C++ in your Cython."
756 |    ]
757 |   },
758 |   {
759 |    "cell_type": "code",
760 |    "execution_count": null,
761 |    "metadata": {},
762 |    "outputs": [],
763 |    "source": [
764 |     "%%cython --cplus -c-O3\n",
765 |     "import numpy\n",
766 |     "\n",
767 |     "cdef extern from *:\n",
768 |     "    \"\"\"\n",
769 |     "    #include <complex>\n",
770 |     "    void quick(int height, int width, int maxiterations, double* c, int* fractal) {\n",
771 |     "        for (int h = 0;  h < height;  h++) {\n",
772 |     "            for (int w = 0;  w < width;  w++) {\n",
773 |     "                double creal = c[2 * (h + height*w)];\n",
774 |     "                double cimag = c[2 * (h + height*w) + 1];\n",
775 |     "                std::complex<double> ci = std::complex<double>(creal, cimag);\n",
776 |     "                std::complex<double> z = ci;\n",
777 |     "                for (int i = 0;  i < maxiterations;  i++) {\n",
778 |     "                    z = z * z + ci;\n",
779 |     "                    if (std::abs(z) > 2) {\n",
780 |     "                        fractal[h + height*w] = i;\n",
781 |     "                        break;\n",
782 |     "                    }\n",
783 |     "                }\n",
784 |     "            }\n",
785 |     "        }\n",
786 |     "    }\n",
787 |     "    \"\"\"\n",
788 |     "    void quick(int height, int width, int maxiterations, double* c, int* fractal)\n",
789 |     "\n",
790 |     "def run_cython(int height, int width, int maxiterations=20):\n",
791 |     "    y, x = numpy.ogrid[-1:0:height*1j, -1.5:0:width*1j]\n",
792 |     "    c = x + y*1j\n",
793 |     "    fractal = numpy.zeros(c.shape, dtype=numpy.int32) + maxiterations\n",
794 |     "    quick(height, width, maxiterations, <double*>(<size_t>c.ctypes.data), <int*>(<size_t>fractal.ctypes.data))\n",
795 |     "    return fractal"
796 |    ]
797 |   },
798 |   {
799 |    "cell_type": "markdown",
800 |    "metadata": {
801 |     "slideshow": {
802 |      "slide_type": "slide"
803 |     }
804 |    },
805 |    "source": [
806 |     "Now we can use the full grid. This is\n",
807 |     "\n",
808 |     "   * 2× slower than Numba,\n",
809 |     "   * 2× faster than Numpy, and\n",
810 |     "   * 45× faster than Python."
811 |    ]
812 |   },
813 |   {
814 |    "cell_type": "code",
815 |    "execution_count": null,
816 |    "metadata": {},
817 |    "outputs": [],
818 |    "source": [
819 |     "starttime = time.time()\n",
820 |     "fractal = run_cython(8000, 12000)\n",
821 |     "time.time() - starttime"
822 |    ]
823 |   },
824 |   {
825 |    "cell_type": "markdown",
826 |    "metadata": {
827 |     "slideshow": {
828 |      "slide_type": "slide"
829 |     }
830 |    },
831 |    "source": [
832 |     "Although Cython was originally intended as a code optimizer (you can see that it's difficult to use it that way!), it has come to be used to _bind_ C++ libraries as Python extensions, since it can speak both languages."
833 |    ]
834 |   },
835 |   {
836 |    "cell_type": "markdown",
837 |    "metadata": {
838 |     "slideshow": {
839 |      "slide_type": "fragment"
840 |     }
841 |    },
842 |    "source": [
843 |     "Today, there's another alternative: pybind11 is a C++ header for binding to Python (coming from the other direction, from C++ to Python)."
844 |    ]
845 |   },
846 |   {
847 |    "cell_type": "markdown",
848 |    "metadata": {
849 |     "slideshow": {
850 |      "slide_type": "slide"
851 |     }
852 |    },
853 |    "source": [
854 |     "I would recommend:\n",
855 |     "\n",
856 |     "   * **Numba** for accelerating small bits of numerical code.\n",
857 |     "   * **Cython** for mixing C++ into a mostly Python script: for instance, to access C++ only libraries.\n",
858 |     "   * **pybind11** for wrapping C++ cleanly as Python modules: for instance, you're distributing a C++ library for use in Python."
859 |    ]
860 |   },
861 |   {
862 |    "cell_type": "markdown",
863 |    "metadata": {
864 |     "slideshow": {
865 |      "slide_type": "fragment"
866 |     }
867 |    },
868 |    "source": [
869 |     "(I've dropped pybind11 content because I'm sure we'd run out of time.)"
870 |    ]
871 |   },
872 |   {
873 |    "cell_type": "markdown",
874 |    "metadata": {
875 |     "slideshow": {
876 |      "slide_type": "slide"
877 |     }
878 |    },
879 |    "source": [
880 |     "Last minute addition: since you've installed Pandas, you have NumExpr. This is a very easy way to accelerate \"one in, one out\" formulae. Our fractal is more complex than that, but many formulas aren't."
881 |    ]
882 |   },
883 |   {
884 |    "cell_type": "code",
885 |    "execution_count": null,
886 |    "metadata": {},
887 |    "outputs": [],
888 |    "source": [
889 |     "import numexpr"
890 |    ]
891 |   },
892 |   {
893 |    "cell_type": "code",
894 |    "execution_count": null,
895 |    "metadata": {},
896 |    "outputs": [],
897 |    "source": [
898 |     "a = numpy.arange(1e6)\n",
899 |     "b = numpy.arange(1e6)\n",
900 |     "numexpr.evaluate(\"sin(a) + arcsinh(b)\")"
901 |    ]
902 |   }
903 |  ],
904 |  "metadata": {
905 |   "celltoolbar": "Slideshow",
906 |   "kernelspec": {
907 |    "display_name": "Python 3",
908 |    "language": "python",
909 |    "name": "python3"
910 |   },
911 |   "language_info": {
912 |    "codemirror_mode": {
913 |     "name": "ipython",
914 |     "version": 3
915 |    },
916 |    "file_extension": ".py",
917 |    "mimetype": "text/x-python",
918 |    "name": "python",
919 |    "nbconvert_exporter": "python",
920 |    "pygments_lexer": "ipython3",
921 |    "version": "3.7.0"
922 |   }
923 |  },
924 |  "nbformat": 4,
925 |  "nbformat_minor": 2
926 | }
927 | 


--------------------------------------------------------------------------------
/7-gpu.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "slideshow": {
  7 |      "slide_type": "slide"
  8 |     }
  9 |    },
 10 |    "source": [
 11 |     "# GPU: CuPy, Numba-GPU, PyCUDA"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {
 17 |     "slideshow": {
 18 |      "slide_type": "fragment"
 19 |     }
 20 |    },
 21 |    "source": [
 22 |     "```\n",
 23 |     "conda install cupy cudatoolkit\n",
 24 |     "```\n",
 25 |     "\n",
 26 |     "```\n",
 27 |     "export CFLAGS=-fpermissive\n",
 28 |     "pip install --no-cache-dir pycuda     # I have more luck with this one in pip\n",
 29 |     "```\n",
 30 |     "\n",
 31 |     "_(and numpy, matplotlib, numba from before)_"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "markdown",
 36 |    "metadata": {
 37 |     "slideshow": {
 38 |      "slide_type": "slide"
 39 |     }
 40 |    },
 41 |    "source": [
 42 |     "If you can get better memory efficiency using rowwise code (e.g. compiled for loops), why would you ever write columnar code (e.g. Numpy)?"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "markdown",
 47 |    "metadata": {
 48 |     "slideshow": {
 49 |      "slide_type": "fragment"
 50 |     }
 51 |    },
 52 |    "source": [
 53 |     "**Answer:** vectorization!"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "markdown",
 58 |    "metadata": {
 59 |     "slideshow": {
 60 |      "slide_type": "fragment"
 61 |     }
 62 |    },
 63 |    "source": [
 64 |     "Vectorization is a vertical scaling technique that uses a single CPU core or a GPU more effectively. You can compute N operations at the same time _if they are all the same operation._\n",
 65 |     "\n",
 66 |     "<center><img src=\"img/vectorization-example.png\" width=\"50%\"></center>"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "markdown",
 71 |    "metadata": {
 72 |     "slideshow": {
 73 |      "slide_type": "slide"
 74 |     }
 75 |    },
 76 |    "source": [
 77 |     "If you don't fully utilize all cores, that's okay; someone else's work can fill the gaps.\n",
 78 |     "\n",
 79 |     "If you don't fully utilize the core's vector unit, no one else can use them."
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "markdown",
 84 |    "metadata": {
 85 |     "slideshow": {
 86 |      "slide_type": "fragment"
 87 |     }
 88 |    },
 89 |    "source": [
 90 |     "A GPU is a computational device designed around vector units."
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "markdown",
 95 |    "metadata": {
 96 |     "slideshow": {
 97 |      "slide_type": "slide"
 98 |     }
 99 |    },
100 |    "source": [
101 |     "Like parallel processing, this is another computing detail that is visible to you as a data analyst.\n",
102 |     "\n",
103 |     "Rowwise code like"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": null,
109 |    "metadata": {
110 |     "slideshow": {
111 |      "slide_type": "-"
112 |     }
113 |    },
114 |    "outputs": [],
115 |    "source": [
116 |     "import numba\n",
117 |     "@numba.jit\n",
118 |     "def run_numba_loop(height, width, maxiterations, c, fractal):\n",
119 |     "    for h in range(height):\n",
120 |     "        for w in range(width):\n",
121 |     "            z = c[h, w]\n",
122 |     "            for i in range(maxiterations):\n",
123 |     "                z = z**2 + c[h, w]\n",
124 |     "                if abs(z) > 2:\n",
125 |     "                    fractal[h, w] = i\n",
126 |     "                    break\n",
127 |     "    return fractal"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "markdown",
132 |    "metadata": {
133 |     "slideshow": {
134 |      "slide_type": "-"
135 |     }
136 |    },
137 |    "source": [
138 |     "does not use vector units effectively because each array element may be in a different stage of processing— some may have diverged before others."
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "markdown",
143 |    "metadata": {
144 |     "slideshow": {
145 |      "slide_type": "slide"
146 |     }
147 |    },
148 |    "source": [
149 |     "Columnar code like"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": null,
155 |    "metadata": {},
156 |    "outputs": [],
157 |    "source": [
158 |     "import numpy\n",
159 |     "import time\n",
160 |     "\n",
161 |     "def prepare(height, width):\n",
162 |     "    y, x = numpy.ogrid[-1:0:height*1j, -1.5:0:width*1j]\n",
163 |     "    c = x + y*1j\n",
164 |     "    fractal = numpy.zeros(c.shape, dtype=numpy.int32)\n",
165 |     "    return c, fractal\n",
166 |     "\n",
167 |     "def run(c, fractal, maxiterations=20):\n",
168 |     "    fractal *= 0                  # set fractal to maxiterations without replacing it\n",
169 |     "    fractal += maxiterations\n",
170 |     "    z = c\n",
171 |     "    for i in range(maxiterations):\n",
172 |     "        z = z**2 + c\n",
173 |     "        diverge = z.real**2 + z.imag**2 > 2**2\n",
174 |     "        divnow = diverge & (fractal == maxiterations)\n",
175 |     "        fractal[divnow] = i\n",
176 |     "        z[diverge] = 2\n",
177 |     "    return fractal"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "markdown",
182 |    "metadata": {},
183 |    "source": [
184 |     "can use vector units effectively because it's always applying the <b>S</b>ame <b>I</b>nstruction on <b>M</b>ultiple <b>D</b>ata (SIMD)."
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "markdown",
189 |    "metadata": {
190 |     "slideshow": {
191 |      "slide_type": "slide"
192 |     }
193 |    },
194 |    "source": [
195 |     "All we need is a librrary to implement the Numpy functions on a GPU."
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "code",
200 |    "execution_count": null,
201 |    "metadata": {
202 |     "slideshow": {
203 |      "slide_type": "fragment"
204 |     }
205 |    },
206 |    "outputs": [],
207 |    "source": [
208 |     "import cupy"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "code",
213 |    "execution_count": null,
214 |    "metadata": {
215 |     "slideshow": {
216 |      "slide_type": "fragment"
217 |     }
218 |    },
219 |    "outputs": [],
220 |    "source": [
221 |     "c, fractal = prepare(4000, 6000)\n",
222 |     "\n",
223 |     "c = cupy.array(c)\n",
224 |     "fractal = cupy.array(fractal)\n",
225 |     "\n",
226 |     "starttime = time.time()\n",
227 |     "fractal = run(c, fractal)\n",
228 |     "time.time() - starttime"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "code",
233 |    "execution_count": null,
234 |    "metadata": {
235 |     "slideshow": {
236 |      "slide_type": "fragment"
237 |     }
238 |    },
239 |    "outputs": [],
240 |    "source": [
241 |     "c, fractal = prepare(4000, 6000)\n",
242 |     "\n",
243 |     "starttime = time.time()\n",
244 |     "fractal = run(c, fractal)\n",
245 |     "time.time() - starttime"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "markdown",
250 |    "metadata": {
251 |     "slideshow": {
252 |      "slide_type": "fragment"
253 |     }
254 |    },
255 |    "source": [
256 |     "Exactly the same code: first with CuPy on the GPU (2.8 sec), then with Numpy on the CPU (7.5 sec)."
257 |    ]
258 |   },
259 |   {
260 |    "cell_type": "markdown",
261 |    "metadata": {
262 |     "slideshow": {
263 |      "slide_type": "slide"
264 |     }
265 |    },
266 |    "source": [
267 |     "If you're wondering why I'm working on a reduced problem (4× smaller than previous sessions), it's because I couldn't fit the full one in my GPU's memory!"
268 |    ]
269 |   },
270 |   {
271 |    "cell_type": "markdown",
272 |    "metadata": {
273 |     "slideshow": {
274 |      "slide_type": "fragment"
275 |     }
276 |    },
277 |    "source": [
278 |     "(There's always a catch!)"
279 |    ]
280 |   },
281 |   {
282 |    "cell_type": "markdown",
283 |    "metadata": {
284 |     "slideshow": {
285 |      "slide_type": "slide"
286 |     }
287 |    },
288 |    "source": [
289 |     "Also, CuPy's adherence to the Numpy API isn't perfect: I had to write\n",
290 |     "\n",
291 |     "```python\n",
292 |     "z.real**2 + z.imag**2\n",
293 |     "```\n",
294 |     "\n",
295 |     "instead of\n",
296 |     "\n",
297 |     "```python\n",
298 |     "numpy.absolute(z)\n",
299 |     "```\n",
300 |     "\n",
301 |     "because the `absolute` function wasn't supported. This is the error you'd see:"
302 |    ]
303 |   },
304 |   {
305 |    "cell_type": "code",
306 |    "execution_count": null,
307 |    "metadata": {},
308 |    "outputs": [],
309 |    "source": [
310 |     "try:\n",
311 |     "    numpy.absolute(cupy.array([1.1, 2.2, 3.3]))\n",
312 |     "except ValueError as err:\n",
313 |     "    print(err)"
314 |    ]
315 |   },
316 |   {
317 |    "cell_type": "markdown",
318 |    "metadata": {
319 |     "slideshow": {
320 |      "slide_type": "fragment"
321 |     }
322 |    },
323 |    "source": [
324 |     "Nevertheless, we can expect CuPy to become more complete as people use it and report missing features."
325 |    ]
326 |   },
327 |   {
328 |    "cell_type": "markdown",
329 |    "metadata": {
330 |     "slideshow": {
331 |      "slide_type": "slide"
332 |     }
333 |    },
334 |    "source": [
335 |     "**GPU method #2:** Use Numba! (You have to install a \"cudatoolkit\" library with it.)"
336 |    ]
337 |   },
338 |   {
339 |    "cell_type": "code",
340 |    "execution_count": null,
341 |    "metadata": {},
342 |    "outputs": [],
343 |    "source": [
344 |     "import numba.cuda\n",
345 |     "import math\n",
346 |     "\n",
347 |     "@numba.cuda.jit\n",
348 |     "def as_cuda(c, fractal, maxiterations):\n",
349 |     "    x, y = numba.cuda.grid(2)     # 2 dimensional CUDA grid\n",
350 |     "    z = c[x, y]\n",
351 |     "    fractal[x, y] = 20\n",
352 |     "    for i in range(maxiterations):\n",
353 |     "        z = z**2 + c[x, y]\n",
354 |     "        if abs(z) > 2:\n",
355 |     "            fractal[x, y] = i\n",
356 |     "            break                 # not optimal: threads that leave the loop still have to wait\n",
357 |     "\n",
358 |     "def run_numba(height, width, maxiterations=20):\n",
359 |     "    y, x = numpy.ogrid[-1:0:height*1j, -1.5:0:width*1j]\n",
360 |     "    c = x + y*1j\n",
361 |     "    fractal = numba.cuda.device_array(c.shape, dtype=numpy.int32)\n",
362 |     "    as_cuda[(math.ceil(height / 32), math.ceil(width / 32)), (32, 32)](c, fractal, maxiterations)\n",
363 |     "    return fractal"
364 |    ]
365 |   },
366 |   {
367 |    "cell_type": "code",
368 |    "execution_count": null,
369 |    "metadata": {},
370 |    "outputs": [],
371 |    "source": [
372 |     "starttime = time.time()\n",
373 |     "fractal = run_numba(4000, 6000)\n",
374 |     "time.time() - starttime"
375 |    ]
376 |   },
377 |   {
378 |    "cell_type": "markdown",
379 |    "metadata": {
380 |     "slideshow": {
381 |      "slide_type": "slide"
382 |     }
383 |    },
384 |    "source": [
385 |     "On the same sized problem,\n",
386 |     "\n",
387 |     "   * Numpy on the CPU: 7.5 sec\n",
388 |     "   * CuPy on the GPU: 2.8 sec\n",
389 |     "   * Numba on the GPU: 0.3 sec\n",
390 |     "\n",
391 |     "And Numba doesn't suffer from the memory issue because it doesn't make as many intermediate copies."
392 |    ]
393 |   },
394 |   {
395 |    "cell_type": "code",
396 |    "execution_count": null,
397 |    "metadata": {
398 |     "slideshow": {
399 |      "slide_type": "fragment"
400 |     }
401 |    },
402 |    "outputs": [],
403 |    "source": [
404 |     "starttime = time.time()\n",
405 |     "fractal = run_numba(8000, 12000)    # full-sized problem\n",
406 |     "time.time() - starttime"
407 |    ]
408 |   },
409 |   {
410 |    "cell_type": "markdown",
411 |    "metadata": {
412 |     "slideshow": {
413 |      "slide_type": "slide"
414 |     }
415 |    },
416 |    "source": [
417 |     "That full-sized problem used to take us half a minute in Numpy, and (projected) 15 minutes in pure Python. For sanity's sake, we verify that it is, indeed, drawing our fractal."
418 |    ]
419 |   },
420 |   {
421 |    "cell_type": "code",
422 |    "execution_count": null,
423 |    "metadata": {},
424 |    "outputs": [],
425 |    "source": [
426 |     "%matplotlib inline\n",
427 |     "import matplotlib.pyplot as plt\n",
428 |     "fig, ax = plt.subplots(figsize=(12, 8))\n",
429 |     "ax.imshow(fractal)\n",
430 |     "# ax.imshow(fractal[-2000:, :3000])"
431 |    ]
432 |   },
433 |   {
434 |    "cell_type": "markdown",
435 |    "metadata": {
436 |     "slideshow": {
437 |      "slide_type": "slide"
438 |     }
439 |    },
440 |    "source": [
441 |     "**Method #3:** PyCUDA. This library is somewhat older and hard to distribute nowadays (it's not Python 3.7 compliant). However, it is unique in letting you write any CUDA code (e.g. copied from the web) in Python without wrapping it as a library."
442 |    ]
443 |   },
444 |   {
445 |    "cell_type": "code",
446 |    "execution_count": null,
447 |    "metadata": {},
448 |    "outputs": [],
449 |    "source": [
450 |     "import pycuda.autoinit\n",
451 |     "import pycuda.driver\n",
452 |     "import pycuda.compiler\n",
453 |     "\n",
454 |     "module = pycuda.compiler.SourceModule(\"\"\"\n",
455 |     "__global__ void from_pycuda(double* c, int* fractal, int height, int width, int maxiterations) {\n",
456 |     "    const int x = threadIdx.x + blockIdx.x*blockDim.x;\n",
457 |     "    const int y = threadIdx.y + blockIdx.y*blockDim.y;\n",
458 |     "    double creal = c[2 * (x + height*y)];\n",
459 |     "    double cimag = c[2 * (x + height*y) + 1];\n",
460 |     "    double zreal = creal;\n",
461 |     "    double zimag = cimag;\n",
462 |     "    fractal[x + height*y] = maxiterations;\n",
463 |     "    for (int i = 0;  i < maxiterations;  i++) {\n",
464 |     "        double zreal2 = zreal*zreal + zimag*zimag + creal;\n",
465 |     "        double zimag2 = zreal*zreal + zimag*zimag + cimag;\n",
466 |     "        zreal = zreal2;\n",
467 |     "        zimag = zimag2;\n",
468 |     "        if (zreal*zreal + zimag*zimag > 4) {\n",
469 |     "            fractal[x + height*y] = i;\n",
470 |     "            break;\n",
471 |     "        }\n",
472 |     "    }\n",
473 |     "}\n",
474 |     "\"\"\")\n",
475 |     "from_pycuda = module.get_function(\"from_pycuda\")"
476 |    ]
477 |   },
478 |   {
479 |    "cell_type": "code",
480 |    "execution_count": null,
481 |    "metadata": {
482 |     "slideshow": {
483 |      "slide_type": "slide"
484 |     }
485 |    },
486 |    "outputs": [],
487 |    "source": [
488 |     "def run_pycuda(height, width, maxiterations=20):\n",
489 |     "    y, x = numpy.ogrid[-1:0:height*1j, -1.5:0:width*1j]\n",
490 |     "    c = x + y*1j\n",
491 |     "    fractal = numpy.empty(c.shape, dtype=numpy.int32) + maxiterations\n",
492 |     "    from_pycuda(pycuda.driver.In(c.view(numpy.float64)),\n",
493 |     "                pycuda.driver.Out(fractal),\n",
494 |     "                numpy.int32(height),\n",
495 |     "                numpy.int32(width),\n",
496 |     "                numpy.int32(maxiterations),\n",
497 |     "                block=(32, 32, 1),\n",
498 |     "                grid=(int(math.ceil(height / 32)), int(math.ceil(width / 32))))\n",
499 |     "    return fractal"
500 |    ]
501 |   },
502 |   {
503 |    "cell_type": "code",
504 |    "execution_count": null,
505 |    "metadata": {},
506 |    "outputs": [],
507 |    "source": [
508 |     "starttime = time.time()\n",
509 |     "fractal = run_pycuda(8000, 12000)    # full-sized problem\n",
510 |     "time.time() - starttime"
511 |    ]
512 |   },
513 |   {
514 |    "cell_type": "markdown",
515 |    "metadata": {
516 |     "slideshow": {
517 |      "slide_type": "slide"
518 |     }
519 |    },
520 |    "source": [
521 |     "About the same as Numba (10% better), which wouldn't be worth it for having to translate Python into CUDA C++, but would be worth it if you _found_ CUDA C++ and didn't want to translate it into Python!"
522 |    ]
523 |   },
524 |   {
525 |    "cell_type": "markdown",
526 |    "metadata": {
527 |     "slideshow": {
528 |      "slide_type": "fragment"
529 |     }
530 |    },
531 |    "source": [
532 |     "As before, each library has its own special niche:\n",
533 |     "\n",
534 |     "   * **CuPy:** for directly running Numpy on GPUs, no questions asked\n",
535 |     "   * **Numba:** for running (a limited subset of) Python code directly on the GPU\n",
536 |     "   * **PyCUDA:** for running CUDA C++ with the convenience of Numpy input and output."
537 |    ]
538 |   }
539 |  ],
540 |  "metadata": {
541 |   "celltoolbar": "Slideshow",
542 |   "kernelspec": {
543 |    "display_name": "Python 3",
544 |    "language": "python",
545 |    "name": "python3"
546 |   },
547 |   "language_info": {
548 |    "codemirror_mode": {
549 |     "name": "ipython",
550 |     "version": 3
551 |    },
552 |    "file_extension": ".py",
553 |    "mimetype": "text/x-python",
554 |    "name": "python",
555 |    "nbconvert_exporter": "python",
556 |    "pygments_lexer": "ipython3",
557 |    "version": "3.7.0"
558 |   }
559 |  },
560 |  "nbformat": 4,
561 |  "nbformat_minor": 2
562 | }
563 | 


--------------------------------------------------------------------------------
/8-low-level.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "slideshow": {
  7 |      "slide_type": "slide"
  8 |     }
  9 |    },
 10 |    "source": [
 11 |     "# Low-level hackery"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {
 17 |     "slideshow": {
 18 |      "slide_type": "fragment"
 19 |     }
 20 |    },
 21 |    "source": [
 22 |     "_(nothing to install; just numpy, matplotlib from before)_"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {
 28 |     "slideshow": {
 29 |      "slide_type": "slide"
 30 |     }
 31 |    },
 32 |    "source": [
 33 |     "One function I've used without much comment is `numpy.frombuffer`, which lets us wrap arbitrary regions of memory as Numpy arrays. We can \"peek\" at any memory we want; we can also \"poke\" it, changing values, byte by byte."
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {
 39 |     "slideshow": {
 40 |      "slide_type": "slide"
 41 |     }
 42 |    },
 43 |    "source": [
 44 |     "Consider, for instance, a byte string. These are immutable (cannot be changed) in Python:"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "hello = b\"Hello, world!\""
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": null,
 59 |    "metadata": {},
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "try:\n",
 63 |     "    hello[4:8] = b\"????\"\n",
 64 |     "except TypeError as err:\n",
 65 |     "    print(\"Nope: \" + str(err))"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": null,
 71 |    "metadata": {
 72 |     "slideshow": {
 73 |      "slide_type": "fragment"
 74 |     }
 75 |    },
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "import numpy\n",
 79 |     "a = numpy.frombuffer(hello, dtype=numpy.uint8)\n",
 80 |     "a"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": null,
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "a.view(\"S1\")"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "markdown",
 94 |    "metadata": {
 95 |     "slideshow": {
 96 |      "slide_type": "slide"
 97 |     }
 98 |    },
 99 |    "source": [
100 |     "By default, Numpy tries to protect you from doing evil things."
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": null,
106 |    "metadata": {},
107 |    "outputs": [],
108 |    "source": [
109 |     "try:\n",
110 |     "    a[4:8] = [69, 86, 73, 76]\n",
111 |     "except ValueError as err:\n",
112 |     "    print(\"Nope: \" + str(err))"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "markdown",
117 |    "metadata": {
118 |     "slideshow": {
119 |      "slide_type": "fragment"
120 |     }
121 |    },
122 |    "source": [
123 |     "But this is Python: we can shoot our feet if we want to."
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": null,
129 |    "metadata": {},
130 |    "outputs": [],
131 |    "source": [
132 |     "a.flags.writeable = True"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": null,
138 |    "metadata": {},
139 |    "outputs": [],
140 |    "source": [
141 |     "a[4:8] = [69, 86, 73, 76]"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": null,
147 |    "metadata": {},
148 |    "outputs": [],
149 |    "source": [
150 |     "hello"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "markdown",
155 |    "metadata": {
156 |     "slideshow": {
157 |      "slide_type": "slide"
158 |     }
159 |    },
160 |    "source": [
161 |     "This messes with Python's internal data model."
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": null,
167 |    "metadata": {},
168 |    "outputs": [],
169 |    "source": [
170 |     "hello = b\"Hello, world!\"\n",
171 |     "a = numpy.frombuffer(hello, dtype=numpy.uint8)\n",
172 |     "a.flags.writeable = True\n",
173 |     "a[4:8] = [69, 86, 73, 76]\n",
174 |     "print(hello == b\"Hello, world!\")"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "code",
179 |    "execution_count": null,
180 |    "metadata": {
181 |     "slideshow": {
182 |      "slide_type": "fragment"
183 |     }
184 |    },
185 |    "outputs": [],
186 |    "source": [
187 |     "exec(\"\"\"\n",
188 |     "hello = b\"Hello, world!\"\n",
189 |     "a = numpy.frombuffer(hello, dtype=numpy.uint8)\n",
190 |     "a.flags.writeable = True\n",
191 |     "a[4:8] = [69, 86, 73, 76]\n",
192 |     "print(hello == b\"Hello, world!\")\n",
193 |     "\"\"\")"
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "markdown",
198 |    "metadata": {
199 |     "slideshow": {
200 |      "slide_type": "fragment"
201 |     }
202 |    },
203 |    "source": [
204 |     "(The second example was interpreted as a `.pyc` script, in which all instances of the literal `b\"Hello, world!\"` were replaced by a single object: modifying that object in line 4 changed it in line 5!)"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "markdown",
209 |    "metadata": {
210 |     "slideshow": {
211 |      "slide_type": "slide"
212 |     }
213 |    },
214 |    "source": [
215 |     "With the help of ctypes, a built-in Python library, Numpy can wrap any address at all. (Some will cause segmentation faults, so be careful!)"
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "code",
220 |    "execution_count": null,
221 |    "metadata": {},
222 |    "outputs": [],
223 |    "source": [
224 |     "x = 12345"
225 |    ]
226 |   },
227 |   {
228 |    "cell_type": "code",
229 |    "execution_count": null,
230 |    "metadata": {},
231 |    "outputs": [],
232 |    "source": [
233 |     "import ctypes\n",
234 |     "import sys\n",
235 |     "\n",
236 |     "ptr = ctypes.cast(id(x), ctypes.POINTER(ctypes.c_uint8))\n",
237 |     "a = numpy.ctypeslib.as_array(ptr, (sys.getsizeof(x),))\n",
238 |     "a"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "markdown",
243 |    "metadata": {
244 |     "slideshow": {
245 |      "slide_type": "fragment"
246 |     }
247 |    },
248 |    "source": [
249 |     "We're looking at a Python object header, a pointer to the `int` type (also a Python object), and then the number itself: `12345` in little endian bytes is `57, 48, 0, 0`. Do you see it?"
250 |    ]
251 |   },
252 |   {
253 |    "cell_type": "code",
254 |    "execution_count": null,
255 |    "metadata": {
256 |     "slideshow": {
257 |      "slide_type": "fragment"
258 |     }
259 |    },
260 |    "outputs": [],
261 |    "source": [
262 |     "a[-4:].view(numpy.int32)"
263 |    ]
264 |   },
265 |   {
266 |    "cell_type": "markdown",
267 |    "metadata": {
268 |     "slideshow": {
269 |      "slide_type": "slide"
270 |     }
271 |    },
272 |    "source": [
273 |     "Let's try a string."
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "code",
278 |    "execution_count": null,
279 |    "metadata": {},
280 |    "outputs": [],
281 |    "source": [
282 |     "y = \"Hey there.\"\n",
283 |     "ptr = ctypes.cast(id(y), ctypes.POINTER(ctypes.c_uint8))\n",
284 |     "a = numpy.ctypeslib.as_array(ptr, (sys.getsizeof(y),))\n",
285 |     "a"
286 |    ]
287 |   },
288 |   {
289 |    "cell_type": "code",
290 |    "execution_count": null,
291 |    "metadata": {},
292 |    "outputs": [],
293 |    "source": [
294 |     "a[-11:].tostring()"
295 |    ]
296 |   },
297 |   {
298 |    "cell_type": "markdown",
299 |    "metadata": {
300 |     "slideshow": {
301 |      "slide_type": "slide"
302 |     }
303 |    },
304 |    "source": [
305 |     "By wrapping a pointer as a Numpy array (and maybe setting `array.flags.writeable = True`), we can do anything."
306 |    ]
307 |   },
308 |   {
309 |    "cell_type": "markdown",
310 |    "metadata": {
311 |     "slideshow": {
312 |      "slide_type": "fragment"
313 |     }
314 |    },
315 |    "source": [
316 |     "We can break anything."
317 |    ]
318 |   },
319 |   {
320 |    "cell_type": "markdown",
321 |    "metadata": {
322 |     "slideshow": {
323 |      "slide_type": "fragment"
324 |     }
325 |    },
326 |    "source": [
327 |     "Seriously, just letting `numpy.ctypeslib.as_array(0, (8,))` repr itself on the screen would cause a segmentation fault. "
328 |    ]
329 |   },
330 |   {
331 |    "cell_type": "markdown",
332 |    "metadata": {
333 |     "slideshow": {
334 |      "slide_type": "fragment"
335 |     }
336 |    },
337 |    "source": [
338 |     "Is there anything useful we can do with this power?"
339 |    ]
340 |   },
341 |   {
342 |    "cell_type": "markdown",
343 |    "metadata": {
344 |     "slideshow": {
345 |      "slide_type": "slide"
346 |     }
347 |    },
348 |    "source": [
349 |     "The original purpose of the ctypes library was to run code in compiled C libraries (not C++, that's much more complicated).\n",
350 |     "\n",
351 |     "For a busy data analyst, the advantage of that is that you don't need to write (or wait for) official bindings to use a C library."
352 |    ]
353 |   },
354 |   {
355 |    "cell_type": "code",
356 |    "execution_count": null,
357 |    "metadata": {},
358 |    "outputs": [],
359 |    "source": [
360 |     "import ctypes\n",
361 |     "libc = ctypes.cdll.LoadLibrary(\"/lib/x86_64-linux-gnu/libc.so.6\")"
362 |    ]
363 |   },
364 |   {
365 |    "cell_type": "markdown",
366 |    "metadata": {
367 |     "slideshow": {
368 |      "slide_type": "fragment"
369 |     }
370 |    },
371 |    "source": [
372 |     "We can run arbitrary functions from `libc.so.6`, but we have to tell Python what its argument types are (that's not stored in the shared object file)."
373 |    ]
374 |   },
375 |   {
376 |    "cell_type": "code",
377 |    "execution_count": null,
378 |    "metadata": {},
379 |    "outputs": [],
380 |    "source": [
381 |     "libc.malloc.argtypes = (ctypes.c_size_t,)                      # argument types (only one)\n",
382 |     "libc.malloc.restype = ctypes.POINTER(ctypes.c_double)          # return type"
383 |    ]
384 |   },
385 |   {
386 |    "cell_type": "code",
387 |    "execution_count": null,
388 |    "metadata": {
389 |     "slideshow": {
390 |      "slide_type": "fragment"
391 |     }
392 |    },
393 |    "outputs": [],
394 |    "source": [
395 |     "ptr = libc.malloc(100 * numpy.dtype(numpy.float64).itemsize)   # pass number of bytes\n",
396 |     "ptr"
397 |    ]
398 |   },
399 |   {
400 |    "cell_type": "code",
401 |    "execution_count": null,
402 |    "metadata": {
403 |     "slideshow": {
404 |      "slide_type": "slide"
405 |     }
406 |    },
407 |    "outputs": [],
408 |    "source": [
409 |     "a = numpy.ctypeslib.as_array(ptr, (100,))\n",
410 |     "a"
411 |    ]
412 |   },
413 |   {
414 |    "cell_type": "code",
415 |    "execution_count": null,
416 |    "metadata": {
417 |     "slideshow": {
418 |      "slide_type": "fragment"
419 |     }
420 |    },
421 |    "outputs": [],
422 |    "source": [
423 |     "a[:] = 0.0\n",
424 |     "a"
425 |    ]
426 |   },
427 |   {
428 |    "cell_type": "code",
429 |    "execution_count": null,
430 |    "metadata": {
431 |     "slideshow": {
432 |      "slide_type": "fragment"
433 |     }
434 |    },
435 |    "outputs": [],
436 |    "source": [
437 |     "a.flags"
438 |    ]
439 |   },
440 |   {
441 |    "cell_type": "markdown",
442 |    "metadata": {
443 |     "slideshow": {
444 |      "slide_type": "slide"
445 |     }
446 |    },
447 |    "source": [
448 |     "We've just used low-level `libc.malloc` to allocate data for an array. This array doesn't \"own\" its memory, so it doesn't go away when the Python object (`a`) gets garbage collected. We'd have to call `libc.free`, like any C programmer."
449 |    ]
450 |   },
451 |   {
452 |    "cell_type": "markdown",
453 |    "metadata": {
454 |     "slideshow": {
455 |      "slide_type": "fragment"
456 |     }
457 |    },
458 |    "source": [
459 |     "Why do this? I've used it to allocate arrays on NUMA hardware and Knight's Landing MCDRAM. Specialty memory allocations will probably get more important, not less, as architectures get more heterogeneous."
460 |    ]
461 |   },
462 |   {
463 |    "cell_type": "markdown",
464 |    "metadata": {
465 |     "slideshow": {
466 |      "slide_type": "fragment"
467 |     }
468 |    },
469 |    "source": [
470 |     "(No reason to write an entire analysis in C just to get special allocators.)"
471 |    ]
472 |   },
473 |   {
474 |    "cell_type": "markdown",
475 |    "metadata": {
476 |     "slideshow": {
477 |      "slide_type": "fragment"
478 |     }
479 |    },
480 |    "source": [
481 |     "Another possible reason is to access special devices; device drivers are often written in C and distributed with C programmers in mind, but they may be the only thing between you and an important dataset."
482 |    ]
483 |   },
484 |   {
485 |    "cell_type": "markdown",
486 |    "metadata": {
487 |     "slideshow": {
488 |      "slide_type": "slide"
489 |     }
490 |    },
491 |    "source": [
492 |     "Another example: you can wrap structs. (The snake eats its own tail again.)"
493 |    ]
494 |   },
495 |   {
496 |    "cell_type": "code",
497 |    "execution_count": null,
498 |    "metadata": {},
499 |    "outputs": [],
500 |    "source": [
501 |     "class PyObject(ctypes.Structure): pass\n",
502 |     "PyObject._fields_ = [(\"ob_refcnt\", ctypes.c_size_t),\n",
503 |     "                     (\"ob_type\", ctypes.POINTER(PyObject))]"
504 |    ]
505 |   },
506 |   {
507 |    "cell_type": "markdown",
508 |    "metadata": {
509 |     "slideshow": {
510 |      "slide_type": "fragment"
511 |     }
512 |    },
513 |    "source": [
514 |     "which is equivalent to\n",
515 |     "\n",
516 |     "```c\n",
517 |     "struct PyObject {\n",
518 |     "    size_t ob_refcnt;\n",
519 |     "    PyObject* ob_type;\n",
520 |     "    // the rest depends on the type of object\n",
521 |     "}\n",
522 |     "```"
523 |    ]
524 |   },
525 |   {
526 |    "cell_type": "code",
527 |    "execution_count": null,
528 |    "metadata": {
529 |     "slideshow": {
530 |      "slide_type": "fragment"
531 |     }
532 |    },
533 |    "outputs": [],
534 |    "source": [
535 |     "hello = b\"Hello, world!\""
536 |    ]
537 |   },
538 |   {
539 |    "cell_type": "code",
540 |    "execution_count": null,
541 |    "metadata": {},
542 |    "outputs": [],
543 |    "source": [
544 |     "ptr = PyObject.from_address(id(hello))\n",
545 |     "ptr"
546 |    ]
547 |   },
548 |   {
549 |    "cell_type": "code",
550 |    "execution_count": null,
551 |    "metadata": {
552 |     "slideshow": {
553 |      "slide_type": "slide"
554 |     }
555 |    },
556 |    "outputs": [],
557 |    "source": [
558 |     "ptr.ob_refcnt"
559 |    ]
560 |   },
561 |   {
562 |    "cell_type": "markdown",
563 |    "metadata": {
564 |     "slideshow": {
565 |      "slide_type": "fragment"
566 |     }
567 |    },
568 |    "source": [
569 |     "This `ob_refcnt` is the number of Python references to a given object. There's a way to do it with a `sys` call:"
570 |    ]
571 |   },
572 |   {
573 |    "cell_type": "code",
574 |    "execution_count": null,
575 |    "metadata": {},
576 |    "outputs": [],
577 |    "source": [
578 |     "sys.getrefcount(hello)"
579 |    ]
580 |   },
581 |   {
582 |    "cell_type": "markdown",
583 |    "metadata": {
584 |     "slideshow": {
585 |      "slide_type": "fragment"
586 |     }
587 |    },
588 |    "source": [
589 |     "but it's always one too high because you create a reference to pass it to that function!"
590 |    ]
591 |   },
592 |   {
593 |    "cell_type": "code",
594 |    "execution_count": null,
595 |    "metadata": {},
596 |    "outputs": [],
597 |    "source": [
598 |     "biglist = [hello] * 1000"
599 |    ]
600 |   },
601 |   {
602 |    "cell_type": "code",
603 |    "execution_count": null,
604 |    "metadata": {},
605 |    "outputs": [],
606 |    "source": [
607 |     "ptr.ob_refcnt"
608 |    ]
609 |   },
610 |   {
611 |    "cell_type": "code",
612 |    "execution_count": null,
613 |    "metadata": {},
614 |    "outputs": [],
615 |    "source": [
616 |     "sys.getrefcount(hello)"
617 |    ]
618 |   },
619 |   {
620 |    "cell_type": "markdown",
621 |    "metadata": {
622 |     "slideshow": {
623 |      "slide_type": "slide"
624 |     }
625 |    },
626 |    "source": [
627 |     "The ctypes library can wrap numbers, strings, pointers, arrays, and structs, which is just about everything you might encounter in C. The reason C++ isn't supported is because C shoehorns its much larger type system into shared object files by \"name mangling.\"\n",
628 |     "\n",
629 |     "Even if you reverse the name mangling with the `c++filt` program, those names cannot be uniquely identified without interpreting the C++ headers. By that point, you might as well use Cython or pybind11."
630 |    ]
631 |   },
632 |   {
633 |    "cell_type": "markdown",
634 |    "metadata": {
635 |     "slideshow": {
636 |      "slide_type": "slide"
637 |     }
638 |    },
639 |    "source": [
640 |     "**One last example:** the quickest, dirtiest way possible to call out to compiled code:"
641 |    ]
642 |   },
643 |   {
644 |    "cell_type": "code",
645 |    "execution_count": null,
646 |    "metadata": {},
647 |    "outputs": [],
648 |    "source": [
649 |     "import os\n",
650 |     "with open(\"tmp.cpp\", \"w\") as cfile:\n",
651 |     "    cfile.write(\"\"\"\n",
652 |     "#include <complex>\n",
653 |     "extern \"C\" {    \n",
654 |     "    void quick(int height, int width, int maxiterations, double* c, int* fractal) {\n",
655 |     "        for (int h = 0;  h < height;  h++) {\n",
656 |     "            for (int w = 0;  w < width;  w++) {\n",
657 |     "                double creal = c[2 * (h + height*w)];\n",
658 |     "                double cimag = c[2 * (h + height*w) + 1];\n",
659 |     "                std::complex<double> ci = std::complex<double>(creal, cimag);\n",
660 |     "                std::complex<double> z = ci;\n",
661 |     "                for (int i = 0;  i < maxiterations;  i++) {\n",
662 |     "                    z = z * z + ci;\n",
663 |     "                    if (std::abs(z) > 2) {\n",
664 |     "                        fractal[h + height*w] = i;\n",
665 |     "                        break;\n",
666 |     "                    }\n",
667 |     "                }\n",
668 |     "            }\n",
669 |     "        }\n",
670 |     "    }\n",
671 |     "}\n",
672 |     "\"\"\")\n",
673 |     "assert os.system(\"gcc -O3 -fPIC -shared tmp.cpp -o libtmp.so\") == 0"
674 |    ]
675 |   },
676 |   {
677 |    "cell_type": "code",
678 |    "execution_count": null,
679 |    "metadata": {
680 |     "slideshow": {
681 |      "slide_type": "slide"
682 |     }
683 |    },
684 |    "outputs": [],
685 |    "source": [
686 |     "libtmp = ctypes.cdll.LoadLibrary(os.path.join(os.getcwd(), \"libtmp.so\"))\n",
687 |     "libtmp.quick.argtypes = (ctypes.c_int, ctypes.c_int, ctypes.c_int,\n",
688 |     "                         ctypes.POINTER(ctypes.c_double),\n",
689 |     "                         ctypes.POINTER(ctypes.c_int))\n",
690 |     "libtmp.quick.restype = None"
691 |    ]
692 |   },
693 |   {
694 |    "cell_type": "code",
695 |    "execution_count": null,
696 |    "metadata": {},
697 |    "outputs": [],
698 |    "source": [
699 |     "def run_dirty(height, width, maxiterations=20):\n",
700 |     "    y, x = numpy.ogrid[-1:0:height*1j, -1.5:0:width*1j]\n",
701 |     "    c = x + y*1j\n",
702 |     "    fractal = numpy.zeros(c.shape, dtype=numpy.int32) + maxiterations\n",
703 |     "    libtmp.quick(height, width, maxiterations,\n",
704 |     "                 ctypes.cast(c.ctypes.data, ctypes.POINTER(ctypes.c_double)),\n",
705 |     "                 ctypes.cast(fractal.ctypes.data, ctypes.POINTER(ctypes.c_int)))\n",
706 |     "    return fractal"
707 |    ]
708 |   },
709 |   {
710 |    "cell_type": "code",
711 |    "execution_count": null,
712 |    "metadata": {
713 |     "slideshow": {
714 |      "slide_type": "slide"
715 |     }
716 |    },
717 |    "outputs": [],
718 |    "source": [
719 |     "import time\n",
720 |     "starttime = time.time()\n",
721 |     "fractal = run_dirty(8000, 12000)\n",
722 |     "time.time() - starttime"
723 |    ]
724 |   },
725 |   {
726 |    "cell_type": "code",
727 |    "execution_count": null,
728 |    "metadata": {
729 |     "slideshow": {
730 |      "slide_type": "fragment"
731 |     }
732 |    },
733 |    "outputs": [],
734 |    "source": [
735 |     "%matplotlib inline\n",
736 |     "import matplotlib.pyplot as plt\n",
737 |     "fig, ax = plt.subplots(figsize=(12, 8))\n",
738 |     "ax.imshow(fractal)\n",
739 |     "# ax.imshow(fractal[-200:, :300])"
740 |    ]
741 |   },
742 |   {
743 |    "cell_type": "markdown",
744 |    "metadata": {
745 |     "slideshow": {
746 |      "slide_type": "slide"
747 |     }
748 |    },
749 |    "source": [
750 |     "Not very good time, but the right answer."
751 |    ]
752 |   },
753 |   {
754 |    "cell_type": "markdown",
755 |    "metadata": {
756 |     "slideshow": {
757 |      "slide_type": "fragment"
758 |     }
759 |    },
760 |    "source": [
761 |     "With that monstrosity, I'll end the course.\n",
762 |     "\n",
763 |     "Cheers!"
764 |    ]
765 |   }
766 |  ],
767 |  "metadata": {
768 |   "celltoolbar": "Slideshow",
769 |   "kernelspec": {
770 |    "display_name": "Python 3",
771 |    "language": "python",
772 |    "name": "python3"
773 |   },
774 |   "language_info": {
775 |    "codemirror_mode": {
776 |     "name": "ipython",
777 |     "version": 3
778 |    },
779 |    "file_extension": ".py",
780 |    "mimetype": "text/x-python",
781 |    "name": "python",
782 |    "nbconvert_exporter": "python",
783 |    "pygments_lexer": "ipython3",
784 |    "version": "3.7.0"
785 |   }
786 |  },
787 |  "nbformat": 4,
788 |  "nbformat_minor": 2
789 | }
790 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2018, Jim Pivarski
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Python/Numpy for High-Performance Numerical Processing
 2 | 
 3 | Notebooks and slides used for the [Numpy mini-course at Princeton](https://researchcomputing.princeton.edu/events/pythonnumpy-high-performance-numerical-processing) held November 15, 2018.
 4 | 
 5 | ### Abstract
 6 | 
 7 | Python is a notoriously slow language, so why is it widely used by scientists and machine learning experts? In a numerically heavy task, an interpreted, dynamically typed environment can be thousands of times slower than a compiled, statically typed one, which can make the difference between minutes and days or between coarse models on small datasets and fine-grained models on large datasets. The trick is to drive compiled functions from the interpreted commandline, like R, and to frame your problem in array programming primitives, like Matlab, but in a general-purpose programming language with hundreds of thousands of extensions to glue to every conceivable interface.
 8 | 
 9 | In this workshop, we will examine the numerical processing ecosystem that has grown up around Python. The key library in this ecosystem is Numpy, which enables fast array programming, and Pandas, a convenient wrapper for organizing data. We will visualize data in and out of JupyterLab, a notebook front-end for exploratory analysis. We'll also work through examples of binding ~~from C++ to Python with pybind11 and~~ from Python to C++ with Cython, which have different strengths and use-cases. We'll also natively compile Python (C++ speeds without C++) using Numba and run code on GPUs with Numba (Python-like), CuPy (Numpy-like), and PyCUDA/PyOpenCL (raw CUDA/OpenCL).
10 | 
11 | Participants will be encouraged to bring a laptop or log into their favorite cluster to install the software we discuss here for later use. We will use conda and pip-in-conda, so superuser ("sudo") permissions are not required.
12 | 
13 | Jim Pivarski received his Ph.D. in high-energy particle physics from Cornell in 2006. He helped to commission the CMS experiment at the LHC and later switched to data science as a Big Data consultant. He is now back in physics, integrating computing techniques learned from industry into high-energy physics analysis.
14 | 
15 | ### Plan for the day
16 | 
17 | General intention; we'll vary from this if there's good reason to do so.
18 | 
19 | ![](img/plan-for-the-day.png)
20 | 
21 | ### How to prepare
22 | 
23 | (1) Check out this repository:
24 | 
25 | ```
26 | git clone https://github.com/jpivarski/python-numpy-mini-course.git
27 | ```
28 | 
29 | (2) Install Anaconda or [Miniconda for Python 3](https://conda.io/miniconda.html). Using that, install Jupyter (Lab or Notebook; I prefer Lab):
30 | 
31 | ```
32 | conda install jupyterlab
33 | ```
34 | 
35 | Change directories into the repository and start Jupyter Lab or Notebook:
36 | 
37 | ```
38 | cd python-numpy-mini-course
39 | jupyter lab        # or notebook
40 | ```
41 | 
42 | Installations for the sessions are given at the top of each notebook, but if you want to install everything at once, instructions are collected below. Most of these are already bundled in the full Anaconda distribution.
43 | 
44 | ```
45 | conda install numpy                                 # 2-just-numpy
46 | conda install pandas matplotlib                     # 4-pandas
47 | conda install dask distrubted -c conda-forge        # 5-dask
48 | conda install numba cython                          # 6-compilers
49 | ```
50 | 
51 | Don't bother installing softare for the GPU session if you don't have an NVidia GPU with the CUDA development kit installed.
52 | 
53 | ```
54 | conda install cupy cudatoolkit                      # 7-gpu
55 | export CFLAGS=-fpermissive
56 | pip install --no-cache-dir pycuda
57 | ```
58 | 
59 | (3) General Python programming skills will be assumed (ability to read or write a page-long script without difficulty). Walk through an online tutorial if you need to brush up before the course.
60 | 
61 | Knowedge of the libraries presented here _will not_ be assumed. Come and learn!
62 | 
63 | ### Pre-evaluated notebooks
64 | 
65 | You'll get the most out of the course if you follow along in the blank notebooks in the master branch of this repository. However, if you're returning to look up a result, pre-evaluated copies of all the notebooks can be found on the [evaluated branch](https://github.com/jpivarski/python-numpy-mini-course/tree/evaluated) of this repository.
66 | 


--------------------------------------------------------------------------------
/data/nasa-exoplanets-details.txt:
--------------------------------------------------------------------------------
  1 | # This file was produced by the NASA Exoplanet Archive  http://exoplanetarchive.ipac.caltech.edu
  2 | # Fri Nov  9 17:35:38 2018
  3 | #
  4 | # COLUMN pl_hostname:    Host Name
  5 | # COLUMN pl_letter:      Planet Letter
  6 | # COLUMN pl_name:        Planet Name
  7 | # COLUMN pl_discmethod:  Discovery Method
  8 | # COLUMN pl_pnum:        Number of Planets in System
  9 | # COLUMN pl_orbper:      Orbital Period [days]
 10 | # COLUMN pl_orbsmax:     Orbit Semi-Major Axis [AU])
 11 | # COLUMN pl_orbeccen:    Eccentricity
 12 | # COLUMN pl_orbincl:     Inclination [deg]
 13 | # COLUMN pl_bmassj:      Planet Mass or M*sin(i) [Jupiter mass]
 14 | # COLUMN pl_bmassprov:   Planet Mass or M*sin(i) Provenance
 15 | # COLUMN pl_radj:        Planet Radius [Jupiter radii]
 16 | # COLUMN pl_dens:        Planet Density [g/cm**3]
 17 | # COLUMN pl_ttvflag:     TTV Flag
 18 | # COLUMN pl_kepflag:     Kepler Field Flag
 19 | # COLUMN pl_k2flag:      K2 Mission Flag
 20 | # COLUMN pl_nnotes:      Number of Notes
 21 | # COLUMN ra_str:         RA [sexagesimal]
 22 | # COLUMN ra:             RA [decimal degrees]
 23 | # COLUMN dec_str:        Dec [sexagesimal]
 24 | # COLUMN dec:            Dec [decimal degrees]
 25 | # COLUMN st_dist:        Distance [pc]
 26 | # COLUMN st_optmag:      Optical Magnitude [mag]
 27 | # COLUMN st_optband:     Optical Magnitude Band
 28 | # COLUMN gaia_gmag:      G-band (Gaia) [mag]
 29 | # COLUMN st_teff:        Effective Temperature [K]
 30 | # COLUMN st_mass:        Stellar Mass [Solar mass]
 31 | # COLUMN st_rad:         Stellar Radius [Solar radii]
 32 | # COLUMN rowupdate:      Date of Last Update
 33 | # COLUMN pl_tranflag:    Planet Transit Flag
 34 | # COLUMN pl_rvflag:      Planet RV Flag
 35 | # COLUMN pl_imgflag:     Planet Imaging Flag
 36 | # COLUMN pl_astflag:     Planet Astrometry Flag
 37 | # COLUMN pl_omflag:      Planet Orbital Modulation Flag
 38 | # COLUMN pl_cbflag:      Planet Circumbinary Flag
 39 | # COLUMN pl_angsep:      Calculated Angular Separation [mas]
 40 | # COLUMN pl_orbtper:     Time of Periastron [days]
 41 | # COLUMN pl_orblper:     Long. of Periastron [deg]
 42 | # COLUMN pl_rvamp:       Radial Velocity Amplitude [m/s]
 43 | # COLUMN pl_eqt:         Equilibrium Temperature [K]
 44 | # COLUMN pl_insol:       Insolation Flux [Earth flux]
 45 | # COLUMN pl_massj:       Planet Mass [Jupiter mass]
 46 | # COLUMN pl_msinij:      Planet M*sin(i) [Jupiter mass]
 47 | # COLUMN pl_masse:       Planet Mass [Earth mass]
 48 | # COLUMN pl_msinie:      Planet M*sin(i) [Earth mass]
 49 | # COLUMN pl_bmasse:      Planet Mass or M*sin(i) [Earth mass]
 50 | # COLUMN pl_rade:        Planet Radius [Earth radii]
 51 | # COLUMN pl_rads:        Planet Radius [Solar radii]
 52 | # COLUMN pl_trandep:     Transit Depth [percent]
 53 | # COLUMN pl_trandur:     Transit Duration [days]
 54 | # COLUMN pl_tranmid:     Transit Midpoint [days]
 55 | # COLUMN pl_tsystemref:  Time System Reference
 56 | # COLUMN pl_imppar:      Impact Parameter
 57 | # COLUMN pl_occdep:      Occultation Depth [percentage]
 58 | # COLUMN pl_ratdor:      Ratio of Distance to Stellar Radius
 59 | # COLUMN pl_ratror:      Ratio of Planet to Stellar Radius
 60 | # COLUMN pl_def_reflink: Default Reference
 61 | # COLUMN pl_disc:        Year of Discovery
 62 | # COLUMN pl_disc_reflink: Discovery Reference
 63 | # COLUMN pl_locale:      Discovery Locale
 64 | # COLUMN pl_facility:    Discovery Facility
 65 | # COLUMN pl_telescope:   Discovery Telescope
 66 | # COLUMN pl_instrument:  Discovery Instrument
 67 | # COLUMN pl_status:      Status
 68 | # COLUMN pl_mnum:        Number of Moons in System
 69 | # COLUMN pl_st_npar:     Number of Stellar and Planet Parameters
 70 | # COLUMN pl_st_nref:     Number of Stellar and Planet References
 71 | # COLUMN pl_pelink:      Link to Exoplanet Encyclopaedia
 72 | # COLUMN pl_edelink:     Link to Exoplanet Data Explorer
 73 | # COLUMN pl_publ_date:   Publication Date
 74 | # COLUMN hd_name:        HD Name
 75 | # COLUMN hip_name:       HIP Name
 76 | # COLUMN st_rah:         RA [hrs]
 77 | # COLUMN st_glon:        Galactic Longitude [deg]
 78 | # COLUMN st_glat:        Galactic Latitude [deg]
 79 | # COLUMN st_elon:        Ecliptic Longitude [deg]
 80 | # COLUMN st_elat:        Ecliptic Latitude [deg]
 81 | # COLUMN st_plx:         Parallax [mas]
 82 | # COLUMN gaia_plx:       Gaia Parallax [mas]
 83 | # COLUMN gaia_dist:      Gaia Distance [pc]
 84 | # COLUMN st_pmra:        Proper Motion (RA) [mas/yr]
 85 | # COLUMN st_pmdec:       Proper Motion (Dec) [mas/yr]
 86 | # COLUMN st_pm:          Total Proper Motion [mas/yr]
 87 | # COLUMN gaia_pmra:      Gaia Proper Motion (RA) [mas/yr]
 88 | # COLUMN gaia_pmdec:     Gaia Proper Motion (Dec) [mas/yr]
 89 | # COLUMN gaia_pm:        Gaia Total Proper Motion [mas/yr]
 90 | # COLUMN st_radv:        Radial Velocity [km/s]
 91 | # COLUMN st_sp:          Spectral Type
 92 | # COLUMN st_spstr:       Spectral Type
 93 | # COLUMN st_logg:        Stellar Surface Gravity [log10(cm/s**2)]
 94 | # COLUMN st_lum:         Stellar Luminosity [log(Solar)]
 95 | # COLUMN st_dens:        Stellar Density [g/cm**3]
 96 | # COLUMN st_metfe:       Stellar Metallicity [dex]
 97 | # COLUMN st_metratio:    Metallicity Ratio
 98 | # COLUMN st_age:         Stellar Age [Gyr]
 99 | # COLUMN st_vsini:       Rot. Velocity V*sin(i) [km/s]
100 | # COLUMN st_acts:        Stellar Activity S-index
101 | # COLUMN st_actr:        Stellar Activity log(R'HK)
102 | # COLUMN st_actlx:       X-ray Activity log(L<sub>x</sub>)
103 | # COLUMN swasp_id:       SWASP Identifier
104 | # COLUMN st_nts:         Number of Time Series
105 | # COLUMN st_nplc:        Number of Planet Transit Light Curves
106 | # COLUMN st_nglc:        Number of General Light Curves
107 | # COLUMN st_nrvc:        Number of Radial Velocity Time Series
108 | # COLUMN st_naxa:        Number of Amateur Light Curves
109 | # COLUMN st_nimg:        Number of Images
110 | # COLUMN st_nspec:       Number of Spectra
111 | # COLUMN st_uj:          U-band (Johnson) [mag]
112 | # COLUMN st_vj:          V-band (Johnson) [mag]
113 | # COLUMN st_bj:          B-band (Johnson) [mag]
114 | # COLUMN st_rc:          R-band (Cousins) [mag]
115 | # COLUMN st_ic:          I-band (Cousins) [mag]
116 | # COLUMN st_j:           J-band (2MASS) [mag]
117 | # COLUMN st_h:           H-band (2MASS) [mag]
118 | # COLUMN st_k:           Ks-band (2MASS) [mag]
119 | # COLUMN st_wise1:       WISE 3.4um [mag]
120 | # COLUMN st_wise2:       WISE 4.6um [mag]
121 | # COLUMN st_wise3:       WISE 12.um [mag]
122 | # COLUMN st_wise4:       WISE 22.um [mag]
123 | # COLUMN st_irac1:       IRAC 3.6um [mag]
124 | # COLUMN st_irac2:       IRAC 4.5um [mag]
125 | # COLUMN st_irac3:       IRAC 5.8um [mag]
126 | # COLUMN st_irac4:       IRAC 8.0um [mag]
127 | # COLUMN st_mips1:       MIPS 24um [mag]
128 | # COLUMN st_mips2:       MIPS 70um [mag]
129 | # COLUMN st_mips3:       MIPS 160um [mag]
130 | # COLUMN st_iras1:       IRAS 12um Flux [Jy]
131 | # COLUMN st_iras2:       IRAS 25um Flux [Jy]
132 | # COLUMN st_iras3:       IRAS 60um Flux [Jy]
133 | # COLUMN st_iras4:       IRAS 100um Flux [Jy]
134 | # COLUMN st_photn:       Number of Photometry Measurements
135 | # COLUMN st_umbj:        U-B (Johnson) [mag]
136 | # COLUMN st_bmvj:        B-V (Johnson) [mag]
137 | # COLUMN st_vjmic:       V-I (Johnson-Cousins) [mag]
138 | # COLUMN st_vjmrc:       V-R (Johnson-Cousins) [mag]
139 | # COLUMN st_jmh2:        J-H (2MASS) [mag]
140 | # COLUMN st_hmk2:        H-Ks (2MASS) [mag]
141 | # COLUMN st_jmk2:        J-Ks (2MASS) [mag]
142 | # COLUMN st_bmy:         b-y (Stromgren) [mag]
143 | # COLUMN st_m1:          m1 (Stromgren) [mag]
144 | # COLUMN st_c1:          c1 (Stromgren) [mag]
145 | # COLUMN st_colorn:      Number of Color Measurements
146 | 


--------------------------------------------------------------------------------
/img/cards-chance-deck-19060.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/img/cards-chance-deck-19060.jpg


--------------------------------------------------------------------------------
/img/clock-rate.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/img/clock-rate.jpg


--------------------------------------------------------------------------------
/img/plan-for-the-day.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/img/plan-for-the-day.png


--------------------------------------------------------------------------------
/img/png-spec-chunks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/img/png-spec-chunks.png


--------------------------------------------------------------------------------
/img/png-spec-scanline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/img/png-spec-scanline.png


--------------------------------------------------------------------------------
/img/vectorization-example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/img/vectorization-example.png


--------------------------------------------------------------------------------
/img/vectorization-example.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2 | <!-- Created with Inkscape (http://www.inkscape.org/) -->
  3 | 
  4 | <svg
  5 |    xmlns:dc="http://purl.org/dc/elements/1.1/"
  6 |    xmlns:cc="http://creativecommons.org/ns#"
  7 |    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  8 |    xmlns:svg="http://www.w3.org/2000/svg"
  9 |    xmlns="http://www.w3.org/2000/svg"
 10 |    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
 11 |    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
 12 |    width="139.3774mm"
 13 |    height="59.737099mm"
 14 |    viewBox="0 0 139.3774 59.737099"
 15 |    version="1.1"
 16 |    id="svg8"
 17 |    inkscape:version="0.92.3 (2405546, 2018-03-11)"
 18 |    sodipodi:docname="vectorization-example.svg"
 19 |    inkscape:export-filename="/home/pivarski/diana/python-numpy-mini-course/img/vectorization-example.png"
 20 |    inkscape:export-xdpi="124.855"
 21 |    inkscape:export-ydpi="124.855">
 22 |   <defs
 23 |      id="defs2">
 24 |     <marker
 25 |        inkscape:stockid="Arrow1Mend"
 26 |        orient="auto"
 27 |        refY="0"
 28 |        refX="0"
 29 |        id="Arrow1Mend"
 30 |        style="overflow:visible"
 31 |        inkscape:isstock="true">
 32 |       <path
 33 |          id="path1008"
 34 |          d="M 0,0 5,-5 -12.5,0 5,5 Z"
 35 |          style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
 36 |          transform="matrix(-0.4,0,0,-0.4,-4,0)"
 37 |          inkscape:connector-curvature="0" />
 38 |     </marker>
 39 |     <marker
 40 |        inkscape:stockid="Arrow1Mstart"
 41 |        orient="auto"
 42 |        refY="0"
 43 |        refX="0"
 44 |        id="Arrow1Mstart"
 45 |        style="overflow:visible"
 46 |        inkscape:isstock="true">
 47 |       <path
 48 |          id="path1005"
 49 |          d="M 0,0 5,-5 -12.5,0 5,5 Z"
 50 |          style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
 51 |          transform="matrix(0.4,0,0,0.4,4,0)"
 52 |          inkscape:connector-curvature="0" />
 53 |     </marker>
 54 |     <marker
 55 |        inkscape:stockid="Arrow1Lend"
 56 |        orient="auto"
 57 |        refY="0"
 58 |        refX="0"
 59 |        id="Arrow1Lend"
 60 |        style="overflow:visible"
 61 |        inkscape:isstock="true">
 62 |       <path
 63 |          id="path1002"
 64 |          d="M 0,0 5,-5 -12.5,0 5,5 Z"
 65 |          style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
 66 |          transform="matrix(-0.8,0,0,-0.8,-10,0)"
 67 |          inkscape:connector-curvature="0" />
 68 |     </marker>
 69 |     <marker
 70 |        inkscape:stockid="Arrow1Lstart"
 71 |        orient="auto"
 72 |        refY="0"
 73 |        refX="0"
 74 |        id="marker1371"
 75 |        style="overflow:visible"
 76 |        inkscape:isstock="true">
 77 |       <path
 78 |          id="path1369"
 79 |          d="M 0,0 5,-5 -12.5,0 5,5 Z"
 80 |          style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
 81 |          transform="matrix(0.8,0,0,0.8,10,0)"
 82 |          inkscape:connector-curvature="0" />
 83 |     </marker>
 84 |     <marker
 85 |        inkscape:stockid="Arrow2Lend"
 86 |        orient="auto"
 87 |        refY="0"
 88 |        refX="0"
 89 |        id="Arrow2Lend"
 90 |        style="overflow:visible"
 91 |        inkscape:isstock="true">
 92 |       <path
 93 |          id="path1020"
 94 |          style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
 95 |          d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
 96 |          transform="matrix(-1.1,0,0,-1.1,-1.1,0)"
 97 |          inkscape:connector-curvature="0" />
 98 |     </marker>
 99 |     <marker
100 |        inkscape:stockid="Arrow2Lstart"
101 |        orient="auto"
102 |        refY="0"
103 |        refX="0"
104 |        id="Arrow2Lstart"
105 |        style="overflow:visible"
106 |        inkscape:isstock="true">
107 |       <path
108 |          id="path1017"
109 |          style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
110 |          d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
111 |          transform="matrix(1.1,0,0,1.1,1.1,0)"
112 |          inkscape:connector-curvature="0" />
113 |     </marker>
114 |     <marker
115 |        inkscape:stockid="Arrow1Lstart"
116 |        orient="auto"
117 |        refY="0"
118 |        refX="0"
119 |        id="Arrow1Lstart"
120 |        style="overflow:visible"
121 |        inkscape:isstock="true">
122 |       <path
123 |          id="path999"
124 |          d="M 0,0 5,-5 -12.5,0 5,5 Z"
125 |          style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
126 |          transform="matrix(0.8,0,0,0.8,10,0)"
127 |          inkscape:connector-curvature="0" />
128 |     </marker>
129 |   </defs>
130 |   <sodipodi:namedview
131 |      id="base"
132 |      pagecolor="#ffffff"
133 |      bordercolor="#666666"
134 |      borderopacity="1.0"
135 |      inkscape:pageopacity="0.0"
136 |      inkscape:pageshadow="2"
137 |      inkscape:zoom="0.98994949"
138 |      inkscape:cx="79.312391"
139 |      inkscape:cy="94.580276"
140 |      inkscape:document-units="mm"
141 |      inkscape:current-layer="layer1"
142 |      showgrid="false"
143 |      inkscape:object-nodes="false"
144 |      inkscape:object-paths="true"
145 |      fit-margin-top="5"
146 |      fit-margin-left="5"
147 |      fit-margin-right="5"
148 |      fit-margin-bottom="5"
149 |      inkscape:window-width="906"
150 |      inkscape:window-height="987"
151 |      inkscape:window-x="0"
152 |      inkscape:window-y="0"
153 |      inkscape:window-maximized="0" />
154 |   <metadata
155 |      id="metadata5">
156 |     <rdf:RDF>
157 |       <cc:Work
158 |          rdf:about="">
159 |         <dc:format>image/svg+xml</dc:format>
160 |         <dc:type
161 |            rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
162 |         <dc:title />
163 |       </cc:Work>
164 |     </rdf:RDF>
165 |   </metadata>
166 |   <g
167 |      inkscape:label="Layer 1"
168 |      inkscape:groupmode="layer"
169 |      id="layer1"
170 |      transform="translate(-22.2343,-76.352152)">
171 |     <rect
172 |        style="opacity:1;vector-effect:none;fill:#d5f6ff;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.52899998;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
173 |        id="rect935"
174 |        width="43.297665"
175 |        height="32.874149"
176 |        x="112.27154"
177 |        y="90.938629"
178 |        rx="5"
179 |        ry="5" />
180 |     <text
181 |        xml:space="preserve"
182 |        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:8.46666622px;line-height:1.25;font-family:'Nimbus Sans L';-inkscape-font-specification:'Nimbus Sans L';text-align:center;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
183 |        x="118.0453"
184 |        y="99.696426"
185 |        id="text853"><tspan
186 |          sodipodi:role="line"
187 |          id="tspan851"
188 |          x="118.0453"
189 |          y="99.696426"
190 |          style="text-align:center;text-anchor:middle;stroke-width:0.26458332">3</tspan></text>
191 |     <text
192 |        id="text881"
193 |        y="99.696426"
194 |        x="128.62863"
195 |        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:8.46666622px;line-height:1.25;font-family:'Nimbus Sans L';-inkscape-font-specification:'Nimbus Sans L';text-align:center;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
196 |        xml:space="preserve"><tspan
197 |          style="text-align:center;text-anchor:middle;stroke-width:0.26458332"
198 |          y="99.696426"
199 |          x="128.62863"
200 |          id="tspan879"
201 |          sodipodi:role="line">5</tspan></text>
202 |     <text
203 |        xml:space="preserve"
204 |        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:8.46666622px;line-height:1.25;font-family:'Nimbus Sans L';-inkscape-font-specification:'Nimbus Sans L';text-align:center;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
205 |        x="139.21198"
206 |        y="99.696426"
207 |        id="text885"><tspan
208 |          sodipodi:role="line"
209 |          id="tspan883"
210 |          x="139.21198"
211 |          y="99.696426"
212 |          style="text-align:center;text-anchor:middle;stroke-width:0.26458332">2</tspan></text>
213 |     <text
214 |        id="text889"
215 |        y="99.696426"
216 |        x="149.79532"
217 |        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:8.46666622px;line-height:1.25;font-family:'Nimbus Sans L';-inkscape-font-specification:'Nimbus Sans L';text-align:center;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
218 |        xml:space="preserve"><tspan
219 |          style="text-align:center;text-anchor:middle;stroke-width:0.26458332"
220 |          y="99.696426"
221 |          x="149.79532"
222 |          id="tspan887"
223 |          sodipodi:role="line">6</tspan></text>
224 |     <text
225 |        xml:space="preserve"
226 |        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:8.46666622px;line-height:1.25;font-family:'Nimbus Sans L';-inkscape-font-specification:'Nimbus Sans L';text-align:center;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
227 |        x="133.9203"
228 |        y="109.33997"
229 |        id="text893"><tspan
230 |          sodipodi:role="line"
231 |          id="tspan891"
232 |          x="133.9203"
233 |          y="109.33997"
234 |          style="text-align:center;text-anchor:middle;stroke-width:0.26458332">+</tspan></text>
235 |     <text
236 |        id="text897"
237 |        y="120.86308"
238 |        x="128.62863"
239 |        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:8.46666622px;line-height:1.25;font-family:'Nimbus Sans L';-inkscape-font-specification:'Nimbus Sans L';text-align:center;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
240 |        xml:space="preserve"><tspan
241 |          style="text-align:center;text-anchor:middle;stroke-width:0.26458332"
242 |          y="120.86308"
243 |          x="128.62863"
244 |          id="tspan895"
245 |          sodipodi:role="line">3</tspan></text>
246 |     <text
247 |        xml:space="preserve"
248 |        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:8.46666622px;line-height:1.25;font-family:'Nimbus Sans L';-inkscape-font-specification:'Nimbus Sans L';text-align:center;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
249 |        x="118.0453"
250 |        y="120.86308"
251 |        id="text901"><tspan
252 |          sodipodi:role="line"
253 |          id="tspan899"
254 |          x="118.0453"
255 |          y="120.86308"
256 |          style="text-align:center;text-anchor:middle;stroke-width:0.26458332">2</tspan></text>
257 |     <text
258 |        id="text905"
259 |        y="120.86308"
260 |        x="139.21198"
261 |        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:8.46666622px;line-height:1.25;font-family:'Nimbus Sans L';-inkscape-font-specification:'Nimbus Sans L';text-align:center;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
262 |        xml:space="preserve"><tspan
263 |          style="text-align:center;text-anchor:middle;stroke-width:0.26458332"
264 |          y="120.86308"
265 |          x="139.21198"
266 |          id="tspan903"
267 |          sodipodi:role="line">4</tspan></text>
268 |     <text
269 |        xml:space="preserve"
270 |        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:8.46666622px;line-height:1.25;font-family:'Nimbus Sans L';-inkscape-font-specification:'Nimbus Sans L';text-align:center;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
271 |        x="149.79532"
272 |        y="120.86308"
273 |        id="text909"><tspan
274 |          sodipodi:role="line"
275 |          id="tspan907"
276 |          x="149.79532"
277 |          y="120.86308"
278 |          style="text-align:center;text-anchor:middle;stroke-width:0.26458332">1</tspan></text>
279 |     <rect
280 |        ry="5"
281 |        rx="5"
282 |        y="90.938637"
283 |        x="43.373802"
284 |        height="32.874146"
285 |        width="11.75986"
286 |        id="rect937"
287 |        style="opacity:1;vector-effect:none;fill:#ffe6d5;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.52899998;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1" />
288 |     <text
289 |        xml:space="preserve"
290 |        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:8.46666622px;line-height:1.25;font-family:'Nimbus Sans L';-inkscape-font-specification:'Nimbus Sans L';text-align:center;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
291 |        x="49.253666"
292 |        y="99.696426"
293 |        id="text941"><tspan
294 |          sodipodi:role="line"
295 |          id="tspan939"
296 |          x="49.253666"
297 |          y="99.696426"
298 |          style="text-align:center;text-anchor:middle;stroke-width:0.26458332">5</tspan></text>
299 |     <text
300 |        id="text945"
301 |        y="109.33997"
302 |        x="49.253666"
303 |        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:8.46666622px;line-height:1.25;font-family:'Nimbus Sans L';-inkscape-font-specification:'Nimbus Sans L';text-align:center;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
304 |        xml:space="preserve"><tspan
305 |          style="text-align:center;text-anchor:middle;stroke-width:0.26458332"
306 |          y="109.33997"
307 |          x="49.253666"
308 |          id="tspan943"
309 |          sodipodi:role="line">+</tspan></text>
310 |     <text
311 |        xml:space="preserve"
312 |        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:8.46666622px;line-height:1.25;font-family:'Nimbus Sans L';-inkscape-font-specification:'Nimbus Sans L';text-align:center;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
313 |        x="49.253666"
314 |        y="120.86308"
315 |        id="text949"><tspan
316 |          sodipodi:role="line"
317 |          id="tspan947"
318 |          x="49.253666"
319 |          y="120.86308"
320 |          style="text-align:center;text-anchor:middle;stroke-width:0.26458332">3</tspan></text>
321 |     <rect
322 |        style="opacity:1;vector-effect:none;fill:#ffe6d5;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.52899998;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
323 |        id="rect951"
324 |        width="11.75986"
325 |        height="32.874146"
326 |        x="27.4988"
327 |        y="90.938637"
328 |        rx="5"
329 |        ry="5" />
330 |     <text
331 |        id="text955"
332 |        y="99.696426"
333 |        x="33.378662"
334 |        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:8.46666622px;line-height:1.25;font-family:'Nimbus Sans L';-inkscape-font-specification:'Nimbus Sans L';text-align:center;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
335 |        xml:space="preserve"><tspan
336 |          style="text-align:center;text-anchor:middle;stroke-width:0.26458332"
337 |          y="99.696426"
338 |          x="33.378662"
339 |          id="tspan953"
340 |          sodipodi:role="line">3</tspan></text>
341 |     <text
342 |        xml:space="preserve"
343 |        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:8.46666622px;line-height:1.25;font-family:'Nimbus Sans L';-inkscape-font-specification:'Nimbus Sans L';text-align:center;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
344 |        x="33.378662"
345 |        y="109.33997"
346 |        id="text959"><tspan
347 |          sodipodi:role="line"
348 |          id="tspan957"
349 |          x="33.378662"
350 |          y="109.33997"
351 |          style="text-align:center;text-anchor:middle;stroke-width:0.26458332">+</tspan></text>
352 |     <text
353 |        id="text963"
354 |        y="120.86308"
355 |        x="33.378662"
356 |        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:8.46666622px;line-height:1.25;font-family:'Nimbus Sans L';-inkscape-font-specification:'Nimbus Sans L';text-align:center;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
357 |        xml:space="preserve"><tspan
358 |          style="text-align:center;text-anchor:middle;stroke-width:0.26458332"
359 |          y="120.86308"
360 |          x="33.378662"
361 |          id="tspan961"
362 |          sodipodi:role="line">2</tspan></text>
363 |     <rect
364 |        ry="5"
365 |        rx="5"
366 |        y="90.938637"
367 |        x="59.248806"
368 |        height="32.874146"
369 |        width="11.75986"
370 |        id="rect965"
371 |        style="opacity:1;vector-effect:none;fill:#ffe6d5;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.52899998;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1" />
372 |     <text
373 |        xml:space="preserve"
374 |        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:8.46666622px;line-height:1.25;font-family:'Nimbus Sans L';-inkscape-font-specification:'Nimbus Sans L';text-align:center;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
375 |        x="65.12867"
376 |        y="99.696426"
377 |        id="text969"><tspan
378 |          sodipodi:role="line"
379 |          id="tspan967"
380 |          x="65.12867"
381 |          y="99.696426"
382 |          style="text-align:center;text-anchor:middle;stroke-width:0.26458332">2</tspan></text>
383 |     <text
384 |        id="text973"
385 |        y="109.33997"
386 |        x="65.12867"
387 |        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:8.46666622px;line-height:1.25;font-family:'Nimbus Sans L';-inkscape-font-specification:'Nimbus Sans L';text-align:center;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
388 |        xml:space="preserve"><tspan
389 |          style="text-align:center;text-anchor:middle;stroke-width:0.26458332"
390 |          y="109.33997"
391 |          x="65.12867"
392 |          id="tspan971"
393 |          sodipodi:role="line">+</tspan></text>
394 |     <text
395 |        xml:space="preserve"
396 |        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:8.46666622px;line-height:1.25;font-family:'Nimbus Sans L';-inkscape-font-specification:'Nimbus Sans L';text-align:center;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
397 |        x="65.12867"
398 |        y="120.86308"
399 |        id="text977"><tspan
400 |          sodipodi:role="line"
401 |          id="tspan975"
402 |          x="65.12867"
403 |          y="120.86308"
404 |          style="text-align:center;text-anchor:middle;stroke-width:0.26458332">4</tspan></text>
405 |     <rect
406 |        style="opacity:1;vector-effect:none;fill:#ffe6d5;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.52899998;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
407 |        id="rect979"
408 |        width="11.75986"
409 |        height="32.874146"
410 |        x="75.123802"
411 |        y="90.938637"
412 |        rx="5"
413 |        ry="5" />
414 |     <text
415 |        id="text983"
416 |        y="99.696426"
417 |        x="81.003662"
418 |        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:8.46666622px;line-height:1.25;font-family:'Nimbus Sans L';-inkscape-font-specification:'Nimbus Sans L';text-align:center;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
419 |        xml:space="preserve"><tspan
420 |          style="text-align:center;text-anchor:middle;stroke-width:0.26458332"
421 |          y="99.696426"
422 |          x="81.003662"
423 |          id="tspan981"
424 |          sodipodi:role="line">6</tspan></text>
425 |     <text
426 |        xml:space="preserve"
427 |        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:8.46666622px;line-height:1.25;font-family:'Nimbus Sans L';-inkscape-font-specification:'Nimbus Sans L';text-align:center;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
428 |        x="81.003662"
429 |        y="109.33997"
430 |        id="text987"><tspan
431 |          sodipodi:role="line"
432 |          id="tspan985"
433 |          x="81.003662"
434 |          y="109.33997"
435 |          style="text-align:center;text-anchor:middle;stroke-width:0.26458332">+</tspan></text>
436 |     <text
437 |        id="text991"
438 |        y="120.86308"
439 |        x="81.003662"
440 |        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:8.46666622px;line-height:1.25;font-family:'Nimbus Sans L';-inkscape-font-specification:'Nimbus Sans L';text-align:center;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332"
441 |        xml:space="preserve"><tspan
442 |          style="text-align:center;text-anchor:middle;stroke-width:0.26458332"
443 |          y="120.86308"
444 |          x="81.003662"
445 |          id="tspan989"
446 |          sodipodi:role="line">1</tspan></text>
447 |     <path
448 |        style="opacity:1;vector-effect:none;fill:none;fill-opacity:1;stroke:#000000;stroke-width:0.52899998;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker-start:url(#Arrow1Mstart);marker-end:url(#Arrow1Mend)"
449 |        d="M 112.27154,128.97486 H 155.5692"
450 |        id="path997"
451 |        inkscape:connector-curvature="0" />
452 |     <rect
453 |        style="opacity:1;vector-effect:none;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.52899998;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
454 |        id="rect1583"
455 |        width="31.041294"
456 |        height="3.9215028"
457 |        x="118.39973"
458 |        y="127.01411" />
459 |     <text
460 |        xml:space="preserve"
461 |        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.62337351px;line-height:1.25;font-family:'Nimbus Sans L';-inkscape-font-specification:'Nimbus Sans L';text-align:start;letter-spacing:0px;word-spacing:0px;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.17573042"
462 |        x="119.0824"
463 |        y="130.95992"
464 |        id="text995"><tspan
465 |          sodipodi:role="line"
466 |          id="tspan993"
467 |          x="119.0824"
468 |          y="130.95992"
469 |          style="stroke-width:0.17573042">vector width</tspan></text>
470 |     <text
471 |        id="text1606"
472 |        y="85.451591"
473 |        x="133.86414"
474 |        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.62337351px;line-height:1.25;font-family:'Nimbus Sans L';-inkscape-font-specification:'Nimbus Sans L';text-align:start;letter-spacing:0px;word-spacing:0px;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.17573042"
475 |        xml:space="preserve"><tspan
476 |          style="text-align:center;text-anchor:middle;stroke-width:0.17573042"
477 |          y="85.451591"
478 |          x="133.86414"
479 |          id="tspan1604"
480 |          sodipodi:role="line">1 operation</tspan></text>
481 |     <text
482 |        xml:space="preserve"
483 |        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.62337351px;line-height:1.25;font-family:'Nimbus Sans L';-inkscape-font-specification:'Nimbus Sans L';text-align:start;letter-spacing:0px;word-spacing:0px;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.17573042"
484 |        x="57.100246"
485 |        y="85.451591"
486 |        id="text1610"><tspan
487 |          sodipodi:role="line"
488 |          id="tspan1608"
489 |          x="57.100246"
490 |          y="85.451591"
491 |          style="text-align:center;text-anchor:middle;stroke-width:0.17573042">4 operations</tspan></text>
492 |   </g>
493 | </svg>
494 | 


--------------------------------------------------------------------------------
/notes.md:
--------------------------------------------------------------------------------
 1 | Topics to cover
 2 | ===============
 3 | 
 4 | Intro talk
 5 | ----------
 6 | 
 7 | Just Numpy
 8 | ----------
 9 | 
10 | Numpy ecosystem talk
11 | --------------------
12 | 
13 | Pandas
14 | ------
15 | 
16 | Dask & multiprocessing
17 | ----------------------
18 | 
19 | Numba, Cython, pybind11
20 | -----------------------
21 | 
22 | CuPy, Numba-GPU, PyCUDA
23 | -----------------------
24 | 
25 | ctypes & low-level hackery
26 | --------------------------
27 | 


--------------------------------------------------------------------------------
/tex/1-intro.tex:
--------------------------------------------------------------------------------
  1 | \pdfminorversion=4
  2 | \documentclass[aspectratio=169]{beamer}
  3 | 
  4 | \mode<presentation>
  5 | {
  6 |   \usetheme{default}
  7 |   \usecolortheme{default}
  8 |   \usefonttheme{default}
  9 |   \setbeamertemplate{navigation symbols}{}
 10 |   \setbeamertemplate{caption}[numbered]
 11 |   \setbeamertemplate{footline}[frame number]  % or "page number"
 12 |   \setbeamercolor{frametitle}{fg=white}
 13 |   \setbeamercolor{footline}{fg=black}
 14 | } 
 15 | 
 16 | \usepackage[english]{babel}
 17 | \usepackage[utf8x]{inputenc}
 18 | \usepackage{tikz}
 19 | \usepackage{courier}
 20 | \usepackage{array}
 21 | \usepackage{bold-extra}
 22 | \usepackage{minted}
 23 | \usepackage[thicklines]{cancel}
 24 | \usepackage{fancyvrb}
 25 | \usepackage{tabto}
 26 | 
 27 | \xdefinecolor{dianablue}{rgb}{0.18,0.24,0.31}
 28 | \xdefinecolor{darkblue}{rgb}{0.1,0.1,0.7}
 29 | \xdefinecolor{darkgreen}{rgb}{0,0.5,0}
 30 | \xdefinecolor{darkgrey}{rgb}{0.35,0.35,0.35}
 31 | \xdefinecolor{darkorange}{rgb}{0.8,0.5,0}
 32 | \xdefinecolor{darkred}{rgb}{0.7,0,0}
 33 | \definecolor{darkgreen}{rgb}{0,0.6,0}
 34 | \definecolor{mauve}{rgb}{0.58,0,0.82}
 35 | 
 36 | \title[01-intro]{Python/Numpy for High-Performance Numerical Processing}
 37 | \author{Jim Pivarski}
 38 | \institute{Princeton University}
 39 | \date{November 15, 2018}
 40 | 
 41 | \usetikzlibrary{shapes.callouts}
 42 | 
 43 | \begin{document}
 44 | 
 45 | \logo{\pgfputat{\pgfxy(0.11, 7.4)}{\pgfbox[right,base]{\tikz{\filldraw[fill=dianablue, draw=none] (0 cm, 0 cm) rectangle (50 cm, 1 cm);}\mbox{\hspace{-8 cm}\includegraphics[height=1 cm]{princeton-logo-long.png}\mbox{\hspace{0.25 cm}}}}}}
 46 | 
 47 | \begin{frame}
 48 |   \titlepage
 49 | \end{frame}
 50 | 
 51 | \logo{\pgfputat{\pgfxy(0.11, 7.4)}{\pgfbox[right,base]{\tikz{\filldraw[fill=dianablue, draw=none] (0 cm, 0 cm) rectangle (50 cm, 1 cm);}\mbox{\hspace{-8 cm}\includegraphics[height=1 cm]{princeton-logo.png}\mbox{\hspace{0.25 cm}}}}}}
 52 | 
 53 | % Uncomment these lines for an automatically generated outline.
 54 | %\begin{frame}{Outline}
 55 | %  \tableofcontents
 56 | %\end{frame}
 57 | 
 58 | % START START START START START START START START START START START START START
 59 | 
 60 | \begin{frame}{Why Python?}
 61 | \vspace{0.25 cm}
 62 | \begin{center}
 63 | \includegraphics[width=0.8\linewidth]{pypl-popularity.png}
 64 | 
 65 | \textcolor{blue}{\scriptsize\url{http://pypl.github.io/PYPL.html}}
 66 | \end{center}
 67 | \end{frame}
 68 | 
 69 | \begin{frame}{Why Python in science?}
 70 | \vspace{0.5 cm}
 71 | \includegraphics[width=\linewidth]{python-r-cpp-googletrends-data.png}
 72 | 
 73 | \vspace{1 cm}
 74 | \includegraphics[width=\linewidth]{python-r-cpp-googletrends-dataset.png}
 75 | \end{frame}
 76 | 
 77 | \begin{frame}{Why Python in science?}
 78 | \vspace{0.5 cm}
 79 | \includegraphics[width=\linewidth]{python-r-cpp-googletrends-datascience.png}
 80 | 
 81 | \vspace{1 cm}
 82 | \includegraphics[width=\linewidth]{python-r-cpp-googletrends-machinelearning.png}
 83 | \end{frame}
 84 | 
 85 | \begin{frame}{Why Python in science?}
 86 | \vspace{0.5 cm}
 87 | \includegraphics[width=\linewidth]{root-spark-pandas-google-trends.png}
 88 | \end{frame}
 89 | 
 90 | \begin{frame}{Why Python in science?}
 91 | \large
 92 | \vspace{0.4 cm}
 93 | All of the machine learning libraries I could find either have a Python interface or are primarily/exclusively Python.
 94 | 
 95 | \vspace{0.6 cm}
 96 | \mbox{ } \includegraphics[height=0.8 cm]{sklearn-logo.png}
 97 | \hfill \includegraphics[height=0.8 cm]{pytorch-logo.png}
 98 | \hfill \includegraphics[height=0.8 cm]{keras-logo.png}
 99 | \hfill \includegraphics[height=1 cm]{tensorflow-logo.png}
100 | \hfill \includegraphics[height=0.8 cm]{caffe2-logo.png}
101 | \hfill \includegraphics[height=0.8 cm]{gluon-logo.png} \mbox{ }
102 | 
103 | \vspace{0.15 cm}
104 | \mbox{ } \includegraphics[height=0.8 cm]{chainer-logo.png}
105 | \hfill \includegraphics[height=0.8 cm]{cntk-logo.png}
106 | \hfill \includegraphics[height=0.8 cm]{lasagne-logo.png}
107 | \hfill \includegraphics[height=0.8 cm]{onnx-logo.png}
108 | \hfill \includegraphics[height=0.8 cm]{cesium-logo.png}
109 | \hfill \includegraphics[height=0.8 cm]{xgboost-logo.png} \mbox{ }
110 | \end{frame}
111 | 
112 | \begin{frame}{Why Python in science?}
113 | \vspace{0.25 cm}
114 | \begin{center}
115 | \includegraphics[width=0.7\linewidth]{mentions-of-programming-languages.png}
116 | \end{center}
117 | \end{frame}
118 | 
119 | \begin{frame}{Why Python in science?}
120 | \vspace{0.3 cm}
121 | \begin{columns}[b]
122 | \column{0.59\linewidth}
123 | \includegraphics[width=\linewidth]{lsst-notebook.png}
124 | \end{columns}
125 | \end{frame}
126 | 
127 | \begin{frame}{Stealing from Jake VanderPlas's {\it Unexpected Effectiveness} talk}
128 | \vspace{0.25 cm}
129 | \begin{columns}[b]
130 | \column{0.75\linewidth}
131 | \only<1>{\includegraphics[height=7.8 cm]{shells-1.png}}
132 | \only<2>{\includegraphics[height=7.8 cm]{shells-2.png}}
133 | \only<3>{\includegraphics[height=7.8 cm]{shells-3.png}}
134 | \only<4>{\includegraphics[height=7.8 cm]{shells-4.png}}
135 | \only<5-6>{\includegraphics[height=7.8 cm]{shells-5.png}\vspace{0.5 cm}}
136 | 
137 | \column{0.25\linewidth}
138 | \includegraphics[width=\linewidth]{unreasonable-effectiveness.png}
139 | 
140 | \vspace{0.5 cm}
141 | \uncover<6>{If you're used to writing your own code, searching for tools is eye-opening: you learn what's unique about what you do and what isn't.}
142 | 
143 | \vspace{-7\baselineskip}
144 | \vspace{4.8 cm}
145 | \end{columns}
146 | \end{frame}
147 | 
148 | \begin{frame}{Stealing again from Jake VanderPlas}
149 | \vspace{0.27 cm}
150 | \begin{columns}
151 | \column{0.74\linewidth}
152 | \includegraphics[width=\linewidth]{commute-by-plane.png}
153 | \end{columns}
154 | \end{frame}
155 | 
156 | \begin{frame}{Why not indeed?}
157 | \large
158 | \begin{center}
159 | In science, we often have to scale up analyses to large datasets.
160 | 
161 | \vspace{1 cm}
162 | \uncover<2->{10\% faster doesn't mean much, but the difference between \\ ``five minutes'' and ``overnight'' is life-changing.}
163 | 
164 | \vspace{1 cm}
165 | \uncover<3->{That's the scale we're talking about between C and Python.}
166 | 
167 | \vspace{1 cm}
168 | \uncover<4->{But we also need the interactivity of a dynamic language to {\it develop} the analysis. (``If we knew what we were doing, it wouldn't be called research.'')}
169 | \end{center}
170 | \end{frame}
171 | 
172 | \begin{frame}{Metaphor time!}
173 | \Large
174 | \vspace{0.25 cm}
175 | \begin{center}
176 | \textcolor{darkblue}{\underline{Drive to the airport by car, then take a plane.}}
177 | \end{center}
178 | 
179 | \vspace{0.5 cm}
180 | \begin{columns}
181 | \column{0.4\linewidth}
182 | \begin{center}
183 | Small-scale {\it project organization} in Python, ignoring performance entirely.
184 | \end{center}
185 | 
186 | \column{0.4\linewidth}
187 | \begin{center}
188 | Run over {\it big data} in compiled code, tuning performance until it no longer matters.
189 | \end{center}
190 | 
191 | \end{columns}
192 | \end{frame}
193 | 
194 | \begin{frame}{Python is a good glue language: my thesis workflow in 2006}
195 | \vspace{0.5 cm}
196 | \begin{columns}
197 | \column{1.1\linewidth}
198 | \includegraphics[width=\linewidth]{thesis-code-flow.pdf}
199 | \end{columns}
200 | \end{frame}
201 | 
202 | \begin{frame}{Which got me involved in open source (PyMinuit is now ``iminuit'')}
203 | \vspace{0.5 cm}
204 | \includegraphics[width=\linewidth]{pyminuit.png}
205 | \end{frame}
206 | 
207 | \begin{frame}{The key to ecosystem development was a common array library}
208 | \large
209 | \vspace{0.1 cm}
210 | 
211 | \renewcommand{\arraystretch}{1.15}
212 | \mbox{\hspace{-0.5 cm}\begin{tabular}{c p{0.95\linewidth}}
213 | 1994 & \textcolor{darkorange}{\bf Python} 1.0 released. \\
214 | 1995 & First array package: \textcolor{darkorange}{\bf Numeric} \textcolor{gray}{(a.k.a.\ Numerical, Numerical Python, NumPy).} \\
215 | 2001 & Diverse scientific codebases merged into \textcolor{darkorange}{\bf SciPy}. \\
216 | 2003 & \textcolor{darkorange}{\bf Matplotlib} \\
217 | 2003 & Numeric was limited; \textcolor{darkorange}{\bf numarray} appeared as a competitor with more \mbox{features} \textcolor{gray}{(memory-mapped files, alignment, record arrays)}. \\
218 | 2005 & Two packages were incompatible; could not integrate numarray-based code into SciPy. Travis Oliphant merged the codebases as \textcolor{darkorange}{\bf Numpy}. \\
219 | 2008 & \textcolor{darkorange}{\bf Pandas} \\
220 | 2010 & \textcolor{darkorange}{\bf Scikit-Learn} \\
221 | 2011 & \textcolor{darkorange}{\bf AstroPy} \\
222 | 2012 & \textcolor{darkorange}{\bf Anaconda} \\
223 | 2014 & \textcolor{darkorange}{\bf Jupyter} \\
224 | 2015 & \textcolor{darkorange}{\bf Keras} \\
225 | \end{tabular}}
226 | 
227 | \begin{uncoverenv}<2->
228 | \vspace{-3 cm}
229 | \hfill \fbox{\begin{minipage}{7 cm}
230 | \vspace{0.2 cm}
231 | \begin{center}
232 | \begin{minipage}{6.25 cm}
233 | The scientific Python ecosystem could have failed before it started if the Numeric/numarray split hadn't been resolved!
234 | \end{minipage}
235 | \vspace{0.2 cm}
236 | \end{center}
237 | \end{minipage}}
238 | \end{uncoverenv}
239 | \end{frame}
240 | 
241 | \begin{frame}[fragile]{Numpy is high-level, array-at-a-time math}
242 | \vspace{0.5 cm}
243 | \hfill \includegraphics[height=1.5 cm]{numpy-logo.png}
244 | 
245 | \scriptsize
246 | \vspace{-1.6 cm}
247 | \begin{minted}{python}
248 | >>> import numpy
249 | >>> a = numpy.arange(12)
250 | >>> a
251 | array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])
252 | >>> a.shape = (3, 4)
253 | >>> a
254 | array([[ 0,  1,  2,  3],
255 |        [ 4,  5,  6,  7],
256 |        [ 8,  9, 10, 11]])
257 | >>> a.sum(axis=0)
258 | array([12, 15, 18, 21])
259 | >>> a.min(axis=1)
260 | array([0, 4, 8])
261 | >>> a**2
262 | array([[  0,   1,   4,   9],
263 |        [ 16,  25,  36,  49],
264 |        [ 64,  81, 100, 121]])
265 | >>> numpy.sqrt(a)
266 | array([[0.        , 1.        , 1.41421356, 1.73205081],
267 |        [2.        , 2.23606798, 2.44948974, 2.64575131],
268 |        [2.82842712, 3.        , 3.16227766, 3.31662479]])
269 | \end{minted}
270 | \end{frame}
271 | 
272 | \begin{frame}[fragile]{The Numpythonic mindset}
273 | \large
274 | \vspace{0.5 cm}
275 | Although you can write Python {\tt\normalsize for} loops over Numpy arrays, you don't reap the benefit unless you express your calculation in Numpy universal functions (ufuncs).
276 | 
277 | \vspace{\baselineskip}
278 | \begin{columns}[t]
279 | \column{0.45\linewidth}
280 | \vspace{-\baselineskip}
281 | \scriptsize
282 | \begin{minted}{python}
283 | pz = numpy.empty(len(pt))
284 | for i in range(len(pt)):
285 |     pz[i] = pt[i]*numpy.sinh(eta[i])
286 | \end{minted}
287 | 
288 | \vspace{0.5 cm}
289 | $\mathcal{O}(N)$ Python bytecode instructions, type-checks, interpreter locks.
290 | 
291 | \column{0.45\linewidth}
292 | \mbox{\hspace{-0.85 cm}\textcolor{darkblue}{vs}}
293 | \vspace{-\baselineskip}
294 | \scriptsize
295 | \begin{minted}{python}
296 | pz = pt * numpy.sinh(eta)
297 | \end{minted}
298 | \vspace{2\baselineskip}
299 | 
300 | \vspace{0.5 cm}
301 | $\mathcal{O}(1)$ Python bytecode instructions, type-checks, interpreter locks.
302 | 
303 | \vspace{0.1 cm}
304 | $\mathcal{O}(N)$ statically typed, probably vectorized native bytecode operations on contiguous memory.
305 | \end{columns}
306 | 
307 | \large
308 | \vspace{0.75 cm}
309 | \uncover<2->{\textcolor{darkblue}{In other words, a \underline{S}ingle (Python) \underline{I}nstruction on \underline{M}ultiple \underline{D}ata.}}
310 | 
311 | \vspace{0.1 cm}
312 | \uncover<2->{\textcolor{darkblue}{Conceptually similar to SIMD, the program flow of GPUs.}}
313 | \end{frame}
314 | 
315 | \begin{frame}{This is not new}
316 | \Large
317 | \vspace{0.5 cm}
318 | \textcolor{darkorange}{\bf APL}, ``A Programming Language'' introduced the idea of single commands having sweeping effects across large arrays.
319 | 
320 | \begin{center}
321 | \includegraphics[width=0.75\linewidth]{apl-timeline.pdf}
322 | \end{center}
323 | 
324 | \normalsize
325 | \textcolor{gray}{All members of the APL family are intended for interactive data analysis.}
326 | 
327 | \textcolor{gray}{Numpy, however, is a library in a general-purpose language, not a language in itself.}
328 | \end{frame}
329 | 
330 | \begin{frame}{APL}
331 | \Large
332 | \vspace{0.5 cm}
333 | \hfill \mbox{\includegraphics[height=3 cm]{tshirt.jpg}\hspace{-0.25 cm}}
334 | 
335 | \vspace{-2.75 cm}
336 | APL pioneered conciseness;
337 | 
338 | discovered the mistake of being too concise.
339 | 
340 | \large
341 | \vspace{1.25 cm}
342 | Conway's Game of Life was one line of code:
343 | 
344 | \vspace{-0.3 cm}
345 | \[ \mbox{\tt life} \leftarrow \{\uparrow 1\quad\omega \vee.\wedge 3\quad 4=+/,^{^-} 1\quad0\quad1\circ.\Theta^{^-} 1\quad0\quad1\circ.\Phi\subset\omega\} \]
346 | 
347 | \vspace{0.5 cm}
348 | ``Map'' was implicit, ``reduce'' was a slash, functions were symbols. For example:
349 | 
350 | \begin{center}
351 | \renewcommand{\arraystretch}{1.2}
352 | \begin{tabular}{c c c}
353 | APL & \mbox{\hspace{0.5 cm}} & Numpy \\\hline
354 | $\displaystyle \mbox{\tt m} \leftarrow +/(3+\iota 4)$ & & {\tt\normalsize m = (numpy.arange(4) + 3).sum()}
355 | \end{tabular}
356 | \end{center}
357 | \end{frame}
358 | 
359 | \begin{frame}{Numpythonic mindset: GPU and vectorization}
360 | \Large
361 | \vspace{0.5 cm}
362 | \begin{center}
363 | As an array abstraction, Numpy presents a high-level way \\ for users to think about vectorization.
364 | 
365 | \vspace{1 cm}
366 | Vectorization is key to using GPUs and modern CPUs efficiently.
367 | \end{center}
368 | \end{frame}
369 | 
370 | \begin{frame}{Numpythonic mindset: GPU and vectorization}
371 | \vspace{0.35 cm}
372 | \includegraphics[width=\linewidth]{cupy.png}
373 | \end{frame}
374 | 
375 | \begin{frame}{Numpythonic mindset: GPU and vectorization}
376 | \vspace{0.35 cm}
377 | \includegraphics[width=\linewidth]{quantstack.png}
378 | \end{frame}
379 | 
380 | \begin{frame}{Plan for the day}
381 | \large
382 | \begin{columns}
383 | \column{0.68\linewidth}
384 | \includegraphics[width=\linewidth]{../img/plan-for-the-day.png}
385 | 
386 | \column{0.3\linewidth}
387 | Skills-based Numpy tutorial with a couple of exercises in the morning: how to think in SIMD.
388 | 
389 | \vspace{1 cm}
390 | Overview of libraries in the afternoon: where to look for solutions to your problems.
391 | \end{columns}
392 | \end{frame}
393 | 
394 | \end{document}
395 | 


--------------------------------------------------------------------------------
/tex/3-ecosystem.tex:
--------------------------------------------------------------------------------
  1 | \pdfminorversion=4
  2 | \documentclass[aspectratio=169]{beamer}
  3 | 
  4 | \mode<presentation>
  5 | {
  6 |   \usetheme{default}
  7 |   \usecolortheme{default}
  8 |   \usefonttheme{default}
  9 |   \setbeamertemplate{navigation symbols}{}
 10 |   \setbeamertemplate{caption}[numbered]
 11 |   \setbeamertemplate{footline}[frame number]  % or "page number"
 12 |   \setbeamercolor{frametitle}{fg=white}
 13 |   \setbeamercolor{footline}{fg=black}
 14 | } 
 15 | 
 16 | \usepackage[english]{babel}
 17 | \usepackage[utf8x]{inputenc}
 18 | \usepackage{tikz}
 19 | \usepackage{courier}
 20 | \usepackage{array}
 21 | \usepackage{bold-extra}
 22 | \usepackage{minted}
 23 | \usepackage[thicklines]{cancel}
 24 | \usepackage{fancyvrb}
 25 | \usepackage{tabto}
 26 | 
 27 | \xdefinecolor{dianablue}{rgb}{0.18,0.24,0.31}
 28 | \xdefinecolor{darkblue}{rgb}{0.1,0.1,0.7}
 29 | \xdefinecolor{darkgreen}{rgb}{0,0.5,0}
 30 | \xdefinecolor{darkgrey}{rgb}{0.35,0.35,0.35}
 31 | \xdefinecolor{darkorange}{rgb}{0.8,0.5,0}
 32 | \xdefinecolor{darkred}{rgb}{0.7,0,0}
 33 | \definecolor{darkgreen}{rgb}{0,0.6,0}
 34 | \definecolor{mauve}{rgb}{0.58,0,0.82}
 35 | 
 36 | \title[03-ecosystem]{The Numpy Ecosystem}
 37 | \author{Jim Pivarski}
 38 | \institute{Princeton University}
 39 | \date{November 15, 2018}
 40 | 
 41 | \usetikzlibrary{shapes.callouts}
 42 | 
 43 | \begin{document}
 44 | 
 45 | \logo{\pgfputat{\pgfxy(0.11, 7.4)}{\pgfbox[right,base]{\tikz{\filldraw[fill=dianablue, draw=none] (0 cm, 0 cm) rectangle (50 cm, 1 cm);}\mbox{\hspace{-8 cm}\includegraphics[height=1 cm]{princeton-logo-long.png}\mbox{\hspace{0.25 cm}}}}}}
 46 | 
 47 | \begin{frame}
 48 |   \titlepage
 49 | \end{frame}
 50 | 
 51 | \logo{\pgfputat{\pgfxy(0.11, 7.4)}{\pgfbox[right,base]{\tikz{\filldraw[fill=dianablue, draw=none] (0 cm, 0 cm) rectangle (50 cm, 1 cm);}\mbox{\hspace{-8 cm}\includegraphics[height=1 cm]{princeton-logo.png}\mbox{\hspace{0.25 cm}}}}}}
 52 | 
 53 | % Uncomment these lines for an automatically generated outline.
 54 | %\begin{frame}{Outline}
 55 | %  \tableofcontents
 56 | %\end{frame}
 57 | 
 58 | % START START START START START START START START START START START START START
 59 | 
 60 | \begin{frame}{This afternoon}
 61 | \large
 62 | \vspace{0.5 cm}
 63 | This morning, we focused on just one library--- Numpy--- and worked on putting its slicing interfaces together to achieve things you'd normally need for loops for.
 64 | 
 65 | \vspace{1 cm}
 66 | \uncover<2->{\Large This afternoon, we switch to\ldots}
 67 | \end{frame}
 68 | 
 69 | \begin{frame}{Everything else}
 70 | \vspace{0.16 cm}
 71 | \begin{columns}
 72 | \column{1.14\linewidth}
 73 | \vspace{-4 cm}
 74 | \includegraphics[width=\linewidth]{shells-5.png}
 75 | \end{columns}
 76 | \end{frame}
 77 | 
 78 | \begin{frame}{Specific topics}
 79 | \vspace{0.3 cm}
 80 | \begin{block}{Statistics tools}
 81 | \begin{itemize}
 82 | \item {\bf Pandas:} a central component, becoming as important as Numpy itself.
 83 | \end{itemize}
 84 | 
 85 | \uncover<2->{Other than that, you're on your own. Statistical software are as varied as your domains.}
 86 | \end{block}
 87 | 
 88 | \vspace{0.4 cm}
 89 | \begin{uncoverenv}<3->
 90 | \begin{block}{Speeding up code}
 91 | \begin{itemize}
 92 | \item {\bf Dask:} parallel processing; \underline{M}ultiple \underline{I}nstructions on \underline{M}ultiple \underline{D}ata (MIMD).
 93 | \item {\bf Numba:} compile a limited subset of Python, as-is, to C-like speeds.
 94 | \item {\bf Cython:} compile any Python code, but you have to modify it to make it fast.
 95 | \item {\bf CuPy:} run any Numpy operations on a GPU.
 96 | \item {\bf Numba-GPU:} compile limited Python for the GPU.
 97 | \item {\bf PyCUDA:} interface with raw CUDA through Numpy arrays.
 98 | \item {\bf ctypes:} cast pointers as Numpy arrays and run code in shared library ({\tt\small *.so}) files.
 99 | \end{itemize}
100 | \end{block}
101 | \end{uncoverenv}
102 | \end{frame}
103 | 
104 | \begin{frame}{Speeding up code}
105 | \vspace{0.5 cm}
106 | Fast software is not like a fast runner, who has some superior intrinsic ability. \\ All run at the same rate, but some have more hurdles on the track than others.
107 | 
108 | \vspace{0.25 cm}
109 | \begin{center}
110 | \includegraphics[width=0.7\linewidth]{hurdle9.jpg}
111 | \end{center}
112 | \end{frame}
113 | 
114 | \begin{frame}{Hurdles, from smallest to largest}
115 | \large
116 | \begin{columns}[t]
117 | \column{0.5\linewidth}
118 | \begin{enumerate}\setlength{\itemsep}{0.35 cm}
119 | \item Unnecessary or repeated arithmetic
120 | \item Arithmetic in separate instructions that could be in the same instruction (vectorization)
121 | \item Transcendental functions or division
122 | \item Unnecessary or nonsequential memory access; cache swapping
123 | \end{enumerate}
124 | 
125 | \column{0.5\linewidth}
126 | \begin{enumerate}\setlength{\itemsep}{0.35 cm}\setcounter{enumi}{4}
127 | \item Virtual machine indirection
128 | \item Boxing numbers as objects
129 | \item Type checking at runtime
130 | \item Unnecessary or nonsequential disk/network access
131 | \item Wacky stuff
132 | \end{enumerate}
133 | \end{columns}
134 | 
135 | \vspace{0.5 cm}
136 | \uncover<2->{Compilation optimizes away most of \textcolor{darkblue}{\#1}, \textcolor{darkblue}{\#2}, and \textcolor{darkblue}{\#4}.}
137 | 
138 | \vspace{0.2 cm}
139 | \uncover<3->{GPUs focus on \textcolor{darkblue}{\#2} and \textcolor{darkblue}{\#4} (by putting memory close to processing).}
140 | 
141 | \vspace{0.2 cm}
142 | \uncover<4->{Python is guilty of \textcolor{darkblue}{\#4}, \textcolor{darkblue}{\#5}, \textcolor{darkblue}{\#6}, and \textcolor{darkblue}{\#7} (Java only \textcolor{darkblue}{\#4}, \textcolor{darkblue}{\#5}, and half of \textcolor{darkblue}{\#6}).}
143 | \end{frame}
144 | 
145 | \begin{frame}{Optimization is about trade-offs}
146 | \large
147 | \vspace{0.5 cm}
148 | We're here because we like the productivity Python gives us in exchange for \\ \textcolor{darkblue}{\#4}, \textcolor{darkblue}{\#5}, \textcolor{darkblue}{\#6}, and \textcolor{darkblue}{\#7}.
149 | 
150 | \vspace{0.5 cm}
151 | \begin{uncoverenv}<2->
152 | Ideally, we'd like a library that makes Python code fast without modification.
153 | \begin{itemize}
154 | \item I don't know how much speedup I'll get until I apply it; but that costs effort.
155 | \item If I've applied it and I don't like it, I want to easily remove it.
156 | \end{itemize}
157 | \end{uncoverenv}
158 | 
159 | \vspace{0.5 cm}
160 | \uncover<3->{If we had such a thing, though, when would we ever {\it not} use it?}
161 | 
162 | \vspace{0.2 cm}
163 | \uncover<4->{\textcolor{darkgray}{Example: PyPy, a reimplementation of Python with just-in-time (JIT) compilation. If it works, we'd only use that. It doesn't yet work with all extension modules, though.}}
164 | \end{frame}
165 | 
166 | \begin{frame}{Horizontal and vertical scaling}
167 | \Large
168 | \vspace{0.5 cm}
169 | \begin{description}
170 | \item[\bf Horizontal:] split up task and distribute among parallel workers.
171 | \end{description}
172 | 
173 | \large
174 | \uncover<2->{\textcolor{darkgray}{Oddly, this speedup is rarely proportional to the number of workers, even when work is independent, due to bookkeeping overhead and shipping data.}}
175 | 
176 | \vspace{1 cm}
177 | \Large
178 | \begin{description}
179 | \item[\bf Vertical:] use hardware more effectively by removing hurdles.
180 | \end{description}
181 | 
182 | \large
183 | \uncover<3->{\textcolor{darkgray}{Plateaus as you get close to optimum. More effort yields diminishing returns.}}
184 | \end{frame}
185 | 
186 | \begin{frame}{Why cover Pandas in an afternoon about performance?}
187 | \large
188 | \begin{center}
189 | \includegraphics[width=0.5\linewidth]{pandas-logo.png}
190 | \end{center}
191 | 
192 | \vspace{0.25 cm}
193 | Pandas is about simplifying data analysis, and it does so by translating the array programming style from Numpy to domain concepts: timestamps, categorical data, relational data, etc.
194 | 
195 | \vspace{0.5 cm}
196 | \uncover<2->{It's like a spreadsheet that uses Numpy arrays instead of graphical cells.}
197 | 
198 | \vspace{0.5 cm}
199 | \uncover<3->{It's not as fast as Numpy or the other accelerators I'll show, but it benefits from the conciseness of the same Numpythonic mindset.}
200 | \end{frame}
201 | 
202 | \begin{frame}{}
203 | \LARGE
204 | \vspace{1.5 cm}
205 | \begin{center}
206 | So without further ado\ldots
207 | \end{center}
208 | \end{frame}
209 | 
210 | 
211 | %% \begin{frame}{Speeding up code}
212 | %% \large
213 | %% \vspace{0.5 cm}
214 | %% There is a mantra regarding performance tuning:
215 | %% \begin{center}
216 | %% \it Premature optimization is the root of all evil.
217 | %% \end{center}
218 | 
219 | %% \normalsize
220 | %% \vspace{0.5 cm}
221 | %% \uncover<2->{\textcolor{darkblue}{It's mostly correct.} Mechanations to increase speed or reduce memory can muddle the intent of the code and even be counterproductive. Your processor, operating system, compiler, and maybe framework are all trying to optimize it for you--- doing weird things can confuse these systems.}
222 | 
223 | %% \vspace{0.5 cm}
224 | %% \uncover<3->{\textcolor{darkblue}{It's not always correct.} Sometimes, you have to think about performance up front to design a sensible workflow, and sometimes factors of 1000's are at stake.}
225 | %% \end{frame}
226 | 
227 | %% \begin{frame}{Speeding up code}
228 | %% \large
229 | %% \vspace{0.35 cm}
230 | %% \begin{columns}
231 | %% \column{0.8\linewidth}
232 | %% \begin{center}
233 | %% An ideal code optimization library would be transparent: \\ same code, just faster.
234 | 
235 | %% \vspace{0.25 cm}
236 | %% \uncover<2->{You never know how much it will help until you try it, so you want the barrier to entry to be as small as possible. You also want an easy way to back out if you find you don't want it.}
237 | 
238 | %% \vspace{0.25 cm}
239 | %% \uncover<3->{Ideally, it would also be general: apply to all of your code \\ so that you don't have to pick out hotspots.}
240 | 
241 | %% \vspace{0.25 cm}
242 | %% \uncover<4->{But if we had a completely general, transparent optimizer, \\ we would just use that exclusively.}
243 | 
244 | %% \vspace{0.25 cm}
245 | %% \uncover<5->{\textcolor{darkblue}{PyPy} aims to be fully general and transparent, but it doesn't support all the compiled modules built for standard Python.}
246 | %% \end{center}
247 | %% \end{columns}
248 | %% \end{frame}
249 | 
250 | \end{document}
251 | 


--------------------------------------------------------------------------------
/tex/apl-timeline.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/apl-timeline.pdf


--------------------------------------------------------------------------------
/tex/caffe2-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/caffe2-logo.png


--------------------------------------------------------------------------------
/tex/cesium-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/cesium-logo.png


--------------------------------------------------------------------------------
/tex/chainer-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/chainer-logo.png


--------------------------------------------------------------------------------
/tex/cntk-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/cntk-logo.png


--------------------------------------------------------------------------------
/tex/commute-by-plane.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/commute-by-plane.png


--------------------------------------------------------------------------------
/tex/cupy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/cupy.png


--------------------------------------------------------------------------------
/tex/gluon-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/gluon-logo.png


--------------------------------------------------------------------------------
/tex/hurdle9.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/hurdle9.jpg


--------------------------------------------------------------------------------
/tex/keras-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/keras-logo.png


--------------------------------------------------------------------------------
/tex/lasagne-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/lasagne-logo.png


--------------------------------------------------------------------------------
/tex/lsst-notebook.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/lsst-notebook.png


--------------------------------------------------------------------------------
/tex/mentions-of-programming-languages.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/mentions-of-programming-languages.png


--------------------------------------------------------------------------------
/tex/numpy-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/numpy-logo.png


--------------------------------------------------------------------------------
/tex/onnx-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/onnx-logo.png


--------------------------------------------------------------------------------
/tex/pandas-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/pandas-logo.png


--------------------------------------------------------------------------------
/tex/princeton-logo-long.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/princeton-logo-long.png


--------------------------------------------------------------------------------
/tex/princeton-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/princeton-logo.png


--------------------------------------------------------------------------------
/tex/pyminuit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/pyminuit.png


--------------------------------------------------------------------------------
/tex/pypl-popularity.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/pypl-popularity.png


--------------------------------------------------------------------------------
/tex/python-r-cpp-googletrends-data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/python-r-cpp-googletrends-data.png


--------------------------------------------------------------------------------
/tex/python-r-cpp-googletrends-datascience.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/python-r-cpp-googletrends-datascience.png


--------------------------------------------------------------------------------
/tex/python-r-cpp-googletrends-dataset.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/python-r-cpp-googletrends-dataset.png


--------------------------------------------------------------------------------
/tex/python-r-cpp-googletrends-machinelearning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/python-r-cpp-googletrends-machinelearning.png


--------------------------------------------------------------------------------
/tex/pytorch-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/pytorch-logo.png


--------------------------------------------------------------------------------
/tex/quantstack.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/quantstack.png


--------------------------------------------------------------------------------
/tex/root-spark-pandas-google-trends.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/root-spark-pandas-google-trends.png


--------------------------------------------------------------------------------
/tex/shells-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/shells-1.png


--------------------------------------------------------------------------------
/tex/shells-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/shells-2.png


--------------------------------------------------------------------------------
/tex/shells-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/shells-3.png


--------------------------------------------------------------------------------
/tex/shells-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/shells-4.png


--------------------------------------------------------------------------------
/tex/shells-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/shells-5.png


--------------------------------------------------------------------------------
/tex/sklearn-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/sklearn-logo.png


--------------------------------------------------------------------------------
/tex/tensorflow-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/tensorflow-logo.png


--------------------------------------------------------------------------------
/tex/thesis-code-flow.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/thesis-code-flow.pdf


--------------------------------------------------------------------------------
/tex/tshirt.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/tshirt.jpg


--------------------------------------------------------------------------------
/tex/unreasonable-effectiveness.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/unreasonable-effectiveness.png


--------------------------------------------------------------------------------
/tex/xgboost-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/xgboost-logo.png


--------------------------------------------------------------------------------