├── .gitignore
├── 1-intro.pdf
├── 2-just-numpy.ipynb
├── 3-ecosystem.pdf
├── 4-pandas.ipynb
├── 5-dask.ipynb
├── 6-compilers.ipynb
├── 7-gpu.ipynb
├── 8-low-level.ipynb
├── LICENSE
├── README.md
├── data
├── nasa-exoplanets-details.txt
├── nasa-exoplanets.csv
├── newark-days-ago.txt
├── newark-temperature-avg.txt
├── newark-temperature-max.txt
├── newark-temperature-min.txt
└── newark-temperature.csv
├── img
├── cards-chance-deck-19060.jpg
├── clock-rate.jpg
├── plan-for-the-day.png
├── plan-for-the-day.svg
├── png-spec-chunks.png
├── png-spec-scanline.png
├── vectorization-example.png
└── vectorization-example.svg
├── notes.md
└── tex
├── 1-intro.tex
├── 3-ecosystem.tex
├── apl-timeline.pdf
├── caffe2-logo.png
├── cesium-logo.png
├── chainer-logo.png
├── cntk-logo.png
├── commute-by-plane.png
├── cupy.png
├── gluon-logo.png
├── hurdle9.jpg
├── keras-logo.png
├── lasagne-logo.png
├── lsst-notebook.png
├── mentions-of-programming-languages.png
├── numpy-logo.png
├── onnx-logo.png
├── pandas-logo.png
├── princeton-logo-long.png
├── princeton-logo-long.svg
├── princeton-logo.png
├── princeton-logo.svg
├── pyminuit.png
├── pypl-popularity.png
├── python-r-cpp-googletrends-data.png
├── python-r-cpp-googletrends-datascience.png
├── python-r-cpp-googletrends-dataset.png
├── python-r-cpp-googletrends-machinelearning.png
├── pytorch-logo.png
├── quantstack.png
├── root-spark-pandas-google-trends.png
├── shells-1.png
├── shells-2.png
├── shells-3.png
├── shells-4.png
├── shells-5.png
├── sklearn-logo.png
├── tensorflow-logo.png
├── thesis-code-flow.pdf
├── tshirt.jpg
├── unreasonable-effectiveness.png
└── xgboost-logo.png
/.gitignore:
--------------------------------------------------------------------------------
1 | tex/*.aux
2 | tex/*.nav
3 | tex/*.out
4 | tex/*.snm
5 | tex/*.toc
6 | tex/01-intro.pdf
7 | tex/03-ecosystem.pdf
8 |
9 | # Byte-compiled / optimized / DLL files
10 | __pycache__/
11 | *.py[cod]
12 | *$py.class
13 |
14 | # C extensions
15 | *.so
16 |
17 | # Distribution / packaging
18 | .Python
19 | build/
20 | develop-eggs/
21 | dist/
22 | downloads/
23 | eggs/
24 | .eggs/
25 | lib/
26 | lib64/
27 | parts/
28 | sdist/
29 | var/
30 | wheels/
31 | *.egg-info/
32 | .installed.cfg
33 | *.egg
34 | MANIFEST
35 |
36 | # PyInstaller
37 | # Usually these files are written by a python script from a template
38 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
39 | *.manifest
40 | *.spec
41 |
42 | # Installer logs
43 | pip-log.txt
44 | pip-delete-this-directory.txt
45 |
46 | # Unit test / coverage reports
47 | htmlcov/
48 | .tox/
49 | .coverage
50 | .coverage.*
51 | .cache
52 | nosetests.xml
53 | coverage.xml
54 | *.cover
55 | .hypothesis/
56 | .pytest_cache/
57 |
58 | # Translations
59 | *.mo
60 | *.pot
61 |
62 | # Django stuff:
63 | *.log
64 | local_settings.py
65 | db.sqlite3
66 |
67 | # Flask stuff:
68 | instance/
69 | .webassets-cache
70 |
71 | # Scrapy stuff:
72 | .scrapy
73 |
74 | # Sphinx documentation
75 | docs/_build/
76 |
77 | # PyBuilder
78 | target/
79 |
80 | # Jupyter Notebook
81 | .ipynb_checkpoints
82 |
83 | # pyenv
84 | .python-version
85 |
86 | # celery beat schedule file
87 | celerybeat-schedule
88 |
89 | # SageMath parsed files
90 | *.sage.py
91 |
92 | # Environments
93 | .env
94 | .venv
95 | env/
96 | venv/
97 | ENV/
98 | env.bak/
99 | venv.bak/
100 |
101 | # Spyder project settings
102 | .spyderproject
103 | .spyproject
104 |
105 | # Rope project settings
106 | .ropeproject
107 |
108 | # mkdocs documentation
109 | /site
110 |
111 | # mypy
112 | .mypy_cache/
113 |
--------------------------------------------------------------------------------
/1-intro.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/1-intro.pdf
--------------------------------------------------------------------------------
/3-ecosystem.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/3-ecosystem.pdf
--------------------------------------------------------------------------------
/4-pandas.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "slideshow": {
7 | "slide_type": "slide"
8 | }
9 | },
10 | "source": [
11 | "# Pandas"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {
17 | "slideshow": {
18 | "slide_type": "fragment"
19 | }
20 | },
21 | "source": [
22 | "```\n",
23 | "conda install pandas matplotlib\n",
24 | "```\n",
25 | "\n",
26 | "_(and numpy from before)_"
27 | ]
28 | },
29 | {
30 | "cell_type": "markdown",
31 | "metadata": {
32 | "slideshow": {
33 | "slide_type": "slide"
34 | }
35 | },
36 | "source": [
37 | "Let's go back to the very first problem from this morning. We have an incomplete record of Newark temperatures since 1893."
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": null,
43 | "metadata": {},
44 | "outputs": [],
45 | "source": [
46 | "import numpy\n",
47 | "temperatures = numpy.loadtxt(\"data/newark-temperature-avg.txt\")\n",
48 | "temperatures"
49 | ]
50 | },
51 | {
52 | "cell_type": "markdown",
53 | "metadata": {
54 | "slideshow": {
55 | "slide_type": "fragment"
56 | }
57 | },
58 | "source": [
59 | "But instead of analyzing it with raw Numpy, let's use Pandas."
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "execution_count": null,
65 | "metadata": {
66 | "scrolled": true
67 | },
68 | "outputs": [],
69 | "source": [
70 | "import pandas\n",
71 | "temperatures = pandas.Series(temperatures)\n",
72 | "temperatures"
73 | ]
74 | },
75 | {
76 | "cell_type": "markdown",
77 | "metadata": {
78 | "slideshow": {
79 | "slide_type": "slide"
80 | }
81 | },
82 | "source": [
83 | "Numpy was designed to do fast calculations with minimal dependencies.\n",
84 | "\n",
85 | "Pandas was designed to make a data analyst's life easier."
86 | ]
87 | },
88 | {
89 | "cell_type": "code",
90 | "execution_count": null,
91 | "metadata": {},
92 | "outputs": [],
93 | "source": [
94 | "%matplotlib inline"
95 | ]
96 | },
97 | {
98 | "cell_type": "code",
99 | "execution_count": null,
100 | "metadata": {},
101 | "outputs": [],
102 | "source": [
103 | "temperatures.plot()"
104 | ]
105 | },
106 | {
107 | "cell_type": "markdown",
108 | "metadata": {
109 | "slideshow": {
110 | "slide_type": "fragment"
111 | }
112 | },
113 | "source": [
114 | "Behold the turning of the seasons!"
115 | ]
116 | },
117 | {
118 | "cell_type": "markdown",
119 | "metadata": {
120 | "slideshow": {
121 | "slide_type": "slide"
122 | }
123 | },
124 | "source": [
125 | "You can think of a Pandas Series as a Numpy array with bells and whistles, but it's more than that."
126 | ]
127 | },
128 | {
129 | "cell_type": "markdown",
130 | "metadata": {
131 | "slideshow": {
132 | "slide_type": "fragment"
133 | }
134 | },
135 | "source": [
136 | "It is an _indexed_ Numpy array with bells and whistles."
137 | ]
138 | },
139 | {
140 | "cell_type": "code",
141 | "execution_count": null,
142 | "metadata": {
143 | "slideshow": {
144 | "slide_type": "slide"
145 | }
146 | },
147 | "outputs": [],
148 | "source": [
149 | "temperatures.index"
150 | ]
151 | },
152 | {
153 | "cell_type": "code",
154 | "execution_count": null,
155 | "metadata": {
156 | "slideshow": {
157 | "slide_type": "fragment"
158 | }
159 | },
160 | "outputs": [],
161 | "source": [
162 | "temperatures.index = numpy.loadtxt(\"data/newark-days-ago.txt\")\n",
163 | "temperatures.index.name = \"days ago\""
164 | ]
165 | },
166 | {
167 | "cell_type": "code",
168 | "execution_count": null,
169 | "metadata": {},
170 | "outputs": [],
171 | "source": [
172 | "temperatures.index"
173 | ]
174 | },
175 | {
176 | "cell_type": "code",
177 | "execution_count": null,
178 | "metadata": {
179 | "slideshow": {
180 | "slide_type": "fragment"
181 | }
182 | },
183 | "outputs": [],
184 | "source": [
185 | "temperatures.plot()"
186 | ]
187 | },
188 | {
189 | "cell_type": "code",
190 | "execution_count": null,
191 | "metadata": {
192 | "slideshow": {
193 | "slide_type": "slide"
194 | }
195 | },
196 | "outputs": [],
197 | "source": [
198 | "temperatures.index = pandas.to_datetime(temperatures.index, unit=\"D\", origin=pandas.Timestamp(\"2018-11-04\"))\n",
199 | "temperatures.index.name = \"date\"\n",
200 | "temperatures.index"
201 | ]
202 | },
203 | {
204 | "cell_type": "code",
205 | "execution_count": null,
206 | "metadata": {},
207 | "outputs": [],
208 | "source": [
209 | "temperatures.plot()"
210 | ]
211 | },
212 | {
213 | "cell_type": "markdown",
214 | "metadata": {
215 | "slideshow": {
216 | "slide_type": "slide"
217 | }
218 | },
219 | "source": [
220 | "Now let's return to the problem of imputing the missing temperature data."
221 | ]
222 | },
223 | {
224 | "cell_type": "code",
225 | "execution_count": null,
226 | "metadata": {},
227 | "outputs": [],
228 | "source": [
229 | "min_temperatures = pandas.Series(numpy.loadtxt(\"data/newark-temperature-min.txt\"))\n",
230 | "max_temperatures = pandas.Series(numpy.loadtxt(\"data/newark-temperature-max.txt\"))\n",
231 | "min_temperatures.index = temperatures.index\n",
232 | "max_temperatures.index = temperatures.index"
233 | ]
234 | },
235 | {
236 | "cell_type": "code",
237 | "execution_count": null,
238 | "metadata": {},
239 | "outputs": [],
240 | "source": [
241 | "min_temperatures.plot()"
242 | ]
243 | },
244 | {
245 | "cell_type": "code",
246 | "execution_count": null,
247 | "metadata": {},
248 | "outputs": [],
249 | "source": [
250 | "max_temperatures.plot()"
251 | ]
252 | },
253 | {
254 | "cell_type": "markdown",
255 | "metadata": {
256 | "slideshow": {
257 | "slide_type": "slide"
258 | }
259 | },
260 | "source": [
261 | "It would be more convenient if these were in the same object. A DataFrame is several Series glued together with a common index."
262 | ]
263 | },
264 | {
265 | "cell_type": "code",
266 | "execution_count": null,
267 | "metadata": {
268 | "scrolled": true
269 | },
270 | "outputs": [],
271 | "source": [
272 | "df = pandas.concat([temperatures, min_temperatures, max_temperatures], axis=\"columns\")\n",
273 | "df.columns = [\"avg\", \"min\", \"max\"] # name them!\n",
274 | "df"
275 | ]
276 | },
277 | {
278 | "cell_type": "markdown",
279 | "metadata": {
280 | "slideshow": {
281 | "slide_type": "slide"
282 | }
283 | },
284 | "source": [
285 | "Performing calculations on columns of a DataFrame is very much like Numpy: all of the elementwise operations, masking, and fancy indexing apply. In fact, Pandas usually just passes off this work on Numpy. However, it _organizes_ that work."
286 | ]
287 | },
288 | {
289 | "cell_type": "code",
290 | "execution_count": null,
291 | "metadata": {
292 | "scrolled": true
293 | },
294 | "outputs": [],
295 | "source": [
296 | "df[\"min-max avg\"] = (df[\"min\"] + df[\"max\"]) / 2\n",
297 | "df"
298 | ]
299 | },
300 | {
301 | "cell_type": "markdown",
302 | "metadata": {
303 | "slideshow": {
304 | "slide_type": "slide"
305 | }
306 | },
307 | "source": [
308 | "Although we could have repeated the Numpy method of masking by `numpy.isnan(df[\"avg\"])`, Pandas has functions for dealing with missing data. (A lot of them, actually.)"
309 | ]
310 | },
311 | {
312 | "cell_type": "code",
313 | "execution_count": null,
314 | "metadata": {
315 | "scrolled": true
316 | },
317 | "outputs": [],
318 | "source": [
319 | "df[\"imputed\"] = df[\"avg\"].fillna(df[\"min-max avg\"])\n",
320 | "df"
321 | ]
322 | },
323 | {
324 | "cell_type": "markdown",
325 | "metadata": {
326 | "slideshow": {
327 | "slide_type": "slide"
328 | }
329 | },
330 | "source": [
331 | "We can select columns by strings in square brackets (like Numpy's record arrays, a feature I didn't show you), but rows cannot be selected by integer index."
332 | ]
333 | },
334 | {
335 | "cell_type": "markdown",
336 | "metadata": {
337 | "slideshow": {
338 | "slide_type": "fragment"
339 | }
340 | },
341 | "source": [
342 | "Whereas an array of length `N` is a function `[0, N) → V`, a DataFrame is a function `K → V1×V2×V3`, where `K` is the index and `V1×V2×V3` are the columns. Integer indexing won't work unless the index has integer type:"
343 | ]
344 | },
345 | {
346 | "cell_type": "code",
347 | "execution_count": null,
348 | "metadata": {
349 | "slideshow": {
350 | "slide_type": "fragment"
351 | }
352 | },
353 | "outputs": [],
354 | "source": [
355 | "try:\n",
356 | " df[-1]\n",
357 | "except KeyError:\n",
358 | " print(\"Nope, -1 is not a valid index for this DataFrame.\")"
359 | ]
360 | },
361 | {
362 | "cell_type": "markdown",
363 | "metadata": {
364 | "slideshow": {
365 | "slide_type": "slide"
366 | }
367 | },
368 | "source": [
369 | "But unlike an SQL table, a DataFrame index has an ordering. We should be able to ask for the last element, so there's a special accessor for that: `iloc`."
370 | ]
371 | },
372 | {
373 | "cell_type": "code",
374 | "execution_count": null,
375 | "metadata": {},
376 | "outputs": [],
377 | "source": [
378 | "df.iloc[-1]"
379 | ]
380 | },
381 | {
382 | "cell_type": "markdown",
383 | "metadata": {
384 | "slideshow": {
385 | "slide_type": "fragment"
386 | }
387 | },
388 | "source": [
389 | "But generally, the index type should be more meaningful than the positions, so we'd want to access rows by index elements. That's `loc`."
390 | ]
391 | },
392 | {
393 | "cell_type": "code",
394 | "execution_count": null,
395 | "metadata": {},
396 | "outputs": [],
397 | "source": [
398 | "df.loc[pandas.Timestamp(\"2018-11-04\")]"
399 | ]
400 | },
401 | {
402 | "cell_type": "markdown",
403 | "metadata": {
404 | "slideshow": {
405 | "slide_type": "slide"
406 | }
407 | },
408 | "source": [
409 | "We can slice regions of time just as we slice integer indexes."
410 | ]
411 | },
412 | {
413 | "cell_type": "code",
414 | "execution_count": null,
415 | "metadata": {},
416 | "outputs": [],
417 | "source": [
418 | "df.loc[pandas.Timestamp(\"2017-11-04\"):].plot()"
419 | ]
420 | },
421 | {
422 | "cell_type": "markdown",
423 | "metadata": {
424 | "slideshow": {
425 | "slide_type": "fragment"
426 | }
427 | },
428 | "source": [
429 | "And that lets us zoom into interesting regions in the data space."
430 | ]
431 | },
432 | {
433 | "cell_type": "code",
434 | "execution_count": null,
435 | "metadata": {},
436 | "outputs": [],
437 | "source": [
438 | "df[[\"imputed\", \"avg\"]].loc[pandas.Timestamp(\"2011-01-01\"):pandas.Timestamp(\"2015-01-01\")].plot()"
439 | ]
440 | },
441 | {
442 | "cell_type": "markdown",
443 | "metadata": {
444 | "slideshow": {
445 | "slide_type": "slide"
446 | }
447 | },
448 | "source": [
449 | "Pandas has oodles of statistical functions. Whenever I want to do something, I do a web search to find out what it's named— _somebody's_ asked about it on StackOverflow."
450 | ]
451 | },
452 | {
453 | "cell_type": "markdown",
454 | "metadata": {
455 | "slideshow": {
456 | "slide_type": "fragment"
457 | }
458 | },
459 | "source": [
460 | "Let's look for local warming! (Not global; this is just Newark.)"
461 | ]
462 | },
463 | {
464 | "cell_type": "code",
465 | "execution_count": null,
466 | "metadata": {},
467 | "outputs": [],
468 | "source": [
469 | "df[\"imputed\"].plot()"
470 | ]
471 | },
472 | {
473 | "cell_type": "markdown",
474 | "metadata": {
475 | "slideshow": {
476 | "slide_type": "fragment"
477 | }
478 | },
479 | "source": [
480 | "Day-to-day variations are drowning out any effect. Can we smooth these data?"
481 | ]
482 | },
483 | {
484 | "cell_type": "markdown",
485 | "metadata": {
486 | "slideshow": {
487 | "slide_type": "slide"
488 | }
489 | },
490 | "source": [
491 | "Yes. It's called a rolling mean."
492 | ]
493 | },
494 | {
495 | "cell_type": "code",
496 | "execution_count": null,
497 | "metadata": {},
498 | "outputs": [],
499 | "source": [
500 | "df[\"imputed\"].rolling(3*365).mean().plot()"
501 | ]
502 | },
503 | {
504 | "cell_type": "markdown",
505 | "metadata": {
506 | "slideshow": {
507 | "slide_type": "slide"
508 | }
509 | },
510 | "source": [
511 | "The input files we've been working with are artificial (one number per line as text). The original file that came from the NCDC was a CSV with redundant information."
512 | ]
513 | },
514 | {
515 | "cell_type": "code",
516 | "execution_count": null,
517 | "metadata": {
518 | "scrolled": true
519 | },
520 | "outputs": [],
521 | "source": [
522 | "df2 = pandas.read_csv(\"data/newark-temperature.csv\")\n",
523 | "df2"
524 | ]
525 | },
526 | {
527 | "cell_type": "markdown",
528 | "metadata": {
529 | "slideshow": {
530 | "slide_type": "slide"
531 | }
532 | },
533 | "source": [
534 | "The first two columns have only one value because this was a database selection. Pandas's `read_csv` function recognized that the temperature columns are numbers, but not that the date is a date."
535 | ]
536 | },
537 | {
538 | "cell_type": "code",
539 | "execution_count": null,
540 | "metadata": {
541 | "slideshow": {
542 | "slide_type": "fragment"
543 | }
544 | },
545 | "outputs": [],
546 | "source": [
547 | "df2[\"DATE\"].dtype"
548 | ]
549 | },
550 | {
551 | "cell_type": "markdown",
552 | "metadata": {
553 | "slideshow": {
554 | "slide_type": "fragment"
555 | }
556 | },
557 | "source": [
558 | "The `'O'` type (object) is for cases when Pandas doesn't recognize the type of a field. In principle, it could be any Python object, but from a CSV file, it's a string.\n",
559 | "\n",
560 | "We can try to read it again, this time providing a hint that this column is supposed to be a date."
561 | ]
562 | },
563 | {
564 | "cell_type": "code",
565 | "execution_count": null,
566 | "metadata": {
567 | "slideshow": {
568 | "slide_type": "slide"
569 | }
570 | },
571 | "outputs": [],
572 | "source": [
573 | "df2 = pandas.read_csv(\"data/newark-temperature.csv\", parse_dates=[\"DATE\"])\n",
574 | "df2.columns"
575 | ]
576 | },
577 | {
578 | "cell_type": "markdown",
579 | "metadata": {
580 | "slideshow": {
581 | "slide_type": "fragment"
582 | }
583 | },
584 | "source": [
585 | "It has the same columns, but now the date is a date."
586 | ]
587 | },
588 | {
589 | "cell_type": "code",
590 | "execution_count": null,
591 | "metadata": {},
592 | "outputs": [],
593 | "source": [
594 | "df2[\"DATE\"].dtype"
595 | ]
596 | },
597 | {
598 | "cell_type": "markdown",
599 | "metadata": {
600 | "slideshow": {
601 | "slide_type": "slide"
602 | }
603 | },
604 | "source": [
605 | "We'd like this date column to be the index of the whole table, so we say so."
606 | ]
607 | },
608 | {
609 | "cell_type": "code",
610 | "execution_count": null,
611 | "metadata": {},
612 | "outputs": [],
613 | "source": [
614 | "df2.index = df2[\"DATE\"]"
615 | ]
616 | },
617 | {
618 | "cell_type": "markdown",
619 | "metadata": {
620 | "slideshow": {
621 | "slide_type": "fragment"
622 | }
623 | },
624 | "source": [
625 | "Now we can do the same analysis we did before, but directly on the DataFrame from the CSV file, not by gluing together Series derived from Numpy arrays."
626 | ]
627 | },
628 | {
629 | "cell_type": "code",
630 | "execution_count": null,
631 | "metadata": {},
632 | "outputs": [],
633 | "source": [
634 | "df2[\"imputed\"] = df2[\"TAVG\"].fillna((df2[\"TMIN\"] + df2[\"TMAX\"]) / 2)\n",
635 | "df2[\"imputed\"].rolling(3*365).mean().plot()"
636 | ]
637 | },
638 | {
639 | "cell_type": "markdown",
640 | "metadata": {
641 | "slideshow": {
642 | "slide_type": "slide"
643 | }
644 | },
645 | "source": [
646 | "Temperature data are fairly simple: a one-dimensional time series. Let's consider something with a bit more structure— exoplanets.\n",
647 | "\n",
648 | "Each star may have a different number of planets, which complicates the indexing."
649 | ]
650 | },
651 | {
652 | "cell_type": "code",
653 | "execution_count": null,
654 | "metadata": {
655 | "scrolled": true
656 | },
657 | "outputs": [],
658 | "source": [
659 | "planets = pandas.read_csv(\"data/nasa-exoplanets.csv\")\n",
660 | "planets"
661 | ]
662 | },
663 | {
664 | "cell_type": "markdown",
665 | "metadata": {
666 | "slideshow": {
667 | "slide_type": "slide"
668 | }
669 | },
670 | "source": [
671 | "Not knowing much about this dataset, let's get a quick summary of the columns."
672 | ]
673 | },
674 | {
675 | "cell_type": "code",
676 | "execution_count": null,
677 | "metadata": {
678 | "scrolled": true
679 | },
680 | "outputs": [],
681 | "source": [
682 | "planets.describe().transpose()"
683 | ]
684 | },
685 | {
686 | "cell_type": "markdown",
687 | "metadata": {
688 | "slideshow": {
689 | "slide_type": "slide"
690 | }
691 | },
692 | "source": [
693 | "Fortunately, NASA gave us longer explanations of the column names."
694 | ]
695 | },
696 | {
697 | "cell_type": "code",
698 | "execution_count": null,
699 | "metadata": {},
700 | "outputs": [],
701 | "source": [
702 | "!cat data/nasa-exoplanets-details.txt"
703 | ]
704 | },
705 | {
706 | "cell_type": "markdown",
707 | "metadata": {
708 | "slideshow": {
709 | "slide_type": "fragment"
710 | }
711 | },
712 | "source": [
713 | "Number of moons? (\"`pl_mnum`: Number of Moons in System\") Awesome!"
714 | ]
715 | },
716 | {
717 | "cell_type": "code",
718 | "execution_count": null,
719 | "metadata": {
720 | "slideshow": {
721 | "slide_type": "slide"
722 | }
723 | },
724 | "outputs": [],
725 | "source": [
726 | "planets[planets[\"pl_mnum\"] > 0]"
727 | ]
728 | },
729 | {
730 | "cell_type": "markdown",
731 | "metadata": {
732 | "slideshow": {
733 | "slide_type": "fragment"
734 | }
735 | },
736 | "source": [
737 | "I guess not yet. They're just being hopeful."
738 | ]
739 | },
740 | {
741 | "cell_type": "markdown",
742 | "metadata": {
743 | "slideshow": {
744 | "slide_type": "slide"
745 | }
746 | },
747 | "source": [
748 | "In this dataset, one row is one planet. Thus, star data for stars with multiple known planets are duplicated (which effectively weights star data by their number of planets in `planets.describe()`).\n",
749 | "\n",
750 | "To get a table of stars only, we'd have to do a group-by. We expect the star data to be the same for each planet associated with a star, so the mean is an appropriate summary."
751 | ]
752 | },
753 | {
754 | "cell_type": "code",
755 | "execution_count": null,
756 | "metadata": {
757 | "scrolled": true
758 | },
759 | "outputs": [],
760 | "source": [
761 | "planets.groupby(\"pl_hostname\").mean()"
762 | ]
763 | },
764 | {
765 | "cell_type": "markdown",
766 | "metadata": {
767 | "slideshow": {
768 | "slide_type": "slide"
769 | }
770 | },
771 | "source": [
772 | "**Exercise:** This reduction averages all values per star, which is not meaningful for planet variables (which start with `\"pl_\"`). After all, what does it mean to average their \"semi-major axes?\" Or their \"discovery methods?\"\n",
773 | "\n",
774 | "To avoid confusion, repeat the group-by with only the columns that start with `\"st_\"`."
775 | ]
776 | },
777 | {
778 | "cell_type": "code",
779 | "execution_count": null,
780 | "metadata": {
781 | "scrolled": true
782 | },
783 | "outputs": [],
784 | "source": [
785 | "???"
786 | ]
787 | },
788 | {
789 | "cell_type": "markdown",
790 | "metadata": {
791 | "slideshow": {
792 | "slide_type": "slide"
793 | }
794 | },
795 | "source": [
796 | "Wouldn't it be great if the index encapsulated the hierarchical relationship between stars and planets?\n",
797 | "\n",
798 | "Pandas has a `MultiIndex`, which allows sub-indexes to be nested within outer indexes."
799 | ]
800 | },
801 | {
802 | "cell_type": "code",
803 | "execution_count": null,
804 | "metadata": {
805 | "scrolled": true
806 | },
807 | "outputs": [],
808 | "source": [
809 | "planets.index = pandas.MultiIndex.from_arrays([planets[\"pl_hostname\"], planets[\"pl_letter\"]])\n",
810 | "planets"
811 | ]
812 | },
813 | {
814 | "cell_type": "markdown",
815 | "metadata": {
816 | "slideshow": {
817 | "slide_type": "slide"
818 | }
819 | },
820 | "source": [
821 | "The transpose of this multiindexed table is a table with two levels of columns."
822 | ]
823 | },
824 | {
825 | "cell_type": "code",
826 | "execution_count": null,
827 | "metadata": {
828 | "scrolled": true
829 | },
830 | "outputs": [],
831 | "source": [
832 | "planets.transpose()"
833 | ]
834 | },
835 | {
836 | "cell_type": "markdown",
837 | "metadata": {
838 | "slideshow": {
839 | "slide_type": "slide"
840 | }
841 | },
842 | "source": [
843 | "To simplify this table, let's reduce it to the following fields:\n",
844 | "\n",
845 | " * `pl_discmethod`: Discovery Method\n",
846 | " * `pl_orbper`: Orbital Period [days]\n",
847 | " * `pl_orbsmax`: Orbit Semi-Major Axis [AU])\n",
848 | " * `pl_orbeccen`: Eccentricity\n",
849 | " * `pl_orbincl`: Inclination [deg]\n",
850 | " * `pl_bmassj`: Planet Mass or M*sin(i) [Jupiter mass]\n",
851 | " * `pl_bmassprov`: Planet Mass or M*sin(i) Provenance\n",
852 | " * `pl_disc`: Year of Discovery\n",
853 | " * `pl_telescope`: Discovery Telescope"
854 | ]
855 | },
856 | {
857 | "cell_type": "code",
858 | "execution_count": null,
859 | "metadata": {
860 | "scrolled": true
861 | },
862 | "outputs": [],
863 | "source": [
864 | "simple = planets[[\"pl_discmethod\", \"pl_orbper\", \"pl_orbsmax\", \"pl_orbeccen\", \"pl_bmassj\", \"pl_bmassprov\", \"pl_disc\", \"pl_telescope\"]]\n",
865 | "simple"
866 | ]
867 | },
868 | {
869 | "cell_type": "markdown",
870 | "metadata": {
871 | "slideshow": {
872 | "slide_type": "slide"
873 | }
874 | },
875 | "source": [
876 | "The syntax for selecting rows by star name now selects subtables of all planets associated with a star."
877 | ]
878 | },
879 | {
880 | "cell_type": "code",
881 | "execution_count": null,
882 | "metadata": {},
883 | "outputs": [],
884 | "source": [
885 | "simple.loc[\"tau Boo\"]"
886 | ]
887 | },
888 | {
889 | "cell_type": "code",
890 | "execution_count": null,
891 | "metadata": {},
892 | "outputs": [],
893 | "source": [
894 | "simple.loc[\"tau Cet\"]"
895 | ]
896 | },
897 | {
898 | "cell_type": "markdown",
899 | "metadata": {
900 | "slideshow": {
901 | "slide_type": "slide"
902 | }
903 | },
904 | "source": [
905 | "How about the opposite selection? To get all planets labeled `\"b\"`— this is one planet per star. The method for that is `xs` (for \"cross-section\")."
906 | ]
907 | },
908 | {
909 | "cell_type": "code",
910 | "execution_count": null,
911 | "metadata": {
912 | "scrolled": true
913 | },
914 | "outputs": [],
915 | "source": [
916 | "simple.xs(\"b\", level=\"pl_letter\")"
917 | ]
918 | },
919 | {
920 | "cell_type": "markdown",
921 | "metadata": {
922 | "slideshow": {
923 | "slide_type": "slide"
924 | }
925 | },
926 | "source": [
927 | "In a sense, the multiindexed table represents three dimensional information: (1) star, (2) planet letter, (3) columns. Some combinations of star and planet letter do not exist: these two dimensions are not completely filled. Some stars have only one planet, while others have as many as..."
928 | ]
929 | },
930 | {
931 | "cell_type": "code",
932 | "execution_count": null,
933 | "metadata": {
934 | "slideshow": {
935 | "slide_type": "fragment"
936 | }
937 | },
938 | "outputs": [],
939 | "source": [
940 | "planets[\"pl_letter\"].groupby(\"pl_hostname\").count().max()"
941 | ]
942 | },
943 | {
944 | "cell_type": "markdown",
945 | "metadata": {},
946 | "source": [
947 | "...8 planets. Most have only one. I would call this dimension \"jagged\" or \"ragged.\" Pandas represents this _sparsely,_ by enumerating only the combinations that do exist."
948 | ]
949 | },
950 | {
951 | "cell_type": "markdown",
952 | "metadata": {
953 | "slideshow": {
954 | "slide_type": "fragment"
955 | }
956 | },
957 | "source": [
958 | "Pandas used to have a type to handle 3+ dimensional data (Series is 1D, DataFrame is 2D, ...), but this was dropped in favor of multiindexes."
959 | ]
960 | },
961 | {
962 | "cell_type": "markdown",
963 | "metadata": {
964 | "slideshow": {
965 | "slide_type": "fragment"
966 | }
967 | },
968 | "source": [
969 | "There's another library called xarray to fill this niche, but generally, I'd rather deal with 3+ dimensions sparsely than densely."
970 | ]
971 | },
972 | {
973 | "cell_type": "markdown",
974 | "metadata": {
975 | "slideshow": {
976 | "slide_type": "slide"
977 | }
978 | },
979 | "source": [
980 | "If we want to pass these data to a machine learning model or something, we'll have to flatten the star-planet structure.\n",
981 | "\n",
982 | "A lossless way to do that is to pivot planet letter keys into columns. (Note: the `stack` and `unstack` functions do a similar thing.)"
983 | ]
984 | },
985 | {
986 | "cell_type": "code",
987 | "execution_count": null,
988 | "metadata": {},
989 | "outputs": [],
990 | "source": [
991 | "simple.pivot_table(index=\"pl_hostname\", columns=\"pl_letter\")"
992 | ]
993 | },
994 | {
995 | "cell_type": "markdown",
996 | "metadata": {
997 | "slideshow": {
998 | "slide_type": "fragment"
999 | }
1000 | },
1001 | "source": [
1002 | "There's now a column for each letter-column combination. Most of them are empty because there was one star with 8 planets but most have 1 planet."
1003 | ]
1004 | },
1005 | {
1006 | "cell_type": "markdown",
1007 | "metadata": {
1008 | "slideshow": {
1009 | "slide_type": "slide"
1010 | }
1011 | },
1012 | "source": [
1013 | "If you want to simplify this table, you'll have to make choices because anything you do from here on loses information.\n",
1014 | "\n",
1015 | " * Do you fill in NaN with `fillna`?\n",
1016 | " * Do you pick a few of the most common planet letters, like `[\"b\", \"c\", \"d\"]`?\n",
1017 | " * Do you average over all planets?\n",
1018 | " * Do you turn the original into two tables, one for each star and another for each planet, with identifiers linking them?"
1019 | ]
1020 | },
1021 | {
1022 | "cell_type": "markdown",
1023 | "metadata": {
1024 | "slideshow": {
1025 | "slide_type": "slide"
1026 | }
1027 | },
1028 | "source": [
1029 | "**One last topic:** This dataset has a lot of strings. When we read it from the CSV file, Pandas left them as Python strings. This can be inefficient for very large tables (in storage and comparison speed). A common technique is to find unique strings and replace each value with an integer. This is exactly what we did with the Gettysburg Address this morning (\"dictionary encoding\"). Pandas has an automated way to do it."
1030 | ]
1031 | },
1032 | {
1033 | "cell_type": "code",
1034 | "execution_count": null,
1035 | "metadata": {
1036 | "slideshow": {
1037 | "slide_type": "fragment"
1038 | }
1039 | },
1040 | "outputs": [],
1041 | "source": [
1042 | "simple[\"pl_discmethod\"].dtype"
1043 | ]
1044 | },
1045 | {
1046 | "cell_type": "markdown",
1047 | "metadata": {
1048 | "slideshow": {
1049 | "slide_type": "fragment"
1050 | }
1051 | },
1052 | "source": [
1053 | "Type `'O'` is \"object,\" for arbitrary Python objects (usually strings). We want it to be a \"category\" type. Let's convert it (`astype`) and assign it."
1054 | ]
1055 | },
1056 | {
1057 | "cell_type": "code",
1058 | "execution_count": null,
1059 | "metadata": {},
1060 | "outputs": [],
1061 | "source": [
1062 | "simple[\"pl_discmethod\"] = simple[\"pl_discmethod\"].astype(\"category\")"
1063 | ]
1064 | },
1065 | {
1066 | "cell_type": "markdown",
1067 | "metadata": {
1068 | "slideshow": {
1069 | "slide_type": "fragment"
1070 | }
1071 | },
1072 | "source": [
1073 | "Uh oh... what's that warning?"
1074 | ]
1075 | },
1076 | {
1077 | "cell_type": "markdown",
1078 | "metadata": {
1079 | "slideshow": {
1080 | "slide_type": "slide"
1081 | }
1082 | },
1083 | "source": [
1084 | "It's one of the most common you get with Pandas ([here's a whole blog on it](https://www.dataquest.io/blog/settingwithcopywarning/)). Remember the \"view vs copy\" discussion this morning? This is the same thing in a Pandas, rather than Numpy, context."
1085 | ]
1086 | },
1087 | {
1088 | "cell_type": "markdown",
1089 | "metadata": {
1090 | "slideshow": {
1091 | "slide_type": "fragment"
1092 | }
1093 | },
1094 | "source": [
1095 | "Unlike Numpy, Pandas gives you a warning. We really should address that warning."
1096 | ]
1097 | },
1098 | {
1099 | "cell_type": "markdown",
1100 | "metadata": {
1101 | "slideshow": {
1102 | "slide_type": "fragment"
1103 | }
1104 | },
1105 | "source": [
1106 | "The issue was that `simple` is a view of `planets`, so modifying a column in `simple` modifies `planets`. That could lead to surprising results.\n",
1107 | "\n",
1108 | "The solution? Turn `simple` into a copy."
1109 | ]
1110 | },
1111 | {
1112 | "cell_type": "code",
1113 | "execution_count": null,
1114 | "metadata": {},
1115 | "outputs": [],
1116 | "source": [
1117 | "simple = simple.copy()"
1118 | ]
1119 | },
1120 | {
1121 | "cell_type": "markdown",
1122 | "metadata": {
1123 | "slideshow": {
1124 | "slide_type": "slide"
1125 | }
1126 | },
1127 | "source": [
1128 | "Now there's no warning because everything's fine."
1129 | ]
1130 | },
1131 | {
1132 | "cell_type": "code",
1133 | "execution_count": null,
1134 | "metadata": {},
1135 | "outputs": [],
1136 | "source": [
1137 | "simple[\"pl_discmethod\"] = simple[\"pl_discmethod\"].astype(\"category\")"
1138 | ]
1139 | },
1140 | {
1141 | "cell_type": "code",
1142 | "execution_count": null,
1143 | "metadata": {},
1144 | "outputs": [],
1145 | "source": [
1146 | "simple[\"pl_discmethod\"].dtype"
1147 | ]
1148 | },
1149 | {
1150 | "cell_type": "markdown",
1151 | "metadata": {
1152 | "slideshow": {
1153 | "slide_type": "fragment"
1154 | }
1155 | },
1156 | "source": [
1157 | "Now when we do analysis on `\"pl_discmethod\"`, it's number-crunching, rather than string-crunching."
1158 | ]
1159 | },
1160 | {
1161 | "cell_type": "code",
1162 | "execution_count": null,
1163 | "metadata": {},
1164 | "outputs": [],
1165 | "source": [
1166 | "simple[\"pl_discmethod\"].value_counts().plot.bar(logy=True)"
1167 | ]
1168 | },
1169 | {
1170 | "cell_type": "markdown",
1171 | "metadata": {
1172 | "slideshow": {
1173 | "slide_type": "slide"
1174 | }
1175 | },
1176 | "source": [
1177 | "Just poking around now... discovery method by date?"
1178 | ]
1179 | },
1180 | {
1181 | "cell_type": "code",
1182 | "execution_count": null,
1183 | "metadata": {},
1184 | "outputs": [],
1185 | "source": [
1186 | "simple.reset_index().groupby([\"pl_disc\", \"pl_discmethod\"]).count()[\"pl_letter\"].unstack(\"pl_discmethod\").fillna(0).plot()"
1187 | ]
1188 | },
1189 | {
1190 | "cell_type": "markdown",
1191 | "metadata": {
1192 | "slideshow": {
1193 | "slide_type": "fragment"
1194 | }
1195 | },
1196 | "source": [
1197 | "Wow— a lot of transits! Why is that?"
1198 | ]
1199 | },
1200 | {
1201 | "cell_type": "code",
1202 | "execution_count": null,
1203 | "metadata": {
1204 | "slideshow": {
1205 | "slide_type": "slide"
1206 | }
1207 | },
1208 | "outputs": [],
1209 | "source": [
1210 | "planets[planets[\"pl_discmethod\"] == \"Transit\"].groupby([\"pl_disc\", \"pl_telescope\"]).count()[\"pl_letter\"].unstack(\"pl_telescope\").fillna(0).plot(figsize=(10, 7))"
1211 | ]
1212 | },
1213 | {
1214 | "cell_type": "markdown",
1215 | "metadata": {
1216 | "slideshow": {
1217 | "slide_type": "fragment"
1218 | }
1219 | },
1220 | "source": [
1221 | "Oh. Kepler. The first peak corresponds to the Kepler telescope's first mission, which ended early in instrument failure. The second mission, K2, worked around the failure to get results until earlier this year."
1222 | ]
1223 | },
1224 | {
1225 | "cell_type": "markdown",
1226 | "metadata": {
1227 | "slideshow": {
1228 | "slide_type": "slide"
1229 | }
1230 | },
1231 | "source": [
1232 | "Other than transits, what are the most popular methods?"
1233 | ]
1234 | },
1235 | {
1236 | "cell_type": "code",
1237 | "execution_count": null,
1238 | "metadata": {
1239 | "slideshow": {
1240 | "slide_type": "-"
1241 | }
1242 | },
1243 | "outputs": [],
1244 | "source": [
1245 | "simple[simple[\"pl_discmethod\"] != \"Transit\"].reset_index().groupby([\"pl_disc\", \"pl_discmethod\"]).count()[\"pl_letter\"].unstack(\"pl_discmethod\").fillna(0).plot(figsize=(10, 7))"
1246 | ]
1247 | },
1248 | {
1249 | "cell_type": "markdown",
1250 | "metadata": {
1251 | "slideshow": {
1252 | "slide_type": "fragment"
1253 | }
1254 | },
1255 | "source": [
1256 | "Pulsar timing, apparently."
1257 | ]
1258 | },
1259 | {
1260 | "cell_type": "markdown",
1261 | "metadata": {
1262 | "slideshow": {
1263 | "slide_type": "slide"
1264 | }
1265 | },
1266 | "source": [
1267 | "On to Dask!"
1268 | ]
1269 | }
1270 | ],
1271 | "metadata": {
1272 | "celltoolbar": "Slideshow",
1273 | "kernelspec": {
1274 | "display_name": "Python 3",
1275 | "language": "python",
1276 | "name": "python3"
1277 | },
1278 | "language_info": {
1279 | "codemirror_mode": {
1280 | "name": "ipython",
1281 | "version": 3
1282 | },
1283 | "file_extension": ".py",
1284 | "mimetype": "text/x-python",
1285 | "name": "python",
1286 | "nbconvert_exporter": "python",
1287 | "pygments_lexer": "ipython3",
1288 | "version": "3.7.0"
1289 | }
1290 | },
1291 | "nbformat": 4,
1292 | "nbformat_minor": 2
1293 | }
1294 |
--------------------------------------------------------------------------------
/5-dask.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "slideshow": {
7 | "slide_type": "slide"
8 | }
9 | },
10 | "source": [
11 | "# Dask & multiprocessing"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {
17 | "slideshow": {
18 | "slide_type": "fragment"
19 | }
20 | },
21 | "source": [
22 | "```\n",
23 | "conda install dask distributed -c conda-forge\n",
24 | "```\n",
25 | "\n",
26 | "_(and numpy, matplotlib from before)_"
27 | ]
28 | },
29 | {
30 | "cell_type": "markdown",
31 | "metadata": {
32 | "slideshow": {
33 | "slide_type": "slide"
34 | }
35 | },
36 | "source": [
37 | "Computers aren't getting any _faster._\n",
38 | "\n",
39 | "

Computer Architecture: A Quantitative Approach, David A. Patterson and John L. Hennessy
"
40 | ]
41 | },
42 | {
43 | "cell_type": "markdown",
44 | "metadata": {
45 | "slideshow": {
46 | "slide_type": "slide"
47 | }
48 | },
49 | "source": [
50 | "But Moore's Law is still in effect: the number of transistors per square inch continues to grow exponentially (for now). In the 21st century, however, those extra transitors are used to make more execution units, not to incrase the rate through smaller pipelines."
51 | ]
52 | },
53 | {
54 | "cell_type": "markdown",
55 | "metadata": {
56 | "slideshow": {
57 | "slide_type": "fragment"
58 | }
59 | },
60 | "source": [
61 | "(The scaling of clock rates— Dennard's Law— ended because power dissipation scales with clock rate squared: anything faster than 3 GHz _cooks_ the chip!)"
62 | ]
63 | },
64 | {
65 | "cell_type": "markdown",
66 | "metadata": {
67 | "slideshow": {
68 | "slide_type": "fragment"
69 | }
70 | },
71 | "source": [
72 | "Most programming languages, Python among them, cannot be transparently parallelized. You'll have to change your programs to use the extra processors."
73 | ]
74 | },
75 | {
76 | "cell_type": "markdown",
77 | "metadata": {
78 | "slideshow": {
79 | "slide_type": "slide"
80 | }
81 | },
82 | "source": [
83 | "Let's take a moderately complex problem as an example."
84 | ]
85 | },
86 | {
87 | "cell_type": "code",
88 | "execution_count": null,
89 | "metadata": {},
90 | "outputs": [],
91 | "source": [
92 | "%matplotlib inline\n",
93 | "import matplotlib.pyplot as plt\n",
94 | "import numpy\n",
95 | "import time\n",
96 | "\n",
97 | "def prepare(height, width):\n",
98 | " y, x = numpy.ogrid[-1:0:height*1j, -1.5:0:width*1j]\n",
99 | " c = x + y*1j\n",
100 | " fractal = numpy.zeros(c.shape, dtype=numpy.int32)\n",
101 | " return c, fractal\n",
102 | "\n",
103 | "def run(c, fractal, maxiterations=20):\n",
104 | " fractal *= 0 # set fractal to maxiterations without replacing it\n",
105 | " fractal += maxiterations\n",
106 | " z = c\n",
107 | " for i in range(maxiterations):\n",
108 | " z = z**2 + c\n",
109 | " diverge = numpy.absolute(z) > 2\n",
110 | " divnow = diverge & (fractal == maxiterations)\n",
111 | " fractal[divnow] = i\n",
112 | " z[diverge] = 2\n",
113 | " return fractal"
114 | ]
115 | },
116 | {
117 | "cell_type": "code",
118 | "execution_count": null,
119 | "metadata": {
120 | "slideshow": {
121 | "slide_type": "slide"
122 | }
123 | },
124 | "outputs": [],
125 | "source": [
126 | "c, fractal = prepare(8000, 12000)\n",
127 | "\n",
128 | "starttime = time.time()\n",
129 | "fractal = run(c, fractal)\n",
130 | "time.time() - starttime"
131 | ]
132 | },
133 | {
134 | "cell_type": "code",
135 | "execution_count": null,
136 | "metadata": {},
137 | "outputs": [],
138 | "source": [
139 | "fig, ax = plt.subplots(figsize=(12, 8))\n",
140 | "ax.imshow(fractal)\n",
141 | "# ax.imshow(fractal[-200:, :300])"
142 | ]
143 | },
144 | {
145 | "cell_type": "markdown",
146 | "metadata": {
147 | "slideshow": {
148 | "slide_type": "slide"
149 | }
150 | },
151 | "source": [
152 | "Python has built-in libraries for parallel processing:\n",
153 | "\n",
154 | " * **threading:** lets you launch individual threads; you manage coordination.\n",
155 | " * **multiprocessing:** same interface but it launches processes. Pro: can't make common mistakes due to shared memory. Con: memory isn't shared; have to ship data to and from workers.\n",
156 | " * **concurrent.futures:** higher-level interface: Python manages workers; you send work."
157 | ]
158 | },
159 | {
160 | "cell_type": "markdown",
161 | "metadata": {
162 | "slideshow": {
163 | "slide_type": "slide"
164 | }
165 | },
166 | "source": [
167 | "Here's an illustration of the threading interface. Since memory is shared, we don't have to send data to the workers or send results back— they can all see and modify the same array."
168 | ]
169 | },
170 | {
171 | "cell_type": "code",
172 | "execution_count": null,
173 | "metadata": {},
174 | "outputs": [],
175 | "source": [
176 | "import threading\n",
177 | "\n",
178 | "class Worker(threading.Thread):\n",
179 | " def __init__(self, c, fractal, i):\n",
180 | " super(Worker, self).__init__()\n",
181 | " self.c, self.fractal, self.i = c, fractal, i\n",
182 | " def run(self):\n",
183 | " run(self.c[10*self.i : 10*(self.i + 1), :], self.fractal[10*self.i : 10*(self.i + 1), :])\n",
184 | "\n",
185 | "c, fractal = prepare(8000, 12000)\n",
186 | "workers = []\n",
187 | "for i in range(800):\n",
188 | " workers.append(Worker(c, fractal, i))\n",
189 | "\n",
190 | "starttime = time.time()\n",
191 | "\n",
192 | "for worker in workers:\n",
193 | " worker.start()\n",
194 | "for worker in workers:\n",
195 | " worker.join()\n",
196 | "\n",
197 | "time.time() - starttime"
198 | ]
199 | },
200 | {
201 | "cell_type": "markdown",
202 | "metadata": {
203 | "slideshow": {
204 | "slide_type": "slide"
205 | }
206 | },
207 | "source": [
208 | "Now we have to check the result because it's easy to screw this up. (I did many times, preparing this talk.)"
209 | ]
210 | },
211 | {
212 | "cell_type": "code",
213 | "execution_count": null,
214 | "metadata": {
215 | "scrolled": false
216 | },
217 | "outputs": [],
218 | "source": [
219 | "fig, ax = plt.subplots(figsize=(12, 8))\n",
220 | "ax.imshow(fractal)"
221 | ]
222 | },
223 | {
224 | "cell_type": "markdown",
225 | "metadata": {
226 | "slideshow": {
227 | "slide_type": "slide"
228 | }
229 | },
230 | "source": [
231 | "1 thread took 35 seconds to complete.\n",
232 | "\n",
233 | "8 threads took 12 seconds to complete."
234 | ]
235 | },
236 | {
237 | "cell_type": "code",
238 | "execution_count": null,
239 | "metadata": {
240 | "slideshow": {
241 | "slide_type": "fragment"
242 | }
243 | },
244 | "outputs": [],
245 | "source": [
246 | "35 / 12"
247 | ]
248 | },
249 | {
250 | "cell_type": "markdown",
251 | "metadata": {
252 | "slideshow": {
253 | "slide_type": "fragment"
254 | }
255 | },
256 | "source": [
257 | "3 ≠ 8.\n",
258 | "\n",
259 | "It's often difficult to get \"perfect scaling,\" N times more work from N threads, in real situations. Even though this problem is \"embarrassingly parallel\" (none of the workers need to know other workers' results), there can be scheduling overhead, contention for memory, or slow-downs due to Python's [Global Interpreter Lock](https://realpython.com/python-gil/)."
260 | ]
261 | },
262 | {
263 | "cell_type": "markdown",
264 | "metadata": {
265 | "slideshow": {
266 | "slide_type": "slide"
267 | }
268 | },
269 | "source": [
270 | "One way to avoid the global interpreter lock is to send work to separate processes. Python interpreters in separate processes do not share memory and therefore do not need to coordinate."
271 | ]
272 | },
273 | {
274 | "cell_type": "markdown",
275 | "metadata": {
276 | "slideshow": {
277 | "slide_type": "fragment"
278 | }
279 | },
280 | "source": [
281 | "However, that means that we can't send data by simply sharing variables. We have to send it through a `multiprocessing.Queue` (which serializes— pickles— the data so that it can go through a pipe)."
282 | ]
283 | },
284 | {
285 | "cell_type": "markdown",
286 | "metadata": {
287 | "slideshow": {
288 | "slide_type": "slide"
289 | }
290 | },
291 | "source": [
292 | "...usually. There's an exception to this: you can share arrays among processes if you declare them as shared memory before launching the subprocesses. Python has a special type for this:"
293 | ]
294 | },
295 | {
296 | "cell_type": "code",
297 | "execution_count": null,
298 | "metadata": {},
299 | "outputs": [],
300 | "source": [
301 | "import multiprocessing\n",
302 | "import ctypes\n",
303 | "\n",
304 | "sharedarray = multiprocessing.RawArray(ctypes.c_double, 100)\n",
305 | "sharedarray"
306 | ]
307 | },
308 | {
309 | "cell_type": "markdown",
310 | "metadata": {
311 | "slideshow": {
312 | "slide_type": "fragment"
313 | }
314 | },
315 | "source": [
316 | "This is not a Numpy array, but it can be cast as a Numpy array (in the forked process) like this:"
317 | ]
318 | },
319 | {
320 | "cell_type": "code",
321 | "execution_count": null,
322 | "metadata": {},
323 | "outputs": [],
324 | "source": [
325 | "numpy.frombuffer(sharedarray, dtype=numpy.float64)"
326 | ]
327 | },
328 | {
329 | "cell_type": "markdown",
330 | "metadata": {
331 | "slideshow": {
332 | "slide_type": "slide"
333 | }
334 | },
335 | "source": [
336 | "If the forked processes are not writing to different parts of the array, they can seriously garble the data if they write to the same element at the same time.\n",
337 | "\n",
338 | "\n",
339 | "It's not for the faint of heart, but it can be the fastest way to communicate between processes, and seperate processes are the only way to fully escape synchronization delays due to Python's global interpreter lock."
340 | ]
341 | },
342 | {
343 | "cell_type": "markdown",
344 | "metadata": {
345 | "slideshow": {
346 | "slide_type": "fragment"
347 | }
348 | },
349 | "source": [
350 | "By now, you may be wondering if there's a more \"high level\" approach."
351 | ]
352 | },
353 | {
354 | "cell_type": "markdown",
355 | "metadata": {
356 | "slideshow": {
357 | "slide_type": "slide"
358 | }
359 | },
360 | "source": [
361 | "Python 3 introduced an \"executor\" interface that manages workers for you. Instead of creating threads or processes with a `run` method, you create an executor and send work to it."
362 | ]
363 | },
364 | {
365 | "cell_type": "code",
366 | "execution_count": null,
367 | "metadata": {
368 | "slideshow": {
369 | "slide_type": "-"
370 | }
371 | },
372 | "outputs": [],
373 | "source": [
374 | "import concurrent.futures\n",
375 | "executor = concurrent.futures.ThreadPoolExecutor(max_workers=8)"
376 | ]
377 | },
378 | {
379 | "cell_type": "code",
380 | "execution_count": null,
381 | "metadata": {},
382 | "outputs": [],
383 | "source": [
384 | "c, fractal = prepare(8000, 12000)\n",
385 | "# fractal = numpy.asfortranarray(fractal)\n",
386 | "\n",
387 | "def piece(i):\n",
388 | " ci = c[10*i : 10*(i + 1), :]\n",
389 | " fi = fractal[10*i : 10*(i + 1), :]\n",
390 | " run(ci, fi)\n",
391 | "\n",
392 | "starttime = time.time()\n",
393 | "\n",
394 | "futures = executor.map(piece, range(800))\n",
395 | "for future in futures: # iterating over them waits for the results\n",
396 | " pass\n",
397 | "\n",
398 | "time.time() - starttime"
399 | ]
400 | },
401 | {
402 | "cell_type": "markdown",
403 | "metadata": {
404 | "slideshow": {
405 | "slide_type": "fragment"
406 | }
407 | },
408 | "source": [
409 | "Yay! A tiny bit better! What happens when we change to Fortran order? Why?"
410 | ]
411 | },
412 | {
413 | "cell_type": "markdown",
414 | "metadata": {
415 | "slideshow": {
416 | "slide_type": "slide"
417 | }
418 | },
419 | "source": [
420 | "Always make sure we haven't screwed things up."
421 | ]
422 | },
423 | {
424 | "cell_type": "code",
425 | "execution_count": null,
426 | "metadata": {
427 | "slideshow": {
428 | "slide_type": "-"
429 | }
430 | },
431 | "outputs": [],
432 | "source": [
433 | "fig, ax = plt.subplots(figsize=(12, 8))\n",
434 | "ax.imshow(fractal)"
435 | ]
436 | },
437 | {
438 | "cell_type": "markdown",
439 | "metadata": {
440 | "slideshow": {
441 | "slide_type": "slide"
442 | }
443 | },
444 | "source": [
445 | "Still, there needs to be a better way. Our array slices in `piece` are fragile: an indexing error can ruin the result. Can't the problem of scattering work be generalized?"
446 | ]
447 | },
448 | {
449 | "cell_type": "code",
450 | "execution_count": null,
451 | "metadata": {
452 | "slideshow": {
453 | "slide_type": "slide"
454 | }
455 | },
456 | "outputs": [],
457 | "source": [
458 | "import dask.array"
459 | ]
460 | },
461 | {
462 | "cell_type": "code",
463 | "execution_count": null,
464 | "metadata": {},
465 | "outputs": [],
466 | "source": [
467 | "c, fractal = prepare(8000, 12000)\n",
468 | "\n",
469 | "c = dask.array.from_array(c, chunks=(10, 12000))\n",
470 | "fractal = dask.array.from_array(fractal, chunks=(10, 12000))\n",
471 | "\n",
472 | "starttime = time.time()\n",
473 | "fractal = run(c, fractal)\n",
474 | "time.time() - starttime"
475 | ]
476 | },
477 | {
478 | "cell_type": "markdown",
479 | "metadata": {
480 | "slideshow": {
481 | "slide_type": "fragment"
482 | }
483 | },
484 | "source": [
485 | "That was too fast: too good to be true."
486 | ]
487 | },
488 | {
489 | "cell_type": "code",
490 | "execution_count": null,
491 | "metadata": {},
492 | "outputs": [],
493 | "source": [
494 | "fractal"
495 | ]
496 | },
497 | {
498 | "cell_type": "markdown",
499 | "metadata": {
500 | "slideshow": {
501 | "slide_type": "fragment"
502 | }
503 | },
504 | "source": [
505 | "This is not an array: it is a description of how to make an array. Dask has stepped through our procedure and built an execution graph, encoding all the dependencies so that it can correctly apply it to individual chunks. When we execute this graph, Dask will send a chunk to each processor in the computer and combine results."
506 | ]
507 | },
508 | {
509 | "cell_type": "code",
510 | "execution_count": null,
511 | "metadata": {
512 | "slideshow": {
513 | "slide_type": "slide"
514 | }
515 | },
516 | "outputs": [],
517 | "source": [
518 | "starttime = time.time()\n",
519 | "fractal = fractal.compute() # replace `fractal` the execution graph with `fractal` the array result\n",
520 | "time.time() - starttime"
521 | ]
522 | },
523 | {
524 | "cell_type": "markdown",
525 | "metadata": {
526 | "slideshow": {
527 | "slide_type": "fragment"
528 | }
529 | },
530 | "source": [
531 | "Now this check is a formality: Dask has managed the chunking, so we won't accidentally miss a slice."
532 | ]
533 | },
534 | {
535 | "cell_type": "code",
536 | "execution_count": null,
537 | "metadata": {},
538 | "outputs": [],
539 | "source": [
540 | "fig, ax = plt.subplots(figsize=(12, 8))\n",
541 | "ax.imshow(fractal)"
542 | ]
543 | },
544 | {
545 | "cell_type": "markdown",
546 | "metadata": {
547 | "slideshow": {
548 | "slide_type": "slide"
549 | }
550 | },
551 | "source": [
552 | "We seem to have paid for this simplicity: it took twice as long as the carefully sliced `pieces` in the executor."
553 | ]
554 | },
555 | {
556 | "cell_type": "markdown",
557 | "metadata": {
558 | "slideshow": {
559 | "slide_type": "fragment"
560 | }
561 | },
562 | "source": [
563 | "The reason is that our code is not as simple as it looks. It has masking and piecemeal assignments, which in principle could introduce complex dependencies. _We_ know that everything will be fine if you just chop up the array in independent sections— and thus we implemented our thread and executor-based solutions that way."
564 | ]
565 | },
566 | {
567 | "cell_type": "markdown",
568 | "metadata": {
569 | "slideshow": {
570 | "slide_type": "slide"
571 | }
572 | },
573 | "source": [
574 | "Let me show you what Dask has to do for a 1×1 chunking of our problem."
575 | ]
576 | },
577 | {
578 | "cell_type": "code",
579 | "execution_count": null,
580 | "metadata": {
581 | "scrolled": true
582 | },
583 | "outputs": [],
584 | "source": [
585 | "c, fractal = prepare(1, 1) # try 2, 2\n",
586 | "c = dask.array.from_array(c, chunks=(1, 1))\n",
587 | "fractal = dask.array.from_array(fractal, chunks=(1, 1))\n",
588 | "fractal = run(c, fractal, maxiterations=1) # try more iterations\n",
589 | "fractal.visualize()"
590 | ]
591 | },
592 | {
593 | "cell_type": "markdown",
594 | "metadata": {
595 | "slideshow": {
596 | "slide_type": "slide"
597 | }
598 | },
599 | "source": [
600 | "If that were all, I'd probably stick to chopping up the grid by hand (when possible). However, _exactly the same interface_ that distributes work across cores in my laptop can distribute work around the world, just by pointing it to a remote scheduler.\n",
601 | "\n",
602 | "This is truly the ~~lazy~~ busy researcher approach!"
603 | ]
604 | },
605 | {
606 | "cell_type": "markdown",
607 | "metadata": {
608 | "slideshow": {
609 | "slide_type": "fragment"
610 | }
611 | },
612 | "source": [
613 | "Note to self: launch\n",
614 | "\n",
615 | "`dask-scheduler &`\n",
616 | "\n",
617 | "and\n",
618 | "\n",
619 | "`dask-worker --nthreads 8 127.0.0.1:8786 &`\n",
620 | "\n",
621 | "in a terminal now."
622 | ]
623 | },
624 | {
625 | "cell_type": "code",
626 | "execution_count": null,
627 | "metadata": {
628 | "slideshow": {
629 | "slide_type": "slide"
630 | }
631 | },
632 | "outputs": [],
633 | "source": [
634 | "import dask.distributed\n",
635 | "client = dask.distributed.Client(\"127.0.0.1:8786\")\n",
636 | "client"
637 | ]
638 | },
639 | {
640 | "cell_type": "code",
641 | "execution_count": null,
642 | "metadata": {},
643 | "outputs": [],
644 | "source": [
645 | "c, fractal = prepare(8000, 12000)\n",
646 | "\n",
647 | "c = dask.array.from_array(c, chunks=(100, 12000))\n",
648 | "fractal = dask.array.from_array(fractal, chunks=(100, 12000))\n",
649 | "fractal = run(c, fractal)\n",
650 | "\n",
651 | "starttime = time.time()\n",
652 | "fractal = client.compute(fractal, sync=True)\n",
653 | "time.time() - starttime"
654 | ]
655 | },
656 | {
657 | "cell_type": "markdown",
658 | "metadata": {
659 | "slideshow": {
660 | "slide_type": "fragment"
661 | }
662 | },
663 | "source": [
664 | "Well, that was exciting!\n",
665 | "\n",
666 | "In the end, this example took longer than the single-core version, but it illustrates how array operations _can be_ distributed in a simple way."
667 | ]
668 | },
669 | {
670 | "cell_type": "markdown",
671 | "metadata": {
672 | "slideshow": {
673 | "slide_type": "slide"
674 | }
675 | },
676 | "source": [
677 | "I haven't shown very much of what Dask can do. It's a general toolkit for delayed and distributed evaluation. As such, it provides a nice way to work on Pandas-like DataFrames that are too large for memory:"
678 | ]
679 | },
680 | {
681 | "cell_type": "code",
682 | "execution_count": null,
683 | "metadata": {},
684 | "outputs": [],
685 | "source": [
686 | "import dask.dataframe\n",
687 | "\n",
688 | "df = dask.dataframe.read_csv(\"data/nasa-exoplanets.csv\")\n",
689 | "df"
690 | ]
691 | },
692 | {
693 | "cell_type": "markdown",
694 | "metadata": {
695 | "slideshow": {
696 | "slide_type": "fragment"
697 | }
698 | },
699 | "source": [
700 | "We don't see the data because they haven't been loaded. But we can get them if we need them."
701 | ]
702 | },
703 | {
704 | "cell_type": "code",
705 | "execution_count": null,
706 | "metadata": {
707 | "scrolled": true
708 | },
709 | "outputs": [],
710 | "source": [
711 | "df[[\"pl_hostname\", \"pl_pnum\"]].compute()"
712 | ]
713 | },
714 | {
715 | "cell_type": "markdown",
716 | "metadata": {
717 | "slideshow": {
718 | "slide_type": "slide"
719 | }
720 | },
721 | "source": [
722 | "Additionally, Dask isn't the only project filling this need. There's also:\n",
723 | "\n",
724 | " * **Joblib:** annotate functions to execute remotely with decorators.\n",
725 | " * **Parsl:** same, but work with conventional schedulers (Condor, Slurm, GRID); an academic project.\n",
726 | " * **PySpark:** Spark is a big, scalable project, though its Python interface has performance issues.\n",
727 | "\n",
728 | "and many smaller projects.\n",
729 | "\n",
730 | "(Distributed computing hasn't been fully figured out yet.)"
731 | ]
732 | }
733 | ],
734 | "metadata": {
735 | "celltoolbar": "Slideshow",
736 | "kernelspec": {
737 | "display_name": "Python 3",
738 | "language": "python",
739 | "name": "python3"
740 | },
741 | "language_info": {
742 | "codemirror_mode": {
743 | "name": "ipython",
744 | "version": 3
745 | },
746 | "file_extension": ".py",
747 | "mimetype": "text/x-python",
748 | "name": "python",
749 | "nbconvert_exporter": "python",
750 | "pygments_lexer": "ipython3",
751 | "version": "3.7.0"
752 | }
753 | },
754 | "nbformat": 4,
755 | "nbformat_minor": 2
756 | }
757 |
--------------------------------------------------------------------------------
/6-compilers.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "slideshow": {
7 | "slide_type": "slide"
8 | }
9 | },
10 | "source": [
11 | "# Compilers: Numba, Cython, ~~pybind11~~"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {
17 | "slideshow": {
18 | "slide_type": "fragment"
19 | }
20 | },
21 | "source": [
22 | "```\n",
23 | "conda install numba cython\n",
24 | "```\n",
25 | "\n",
26 | "_(and numpy, matplotlib, dask from before)_"
27 | ]
28 | },
29 | {
30 | "cell_type": "markdown",
31 | "metadata": {
32 | "slideshow": {
33 | "slide_type": "slide"
34 | }
35 | },
36 | "source": [
37 | "Speeding things up through parallel processing is called \"horizontal scaling.\" Often, analysis code can also be accelerated on a single thread, known as \"vertical scaling.\"\n",
38 | "\n",
39 | "Horizontal and vertical scaling are complementary."
40 | ]
41 | },
42 | {
43 | "cell_type": "markdown",
44 | "metadata": {
45 | "slideshow": {
46 | "slide_type": "slide"
47 | }
48 | },
49 | "source": [
50 | "Let's illustrate this with the fractal example from last time."
51 | ]
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": null,
56 | "metadata": {},
57 | "outputs": [],
58 | "source": [
59 | "%matplotlib inline\n",
60 | "import matplotlib.pyplot as plt\n",
61 | "import numpy\n",
62 | "import time\n",
63 | "\n",
64 | "def run_numpy(height, width, maxiterations=20):\n",
65 | " y, x = numpy.ogrid[-1:0:height*1j, -1.5:0:width*1j] # ask Numpy to make an x, y grid for us\n",
66 | " c = x + y*1j # c is a constant: a grid of complex coordinates\n",
67 | " z = c\n",
68 | " fractal = numpy.zeros(c.shape, dtype=numpy.int32) + maxiterations # the fractal image starts as \"20\" everywhere\n",
69 | " for i in range(maxiterations):\n",
70 | " z = z**2 + c # iteratively apply z -> z**2 + c\n",
71 | " diverge = numpy.absolute(z) > 2 # define \"divergence\" by |z| > 2\n",
72 | " divnow = diverge & (fractal == maxiterations) # the pixels that are diverging in this iteration\n",
73 | " fractal[divnow] = i # the fractal image is a plot of the iteration number\n",
74 | " z[diverge] = 2 # clamp to 2 so they don't diverge too much\n",
75 | " return fractal"
76 | ]
77 | },
78 | {
79 | "cell_type": "markdown",
80 | "metadata": {
81 | "slideshow": {
82 | "slide_type": "fragment"
83 | }
84 | },
85 | "source": [
86 | "Stare at this code: it performs operations across the whole grid, identifies pixels that have diverged, and repeats everything 20 times, even though the parts that have already diverged are \"done.\""
87 | ]
88 | },
89 | {
90 | "cell_type": "markdown",
91 | "metadata": {
92 | "slideshow": {
93 | "slide_type": "slide"
94 | }
95 | },
96 | "source": [
97 | "More significant than the unnecessary work, though, is the memory movement. Each `z**2 + c` creates new intermediate arrays, moving a lot of memory, flushing CPU caches. Nowadays, mathematical operations are much faster than memory movement."
98 | ]
99 | },
100 | {
101 | "cell_type": "markdown",
102 | "metadata": {
103 | "slideshow": {
104 | "slide_type": "slide"
105 | }
106 | },
107 | "source": [
108 | "As a reminder, this took 35 seconds to run."
109 | ]
110 | },
111 | {
112 | "cell_type": "code",
113 | "execution_count": null,
114 | "metadata": {},
115 | "outputs": [],
116 | "source": [
117 | "starttime = time.time()\n",
118 | "fractal = run_numpy(8000, 12000)\n",
119 | "time.time() - starttime"
120 | ]
121 | },
122 | {
123 | "cell_type": "markdown",
124 | "metadata": {
125 | "slideshow": {
126 | "slide_type": "slide"
127 | }
128 | },
129 | "source": [
130 | "If we weren't using Numpy, we'd write the algorithm differently: we'd deal with one pixel at a time. Once the pixel has diverged, we'd move on to the next, saving some work. But more importantly, we make only one pass over the image, avoiding repeated memory access."
131 | ]
132 | },
133 | {
134 | "cell_type": "code",
135 | "execution_count": null,
136 | "metadata": {},
137 | "outputs": [],
138 | "source": [
139 | "def run_python(height, width, maxiterations=20):\n",
140 | " y, x = numpy.ogrid[-1:0:height*1j, -1.5:0:width*1j]\n",
141 | " c = x + y*1j\n",
142 | " fractal = numpy.zeros(c.shape, dtype=numpy.int32) + maxiterations\n",
143 | " for h in range(height):\n",
144 | " for w in range(width):\n",
145 | " z = c[h, w]\n",
146 | " for i in range(maxiterations):\n",
147 | " z = z**2 + c[h, w]\n",
148 | " if abs(z) > 2:\n",
149 | " fractal[h, w] = i\n",
150 | " break\n",
151 | " return fractal"
152 | ]
153 | },
154 | {
155 | "cell_type": "markdown",
156 | "metadata": {
157 | "slideshow": {
158 | "slide_type": "slide"
159 | }
160 | },
161 | "source": [
162 | "Before I run this, I'm going to drop the number of pixels from 8000×12000 to 800×1200, a factor of 100.\n",
163 | "\n",
164 | "We don't want to wait 900 seconds (15 minutes)!"
165 | ]
166 | },
167 | {
168 | "cell_type": "code",
169 | "execution_count": null,
170 | "metadata": {},
171 | "outputs": [],
172 | "source": [
173 | "starttime = time.time()\n",
174 | "fractal = run_python(800, 1200)\n",
175 | "time.time() - starttime"
176 | ]
177 | },
178 | {
179 | "cell_type": "code",
180 | "execution_count": null,
181 | "metadata": {
182 | "slideshow": {
183 | "slide_type": "slide"
184 | }
185 | },
186 | "outputs": [],
187 | "source": [
188 | "fig, ax = plt.subplots(figsize=(12, 8))\n",
189 | "ax.imshow(fractal)\n",
190 | "# ax.imshow(fractal[-200:, :300])"
191 | ]
192 | },
193 | {
194 | "cell_type": "markdown",
195 | "metadata": {
196 | "slideshow": {
197 | "slide_type": "fragment"
198 | }
199 | },
200 | "source": [
201 | "It works, but it's _super slow!_ This is how an efficient algorithm would go, but stepping through each pixel in Python code kills performance due to all the type-checking, numeric boxing, and virtualization that Python does."
202 | ]
203 | },
204 | {
205 | "cell_type": "markdown",
206 | "metadata": {
207 | "slideshow": {
208 | "slide_type": "slide"
209 | }
210 | },
211 | "source": [
212 | "At this point, we'd normally start thinking about compiled code. And we should: compilation is exactly how to avoid all the aforementioned issues."
213 | ]
214 | },
215 | {
216 | "cell_type": "markdown",
217 | "metadata": {
218 | "slideshow": {
219 | "slide_type": "fragment"
220 | }
221 | },
222 | "source": [
223 | "However, we _don't_ need to rewrite our code in another language."
224 | ]
225 | },
226 | {
227 | "cell_type": "code",
228 | "execution_count": null,
229 | "metadata": {
230 | "slideshow": {
231 | "slide_type": "slide"
232 | }
233 | },
234 | "outputs": [],
235 | "source": [
236 | "import numba\n",
237 | "\n",
238 | "@numba.jit\n",
239 | "def run_numba_loop(height, width, maxiterations, c, fractal):\n",
240 | " for h in range(height):\n",
241 | " for w in range(width):\n",
242 | " z = c[h, w]\n",
243 | " for i in range(maxiterations):\n",
244 | " z = z**2 + c[h, w]\n",
245 | " if abs(z) > 2:\n",
246 | " fractal[h, w] = i\n",
247 | " break\n",
248 | " return fractal\n",
249 | "\n",
250 | "def run_numba(height, width, maxiterations=20):\n",
251 | " y, x = numpy.ogrid[-1:0:height*1j, -1.5:0:width*1j]\n",
252 | " c = x + y*1j\n",
253 | " fractal = numpy.zeros(c.shape, dtype=numpy.int32) + maxiterations\n",
254 | " return run_numba_loop(height, width, maxiterations, c, fractal)"
255 | ]
256 | },
257 | {
258 | "cell_type": "code",
259 | "execution_count": null,
260 | "metadata": {
261 | "slideshow": {
262 | "slide_type": "fragment"
263 | }
264 | },
265 | "outputs": [],
266 | "source": [
267 | "starttime = time.time()\n",
268 | "fractal = run_numba(8000, 12000, maxiterations=20)\n",
269 | "time.time() - starttime"
270 | ]
271 | },
272 | {
273 | "cell_type": "markdown",
274 | "metadata": {
275 | "slideshow": {
276 | "slide_type": "slide"
277 | }
278 | },
279 | "source": [
280 | "Numba is a \"just in time\" compiler (JIT) for numeric Python. That is, it compiles the Python code as soon as it knows the data types of the inputs, just before execution. (Remember that the compilation time is included in the measurement— it's small compared to 10 seconds, though.)"
281 | ]
282 | },
283 | {
284 | "cell_type": "markdown",
285 | "metadata": {
286 | "slideshow": {
287 | "slide_type": "fragment"
288 | }
289 | },
290 | "source": [
291 | "Numba knew to compile the `run_numba_loop` function because it was preceeded by the decorator ` @numba.jit`. It is now a wrapped function."
292 | ]
293 | },
294 | {
295 | "cell_type": "code",
296 | "execution_count": null,
297 | "metadata": {},
298 | "outputs": [],
299 | "source": [
300 | "run_numba_loop"
301 | ]
302 | },
303 | {
304 | "cell_type": "markdown",
305 | "metadata": {
306 | "slideshow": {
307 | "slide_type": "slide"
308 | }
309 | },
310 | "source": [
311 | "Its \"overloads\" are the saved, compiled functions for each signature. There's only one so far: `int, int, int, array(complex), array(int)`."
312 | ]
313 | },
314 | {
315 | "cell_type": "code",
316 | "execution_count": null,
317 | "metadata": {
318 | "scrolled": false
319 | },
320 | "outputs": [],
321 | "source": [
322 | "run_numba_loop.overloads"
323 | ]
324 | },
325 | {
326 | "cell_type": "markdown",
327 | "metadata": {
328 | "slideshow": {
329 | "slide_type": "slide"
330 | }
331 | },
332 | "source": [
333 | "Notice that we split the process into `run_numba`, a plain function, and `run_numba_loop`, a JIT-compiled function. Not all Python can be compiled, or we'd be doing it all the time! Python is a highly dynamic language (did you know you can change an object's class after it's created?), so there will always be things Python can do that a compiled language can't do. There will always be data types Numba doesn't recognize."
334 | ]
335 | },
336 | {
337 | "cell_type": "markdown",
338 | "metadata": {
339 | "slideshow": {
340 | "slide_type": "fragment"
341 | }
342 | },
343 | "source": [
344 | "Numba lists the [Python language features](https://numba.pydata.org/numba-doc/latest/reference/pysupported.html) and [Numpy types and functions](https://numba.pydata.org/numba-doc/latest/reference/numpysupported.html) that it recognizes on its website. This is a growing list, bit it will never converge to the entirety of Python and all its libraries."
345 | ]
346 | },
347 | {
348 | "cell_type": "markdown",
349 | "metadata": {
350 | "slideshow": {
351 | "slide_type": "fragment"
352 | }
353 | },
354 | "source": [
355 | "For the most part, Numba recognizes numbers and arrays, and even if it can handle a given language feature (iterators, classes), it will perform best on simple loops and straightforward code. Generally, you only want to wrap the most arithmetically intense part of your calculation.\n",
356 | "\n",
357 | "In the above example, I didn't include the array-creation steps because the first one was an unsupported function (`numpy.ogrid`)."
358 | ]
359 | },
360 | {
361 | "cell_type": "markdown",
362 | "metadata": {
363 | "slideshow": {
364 | "slide_type": "slide"
365 | }
366 | },
367 | "source": [
368 | "One of the early reasons for Numba's existence was to write new Numpy universal functions (\"ufuncs\")."
369 | ]
370 | },
371 | {
372 | "cell_type": "code",
373 | "execution_count": null,
374 | "metadata": {},
375 | "outputs": [],
376 | "source": [
377 | "@numba.vectorize\n",
378 | "def as_ufunc(c, maxiterations):\n",
379 | " z = c\n",
380 | " for i in range(maxiterations):\n",
381 | " z = z**2 + c\n",
382 | " if abs(z) > 2:\n",
383 | " return i\n",
384 | " return maxiterations\n",
385 | "\n",
386 | "def run_numba_2(height, width, maxiterations=20):\n",
387 | " y, x = numpy.ogrid[-1:0:height*1j, -1.5:0:width*1j]\n",
388 | " c = x + y*1j\n",
389 | " return as_ufunc(c, maxiterations)"
390 | ]
391 | },
392 | {
393 | "cell_type": "code",
394 | "execution_count": null,
395 | "metadata": {
396 | "slideshow": {
397 | "slide_type": "fragment"
398 | }
399 | },
400 | "outputs": [],
401 | "source": [
402 | "starttime = time.time()\n",
403 | "fractal = run_numba_2(8000, 12000, maxiterations=20)\n",
404 | "time.time() - starttime"
405 | ]
406 | },
407 | {
408 | "cell_type": "code",
409 | "execution_count": null,
410 | "metadata": {
411 | "slideshow": {
412 | "slide_type": "fragment"
413 | }
414 | },
415 | "outputs": [],
416 | "source": [
417 | "type(as_ufunc)"
418 | ]
419 | },
420 | {
421 | "cell_type": "markdown",
422 | "metadata": {
423 | "slideshow": {
424 | "slide_type": "slide"
425 | }
426 | },
427 | "source": [
428 | "This is only possible if the process we want to apply is elementwise— we do an independent thing to each element, and the output shape is the same as the input shape— because that's what a ufunc does. The function definition is much simpler since the input argument `c` is now a (complex) number, rather than an array. We don't need to write the for loops."
429 | ]
430 | },
431 | {
432 | "cell_type": "markdown",
433 | "metadata": {
434 | "slideshow": {
435 | "slide_type": "slide"
436 | }
437 | },
438 | "source": [
439 | "It even has the funky ufunc methods, like `.at` and `.reduce`:"
440 | ]
441 | },
442 | {
443 | "cell_type": "code",
444 | "execution_count": null,
445 | "metadata": {},
446 | "outputs": [],
447 | "source": [
448 | "a = numpy.arange(0, 2, 0.1) * 1j\n",
449 | "a"
450 | ]
451 | },
452 | {
453 | "cell_type": "code",
454 | "execution_count": null,
455 | "metadata": {},
456 | "outputs": [],
457 | "source": [
458 | "as_ufunc.at(a, [0, 2, 4, 6, 8, 10, 12, 14], 20)\n",
459 | "a"
460 | ]
461 | },
462 | {
463 | "cell_type": "markdown",
464 | "metadata": {
465 | "slideshow": {
466 | "slide_type": "slide"
467 | }
468 | },
469 | "source": [
470 | "Remember when I said horizontal and vertical scaling are complementary? I didn't say that they're multiplicative because it's sometimes much better than that. Let's put this Numba-compiled ufunc into Dask:"
471 | ]
472 | },
473 | {
474 | "cell_type": "code",
475 | "execution_count": null,
476 | "metadata": {},
477 | "outputs": [],
478 | "source": [
479 | "import dask.array\n",
480 | "\n",
481 | "def run_dask(height, width, maxiterations=20):\n",
482 | " y, x = numpy.ogrid[-1:0:height*1j, -1.5:0:width*1j]\n",
483 | " c = dask.array.from_array(x + y*1j, chunks=(100, 12000))\n",
484 | " return as_ufunc(c, maxiterations)"
485 | ]
486 | },
487 | {
488 | "cell_type": "code",
489 | "execution_count": null,
490 | "metadata": {
491 | "slideshow": {
492 | "slide_type": "fragment"
493 | }
494 | },
495 | "outputs": [],
496 | "source": [
497 | "starttime = time.time()\n",
498 | "fractal = run_dask(8000, 12000, maxiterations=20).compute()\n",
499 | "time.time() - starttime"
500 | ]
501 | },
502 | {
503 | "cell_type": "markdown",
504 | "metadata": {
505 | "slideshow": {
506 | "slide_type": "slide"
507 | }
508 | },
509 | "source": [
510 | "It took\n",
511 | "\n",
512 | " * 35 seconds to run in Numpy on 1 core.\n",
513 | " * 21 seconds to run in Numpy on 12 cores with Dask.\n",
514 | " * 10 seconds to run as a Numba-compiled ufunc on 1 core.\n",
515 | " * 3.7 seconds to run as a Numba-compiled ufunc on 12 cores with Dask.\n",
516 | "\n",
517 | "Dask multiprocessing scales better with the Numba-compiled ufunc because it's a much simpler computation graph. Dask can't see inside `as_ufunc` to worry about interdependencies."
518 | ]
519 | },
520 | {
521 | "cell_type": "code",
522 | "execution_count": null,
523 | "metadata": {
524 | "slideshow": {
525 | "slide_type": "slide"
526 | }
527 | },
528 | "outputs": [],
529 | "source": [
530 | "c = dask.array.from_array(numpy.array([[0j, 1j], [0j, 1j]]), chunks=(1, 1))\n",
531 | "as_ufunc(c, 20).visualize()"
532 | ]
533 | },
534 | {
535 | "cell_type": "markdown",
536 | "metadata": {
537 | "slideshow": {
538 | "slide_type": "fragment"
539 | }
540 | },
541 | "source": [
542 | "Remember how it used to look? How its complexity scaled with the number of iterations? Now all of the complexity of our algorithm is internal to `as_ufunc`."
543 | ]
544 | },
545 | {
546 | "cell_type": "markdown",
547 | "metadata": {
548 | "slideshow": {
549 | "slide_type": "slide"
550 | }
551 | },
552 | "source": [
553 | "(Incidentally, the reason it's not scaling beyond 3 cores is likely memory bandwidth: the above example was fetching memory at 1.5 GB/sec, which I've found to be an approximate limit on all systems I've encountered except for Knight's Landing's MCDRAM.)"
554 | ]
555 | },
556 | {
557 | "cell_type": "markdown",
558 | "metadata": {
559 | "slideshow": {
560 | "slide_type": "fragment"
561 | }
562 | },
563 | "source": [
564 | "I presented Numba first because it involves the least change to your code— the orthodox mantra is to get your code working first, profile it to find the slowest parts, and only accelerate those parts. Numba lets you do that with the least effort."
565 | ]
566 | },
567 | {
568 | "cell_type": "markdown",
569 | "metadata": {
570 | "slideshow": {
571 | "slide_type": "fragment"
572 | }
573 | },
574 | "source": [
575 | "But sometimes you need something more: features that are only available in C++, for instance. Python is unable to express some concepts related to performance tuning (deliberately: to keep the language simple) and compilers aren't magical— Numba can't always guess what you mean."
576 | ]
577 | },
578 | {
579 | "cell_type": "markdown",
580 | "metadata": {
581 | "slideshow": {
582 | "slide_type": "slide"
583 | }
584 | },
585 | "source": [
586 | "Cython is a halfway language, part Python and part C/C++. It translates to C or C++ and uses a conventional compiler to turn into a Python extension module. They also have a Jupyter extension, which I'll use for this demo."
587 | ]
588 | },
589 | {
590 | "cell_type": "code",
591 | "execution_count": null,
592 | "metadata": {},
593 | "outputs": [],
594 | "source": [
595 | "%load_ext Cython"
596 | ]
597 | },
598 | {
599 | "cell_type": "markdown",
600 | "metadata": {
601 | "slideshow": {
602 | "slide_type": "slide"
603 | }
604 | },
605 | "source": [
606 | "The following cell creates C++ code from Python, compiles it, and loads the resulting Python module."
607 | ]
608 | },
609 | {
610 | "cell_type": "code",
611 | "execution_count": null,
612 | "metadata": {},
613 | "outputs": [],
614 | "source": [
615 | "%%cython --cplus\n",
616 | "import numpy\n",
617 | "\n",
618 | "def run_cython(height, width, maxiterations=20):\n",
619 | " y, x = numpy.ogrid[-1:0:height*1j, -1.5:0:width*1j]\n",
620 | " c = x + y*1j\n",
621 | " fractal = numpy.zeros(c.shape, dtype=numpy.int32) + maxiterations\n",
622 | " for h in range(height):\n",
623 | " for w in range(width):\n",
624 | " z = c[h, w]\n",
625 | " for i in range(maxiterations):\n",
626 | " z = z**2 + c[h, w]\n",
627 | " if abs(z) > 2:\n",
628 | " fractal[h, w] = i\n",
629 | " break\n",
630 | " return fractal"
631 | ]
632 | },
633 | {
634 | "cell_type": "code",
635 | "execution_count": null,
636 | "metadata": {
637 | "slideshow": {
638 | "slide_type": "slide"
639 | }
640 | },
641 | "outputs": [],
642 | "source": [
643 | "starttime = time.time()\n",
644 | "fractal = run_cython(800, 1200) # note: small grid; 100× smaller problem\n",
645 | "time.time() - starttime"
646 | ]
647 | },
648 | {
649 | "cell_type": "markdown",
650 | "metadata": {
651 | "slideshow": {
652 | "slide_type": "fragment"
653 | }
654 | },
655 | "source": [
656 | "But the resulting _compiled_ module runs almost as slowly as Python itself: 7 sec vs 9.5 sec (note: we're using the smaller grid again, so this is hundreds of times slower than Numpy or Numba). Why is that? Isn't this compiled?"
657 | ]
658 | },
659 | {
660 | "cell_type": "markdown",
661 | "metadata": {
662 | "slideshow": {
663 | "slide_type": "fragment"
664 | }
665 | },
666 | "source": [
667 | "The issue is that Cython does nothing about all the runtime type-checking of Python objects. Numba replaced Python objects with raw numbers, where possible, which makes the real difference."
668 | ]
669 | },
670 | {
671 | "cell_type": "markdown",
672 | "metadata": {
673 | "slideshow": {
674 | "slide_type": "fragment"
675 | }
676 | },
677 | "source": [
678 | "Cython chose to cover the entire Python language and make naive translations by default. Numba chose to make optimized translations by default but not cover the entire Python language."
679 | ]
680 | },
681 | {
682 | "cell_type": "markdown",
683 | "metadata": {
684 | "slideshow": {
685 | "slide_type": "slide"
686 | }
687 | },
688 | "source": [
689 | "To get optimizations, we have to introduce C++ by hand."
690 | ]
691 | },
692 | {
693 | "cell_type": "code",
694 | "execution_count": null,
695 | "metadata": {},
696 | "outputs": [],
697 | "source": [
698 | "%%cython --cplus --annotate\n",
699 | "import cython\n",
700 | "import numpy # load Python interface to Numpy\n",
701 | "cimport numpy # load C++ interface to Numpy (types end in _t)\n",
702 | "\n",
703 | "@cython.boundscheck(False) # turn off bounds-checking\n",
704 | "@cython.wraparound(False) # turn off negative index wrapping (e.g. -1 for last element)\n",
705 | "def run_cython(int height, int width, int maxiterations=20):\n",
706 | " y, x = numpy.ogrid[-1:0:height*1j, -1.5:0:width*1j]\n",
707 | " c = x + y*1j\n",
708 | " fractal = numpy.zeros(c.shape, dtype=numpy.int32) + maxiterations\n",
709 | "\n",
710 | " cdef numpy.ndarray[numpy.complex128_t, ndim=2, mode=\"c\"] c_array = c\n",
711 | " cdef numpy.ndarray[numpy.int32_t, ndim=2, mode=\"c\"] fractal_array = fractal\n",
712 | " cdef numpy.complex128_t z\n",
713 | " \n",
714 | " for h in range(height):\n",
715 | " for w in range(width):\n",
716 | " z = c_array[h, w]\n",
717 | " for i in range(maxiterations):\n",
718 | " z = z**2 + c_array[h, w]\n",
719 | " if abs(z) > 2:\n",
720 | " fractal_array[h, w] = i\n",
721 | " break\n",
722 | " return fractal"
723 | ]
724 | },
725 | {
726 | "cell_type": "markdown",
727 | "metadata": {
728 | "slideshow": {
729 | "slide_type": "slide"
730 | }
731 | },
732 | "source": [
733 | "(Still using the small grid; still unable to scale to native speeds.)"
734 | ]
735 | },
736 | {
737 | "cell_type": "code",
738 | "execution_count": null,
739 | "metadata": {},
740 | "outputs": [],
741 | "source": [
742 | "starttime = time.time()\n",
743 | "fractal = run_cython(800, 1200) # note: small grid; 100× smaller problem\n",
744 | "time.time() - starttime"
745 | ]
746 | },
747 | {
748 | "cell_type": "markdown",
749 | "metadata": {
750 | "slideshow": {
751 | "slide_type": "slide"
752 | }
753 | },
754 | "source": [
755 | "In addition to importing Python libraries, Cython can include C++ headers. A hidden feature in the `cdef extern` syntax for including C++ allows you to write literal C++ in your Cython."
756 | ]
757 | },
758 | {
759 | "cell_type": "code",
760 | "execution_count": null,
761 | "metadata": {},
762 | "outputs": [],
763 | "source": [
764 | "%%cython --cplus -c-O3\n",
765 | "import numpy\n",
766 | "\n",
767 | "cdef extern from *:\n",
768 | " \"\"\"\n",
769 | " #include \n",
770 | " void quick(int height, int width, int maxiterations, double* c, int* fractal) {\n",
771 | " for (int h = 0; h < height; h++) {\n",
772 | " for (int w = 0; w < width; w++) {\n",
773 | " double creal = c[2 * (h + height*w)];\n",
774 | " double cimag = c[2 * (h + height*w) + 1];\n",
775 | " std::complex ci = std::complex(creal, cimag);\n",
776 | " std::complex z = ci;\n",
777 | " for (int i = 0; i < maxiterations; i++) {\n",
778 | " z = z * z + ci;\n",
779 | " if (std::abs(z) > 2) {\n",
780 | " fractal[h + height*w] = i;\n",
781 | " break;\n",
782 | " }\n",
783 | " }\n",
784 | " }\n",
785 | " }\n",
786 | " }\n",
787 | " \"\"\"\n",
788 | " void quick(int height, int width, int maxiterations, double* c, int* fractal)\n",
789 | "\n",
790 | "def run_cython(int height, int width, int maxiterations=20):\n",
791 | " y, x = numpy.ogrid[-1:0:height*1j, -1.5:0:width*1j]\n",
792 | " c = x + y*1j\n",
793 | " fractal = numpy.zeros(c.shape, dtype=numpy.int32) + maxiterations\n",
794 | " quick(height, width, maxiterations, (c.ctypes.data), (fractal.ctypes.data))\n",
795 | " return fractal"
796 | ]
797 | },
798 | {
799 | "cell_type": "markdown",
800 | "metadata": {
801 | "slideshow": {
802 | "slide_type": "slide"
803 | }
804 | },
805 | "source": [
806 | "Now we can use the full grid. This is\n",
807 | "\n",
808 | " * 2× slower than Numba,\n",
809 | " * 2× faster than Numpy, and\n",
810 | " * 45× faster than Python."
811 | ]
812 | },
813 | {
814 | "cell_type": "code",
815 | "execution_count": null,
816 | "metadata": {},
817 | "outputs": [],
818 | "source": [
819 | "starttime = time.time()\n",
820 | "fractal = run_cython(8000, 12000)\n",
821 | "time.time() - starttime"
822 | ]
823 | },
824 | {
825 | "cell_type": "markdown",
826 | "metadata": {
827 | "slideshow": {
828 | "slide_type": "slide"
829 | }
830 | },
831 | "source": [
832 | "Although Cython was originally intended as a code optimizer (you can see that it's difficult to use it that way!), it has come to be used to _bind_ C++ libraries as Python extensions, since it can speak both languages."
833 | ]
834 | },
835 | {
836 | "cell_type": "markdown",
837 | "metadata": {
838 | "slideshow": {
839 | "slide_type": "fragment"
840 | }
841 | },
842 | "source": [
843 | "Today, there's another alternative: pybind11 is a C++ header for binding to Python (coming from the other direction, from C++ to Python)."
844 | ]
845 | },
846 | {
847 | "cell_type": "markdown",
848 | "metadata": {
849 | "slideshow": {
850 | "slide_type": "slide"
851 | }
852 | },
853 | "source": [
854 | "I would recommend:\n",
855 | "\n",
856 | " * **Numba** for accelerating small bits of numerical code.\n",
857 | " * **Cython** for mixing C++ into a mostly Python script: for instance, to access C++ only libraries.\n",
858 | " * **pybind11** for wrapping C++ cleanly as Python modules: for instance, you're distributing a C++ library for use in Python."
859 | ]
860 | },
861 | {
862 | "cell_type": "markdown",
863 | "metadata": {
864 | "slideshow": {
865 | "slide_type": "fragment"
866 | }
867 | },
868 | "source": [
869 | "(I've dropped pybind11 content because I'm sure we'd run out of time.)"
870 | ]
871 | },
872 | {
873 | "cell_type": "markdown",
874 | "metadata": {
875 | "slideshow": {
876 | "slide_type": "slide"
877 | }
878 | },
879 | "source": [
880 | "Last minute addition: since you've installed Pandas, you have NumExpr. This is a very easy way to accelerate \"one in, one out\" formulae. Our fractal is more complex than that, but many formulas aren't."
881 | ]
882 | },
883 | {
884 | "cell_type": "code",
885 | "execution_count": null,
886 | "metadata": {},
887 | "outputs": [],
888 | "source": [
889 | "import numexpr"
890 | ]
891 | },
892 | {
893 | "cell_type": "code",
894 | "execution_count": null,
895 | "metadata": {},
896 | "outputs": [],
897 | "source": [
898 | "a = numpy.arange(1e6)\n",
899 | "b = numpy.arange(1e6)\n",
900 | "numexpr.evaluate(\"sin(a) + arcsinh(b)\")"
901 | ]
902 | }
903 | ],
904 | "metadata": {
905 | "celltoolbar": "Slideshow",
906 | "kernelspec": {
907 | "display_name": "Python 3",
908 | "language": "python",
909 | "name": "python3"
910 | },
911 | "language_info": {
912 | "codemirror_mode": {
913 | "name": "ipython",
914 | "version": 3
915 | },
916 | "file_extension": ".py",
917 | "mimetype": "text/x-python",
918 | "name": "python",
919 | "nbconvert_exporter": "python",
920 | "pygments_lexer": "ipython3",
921 | "version": "3.7.0"
922 | }
923 | },
924 | "nbformat": 4,
925 | "nbformat_minor": 2
926 | }
927 |
--------------------------------------------------------------------------------
/7-gpu.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "slideshow": {
7 | "slide_type": "slide"
8 | }
9 | },
10 | "source": [
11 | "# GPU: CuPy, Numba-GPU, PyCUDA"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {
17 | "slideshow": {
18 | "slide_type": "fragment"
19 | }
20 | },
21 | "source": [
22 | "```\n",
23 | "conda install cupy cudatoolkit\n",
24 | "```\n",
25 | "\n",
26 | "```\n",
27 | "export CFLAGS=-fpermissive\n",
28 | "pip install --no-cache-dir pycuda # I have more luck with this one in pip\n",
29 | "```\n",
30 | "\n",
31 | "_(and numpy, matplotlib, numba from before)_"
32 | ]
33 | },
34 | {
35 | "cell_type": "markdown",
36 | "metadata": {
37 | "slideshow": {
38 | "slide_type": "slide"
39 | }
40 | },
41 | "source": [
42 | "If you can get better memory efficiency using rowwise code (e.g. compiled for loops), why would you ever write columnar code (e.g. Numpy)?"
43 | ]
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "metadata": {
48 | "slideshow": {
49 | "slide_type": "fragment"
50 | }
51 | },
52 | "source": [
53 | "**Answer:** vectorization!"
54 | ]
55 | },
56 | {
57 | "cell_type": "markdown",
58 | "metadata": {
59 | "slideshow": {
60 | "slide_type": "fragment"
61 | }
62 | },
63 | "source": [
64 | "Vectorization is a vertical scaling technique that uses a single CPU core or a GPU more effectively. You can compute N operations at the same time _if they are all the same operation._\n",
65 | "\n",
66 | "
"
67 | ]
68 | },
69 | {
70 | "cell_type": "markdown",
71 | "metadata": {
72 | "slideshow": {
73 | "slide_type": "slide"
74 | }
75 | },
76 | "source": [
77 | "If you don't fully utilize all cores, that's okay; someone else's work can fill the gaps.\n",
78 | "\n",
79 | "If you don't fully utilize the core's vector unit, no one else can use them."
80 | ]
81 | },
82 | {
83 | "cell_type": "markdown",
84 | "metadata": {
85 | "slideshow": {
86 | "slide_type": "fragment"
87 | }
88 | },
89 | "source": [
90 | "A GPU is a computational device designed around vector units."
91 | ]
92 | },
93 | {
94 | "cell_type": "markdown",
95 | "metadata": {
96 | "slideshow": {
97 | "slide_type": "slide"
98 | }
99 | },
100 | "source": [
101 | "Like parallel processing, this is another computing detail that is visible to you as a data analyst.\n",
102 | "\n",
103 | "Rowwise code like"
104 | ]
105 | },
106 | {
107 | "cell_type": "code",
108 | "execution_count": null,
109 | "metadata": {
110 | "slideshow": {
111 | "slide_type": "-"
112 | }
113 | },
114 | "outputs": [],
115 | "source": [
116 | "import numba\n",
117 | "@numba.jit\n",
118 | "def run_numba_loop(height, width, maxiterations, c, fractal):\n",
119 | " for h in range(height):\n",
120 | " for w in range(width):\n",
121 | " z = c[h, w]\n",
122 | " for i in range(maxiterations):\n",
123 | " z = z**2 + c[h, w]\n",
124 | " if abs(z) > 2:\n",
125 | " fractal[h, w] = i\n",
126 | " break\n",
127 | " return fractal"
128 | ]
129 | },
130 | {
131 | "cell_type": "markdown",
132 | "metadata": {
133 | "slideshow": {
134 | "slide_type": "-"
135 | }
136 | },
137 | "source": [
138 | "does not use vector units effectively because each array element may be in a different stage of processing— some may have diverged before others."
139 | ]
140 | },
141 | {
142 | "cell_type": "markdown",
143 | "metadata": {
144 | "slideshow": {
145 | "slide_type": "slide"
146 | }
147 | },
148 | "source": [
149 | "Columnar code like"
150 | ]
151 | },
152 | {
153 | "cell_type": "code",
154 | "execution_count": null,
155 | "metadata": {},
156 | "outputs": [],
157 | "source": [
158 | "import numpy\n",
159 | "import time\n",
160 | "\n",
161 | "def prepare(height, width):\n",
162 | " y, x = numpy.ogrid[-1:0:height*1j, -1.5:0:width*1j]\n",
163 | " c = x + y*1j\n",
164 | " fractal = numpy.zeros(c.shape, dtype=numpy.int32)\n",
165 | " return c, fractal\n",
166 | "\n",
167 | "def run(c, fractal, maxiterations=20):\n",
168 | " fractal *= 0 # set fractal to maxiterations without replacing it\n",
169 | " fractal += maxiterations\n",
170 | " z = c\n",
171 | " for i in range(maxiterations):\n",
172 | " z = z**2 + c\n",
173 | " diverge = z.real**2 + z.imag**2 > 2**2\n",
174 | " divnow = diverge & (fractal == maxiterations)\n",
175 | " fractal[divnow] = i\n",
176 | " z[diverge] = 2\n",
177 | " return fractal"
178 | ]
179 | },
180 | {
181 | "cell_type": "markdown",
182 | "metadata": {},
183 | "source": [
184 | "can use vector units effectively because it's always applying the Same Instruction on Multiple Data (SIMD)."
185 | ]
186 | },
187 | {
188 | "cell_type": "markdown",
189 | "metadata": {
190 | "slideshow": {
191 | "slide_type": "slide"
192 | }
193 | },
194 | "source": [
195 | "All we need is a librrary to implement the Numpy functions on a GPU."
196 | ]
197 | },
198 | {
199 | "cell_type": "code",
200 | "execution_count": null,
201 | "metadata": {
202 | "slideshow": {
203 | "slide_type": "fragment"
204 | }
205 | },
206 | "outputs": [],
207 | "source": [
208 | "import cupy"
209 | ]
210 | },
211 | {
212 | "cell_type": "code",
213 | "execution_count": null,
214 | "metadata": {
215 | "slideshow": {
216 | "slide_type": "fragment"
217 | }
218 | },
219 | "outputs": [],
220 | "source": [
221 | "c, fractal = prepare(4000, 6000)\n",
222 | "\n",
223 | "c = cupy.array(c)\n",
224 | "fractal = cupy.array(fractal)\n",
225 | "\n",
226 | "starttime = time.time()\n",
227 | "fractal = run(c, fractal)\n",
228 | "time.time() - starttime"
229 | ]
230 | },
231 | {
232 | "cell_type": "code",
233 | "execution_count": null,
234 | "metadata": {
235 | "slideshow": {
236 | "slide_type": "fragment"
237 | }
238 | },
239 | "outputs": [],
240 | "source": [
241 | "c, fractal = prepare(4000, 6000)\n",
242 | "\n",
243 | "starttime = time.time()\n",
244 | "fractal = run(c, fractal)\n",
245 | "time.time() - starttime"
246 | ]
247 | },
248 | {
249 | "cell_type": "markdown",
250 | "metadata": {
251 | "slideshow": {
252 | "slide_type": "fragment"
253 | }
254 | },
255 | "source": [
256 | "Exactly the same code: first with CuPy on the GPU (2.8 sec), then with Numpy on the CPU (7.5 sec)."
257 | ]
258 | },
259 | {
260 | "cell_type": "markdown",
261 | "metadata": {
262 | "slideshow": {
263 | "slide_type": "slide"
264 | }
265 | },
266 | "source": [
267 | "If you're wondering why I'm working on a reduced problem (4× smaller than previous sessions), it's because I couldn't fit the full one in my GPU's memory!"
268 | ]
269 | },
270 | {
271 | "cell_type": "markdown",
272 | "metadata": {
273 | "slideshow": {
274 | "slide_type": "fragment"
275 | }
276 | },
277 | "source": [
278 | "(There's always a catch!)"
279 | ]
280 | },
281 | {
282 | "cell_type": "markdown",
283 | "metadata": {
284 | "slideshow": {
285 | "slide_type": "slide"
286 | }
287 | },
288 | "source": [
289 | "Also, CuPy's adherence to the Numpy API isn't perfect: I had to write\n",
290 | "\n",
291 | "```python\n",
292 | "z.real**2 + z.imag**2\n",
293 | "```\n",
294 | "\n",
295 | "instead of\n",
296 | "\n",
297 | "```python\n",
298 | "numpy.absolute(z)\n",
299 | "```\n",
300 | "\n",
301 | "because the `absolute` function wasn't supported. This is the error you'd see:"
302 | ]
303 | },
304 | {
305 | "cell_type": "code",
306 | "execution_count": null,
307 | "metadata": {},
308 | "outputs": [],
309 | "source": [
310 | "try:\n",
311 | " numpy.absolute(cupy.array([1.1, 2.2, 3.3]))\n",
312 | "except ValueError as err:\n",
313 | " print(err)"
314 | ]
315 | },
316 | {
317 | "cell_type": "markdown",
318 | "metadata": {
319 | "slideshow": {
320 | "slide_type": "fragment"
321 | }
322 | },
323 | "source": [
324 | "Nevertheless, we can expect CuPy to become more complete as people use it and report missing features."
325 | ]
326 | },
327 | {
328 | "cell_type": "markdown",
329 | "metadata": {
330 | "slideshow": {
331 | "slide_type": "slide"
332 | }
333 | },
334 | "source": [
335 | "**GPU method #2:** Use Numba! (You have to install a \"cudatoolkit\" library with it.)"
336 | ]
337 | },
338 | {
339 | "cell_type": "code",
340 | "execution_count": null,
341 | "metadata": {},
342 | "outputs": [],
343 | "source": [
344 | "import numba.cuda\n",
345 | "import math\n",
346 | "\n",
347 | "@numba.cuda.jit\n",
348 | "def as_cuda(c, fractal, maxiterations):\n",
349 | " x, y = numba.cuda.grid(2) # 2 dimensional CUDA grid\n",
350 | " z = c[x, y]\n",
351 | " fractal[x, y] = 20\n",
352 | " for i in range(maxiterations):\n",
353 | " z = z**2 + c[x, y]\n",
354 | " if abs(z) > 2:\n",
355 | " fractal[x, y] = i\n",
356 | " break # not optimal: threads that leave the loop still have to wait\n",
357 | "\n",
358 | "def run_numba(height, width, maxiterations=20):\n",
359 | " y, x = numpy.ogrid[-1:0:height*1j, -1.5:0:width*1j]\n",
360 | " c = x + y*1j\n",
361 | " fractal = numba.cuda.device_array(c.shape, dtype=numpy.int32)\n",
362 | " as_cuda[(math.ceil(height / 32), math.ceil(width / 32)), (32, 32)](c, fractal, maxiterations)\n",
363 | " return fractal"
364 | ]
365 | },
366 | {
367 | "cell_type": "code",
368 | "execution_count": null,
369 | "metadata": {},
370 | "outputs": [],
371 | "source": [
372 | "starttime = time.time()\n",
373 | "fractal = run_numba(4000, 6000)\n",
374 | "time.time() - starttime"
375 | ]
376 | },
377 | {
378 | "cell_type": "markdown",
379 | "metadata": {
380 | "slideshow": {
381 | "slide_type": "slide"
382 | }
383 | },
384 | "source": [
385 | "On the same sized problem,\n",
386 | "\n",
387 | " * Numpy on the CPU: 7.5 sec\n",
388 | " * CuPy on the GPU: 2.8 sec\n",
389 | " * Numba on the GPU: 0.3 sec\n",
390 | "\n",
391 | "And Numba doesn't suffer from the memory issue because it doesn't make as many intermediate copies."
392 | ]
393 | },
394 | {
395 | "cell_type": "code",
396 | "execution_count": null,
397 | "metadata": {
398 | "slideshow": {
399 | "slide_type": "fragment"
400 | }
401 | },
402 | "outputs": [],
403 | "source": [
404 | "starttime = time.time()\n",
405 | "fractal = run_numba(8000, 12000) # full-sized problem\n",
406 | "time.time() - starttime"
407 | ]
408 | },
409 | {
410 | "cell_type": "markdown",
411 | "metadata": {
412 | "slideshow": {
413 | "slide_type": "slide"
414 | }
415 | },
416 | "source": [
417 | "That full-sized problem used to take us half a minute in Numpy, and (projected) 15 minutes in pure Python. For sanity's sake, we verify that it is, indeed, drawing our fractal."
418 | ]
419 | },
420 | {
421 | "cell_type": "code",
422 | "execution_count": null,
423 | "metadata": {},
424 | "outputs": [],
425 | "source": [
426 | "%matplotlib inline\n",
427 | "import matplotlib.pyplot as plt\n",
428 | "fig, ax = plt.subplots(figsize=(12, 8))\n",
429 | "ax.imshow(fractal)\n",
430 | "# ax.imshow(fractal[-2000:, :3000])"
431 | ]
432 | },
433 | {
434 | "cell_type": "markdown",
435 | "metadata": {
436 | "slideshow": {
437 | "slide_type": "slide"
438 | }
439 | },
440 | "source": [
441 | "**Method #3:** PyCUDA. This library is somewhat older and hard to distribute nowadays (it's not Python 3.7 compliant). However, it is unique in letting you write any CUDA code (e.g. copied from the web) in Python without wrapping it as a library."
442 | ]
443 | },
444 | {
445 | "cell_type": "code",
446 | "execution_count": null,
447 | "metadata": {},
448 | "outputs": [],
449 | "source": [
450 | "import pycuda.autoinit\n",
451 | "import pycuda.driver\n",
452 | "import pycuda.compiler\n",
453 | "\n",
454 | "module = pycuda.compiler.SourceModule(\"\"\"\n",
455 | "__global__ void from_pycuda(double* c, int* fractal, int height, int width, int maxiterations) {\n",
456 | " const int x = threadIdx.x + blockIdx.x*blockDim.x;\n",
457 | " const int y = threadIdx.y + blockIdx.y*blockDim.y;\n",
458 | " double creal = c[2 * (x + height*y)];\n",
459 | " double cimag = c[2 * (x + height*y) + 1];\n",
460 | " double zreal = creal;\n",
461 | " double zimag = cimag;\n",
462 | " fractal[x + height*y] = maxiterations;\n",
463 | " for (int i = 0; i < maxiterations; i++) {\n",
464 | " double zreal2 = zreal*zreal + zimag*zimag + creal;\n",
465 | " double zimag2 = zreal*zreal + zimag*zimag + cimag;\n",
466 | " zreal = zreal2;\n",
467 | " zimag = zimag2;\n",
468 | " if (zreal*zreal + zimag*zimag > 4) {\n",
469 | " fractal[x + height*y] = i;\n",
470 | " break;\n",
471 | " }\n",
472 | " }\n",
473 | "}\n",
474 | "\"\"\")\n",
475 | "from_pycuda = module.get_function(\"from_pycuda\")"
476 | ]
477 | },
478 | {
479 | "cell_type": "code",
480 | "execution_count": null,
481 | "metadata": {
482 | "slideshow": {
483 | "slide_type": "slide"
484 | }
485 | },
486 | "outputs": [],
487 | "source": [
488 | "def run_pycuda(height, width, maxiterations=20):\n",
489 | " y, x = numpy.ogrid[-1:0:height*1j, -1.5:0:width*1j]\n",
490 | " c = x + y*1j\n",
491 | " fractal = numpy.empty(c.shape, dtype=numpy.int32) + maxiterations\n",
492 | " from_pycuda(pycuda.driver.In(c.view(numpy.float64)),\n",
493 | " pycuda.driver.Out(fractal),\n",
494 | " numpy.int32(height),\n",
495 | " numpy.int32(width),\n",
496 | " numpy.int32(maxiterations),\n",
497 | " block=(32, 32, 1),\n",
498 | " grid=(int(math.ceil(height / 32)), int(math.ceil(width / 32))))\n",
499 | " return fractal"
500 | ]
501 | },
502 | {
503 | "cell_type": "code",
504 | "execution_count": null,
505 | "metadata": {},
506 | "outputs": [],
507 | "source": [
508 | "starttime = time.time()\n",
509 | "fractal = run_pycuda(8000, 12000) # full-sized problem\n",
510 | "time.time() - starttime"
511 | ]
512 | },
513 | {
514 | "cell_type": "markdown",
515 | "metadata": {
516 | "slideshow": {
517 | "slide_type": "slide"
518 | }
519 | },
520 | "source": [
521 | "About the same as Numba (10% better), which wouldn't be worth it for having to translate Python into CUDA C++, but would be worth it if you _found_ CUDA C++ and didn't want to translate it into Python!"
522 | ]
523 | },
524 | {
525 | "cell_type": "markdown",
526 | "metadata": {
527 | "slideshow": {
528 | "slide_type": "fragment"
529 | }
530 | },
531 | "source": [
532 | "As before, each library has its own special niche:\n",
533 | "\n",
534 | " * **CuPy:** for directly running Numpy on GPUs, no questions asked\n",
535 | " * **Numba:** for running (a limited subset of) Python code directly on the GPU\n",
536 | " * **PyCUDA:** for running CUDA C++ with the convenience of Numpy input and output."
537 | ]
538 | }
539 | ],
540 | "metadata": {
541 | "celltoolbar": "Slideshow",
542 | "kernelspec": {
543 | "display_name": "Python 3",
544 | "language": "python",
545 | "name": "python3"
546 | },
547 | "language_info": {
548 | "codemirror_mode": {
549 | "name": "ipython",
550 | "version": 3
551 | },
552 | "file_extension": ".py",
553 | "mimetype": "text/x-python",
554 | "name": "python",
555 | "nbconvert_exporter": "python",
556 | "pygments_lexer": "ipython3",
557 | "version": "3.7.0"
558 | }
559 | },
560 | "nbformat": 4,
561 | "nbformat_minor": 2
562 | }
563 |
--------------------------------------------------------------------------------
/8-low-level.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "slideshow": {
7 | "slide_type": "slide"
8 | }
9 | },
10 | "source": [
11 | "# Low-level hackery"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {
17 | "slideshow": {
18 | "slide_type": "fragment"
19 | }
20 | },
21 | "source": [
22 | "_(nothing to install; just numpy, matplotlib from before)_"
23 | ]
24 | },
25 | {
26 | "cell_type": "markdown",
27 | "metadata": {
28 | "slideshow": {
29 | "slide_type": "slide"
30 | }
31 | },
32 | "source": [
33 | "One function I've used without much comment is `numpy.frombuffer`, which lets us wrap arbitrary regions of memory as Numpy arrays. We can \"peek\" at any memory we want; we can also \"poke\" it, changing values, byte by byte."
34 | ]
35 | },
36 | {
37 | "cell_type": "markdown",
38 | "metadata": {
39 | "slideshow": {
40 | "slide_type": "slide"
41 | }
42 | },
43 | "source": [
44 | "Consider, for instance, a byte string. These are immutable (cannot be changed) in Python:"
45 | ]
46 | },
47 | {
48 | "cell_type": "code",
49 | "execution_count": null,
50 | "metadata": {},
51 | "outputs": [],
52 | "source": [
53 | "hello = b\"Hello, world!\""
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": null,
59 | "metadata": {},
60 | "outputs": [],
61 | "source": [
62 | "try:\n",
63 | " hello[4:8] = b\"????\"\n",
64 | "except TypeError as err:\n",
65 | " print(\"Nope: \" + str(err))"
66 | ]
67 | },
68 | {
69 | "cell_type": "code",
70 | "execution_count": null,
71 | "metadata": {
72 | "slideshow": {
73 | "slide_type": "fragment"
74 | }
75 | },
76 | "outputs": [],
77 | "source": [
78 | "import numpy\n",
79 | "a = numpy.frombuffer(hello, dtype=numpy.uint8)\n",
80 | "a"
81 | ]
82 | },
83 | {
84 | "cell_type": "code",
85 | "execution_count": null,
86 | "metadata": {},
87 | "outputs": [],
88 | "source": [
89 | "a.view(\"S1\")"
90 | ]
91 | },
92 | {
93 | "cell_type": "markdown",
94 | "metadata": {
95 | "slideshow": {
96 | "slide_type": "slide"
97 | }
98 | },
99 | "source": [
100 | "By default, Numpy tries to protect you from doing evil things."
101 | ]
102 | },
103 | {
104 | "cell_type": "code",
105 | "execution_count": null,
106 | "metadata": {},
107 | "outputs": [],
108 | "source": [
109 | "try:\n",
110 | " a[4:8] = [69, 86, 73, 76]\n",
111 | "except ValueError as err:\n",
112 | " print(\"Nope: \" + str(err))"
113 | ]
114 | },
115 | {
116 | "cell_type": "markdown",
117 | "metadata": {
118 | "slideshow": {
119 | "slide_type": "fragment"
120 | }
121 | },
122 | "source": [
123 | "But this is Python: we can shoot our feet if we want to."
124 | ]
125 | },
126 | {
127 | "cell_type": "code",
128 | "execution_count": null,
129 | "metadata": {},
130 | "outputs": [],
131 | "source": [
132 | "a.flags.writeable = True"
133 | ]
134 | },
135 | {
136 | "cell_type": "code",
137 | "execution_count": null,
138 | "metadata": {},
139 | "outputs": [],
140 | "source": [
141 | "a[4:8] = [69, 86, 73, 76]"
142 | ]
143 | },
144 | {
145 | "cell_type": "code",
146 | "execution_count": null,
147 | "metadata": {},
148 | "outputs": [],
149 | "source": [
150 | "hello"
151 | ]
152 | },
153 | {
154 | "cell_type": "markdown",
155 | "metadata": {
156 | "slideshow": {
157 | "slide_type": "slide"
158 | }
159 | },
160 | "source": [
161 | "This messes with Python's internal data model."
162 | ]
163 | },
164 | {
165 | "cell_type": "code",
166 | "execution_count": null,
167 | "metadata": {},
168 | "outputs": [],
169 | "source": [
170 | "hello = b\"Hello, world!\"\n",
171 | "a = numpy.frombuffer(hello, dtype=numpy.uint8)\n",
172 | "a.flags.writeable = True\n",
173 | "a[4:8] = [69, 86, 73, 76]\n",
174 | "print(hello == b\"Hello, world!\")"
175 | ]
176 | },
177 | {
178 | "cell_type": "code",
179 | "execution_count": null,
180 | "metadata": {
181 | "slideshow": {
182 | "slide_type": "fragment"
183 | }
184 | },
185 | "outputs": [],
186 | "source": [
187 | "exec(\"\"\"\n",
188 | "hello = b\"Hello, world!\"\n",
189 | "a = numpy.frombuffer(hello, dtype=numpy.uint8)\n",
190 | "a.flags.writeable = True\n",
191 | "a[4:8] = [69, 86, 73, 76]\n",
192 | "print(hello == b\"Hello, world!\")\n",
193 | "\"\"\")"
194 | ]
195 | },
196 | {
197 | "cell_type": "markdown",
198 | "metadata": {
199 | "slideshow": {
200 | "slide_type": "fragment"
201 | }
202 | },
203 | "source": [
204 | "(The second example was interpreted as a `.pyc` script, in which all instances of the literal `b\"Hello, world!\"` were replaced by a single object: modifying that object in line 4 changed it in line 5!)"
205 | ]
206 | },
207 | {
208 | "cell_type": "markdown",
209 | "metadata": {
210 | "slideshow": {
211 | "slide_type": "slide"
212 | }
213 | },
214 | "source": [
215 | "With the help of ctypes, a built-in Python library, Numpy can wrap any address at all. (Some will cause segmentation faults, so be careful!)"
216 | ]
217 | },
218 | {
219 | "cell_type": "code",
220 | "execution_count": null,
221 | "metadata": {},
222 | "outputs": [],
223 | "source": [
224 | "x = 12345"
225 | ]
226 | },
227 | {
228 | "cell_type": "code",
229 | "execution_count": null,
230 | "metadata": {},
231 | "outputs": [],
232 | "source": [
233 | "import ctypes\n",
234 | "import sys\n",
235 | "\n",
236 | "ptr = ctypes.cast(id(x), ctypes.POINTER(ctypes.c_uint8))\n",
237 | "a = numpy.ctypeslib.as_array(ptr, (sys.getsizeof(x),))\n",
238 | "a"
239 | ]
240 | },
241 | {
242 | "cell_type": "markdown",
243 | "metadata": {
244 | "slideshow": {
245 | "slide_type": "fragment"
246 | }
247 | },
248 | "source": [
249 | "We're looking at a Python object header, a pointer to the `int` type (also a Python object), and then the number itself: `12345` in little endian bytes is `57, 48, 0, 0`. Do you see it?"
250 | ]
251 | },
252 | {
253 | "cell_type": "code",
254 | "execution_count": null,
255 | "metadata": {
256 | "slideshow": {
257 | "slide_type": "fragment"
258 | }
259 | },
260 | "outputs": [],
261 | "source": [
262 | "a[-4:].view(numpy.int32)"
263 | ]
264 | },
265 | {
266 | "cell_type": "markdown",
267 | "metadata": {
268 | "slideshow": {
269 | "slide_type": "slide"
270 | }
271 | },
272 | "source": [
273 | "Let's try a string."
274 | ]
275 | },
276 | {
277 | "cell_type": "code",
278 | "execution_count": null,
279 | "metadata": {},
280 | "outputs": [],
281 | "source": [
282 | "y = \"Hey there.\"\n",
283 | "ptr = ctypes.cast(id(y), ctypes.POINTER(ctypes.c_uint8))\n",
284 | "a = numpy.ctypeslib.as_array(ptr, (sys.getsizeof(y),))\n",
285 | "a"
286 | ]
287 | },
288 | {
289 | "cell_type": "code",
290 | "execution_count": null,
291 | "metadata": {},
292 | "outputs": [],
293 | "source": [
294 | "a[-11:].tostring()"
295 | ]
296 | },
297 | {
298 | "cell_type": "markdown",
299 | "metadata": {
300 | "slideshow": {
301 | "slide_type": "slide"
302 | }
303 | },
304 | "source": [
305 | "By wrapping a pointer as a Numpy array (and maybe setting `array.flags.writeable = True`), we can do anything."
306 | ]
307 | },
308 | {
309 | "cell_type": "markdown",
310 | "metadata": {
311 | "slideshow": {
312 | "slide_type": "fragment"
313 | }
314 | },
315 | "source": [
316 | "We can break anything."
317 | ]
318 | },
319 | {
320 | "cell_type": "markdown",
321 | "metadata": {
322 | "slideshow": {
323 | "slide_type": "fragment"
324 | }
325 | },
326 | "source": [
327 | "Seriously, just letting `numpy.ctypeslib.as_array(0, (8,))` repr itself on the screen would cause a segmentation fault. "
328 | ]
329 | },
330 | {
331 | "cell_type": "markdown",
332 | "metadata": {
333 | "slideshow": {
334 | "slide_type": "fragment"
335 | }
336 | },
337 | "source": [
338 | "Is there anything useful we can do with this power?"
339 | ]
340 | },
341 | {
342 | "cell_type": "markdown",
343 | "metadata": {
344 | "slideshow": {
345 | "slide_type": "slide"
346 | }
347 | },
348 | "source": [
349 | "The original purpose of the ctypes library was to run code in compiled C libraries (not C++, that's much more complicated).\n",
350 | "\n",
351 | "For a busy data analyst, the advantage of that is that you don't need to write (or wait for) official bindings to use a C library."
352 | ]
353 | },
354 | {
355 | "cell_type": "code",
356 | "execution_count": null,
357 | "metadata": {},
358 | "outputs": [],
359 | "source": [
360 | "import ctypes\n",
361 | "libc = ctypes.cdll.LoadLibrary(\"/lib/x86_64-linux-gnu/libc.so.6\")"
362 | ]
363 | },
364 | {
365 | "cell_type": "markdown",
366 | "metadata": {
367 | "slideshow": {
368 | "slide_type": "fragment"
369 | }
370 | },
371 | "source": [
372 | "We can run arbitrary functions from `libc.so.6`, but we have to tell Python what its argument types are (that's not stored in the shared object file)."
373 | ]
374 | },
375 | {
376 | "cell_type": "code",
377 | "execution_count": null,
378 | "metadata": {},
379 | "outputs": [],
380 | "source": [
381 | "libc.malloc.argtypes = (ctypes.c_size_t,) # argument types (only one)\n",
382 | "libc.malloc.restype = ctypes.POINTER(ctypes.c_double) # return type"
383 | ]
384 | },
385 | {
386 | "cell_type": "code",
387 | "execution_count": null,
388 | "metadata": {
389 | "slideshow": {
390 | "slide_type": "fragment"
391 | }
392 | },
393 | "outputs": [],
394 | "source": [
395 | "ptr = libc.malloc(100 * numpy.dtype(numpy.float64).itemsize) # pass number of bytes\n",
396 | "ptr"
397 | ]
398 | },
399 | {
400 | "cell_type": "code",
401 | "execution_count": null,
402 | "metadata": {
403 | "slideshow": {
404 | "slide_type": "slide"
405 | }
406 | },
407 | "outputs": [],
408 | "source": [
409 | "a = numpy.ctypeslib.as_array(ptr, (100,))\n",
410 | "a"
411 | ]
412 | },
413 | {
414 | "cell_type": "code",
415 | "execution_count": null,
416 | "metadata": {
417 | "slideshow": {
418 | "slide_type": "fragment"
419 | }
420 | },
421 | "outputs": [],
422 | "source": [
423 | "a[:] = 0.0\n",
424 | "a"
425 | ]
426 | },
427 | {
428 | "cell_type": "code",
429 | "execution_count": null,
430 | "metadata": {
431 | "slideshow": {
432 | "slide_type": "fragment"
433 | }
434 | },
435 | "outputs": [],
436 | "source": [
437 | "a.flags"
438 | ]
439 | },
440 | {
441 | "cell_type": "markdown",
442 | "metadata": {
443 | "slideshow": {
444 | "slide_type": "slide"
445 | }
446 | },
447 | "source": [
448 | "We've just used low-level `libc.malloc` to allocate data for an array. This array doesn't \"own\" its memory, so it doesn't go away when the Python object (`a`) gets garbage collected. We'd have to call `libc.free`, like any C programmer."
449 | ]
450 | },
451 | {
452 | "cell_type": "markdown",
453 | "metadata": {
454 | "slideshow": {
455 | "slide_type": "fragment"
456 | }
457 | },
458 | "source": [
459 | "Why do this? I've used it to allocate arrays on NUMA hardware and Knight's Landing MCDRAM. Specialty memory allocations will probably get more important, not less, as architectures get more heterogeneous."
460 | ]
461 | },
462 | {
463 | "cell_type": "markdown",
464 | "metadata": {
465 | "slideshow": {
466 | "slide_type": "fragment"
467 | }
468 | },
469 | "source": [
470 | "(No reason to write an entire analysis in C just to get special allocators.)"
471 | ]
472 | },
473 | {
474 | "cell_type": "markdown",
475 | "metadata": {
476 | "slideshow": {
477 | "slide_type": "fragment"
478 | }
479 | },
480 | "source": [
481 | "Another possible reason is to access special devices; device drivers are often written in C and distributed with C programmers in mind, but they may be the only thing between you and an important dataset."
482 | ]
483 | },
484 | {
485 | "cell_type": "markdown",
486 | "metadata": {
487 | "slideshow": {
488 | "slide_type": "slide"
489 | }
490 | },
491 | "source": [
492 | "Another example: you can wrap structs. (The snake eats its own tail again.)"
493 | ]
494 | },
495 | {
496 | "cell_type": "code",
497 | "execution_count": null,
498 | "metadata": {},
499 | "outputs": [],
500 | "source": [
501 | "class PyObject(ctypes.Structure): pass\n",
502 | "PyObject._fields_ = [(\"ob_refcnt\", ctypes.c_size_t),\n",
503 | " (\"ob_type\", ctypes.POINTER(PyObject))]"
504 | ]
505 | },
506 | {
507 | "cell_type": "markdown",
508 | "metadata": {
509 | "slideshow": {
510 | "slide_type": "fragment"
511 | }
512 | },
513 | "source": [
514 | "which is equivalent to\n",
515 | "\n",
516 | "```c\n",
517 | "struct PyObject {\n",
518 | " size_t ob_refcnt;\n",
519 | " PyObject* ob_type;\n",
520 | " // the rest depends on the type of object\n",
521 | "}\n",
522 | "```"
523 | ]
524 | },
525 | {
526 | "cell_type": "code",
527 | "execution_count": null,
528 | "metadata": {
529 | "slideshow": {
530 | "slide_type": "fragment"
531 | }
532 | },
533 | "outputs": [],
534 | "source": [
535 | "hello = b\"Hello, world!\""
536 | ]
537 | },
538 | {
539 | "cell_type": "code",
540 | "execution_count": null,
541 | "metadata": {},
542 | "outputs": [],
543 | "source": [
544 | "ptr = PyObject.from_address(id(hello))\n",
545 | "ptr"
546 | ]
547 | },
548 | {
549 | "cell_type": "code",
550 | "execution_count": null,
551 | "metadata": {
552 | "slideshow": {
553 | "slide_type": "slide"
554 | }
555 | },
556 | "outputs": [],
557 | "source": [
558 | "ptr.ob_refcnt"
559 | ]
560 | },
561 | {
562 | "cell_type": "markdown",
563 | "metadata": {
564 | "slideshow": {
565 | "slide_type": "fragment"
566 | }
567 | },
568 | "source": [
569 | "This `ob_refcnt` is the number of Python references to a given object. There's a way to do it with a `sys` call:"
570 | ]
571 | },
572 | {
573 | "cell_type": "code",
574 | "execution_count": null,
575 | "metadata": {},
576 | "outputs": [],
577 | "source": [
578 | "sys.getrefcount(hello)"
579 | ]
580 | },
581 | {
582 | "cell_type": "markdown",
583 | "metadata": {
584 | "slideshow": {
585 | "slide_type": "fragment"
586 | }
587 | },
588 | "source": [
589 | "but it's always one too high because you create a reference to pass it to that function!"
590 | ]
591 | },
592 | {
593 | "cell_type": "code",
594 | "execution_count": null,
595 | "metadata": {},
596 | "outputs": [],
597 | "source": [
598 | "biglist = [hello] * 1000"
599 | ]
600 | },
601 | {
602 | "cell_type": "code",
603 | "execution_count": null,
604 | "metadata": {},
605 | "outputs": [],
606 | "source": [
607 | "ptr.ob_refcnt"
608 | ]
609 | },
610 | {
611 | "cell_type": "code",
612 | "execution_count": null,
613 | "metadata": {},
614 | "outputs": [],
615 | "source": [
616 | "sys.getrefcount(hello)"
617 | ]
618 | },
619 | {
620 | "cell_type": "markdown",
621 | "metadata": {
622 | "slideshow": {
623 | "slide_type": "slide"
624 | }
625 | },
626 | "source": [
627 | "The ctypes library can wrap numbers, strings, pointers, arrays, and structs, which is just about everything you might encounter in C. The reason C++ isn't supported is because C shoehorns its much larger type system into shared object files by \"name mangling.\"\n",
628 | "\n",
629 | "Even if you reverse the name mangling with the `c++filt` program, those names cannot be uniquely identified without interpreting the C++ headers. By that point, you might as well use Cython or pybind11."
630 | ]
631 | },
632 | {
633 | "cell_type": "markdown",
634 | "metadata": {
635 | "slideshow": {
636 | "slide_type": "slide"
637 | }
638 | },
639 | "source": [
640 | "**One last example:** the quickest, dirtiest way possible to call out to compiled code:"
641 | ]
642 | },
643 | {
644 | "cell_type": "code",
645 | "execution_count": null,
646 | "metadata": {},
647 | "outputs": [],
648 | "source": [
649 | "import os\n",
650 | "with open(\"tmp.cpp\", \"w\") as cfile:\n",
651 | " cfile.write(\"\"\"\n",
652 | "#include \n",
653 | "extern \"C\" { \n",
654 | " void quick(int height, int width, int maxiterations, double* c, int* fractal) {\n",
655 | " for (int h = 0; h < height; h++) {\n",
656 | " for (int w = 0; w < width; w++) {\n",
657 | " double creal = c[2 * (h + height*w)];\n",
658 | " double cimag = c[2 * (h + height*w) + 1];\n",
659 | " std::complex ci = std::complex(creal, cimag);\n",
660 | " std::complex z = ci;\n",
661 | " for (int i = 0; i < maxiterations; i++) {\n",
662 | " z = z * z + ci;\n",
663 | " if (std::abs(z) > 2) {\n",
664 | " fractal[h + height*w] = i;\n",
665 | " break;\n",
666 | " }\n",
667 | " }\n",
668 | " }\n",
669 | " }\n",
670 | " }\n",
671 | "}\n",
672 | "\"\"\")\n",
673 | "assert os.system(\"gcc -O3 -fPIC -shared tmp.cpp -o libtmp.so\") == 0"
674 | ]
675 | },
676 | {
677 | "cell_type": "code",
678 | "execution_count": null,
679 | "metadata": {
680 | "slideshow": {
681 | "slide_type": "slide"
682 | }
683 | },
684 | "outputs": [],
685 | "source": [
686 | "libtmp = ctypes.cdll.LoadLibrary(os.path.join(os.getcwd(), \"libtmp.so\"))\n",
687 | "libtmp.quick.argtypes = (ctypes.c_int, ctypes.c_int, ctypes.c_int,\n",
688 | " ctypes.POINTER(ctypes.c_double),\n",
689 | " ctypes.POINTER(ctypes.c_int))\n",
690 | "libtmp.quick.restype = None"
691 | ]
692 | },
693 | {
694 | "cell_type": "code",
695 | "execution_count": null,
696 | "metadata": {},
697 | "outputs": [],
698 | "source": [
699 | "def run_dirty(height, width, maxiterations=20):\n",
700 | " y, x = numpy.ogrid[-1:0:height*1j, -1.5:0:width*1j]\n",
701 | " c = x + y*1j\n",
702 | " fractal = numpy.zeros(c.shape, dtype=numpy.int32) + maxiterations\n",
703 | " libtmp.quick(height, width, maxiterations,\n",
704 | " ctypes.cast(c.ctypes.data, ctypes.POINTER(ctypes.c_double)),\n",
705 | " ctypes.cast(fractal.ctypes.data, ctypes.POINTER(ctypes.c_int)))\n",
706 | " return fractal"
707 | ]
708 | },
709 | {
710 | "cell_type": "code",
711 | "execution_count": null,
712 | "metadata": {
713 | "slideshow": {
714 | "slide_type": "slide"
715 | }
716 | },
717 | "outputs": [],
718 | "source": [
719 | "import time\n",
720 | "starttime = time.time()\n",
721 | "fractal = run_dirty(8000, 12000)\n",
722 | "time.time() - starttime"
723 | ]
724 | },
725 | {
726 | "cell_type": "code",
727 | "execution_count": null,
728 | "metadata": {
729 | "slideshow": {
730 | "slide_type": "fragment"
731 | }
732 | },
733 | "outputs": [],
734 | "source": [
735 | "%matplotlib inline\n",
736 | "import matplotlib.pyplot as plt\n",
737 | "fig, ax = plt.subplots(figsize=(12, 8))\n",
738 | "ax.imshow(fractal)\n",
739 | "# ax.imshow(fractal[-200:, :300])"
740 | ]
741 | },
742 | {
743 | "cell_type": "markdown",
744 | "metadata": {
745 | "slideshow": {
746 | "slide_type": "slide"
747 | }
748 | },
749 | "source": [
750 | "Not very good time, but the right answer."
751 | ]
752 | },
753 | {
754 | "cell_type": "markdown",
755 | "metadata": {
756 | "slideshow": {
757 | "slide_type": "fragment"
758 | }
759 | },
760 | "source": [
761 | "With that monstrosity, I'll end the course.\n",
762 | "\n",
763 | "Cheers!"
764 | ]
765 | }
766 | ],
767 | "metadata": {
768 | "celltoolbar": "Slideshow",
769 | "kernelspec": {
770 | "display_name": "Python 3",
771 | "language": "python",
772 | "name": "python3"
773 | },
774 | "language_info": {
775 | "codemirror_mode": {
776 | "name": "ipython",
777 | "version": 3
778 | },
779 | "file_extension": ".py",
780 | "mimetype": "text/x-python",
781 | "name": "python",
782 | "nbconvert_exporter": "python",
783 | "pygments_lexer": "ipython3",
784 | "version": "3.7.0"
785 | }
786 | },
787 | "nbformat": 4,
788 | "nbformat_minor": 2
789 | }
790 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2018, Jim Pivarski
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | * Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | * Neither the name of the copyright holder nor the names of its
17 | contributors may be used to endorse or promote products derived from
18 | this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Python/Numpy for High-Performance Numerical Processing
2 |
3 | Notebooks and slides used for the [Numpy mini-course at Princeton](https://researchcomputing.princeton.edu/events/pythonnumpy-high-performance-numerical-processing) held November 15, 2018.
4 |
5 | ### Abstract
6 |
7 | Python is a notoriously slow language, so why is it widely used by scientists and machine learning experts? In a numerically heavy task, an interpreted, dynamically typed environment can be thousands of times slower than a compiled, statically typed one, which can make the difference between minutes and days or between coarse models on small datasets and fine-grained models on large datasets. The trick is to drive compiled functions from the interpreted commandline, like R, and to frame your problem in array programming primitives, like Matlab, but in a general-purpose programming language with hundreds of thousands of extensions to glue to every conceivable interface.
8 |
9 | In this workshop, we will examine the numerical processing ecosystem that has grown up around Python. The key library in this ecosystem is Numpy, which enables fast array programming, and Pandas, a convenient wrapper for organizing data. We will visualize data in and out of JupyterLab, a notebook front-end for exploratory analysis. We'll also work through examples of binding ~~from C++ to Python with pybind11 and~~ from Python to C++ with Cython, which have different strengths and use-cases. We'll also natively compile Python (C++ speeds without C++) using Numba and run code on GPUs with Numba (Python-like), CuPy (Numpy-like), and PyCUDA/PyOpenCL (raw CUDA/OpenCL).
10 |
11 | Participants will be encouraged to bring a laptop or log into their favorite cluster to install the software we discuss here for later use. We will use conda and pip-in-conda, so superuser ("sudo") permissions are not required.
12 |
13 | Jim Pivarski received his Ph.D. in high-energy particle physics from Cornell in 2006. He helped to commission the CMS experiment at the LHC and later switched to data science as a Big Data consultant. He is now back in physics, integrating computing techniques learned from industry into high-energy physics analysis.
14 |
15 | ### Plan for the day
16 |
17 | General intention; we'll vary from this if there's good reason to do so.
18 |
19 | 
20 |
21 | ### How to prepare
22 |
23 | (1) Check out this repository:
24 |
25 | ```
26 | git clone https://github.com/jpivarski/python-numpy-mini-course.git
27 | ```
28 |
29 | (2) Install Anaconda or [Miniconda for Python 3](https://conda.io/miniconda.html). Using that, install Jupyter (Lab or Notebook; I prefer Lab):
30 |
31 | ```
32 | conda install jupyterlab
33 | ```
34 |
35 | Change directories into the repository and start Jupyter Lab or Notebook:
36 |
37 | ```
38 | cd python-numpy-mini-course
39 | jupyter lab # or notebook
40 | ```
41 |
42 | Installations for the sessions are given at the top of each notebook, but if you want to install everything at once, instructions are collected below. Most of these are already bundled in the full Anaconda distribution.
43 |
44 | ```
45 | conda install numpy # 2-just-numpy
46 | conda install pandas matplotlib # 4-pandas
47 | conda install dask distrubted -c conda-forge # 5-dask
48 | conda install numba cython # 6-compilers
49 | ```
50 |
51 | Don't bother installing softare for the GPU session if you don't have an NVidia GPU with the CUDA development kit installed.
52 |
53 | ```
54 | conda install cupy cudatoolkit # 7-gpu
55 | export CFLAGS=-fpermissive
56 | pip install --no-cache-dir pycuda
57 | ```
58 |
59 | (3) General Python programming skills will be assumed (ability to read or write a page-long script without difficulty). Walk through an online tutorial if you need to brush up before the course.
60 |
61 | Knowedge of the libraries presented here _will not_ be assumed. Come and learn!
62 |
63 | ### Pre-evaluated notebooks
64 |
65 | You'll get the most out of the course if you follow along in the blank notebooks in the master branch of this repository. However, if you're returning to look up a result, pre-evaluated copies of all the notebooks can be found on the [evaluated branch](https://github.com/jpivarski/python-numpy-mini-course/tree/evaluated) of this repository.
66 |
--------------------------------------------------------------------------------
/data/nasa-exoplanets-details.txt:
--------------------------------------------------------------------------------
1 | # This file was produced by the NASA Exoplanet Archive http://exoplanetarchive.ipac.caltech.edu
2 | # Fri Nov 9 17:35:38 2018
3 | #
4 | # COLUMN pl_hostname: Host Name
5 | # COLUMN pl_letter: Planet Letter
6 | # COLUMN pl_name: Planet Name
7 | # COLUMN pl_discmethod: Discovery Method
8 | # COLUMN pl_pnum: Number of Planets in System
9 | # COLUMN pl_orbper: Orbital Period [days]
10 | # COLUMN pl_orbsmax: Orbit Semi-Major Axis [AU])
11 | # COLUMN pl_orbeccen: Eccentricity
12 | # COLUMN pl_orbincl: Inclination [deg]
13 | # COLUMN pl_bmassj: Planet Mass or M*sin(i) [Jupiter mass]
14 | # COLUMN pl_bmassprov: Planet Mass or M*sin(i) Provenance
15 | # COLUMN pl_radj: Planet Radius [Jupiter radii]
16 | # COLUMN pl_dens: Planet Density [g/cm**3]
17 | # COLUMN pl_ttvflag: TTV Flag
18 | # COLUMN pl_kepflag: Kepler Field Flag
19 | # COLUMN pl_k2flag: K2 Mission Flag
20 | # COLUMN pl_nnotes: Number of Notes
21 | # COLUMN ra_str: RA [sexagesimal]
22 | # COLUMN ra: RA [decimal degrees]
23 | # COLUMN dec_str: Dec [sexagesimal]
24 | # COLUMN dec: Dec [decimal degrees]
25 | # COLUMN st_dist: Distance [pc]
26 | # COLUMN st_optmag: Optical Magnitude [mag]
27 | # COLUMN st_optband: Optical Magnitude Band
28 | # COLUMN gaia_gmag: G-band (Gaia) [mag]
29 | # COLUMN st_teff: Effective Temperature [K]
30 | # COLUMN st_mass: Stellar Mass [Solar mass]
31 | # COLUMN st_rad: Stellar Radius [Solar radii]
32 | # COLUMN rowupdate: Date of Last Update
33 | # COLUMN pl_tranflag: Planet Transit Flag
34 | # COLUMN pl_rvflag: Planet RV Flag
35 | # COLUMN pl_imgflag: Planet Imaging Flag
36 | # COLUMN pl_astflag: Planet Astrometry Flag
37 | # COLUMN pl_omflag: Planet Orbital Modulation Flag
38 | # COLUMN pl_cbflag: Planet Circumbinary Flag
39 | # COLUMN pl_angsep: Calculated Angular Separation [mas]
40 | # COLUMN pl_orbtper: Time of Periastron [days]
41 | # COLUMN pl_orblper: Long. of Periastron [deg]
42 | # COLUMN pl_rvamp: Radial Velocity Amplitude [m/s]
43 | # COLUMN pl_eqt: Equilibrium Temperature [K]
44 | # COLUMN pl_insol: Insolation Flux [Earth flux]
45 | # COLUMN pl_massj: Planet Mass [Jupiter mass]
46 | # COLUMN pl_msinij: Planet M*sin(i) [Jupiter mass]
47 | # COLUMN pl_masse: Planet Mass [Earth mass]
48 | # COLUMN pl_msinie: Planet M*sin(i) [Earth mass]
49 | # COLUMN pl_bmasse: Planet Mass or M*sin(i) [Earth mass]
50 | # COLUMN pl_rade: Planet Radius [Earth radii]
51 | # COLUMN pl_rads: Planet Radius [Solar radii]
52 | # COLUMN pl_trandep: Transit Depth [percent]
53 | # COLUMN pl_trandur: Transit Duration [days]
54 | # COLUMN pl_tranmid: Transit Midpoint [days]
55 | # COLUMN pl_tsystemref: Time System Reference
56 | # COLUMN pl_imppar: Impact Parameter
57 | # COLUMN pl_occdep: Occultation Depth [percentage]
58 | # COLUMN pl_ratdor: Ratio of Distance to Stellar Radius
59 | # COLUMN pl_ratror: Ratio of Planet to Stellar Radius
60 | # COLUMN pl_def_reflink: Default Reference
61 | # COLUMN pl_disc: Year of Discovery
62 | # COLUMN pl_disc_reflink: Discovery Reference
63 | # COLUMN pl_locale: Discovery Locale
64 | # COLUMN pl_facility: Discovery Facility
65 | # COLUMN pl_telescope: Discovery Telescope
66 | # COLUMN pl_instrument: Discovery Instrument
67 | # COLUMN pl_status: Status
68 | # COLUMN pl_mnum: Number of Moons in System
69 | # COLUMN pl_st_npar: Number of Stellar and Planet Parameters
70 | # COLUMN pl_st_nref: Number of Stellar and Planet References
71 | # COLUMN pl_pelink: Link to Exoplanet Encyclopaedia
72 | # COLUMN pl_edelink: Link to Exoplanet Data Explorer
73 | # COLUMN pl_publ_date: Publication Date
74 | # COLUMN hd_name: HD Name
75 | # COLUMN hip_name: HIP Name
76 | # COLUMN st_rah: RA [hrs]
77 | # COLUMN st_glon: Galactic Longitude [deg]
78 | # COLUMN st_glat: Galactic Latitude [deg]
79 | # COLUMN st_elon: Ecliptic Longitude [deg]
80 | # COLUMN st_elat: Ecliptic Latitude [deg]
81 | # COLUMN st_plx: Parallax [mas]
82 | # COLUMN gaia_plx: Gaia Parallax [mas]
83 | # COLUMN gaia_dist: Gaia Distance [pc]
84 | # COLUMN st_pmra: Proper Motion (RA) [mas/yr]
85 | # COLUMN st_pmdec: Proper Motion (Dec) [mas/yr]
86 | # COLUMN st_pm: Total Proper Motion [mas/yr]
87 | # COLUMN gaia_pmra: Gaia Proper Motion (RA) [mas/yr]
88 | # COLUMN gaia_pmdec: Gaia Proper Motion (Dec) [mas/yr]
89 | # COLUMN gaia_pm: Gaia Total Proper Motion [mas/yr]
90 | # COLUMN st_radv: Radial Velocity [km/s]
91 | # COLUMN st_sp: Spectral Type
92 | # COLUMN st_spstr: Spectral Type
93 | # COLUMN st_logg: Stellar Surface Gravity [log10(cm/s**2)]
94 | # COLUMN st_lum: Stellar Luminosity [log(Solar)]
95 | # COLUMN st_dens: Stellar Density [g/cm**3]
96 | # COLUMN st_metfe: Stellar Metallicity [dex]
97 | # COLUMN st_metratio: Metallicity Ratio
98 | # COLUMN st_age: Stellar Age [Gyr]
99 | # COLUMN st_vsini: Rot. Velocity V*sin(i) [km/s]
100 | # COLUMN st_acts: Stellar Activity S-index
101 | # COLUMN st_actr: Stellar Activity log(R'HK)
102 | # COLUMN st_actlx: X-ray Activity log(Lx)
103 | # COLUMN swasp_id: SWASP Identifier
104 | # COLUMN st_nts: Number of Time Series
105 | # COLUMN st_nplc: Number of Planet Transit Light Curves
106 | # COLUMN st_nglc: Number of General Light Curves
107 | # COLUMN st_nrvc: Number of Radial Velocity Time Series
108 | # COLUMN st_naxa: Number of Amateur Light Curves
109 | # COLUMN st_nimg: Number of Images
110 | # COLUMN st_nspec: Number of Spectra
111 | # COLUMN st_uj: U-band (Johnson) [mag]
112 | # COLUMN st_vj: V-band (Johnson) [mag]
113 | # COLUMN st_bj: B-band (Johnson) [mag]
114 | # COLUMN st_rc: R-band (Cousins) [mag]
115 | # COLUMN st_ic: I-band (Cousins) [mag]
116 | # COLUMN st_j: J-band (2MASS) [mag]
117 | # COLUMN st_h: H-band (2MASS) [mag]
118 | # COLUMN st_k: Ks-band (2MASS) [mag]
119 | # COLUMN st_wise1: WISE 3.4um [mag]
120 | # COLUMN st_wise2: WISE 4.6um [mag]
121 | # COLUMN st_wise3: WISE 12.um [mag]
122 | # COLUMN st_wise4: WISE 22.um [mag]
123 | # COLUMN st_irac1: IRAC 3.6um [mag]
124 | # COLUMN st_irac2: IRAC 4.5um [mag]
125 | # COLUMN st_irac3: IRAC 5.8um [mag]
126 | # COLUMN st_irac4: IRAC 8.0um [mag]
127 | # COLUMN st_mips1: MIPS 24um [mag]
128 | # COLUMN st_mips2: MIPS 70um [mag]
129 | # COLUMN st_mips3: MIPS 160um [mag]
130 | # COLUMN st_iras1: IRAS 12um Flux [Jy]
131 | # COLUMN st_iras2: IRAS 25um Flux [Jy]
132 | # COLUMN st_iras3: IRAS 60um Flux [Jy]
133 | # COLUMN st_iras4: IRAS 100um Flux [Jy]
134 | # COLUMN st_photn: Number of Photometry Measurements
135 | # COLUMN st_umbj: U-B (Johnson) [mag]
136 | # COLUMN st_bmvj: B-V (Johnson) [mag]
137 | # COLUMN st_vjmic: V-I (Johnson-Cousins) [mag]
138 | # COLUMN st_vjmrc: V-R (Johnson-Cousins) [mag]
139 | # COLUMN st_jmh2: J-H (2MASS) [mag]
140 | # COLUMN st_hmk2: H-Ks (2MASS) [mag]
141 | # COLUMN st_jmk2: J-Ks (2MASS) [mag]
142 | # COLUMN st_bmy: b-y (Stromgren) [mag]
143 | # COLUMN st_m1: m1 (Stromgren) [mag]
144 | # COLUMN st_c1: c1 (Stromgren) [mag]
145 | # COLUMN st_colorn: Number of Color Measurements
146 |
--------------------------------------------------------------------------------
/img/cards-chance-deck-19060.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/img/cards-chance-deck-19060.jpg
--------------------------------------------------------------------------------
/img/clock-rate.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/img/clock-rate.jpg
--------------------------------------------------------------------------------
/img/plan-for-the-day.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/img/plan-for-the-day.png
--------------------------------------------------------------------------------
/img/png-spec-chunks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/img/png-spec-chunks.png
--------------------------------------------------------------------------------
/img/png-spec-scanline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/img/png-spec-scanline.png
--------------------------------------------------------------------------------
/img/vectorization-example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/img/vectorization-example.png
--------------------------------------------------------------------------------
/img/vectorization-example.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
494 |
--------------------------------------------------------------------------------
/notes.md:
--------------------------------------------------------------------------------
1 | Topics to cover
2 | ===============
3 |
4 | Intro talk
5 | ----------
6 |
7 | Just Numpy
8 | ----------
9 |
10 | Numpy ecosystem talk
11 | --------------------
12 |
13 | Pandas
14 | ------
15 |
16 | Dask & multiprocessing
17 | ----------------------
18 |
19 | Numba, Cython, pybind11
20 | -----------------------
21 |
22 | CuPy, Numba-GPU, PyCUDA
23 | -----------------------
24 |
25 | ctypes & low-level hackery
26 | --------------------------
27 |
--------------------------------------------------------------------------------
/tex/1-intro.tex:
--------------------------------------------------------------------------------
1 | \pdfminorversion=4
2 | \documentclass[aspectratio=169]{beamer}
3 |
4 | \mode
5 | {
6 | \usetheme{default}
7 | \usecolortheme{default}
8 | \usefonttheme{default}
9 | \setbeamertemplate{navigation symbols}{}
10 | \setbeamertemplate{caption}[numbered]
11 | \setbeamertemplate{footline}[frame number] % or "page number"
12 | \setbeamercolor{frametitle}{fg=white}
13 | \setbeamercolor{footline}{fg=black}
14 | }
15 |
16 | \usepackage[english]{babel}
17 | \usepackage[utf8x]{inputenc}
18 | \usepackage{tikz}
19 | \usepackage{courier}
20 | \usepackage{array}
21 | \usepackage{bold-extra}
22 | \usepackage{minted}
23 | \usepackage[thicklines]{cancel}
24 | \usepackage{fancyvrb}
25 | \usepackage{tabto}
26 |
27 | \xdefinecolor{dianablue}{rgb}{0.18,0.24,0.31}
28 | \xdefinecolor{darkblue}{rgb}{0.1,0.1,0.7}
29 | \xdefinecolor{darkgreen}{rgb}{0,0.5,0}
30 | \xdefinecolor{darkgrey}{rgb}{0.35,0.35,0.35}
31 | \xdefinecolor{darkorange}{rgb}{0.8,0.5,0}
32 | \xdefinecolor{darkred}{rgb}{0.7,0,0}
33 | \definecolor{darkgreen}{rgb}{0,0.6,0}
34 | \definecolor{mauve}{rgb}{0.58,0,0.82}
35 |
36 | \title[01-intro]{Python/Numpy for High-Performance Numerical Processing}
37 | \author{Jim Pivarski}
38 | \institute{Princeton University}
39 | \date{November 15, 2018}
40 |
41 | \usetikzlibrary{shapes.callouts}
42 |
43 | \begin{document}
44 |
45 | \logo{\pgfputat{\pgfxy(0.11, 7.4)}{\pgfbox[right,base]{\tikz{\filldraw[fill=dianablue, draw=none] (0 cm, 0 cm) rectangle (50 cm, 1 cm);}\mbox{\hspace{-8 cm}\includegraphics[height=1 cm]{princeton-logo-long.png}\mbox{\hspace{0.25 cm}}}}}}
46 |
47 | \begin{frame}
48 | \titlepage
49 | \end{frame}
50 |
51 | \logo{\pgfputat{\pgfxy(0.11, 7.4)}{\pgfbox[right,base]{\tikz{\filldraw[fill=dianablue, draw=none] (0 cm, 0 cm) rectangle (50 cm, 1 cm);}\mbox{\hspace{-8 cm}\includegraphics[height=1 cm]{princeton-logo.png}\mbox{\hspace{0.25 cm}}}}}}
52 |
53 | % Uncomment these lines for an automatically generated outline.
54 | %\begin{frame}{Outline}
55 | % \tableofcontents
56 | %\end{frame}
57 |
58 | % START START START START START START START START START START START START START
59 |
60 | \begin{frame}{Why Python?}
61 | \vspace{0.25 cm}
62 | \begin{center}
63 | \includegraphics[width=0.8\linewidth]{pypl-popularity.png}
64 |
65 | \textcolor{blue}{\scriptsize\url{http://pypl.github.io/PYPL.html}}
66 | \end{center}
67 | \end{frame}
68 |
69 | \begin{frame}{Why Python in science?}
70 | \vspace{0.5 cm}
71 | \includegraphics[width=\linewidth]{python-r-cpp-googletrends-data.png}
72 |
73 | \vspace{1 cm}
74 | \includegraphics[width=\linewidth]{python-r-cpp-googletrends-dataset.png}
75 | \end{frame}
76 |
77 | \begin{frame}{Why Python in science?}
78 | \vspace{0.5 cm}
79 | \includegraphics[width=\linewidth]{python-r-cpp-googletrends-datascience.png}
80 |
81 | \vspace{1 cm}
82 | \includegraphics[width=\linewidth]{python-r-cpp-googletrends-machinelearning.png}
83 | \end{frame}
84 |
85 | \begin{frame}{Why Python in science?}
86 | \vspace{0.5 cm}
87 | \includegraphics[width=\linewidth]{root-spark-pandas-google-trends.png}
88 | \end{frame}
89 |
90 | \begin{frame}{Why Python in science?}
91 | \large
92 | \vspace{0.4 cm}
93 | All of the machine learning libraries I could find either have a Python interface or are primarily/exclusively Python.
94 |
95 | \vspace{0.6 cm}
96 | \mbox{ } \includegraphics[height=0.8 cm]{sklearn-logo.png}
97 | \hfill \includegraphics[height=0.8 cm]{pytorch-logo.png}
98 | \hfill \includegraphics[height=0.8 cm]{keras-logo.png}
99 | \hfill \includegraphics[height=1 cm]{tensorflow-logo.png}
100 | \hfill \includegraphics[height=0.8 cm]{caffe2-logo.png}
101 | \hfill \includegraphics[height=0.8 cm]{gluon-logo.png} \mbox{ }
102 |
103 | \vspace{0.15 cm}
104 | \mbox{ } \includegraphics[height=0.8 cm]{chainer-logo.png}
105 | \hfill \includegraphics[height=0.8 cm]{cntk-logo.png}
106 | \hfill \includegraphics[height=0.8 cm]{lasagne-logo.png}
107 | \hfill \includegraphics[height=0.8 cm]{onnx-logo.png}
108 | \hfill \includegraphics[height=0.8 cm]{cesium-logo.png}
109 | \hfill \includegraphics[height=0.8 cm]{xgboost-logo.png} \mbox{ }
110 | \end{frame}
111 |
112 | \begin{frame}{Why Python in science?}
113 | \vspace{0.25 cm}
114 | \begin{center}
115 | \includegraphics[width=0.7\linewidth]{mentions-of-programming-languages.png}
116 | \end{center}
117 | \end{frame}
118 |
119 | \begin{frame}{Why Python in science?}
120 | \vspace{0.3 cm}
121 | \begin{columns}[b]
122 | \column{0.59\linewidth}
123 | \includegraphics[width=\linewidth]{lsst-notebook.png}
124 | \end{columns}
125 | \end{frame}
126 |
127 | \begin{frame}{Stealing from Jake VanderPlas's {\it Unexpected Effectiveness} talk}
128 | \vspace{0.25 cm}
129 | \begin{columns}[b]
130 | \column{0.75\linewidth}
131 | \only<1>{\includegraphics[height=7.8 cm]{shells-1.png}}
132 | \only<2>{\includegraphics[height=7.8 cm]{shells-2.png}}
133 | \only<3>{\includegraphics[height=7.8 cm]{shells-3.png}}
134 | \only<4>{\includegraphics[height=7.8 cm]{shells-4.png}}
135 | \only<5-6>{\includegraphics[height=7.8 cm]{shells-5.png}\vspace{0.5 cm}}
136 |
137 | \column{0.25\linewidth}
138 | \includegraphics[width=\linewidth]{unreasonable-effectiveness.png}
139 |
140 | \vspace{0.5 cm}
141 | \uncover<6>{If you're used to writing your own code, searching for tools is eye-opening: you learn what's unique about what you do and what isn't.}
142 |
143 | \vspace{-7\baselineskip}
144 | \vspace{4.8 cm}
145 | \end{columns}
146 | \end{frame}
147 |
148 | \begin{frame}{Stealing again from Jake VanderPlas}
149 | \vspace{0.27 cm}
150 | \begin{columns}
151 | \column{0.74\linewidth}
152 | \includegraphics[width=\linewidth]{commute-by-plane.png}
153 | \end{columns}
154 | \end{frame}
155 |
156 | \begin{frame}{Why not indeed?}
157 | \large
158 | \begin{center}
159 | In science, we often have to scale up analyses to large datasets.
160 |
161 | \vspace{1 cm}
162 | \uncover<2->{10\% faster doesn't mean much, but the difference between \\ ``five minutes'' and ``overnight'' is life-changing.}
163 |
164 | \vspace{1 cm}
165 | \uncover<3->{That's the scale we're talking about between C and Python.}
166 |
167 | \vspace{1 cm}
168 | \uncover<4->{But we also need the interactivity of a dynamic language to {\it develop} the analysis. (``If we knew what we were doing, it wouldn't be called research.'')}
169 | \end{center}
170 | \end{frame}
171 |
172 | \begin{frame}{Metaphor time!}
173 | \Large
174 | \vspace{0.25 cm}
175 | \begin{center}
176 | \textcolor{darkblue}{\underline{Drive to the airport by car, then take a plane.}}
177 | \end{center}
178 |
179 | \vspace{0.5 cm}
180 | \begin{columns}
181 | \column{0.4\linewidth}
182 | \begin{center}
183 | Small-scale {\it project organization} in Python, ignoring performance entirely.
184 | \end{center}
185 |
186 | \column{0.4\linewidth}
187 | \begin{center}
188 | Run over {\it big data} in compiled code, tuning performance until it no longer matters.
189 | \end{center}
190 |
191 | \end{columns}
192 | \end{frame}
193 |
194 | \begin{frame}{Python is a good glue language: my thesis workflow in 2006}
195 | \vspace{0.5 cm}
196 | \begin{columns}
197 | \column{1.1\linewidth}
198 | \includegraphics[width=\linewidth]{thesis-code-flow.pdf}
199 | \end{columns}
200 | \end{frame}
201 |
202 | \begin{frame}{Which got me involved in open source (PyMinuit is now ``iminuit'')}
203 | \vspace{0.5 cm}
204 | \includegraphics[width=\linewidth]{pyminuit.png}
205 | \end{frame}
206 |
207 | \begin{frame}{The key to ecosystem development was a common array library}
208 | \large
209 | \vspace{0.1 cm}
210 |
211 | \renewcommand{\arraystretch}{1.15}
212 | \mbox{\hspace{-0.5 cm}\begin{tabular}{c p{0.95\linewidth}}
213 | 1994 & \textcolor{darkorange}{\bf Python} 1.0 released. \\
214 | 1995 & First array package: \textcolor{darkorange}{\bf Numeric} \textcolor{gray}{(a.k.a.\ Numerical, Numerical Python, NumPy).} \\
215 | 2001 & Diverse scientific codebases merged into \textcolor{darkorange}{\bf SciPy}. \\
216 | 2003 & \textcolor{darkorange}{\bf Matplotlib} \\
217 | 2003 & Numeric was limited; \textcolor{darkorange}{\bf numarray} appeared as a competitor with more \mbox{features} \textcolor{gray}{(memory-mapped files, alignment, record arrays)}. \\
218 | 2005 & Two packages were incompatible; could not integrate numarray-based code into SciPy. Travis Oliphant merged the codebases as \textcolor{darkorange}{\bf Numpy}. \\
219 | 2008 & \textcolor{darkorange}{\bf Pandas} \\
220 | 2010 & \textcolor{darkorange}{\bf Scikit-Learn} \\
221 | 2011 & \textcolor{darkorange}{\bf AstroPy} \\
222 | 2012 & \textcolor{darkorange}{\bf Anaconda} \\
223 | 2014 & \textcolor{darkorange}{\bf Jupyter} \\
224 | 2015 & \textcolor{darkorange}{\bf Keras} \\
225 | \end{tabular}}
226 |
227 | \begin{uncoverenv}<2->
228 | \vspace{-3 cm}
229 | \hfill \fbox{\begin{minipage}{7 cm}
230 | \vspace{0.2 cm}
231 | \begin{center}
232 | \begin{minipage}{6.25 cm}
233 | The scientific Python ecosystem could have failed before it started if the Numeric/numarray split hadn't been resolved!
234 | \end{minipage}
235 | \vspace{0.2 cm}
236 | \end{center}
237 | \end{minipage}}
238 | \end{uncoverenv}
239 | \end{frame}
240 |
241 | \begin{frame}[fragile]{Numpy is high-level, array-at-a-time math}
242 | \vspace{0.5 cm}
243 | \hfill \includegraphics[height=1.5 cm]{numpy-logo.png}
244 |
245 | \scriptsize
246 | \vspace{-1.6 cm}
247 | \begin{minted}{python}
248 | >>> import numpy
249 | >>> a = numpy.arange(12)
250 | >>> a
251 | array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
252 | >>> a.shape = (3, 4)
253 | >>> a
254 | array([[ 0, 1, 2, 3],
255 | [ 4, 5, 6, 7],
256 | [ 8, 9, 10, 11]])
257 | >>> a.sum(axis=0)
258 | array([12, 15, 18, 21])
259 | >>> a.min(axis=1)
260 | array([0, 4, 8])
261 | >>> a**2
262 | array([[ 0, 1, 4, 9],
263 | [ 16, 25, 36, 49],
264 | [ 64, 81, 100, 121]])
265 | >>> numpy.sqrt(a)
266 | array([[0. , 1. , 1.41421356, 1.73205081],
267 | [2. , 2.23606798, 2.44948974, 2.64575131],
268 | [2.82842712, 3. , 3.16227766, 3.31662479]])
269 | \end{minted}
270 | \end{frame}
271 |
272 | \begin{frame}[fragile]{The Numpythonic mindset}
273 | \large
274 | \vspace{0.5 cm}
275 | Although you can write Python {\tt\normalsize for} loops over Numpy arrays, you don't reap the benefit unless you express your calculation in Numpy universal functions (ufuncs).
276 |
277 | \vspace{\baselineskip}
278 | \begin{columns}[t]
279 | \column{0.45\linewidth}
280 | \vspace{-\baselineskip}
281 | \scriptsize
282 | \begin{minted}{python}
283 | pz = numpy.empty(len(pt))
284 | for i in range(len(pt)):
285 | pz[i] = pt[i]*numpy.sinh(eta[i])
286 | \end{minted}
287 |
288 | \vspace{0.5 cm}
289 | $\mathcal{O}(N)$ Python bytecode instructions, type-checks, interpreter locks.
290 |
291 | \column{0.45\linewidth}
292 | \mbox{\hspace{-0.85 cm}\textcolor{darkblue}{vs}}
293 | \vspace{-\baselineskip}
294 | \scriptsize
295 | \begin{minted}{python}
296 | pz = pt * numpy.sinh(eta)
297 | \end{minted}
298 | \vspace{2\baselineskip}
299 |
300 | \vspace{0.5 cm}
301 | $\mathcal{O}(1)$ Python bytecode instructions, type-checks, interpreter locks.
302 |
303 | \vspace{0.1 cm}
304 | $\mathcal{O}(N)$ statically typed, probably vectorized native bytecode operations on contiguous memory.
305 | \end{columns}
306 |
307 | \large
308 | \vspace{0.75 cm}
309 | \uncover<2->{\textcolor{darkblue}{In other words, a \underline{S}ingle (Python) \underline{I}nstruction on \underline{M}ultiple \underline{D}ata.}}
310 |
311 | \vspace{0.1 cm}
312 | \uncover<2->{\textcolor{darkblue}{Conceptually similar to SIMD, the program flow of GPUs.}}
313 | \end{frame}
314 |
315 | \begin{frame}{This is not new}
316 | \Large
317 | \vspace{0.5 cm}
318 | \textcolor{darkorange}{\bf APL}, ``A Programming Language'' introduced the idea of single commands having sweeping effects across large arrays.
319 |
320 | \begin{center}
321 | \includegraphics[width=0.75\linewidth]{apl-timeline.pdf}
322 | \end{center}
323 |
324 | \normalsize
325 | \textcolor{gray}{All members of the APL family are intended for interactive data analysis.}
326 |
327 | \textcolor{gray}{Numpy, however, is a library in a general-purpose language, not a language in itself.}
328 | \end{frame}
329 |
330 | \begin{frame}{APL}
331 | \Large
332 | \vspace{0.5 cm}
333 | \hfill \mbox{\includegraphics[height=3 cm]{tshirt.jpg}\hspace{-0.25 cm}}
334 |
335 | \vspace{-2.75 cm}
336 | APL pioneered conciseness;
337 |
338 | discovered the mistake of being too concise.
339 |
340 | \large
341 | \vspace{1.25 cm}
342 | Conway's Game of Life was one line of code:
343 |
344 | \vspace{-0.3 cm}
345 | \[ \mbox{\tt life} \leftarrow \{\uparrow 1\quad\omega \vee.\wedge 3\quad 4=+/,^{^-} 1\quad0\quad1\circ.\Theta^{^-} 1\quad0\quad1\circ.\Phi\subset\omega\} \]
346 |
347 | \vspace{0.5 cm}
348 | ``Map'' was implicit, ``reduce'' was a slash, functions were symbols. For example:
349 |
350 | \begin{center}
351 | \renewcommand{\arraystretch}{1.2}
352 | \begin{tabular}{c c c}
353 | APL & \mbox{\hspace{0.5 cm}} & Numpy \\\hline
354 | $\displaystyle \mbox{\tt m} \leftarrow +/(3+\iota 4)$ & & {\tt\normalsize m = (numpy.arange(4) + 3).sum()}
355 | \end{tabular}
356 | \end{center}
357 | \end{frame}
358 |
359 | \begin{frame}{Numpythonic mindset: GPU and vectorization}
360 | \Large
361 | \vspace{0.5 cm}
362 | \begin{center}
363 | As an array abstraction, Numpy presents a high-level way \\ for users to think about vectorization.
364 |
365 | \vspace{1 cm}
366 | Vectorization is key to using GPUs and modern CPUs efficiently.
367 | \end{center}
368 | \end{frame}
369 |
370 | \begin{frame}{Numpythonic mindset: GPU and vectorization}
371 | \vspace{0.35 cm}
372 | \includegraphics[width=\linewidth]{cupy.png}
373 | \end{frame}
374 |
375 | \begin{frame}{Numpythonic mindset: GPU and vectorization}
376 | \vspace{0.35 cm}
377 | \includegraphics[width=\linewidth]{quantstack.png}
378 | \end{frame}
379 |
380 | \begin{frame}{Plan for the day}
381 | \large
382 | \begin{columns}
383 | \column{0.68\linewidth}
384 | \includegraphics[width=\linewidth]{../img/plan-for-the-day.png}
385 |
386 | \column{0.3\linewidth}
387 | Skills-based Numpy tutorial with a couple of exercises in the morning: how to think in SIMD.
388 |
389 | \vspace{1 cm}
390 | Overview of libraries in the afternoon: where to look for solutions to your problems.
391 | \end{columns}
392 | \end{frame}
393 |
394 | \end{document}
395 |
--------------------------------------------------------------------------------
/tex/3-ecosystem.tex:
--------------------------------------------------------------------------------
1 | \pdfminorversion=4
2 | \documentclass[aspectratio=169]{beamer}
3 |
4 | \mode
5 | {
6 | \usetheme{default}
7 | \usecolortheme{default}
8 | \usefonttheme{default}
9 | \setbeamertemplate{navigation symbols}{}
10 | \setbeamertemplate{caption}[numbered]
11 | \setbeamertemplate{footline}[frame number] % or "page number"
12 | \setbeamercolor{frametitle}{fg=white}
13 | \setbeamercolor{footline}{fg=black}
14 | }
15 |
16 | \usepackage[english]{babel}
17 | \usepackage[utf8x]{inputenc}
18 | \usepackage{tikz}
19 | \usepackage{courier}
20 | \usepackage{array}
21 | \usepackage{bold-extra}
22 | \usepackage{minted}
23 | \usepackage[thicklines]{cancel}
24 | \usepackage{fancyvrb}
25 | \usepackage{tabto}
26 |
27 | \xdefinecolor{dianablue}{rgb}{0.18,0.24,0.31}
28 | \xdefinecolor{darkblue}{rgb}{0.1,0.1,0.7}
29 | \xdefinecolor{darkgreen}{rgb}{0,0.5,0}
30 | \xdefinecolor{darkgrey}{rgb}{0.35,0.35,0.35}
31 | \xdefinecolor{darkorange}{rgb}{0.8,0.5,0}
32 | \xdefinecolor{darkred}{rgb}{0.7,0,0}
33 | \definecolor{darkgreen}{rgb}{0,0.6,0}
34 | \definecolor{mauve}{rgb}{0.58,0,0.82}
35 |
36 | \title[03-ecosystem]{The Numpy Ecosystem}
37 | \author{Jim Pivarski}
38 | \institute{Princeton University}
39 | \date{November 15, 2018}
40 |
41 | \usetikzlibrary{shapes.callouts}
42 |
43 | \begin{document}
44 |
45 | \logo{\pgfputat{\pgfxy(0.11, 7.4)}{\pgfbox[right,base]{\tikz{\filldraw[fill=dianablue, draw=none] (0 cm, 0 cm) rectangle (50 cm, 1 cm);}\mbox{\hspace{-8 cm}\includegraphics[height=1 cm]{princeton-logo-long.png}\mbox{\hspace{0.25 cm}}}}}}
46 |
47 | \begin{frame}
48 | \titlepage
49 | \end{frame}
50 |
51 | \logo{\pgfputat{\pgfxy(0.11, 7.4)}{\pgfbox[right,base]{\tikz{\filldraw[fill=dianablue, draw=none] (0 cm, 0 cm) rectangle (50 cm, 1 cm);}\mbox{\hspace{-8 cm}\includegraphics[height=1 cm]{princeton-logo.png}\mbox{\hspace{0.25 cm}}}}}}
52 |
53 | % Uncomment these lines for an automatically generated outline.
54 | %\begin{frame}{Outline}
55 | % \tableofcontents
56 | %\end{frame}
57 |
58 | % START START START START START START START START START START START START START
59 |
60 | \begin{frame}{This afternoon}
61 | \large
62 | \vspace{0.5 cm}
63 | This morning, we focused on just one library--- Numpy--- and worked on putting its slicing interfaces together to achieve things you'd normally need for loops for.
64 |
65 | \vspace{1 cm}
66 | \uncover<2->{\Large This afternoon, we switch to\ldots}
67 | \end{frame}
68 |
69 | \begin{frame}{Everything else}
70 | \vspace{0.16 cm}
71 | \begin{columns}
72 | \column{1.14\linewidth}
73 | \vspace{-4 cm}
74 | \includegraphics[width=\linewidth]{shells-5.png}
75 | \end{columns}
76 | \end{frame}
77 |
78 | \begin{frame}{Specific topics}
79 | \vspace{0.3 cm}
80 | \begin{block}{Statistics tools}
81 | \begin{itemize}
82 | \item {\bf Pandas:} a central component, becoming as important as Numpy itself.
83 | \end{itemize}
84 |
85 | \uncover<2->{Other than that, you're on your own. Statistical software are as varied as your domains.}
86 | \end{block}
87 |
88 | \vspace{0.4 cm}
89 | \begin{uncoverenv}<3->
90 | \begin{block}{Speeding up code}
91 | \begin{itemize}
92 | \item {\bf Dask:} parallel processing; \underline{M}ultiple \underline{I}nstructions on \underline{M}ultiple \underline{D}ata (MIMD).
93 | \item {\bf Numba:} compile a limited subset of Python, as-is, to C-like speeds.
94 | \item {\bf Cython:} compile any Python code, but you have to modify it to make it fast.
95 | \item {\bf CuPy:} run any Numpy operations on a GPU.
96 | \item {\bf Numba-GPU:} compile limited Python for the GPU.
97 | \item {\bf PyCUDA:} interface with raw CUDA through Numpy arrays.
98 | \item {\bf ctypes:} cast pointers as Numpy arrays and run code in shared library ({\tt\small *.so}) files.
99 | \end{itemize}
100 | \end{block}
101 | \end{uncoverenv}
102 | \end{frame}
103 |
104 | \begin{frame}{Speeding up code}
105 | \vspace{0.5 cm}
106 | Fast software is not like a fast runner, who has some superior intrinsic ability. \\ All run at the same rate, but some have more hurdles on the track than others.
107 |
108 | \vspace{0.25 cm}
109 | \begin{center}
110 | \includegraphics[width=0.7\linewidth]{hurdle9.jpg}
111 | \end{center}
112 | \end{frame}
113 |
114 | \begin{frame}{Hurdles, from smallest to largest}
115 | \large
116 | \begin{columns}[t]
117 | \column{0.5\linewidth}
118 | \begin{enumerate}\setlength{\itemsep}{0.35 cm}
119 | \item Unnecessary or repeated arithmetic
120 | \item Arithmetic in separate instructions that could be in the same instruction (vectorization)
121 | \item Transcendental functions or division
122 | \item Unnecessary or nonsequential memory access; cache swapping
123 | \end{enumerate}
124 |
125 | \column{0.5\linewidth}
126 | \begin{enumerate}\setlength{\itemsep}{0.35 cm}\setcounter{enumi}{4}
127 | \item Virtual machine indirection
128 | \item Boxing numbers as objects
129 | \item Type checking at runtime
130 | \item Unnecessary or nonsequential disk/network access
131 | \item Wacky stuff
132 | \end{enumerate}
133 | \end{columns}
134 |
135 | \vspace{0.5 cm}
136 | \uncover<2->{Compilation optimizes away most of \textcolor{darkblue}{\#1}, \textcolor{darkblue}{\#2}, and \textcolor{darkblue}{\#4}.}
137 |
138 | \vspace{0.2 cm}
139 | \uncover<3->{GPUs focus on \textcolor{darkblue}{\#2} and \textcolor{darkblue}{\#4} (by putting memory close to processing).}
140 |
141 | \vspace{0.2 cm}
142 | \uncover<4->{Python is guilty of \textcolor{darkblue}{\#4}, \textcolor{darkblue}{\#5}, \textcolor{darkblue}{\#6}, and \textcolor{darkblue}{\#7} (Java only \textcolor{darkblue}{\#4}, \textcolor{darkblue}{\#5}, and half of \textcolor{darkblue}{\#6}).}
143 | \end{frame}
144 |
145 | \begin{frame}{Optimization is about trade-offs}
146 | \large
147 | \vspace{0.5 cm}
148 | We're here because we like the productivity Python gives us in exchange for \\ \textcolor{darkblue}{\#4}, \textcolor{darkblue}{\#5}, \textcolor{darkblue}{\#6}, and \textcolor{darkblue}{\#7}.
149 |
150 | \vspace{0.5 cm}
151 | \begin{uncoverenv}<2->
152 | Ideally, we'd like a library that makes Python code fast without modification.
153 | \begin{itemize}
154 | \item I don't know how much speedup I'll get until I apply it; but that costs effort.
155 | \item If I've applied it and I don't like it, I want to easily remove it.
156 | \end{itemize}
157 | \end{uncoverenv}
158 |
159 | \vspace{0.5 cm}
160 | \uncover<3->{If we had such a thing, though, when would we ever {\it not} use it?}
161 |
162 | \vspace{0.2 cm}
163 | \uncover<4->{\textcolor{darkgray}{Example: PyPy, a reimplementation of Python with just-in-time (JIT) compilation. If it works, we'd only use that. It doesn't yet work with all extension modules, though.}}
164 | \end{frame}
165 |
166 | \begin{frame}{Horizontal and vertical scaling}
167 | \Large
168 | \vspace{0.5 cm}
169 | \begin{description}
170 | \item[\bf Horizontal:] split up task and distribute among parallel workers.
171 | \end{description}
172 |
173 | \large
174 | \uncover<2->{\textcolor{darkgray}{Oddly, this speedup is rarely proportional to the number of workers, even when work is independent, due to bookkeeping overhead and shipping data.}}
175 |
176 | \vspace{1 cm}
177 | \Large
178 | \begin{description}
179 | \item[\bf Vertical:] use hardware more effectively by removing hurdles.
180 | \end{description}
181 |
182 | \large
183 | \uncover<3->{\textcolor{darkgray}{Plateaus as you get close to optimum. More effort yields diminishing returns.}}
184 | \end{frame}
185 |
186 | \begin{frame}{Why cover Pandas in an afternoon about performance?}
187 | \large
188 | \begin{center}
189 | \includegraphics[width=0.5\linewidth]{pandas-logo.png}
190 | \end{center}
191 |
192 | \vspace{0.25 cm}
193 | Pandas is about simplifying data analysis, and it does so by translating the array programming style from Numpy to domain concepts: timestamps, categorical data, relational data, etc.
194 |
195 | \vspace{0.5 cm}
196 | \uncover<2->{It's like a spreadsheet that uses Numpy arrays instead of graphical cells.}
197 |
198 | \vspace{0.5 cm}
199 | \uncover<3->{It's not as fast as Numpy or the other accelerators I'll show, but it benefits from the conciseness of the same Numpythonic mindset.}
200 | \end{frame}
201 |
202 | \begin{frame}{}
203 | \LARGE
204 | \vspace{1.5 cm}
205 | \begin{center}
206 | So without further ado\ldots
207 | \end{center}
208 | \end{frame}
209 |
210 |
211 | %% \begin{frame}{Speeding up code}
212 | %% \large
213 | %% \vspace{0.5 cm}
214 | %% There is a mantra regarding performance tuning:
215 | %% \begin{center}
216 | %% \it Premature optimization is the root of all evil.
217 | %% \end{center}
218 |
219 | %% \normalsize
220 | %% \vspace{0.5 cm}
221 | %% \uncover<2->{\textcolor{darkblue}{It's mostly correct.} Mechanations to increase speed or reduce memory can muddle the intent of the code and even be counterproductive. Your processor, operating system, compiler, and maybe framework are all trying to optimize it for you--- doing weird things can confuse these systems.}
222 |
223 | %% \vspace{0.5 cm}
224 | %% \uncover<3->{\textcolor{darkblue}{It's not always correct.} Sometimes, you have to think about performance up front to design a sensible workflow, and sometimes factors of 1000's are at stake.}
225 | %% \end{frame}
226 |
227 | %% \begin{frame}{Speeding up code}
228 | %% \large
229 | %% \vspace{0.35 cm}
230 | %% \begin{columns}
231 | %% \column{0.8\linewidth}
232 | %% \begin{center}
233 | %% An ideal code optimization library would be transparent: \\ same code, just faster.
234 |
235 | %% \vspace{0.25 cm}
236 | %% \uncover<2->{You never know how much it will help until you try it, so you want the barrier to entry to be as small as possible. You also want an easy way to back out if you find you don't want it.}
237 |
238 | %% \vspace{0.25 cm}
239 | %% \uncover<3->{Ideally, it would also be general: apply to all of your code \\ so that you don't have to pick out hotspots.}
240 |
241 | %% \vspace{0.25 cm}
242 | %% \uncover<4->{But if we had a completely general, transparent optimizer, \\ we would just use that exclusively.}
243 |
244 | %% \vspace{0.25 cm}
245 | %% \uncover<5->{\textcolor{darkblue}{PyPy} aims to be fully general and transparent, but it doesn't support all the compiled modules built for standard Python.}
246 | %% \end{center}
247 | %% \end{columns}
248 | %% \end{frame}
249 |
250 | \end{document}
251 |
--------------------------------------------------------------------------------
/tex/apl-timeline.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/apl-timeline.pdf
--------------------------------------------------------------------------------
/tex/caffe2-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/caffe2-logo.png
--------------------------------------------------------------------------------
/tex/cesium-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/cesium-logo.png
--------------------------------------------------------------------------------
/tex/chainer-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/chainer-logo.png
--------------------------------------------------------------------------------
/tex/cntk-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/cntk-logo.png
--------------------------------------------------------------------------------
/tex/commute-by-plane.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/commute-by-plane.png
--------------------------------------------------------------------------------
/tex/cupy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/cupy.png
--------------------------------------------------------------------------------
/tex/gluon-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/gluon-logo.png
--------------------------------------------------------------------------------
/tex/hurdle9.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/hurdle9.jpg
--------------------------------------------------------------------------------
/tex/keras-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/keras-logo.png
--------------------------------------------------------------------------------
/tex/lasagne-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/lasagne-logo.png
--------------------------------------------------------------------------------
/tex/lsst-notebook.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/lsst-notebook.png
--------------------------------------------------------------------------------
/tex/mentions-of-programming-languages.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/mentions-of-programming-languages.png
--------------------------------------------------------------------------------
/tex/numpy-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/numpy-logo.png
--------------------------------------------------------------------------------
/tex/onnx-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/onnx-logo.png
--------------------------------------------------------------------------------
/tex/pandas-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/pandas-logo.png
--------------------------------------------------------------------------------
/tex/princeton-logo-long.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/princeton-logo-long.png
--------------------------------------------------------------------------------
/tex/princeton-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/princeton-logo.png
--------------------------------------------------------------------------------
/tex/pyminuit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/pyminuit.png
--------------------------------------------------------------------------------
/tex/pypl-popularity.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/pypl-popularity.png
--------------------------------------------------------------------------------
/tex/python-r-cpp-googletrends-data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/python-r-cpp-googletrends-data.png
--------------------------------------------------------------------------------
/tex/python-r-cpp-googletrends-datascience.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/python-r-cpp-googletrends-datascience.png
--------------------------------------------------------------------------------
/tex/python-r-cpp-googletrends-dataset.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/python-r-cpp-googletrends-dataset.png
--------------------------------------------------------------------------------
/tex/python-r-cpp-googletrends-machinelearning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/python-r-cpp-googletrends-machinelearning.png
--------------------------------------------------------------------------------
/tex/pytorch-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/pytorch-logo.png
--------------------------------------------------------------------------------
/tex/quantstack.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/quantstack.png
--------------------------------------------------------------------------------
/tex/root-spark-pandas-google-trends.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/root-spark-pandas-google-trends.png
--------------------------------------------------------------------------------
/tex/shells-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/shells-1.png
--------------------------------------------------------------------------------
/tex/shells-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/shells-2.png
--------------------------------------------------------------------------------
/tex/shells-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/shells-3.png
--------------------------------------------------------------------------------
/tex/shells-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/shells-4.png
--------------------------------------------------------------------------------
/tex/shells-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/shells-5.png
--------------------------------------------------------------------------------
/tex/sklearn-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/sklearn-logo.png
--------------------------------------------------------------------------------
/tex/tensorflow-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/tensorflow-logo.png
--------------------------------------------------------------------------------
/tex/thesis-code-flow.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/thesis-code-flow.pdf
--------------------------------------------------------------------------------
/tex/tshirt.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/tshirt.jpg
--------------------------------------------------------------------------------
/tex/unreasonable-effectiveness.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/unreasonable-effectiveness.png
--------------------------------------------------------------------------------
/tex/xgboost-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jpivarski-talks/python-numpy-mini-course/e1271f3092dac18d63cb858fbca4226893043604/tex/xgboost-logo.png
--------------------------------------------------------------------------------