├── .gitignore
├── .gitmodules
├── 01-intro
    ├── 0-slides.pdf
    ├── 1-intro-via-image-processing.ipynb
    ├── 2-1-Python-Types.ipynb
    ├── 2-2-Python-Names and Values.ipynb
    ├── 2-3-Python-Indexing.ipynb
    ├── 2-4-Python-Control flow.ipynb
    ├── 2-5-Python-Functions.ipynb
    ├── 2-6-Python-Objects.ipynb
    ├── 2-7-Python-A few more things.ipynb
    ├── 3-1-numpy-Introduction.ipynb
    ├── 3-2-numpy-Indexing.ipynb
    ├── 3-3-numpy-Broadcasting.ipynb
    ├── 3-4-numpy-Tools.ipynb
    ├── 3-5-numpy-Data Storage.ipynb
    ├── 4-practice-ordering-tree.ipynb
    ├── README.rst
    └── cat.jpeg
├── 02-languages
    ├── 01-expression-trees.ipynb
    ├── 02-traversing-trees.ipynb
    ├── 03-defining-custom-node-types.ipynb
    ├── 04-accessing-python-syntax-trees.ipynb
    ├── 05-common-operations.ipynb
    ├── 06-interoperating-with-sympy.ipynb
    ├── 07-internal-representations.ipynb
    ├── 08-practice.ipynb
    ├── README.rst
    └── gvmagic.py
├── 03-opencl
    ├── 0-slides.pdf
    ├── 1-1-hello-pyopencl.ipynb
    ├── 1-2-pyopencl-arrays.ipynb
    ├── 1-3-exercise.ipynb
    ├── 1-4-ipython-magic.ipynb
    ├── 2-1-elementwise.ipynb
    ├── 2-2-reduction.ipynb
    ├── 2-2a-monte-carlo.ipynb
    ├── 2-3-scan.ipynb
    ├── 3-practice-expression-kernel.ipynb
    ├── 3-practice-hermite-monte-carlo.ipynb
    ├── 3-practice-thinking-with-scans.ipynb
    └── README.rst
├── 04-case-studies
    ├── 01-indexing-and-broadcasting.ipynb
    ├── 02-einsum.ipynb
    ├── 03-ufl.ipynb
    └── README.rst
├── 05-generating-c
    ├── 01-substitution.ipynb
    ├── 02-templating.ipynb
    ├── 03-asts.ipynb
    ├── 04-practice.ipynb
    └── README.rst
├── 06-loopy
    ├── 0-slides.pdf
    ├── 01-rank-one.ipynb
    ├── 02-data-layout.ipynb
    ├── 03-reduction.ipynb
    ├── 04-intermediate-results.ipynb
    ├── 05-pde-to-code.ipynb
    ├── 05a-image-processing-language.ipynb
    ├── 06-operation-counting.ipynb
    ├── 07-practice-einsum.ipynb
    ├── 07-practice-image-processing.ipynb
    ├── 07-practice-matrix-products.ipynb
    ├── 08-monte-carlo.ipynb
    ├── README.rst
    └── cat.jpeg
├── LICENSE
├── README.rst
├── assemble.sh
├── aux
    ├── index.md
    ├── ipython_config.py
    ├── material-email.txt
    ├── pystuff-requirements.txt
    ├── sudoers
    ├── time-planning.ods
    ├── tut-pack.run
    ├── upload.sh
    ├── video-script.txt
    └── vm-requirements.txt
├── prepare-all-notebooks.sh
└── slides
    ├── .gitignore
    ├── 01-intro.tex
    ├── 03-opencl.tex
    ├── 06-loopy.tex
    ├── beamercolorthemeuiuc.sty
    ├── code
        ├── loopy-variants.py
        ├── transpose.cl
        └── transpose.cu
    ├── kloeckislides.sty
    ├── latexmkrc
    ├── media
        ├── amd-logo.pdf
        ├── apple-logo.pdf
        ├── c870.png
        ├── cl-programs-and-kernels-v2.tex
        ├── context.jpeg
        ├── cpu.jpeg
        ├── general-dep-graph.tex
        ├── glass-dollar.jpeg
        ├── intel-logo.pdf
        ├── loopy-crop.pdf
        ├── memory.png
        ├── nvidia.pdf
        ├── onion.jpeg
        ├── opencl-11.pdf
        ├── opencl-logo.png
        ├── opencl-overview.pdf
        ├── parallel-field.jpeg
        ├── python-logo-no-shadow.png
        ├── question-mark.png
        ├── queue.jpeg
        ├── radar.png
        └── tree.jpeg
    ├── settings.tex
    ├── slides
        ├── barrier.tex
        ├── cl-buffer-objects-v4.tex
        ├── cl-command-queue.tex
        ├── cl-command-queues.tex
        ├── cl-compute-dag-v2.tex
        ├── cl-computing-as-a-service.tex
        ├── cl-context-v2.tex
        ├── cl-device.tex
        ├── cl-platform.tex
        ├── cl-prog-model-hardware.tex
        ├── cuda-cl-dictionary.tex
        ├── gpu-cl-execution-model.tex
        ├── memory-fence.tex
        ├── what-is-opencl-v2.tex
        └── why-gpu-scripting-v3.tex
    └── update-slides.sh


/.gitignore:
--------------------------------------------------------------------------------
1 | .*.swp
2 | .ipynb_checkpoints
3 | *~
4 | cleared
5 | upload
6 | dist
7 | __pycache__
8 | *dist.zip
9 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "ipython-demo-tools"]
2 | 	path = ipython-demo-tools
3 | 	url = https://github.com/inducer/ipython-demo-tools.git
4 | 


--------------------------------------------------------------------------------
/01-intro/0-slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/01-intro/0-slides.pdf


--------------------------------------------------------------------------------
/01-intro/2-1-Python-Types.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Python Introduction: Types"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "Let's evaluate some simple expressions."
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 1,
 20 |    "metadata": {
 21 |     "collapsed": false
 22 |    },
 23 |    "outputs": [
 24 |     {
 25 |      "data": {
 26 |       "text/plain": [
 27 |        "6"
 28 |       ]
 29 |      },
 30 |      "execution_count": 1,
 31 |      "metadata": {},
 32 |      "output_type": "execute_result"
 33 |     }
 34 |    ],
 35 |    "source": [
 36 |     "#clear\n",
 37 |     "3*2"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 2,
 43 |    "metadata": {
 44 |     "collapsed": false
 45 |    },
 46 |    "outputs": [
 47 |     {
 48 |      "data": {
 49 |       "text/plain": [
 50 |        "11"
 51 |       ]
 52 |      },
 53 |      "execution_count": 2,
 54 |      "metadata": {},
 55 |      "output_type": "execute_result"
 56 |     }
 57 |    ],
 58 |    "source": [
 59 |     "#clear\n",
 60 |     "5+3*2"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "metadata": {},
 66 |    "source": [
 67 |     "You can use `type()` to find the *type* of an expression."
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": 3,
 73 |    "metadata": {
 74 |     "collapsed": false
 75 |    },
 76 |    "outputs": [
 77 |     {
 78 |      "data": {
 79 |       "text/plain": [
 80 |        "int"
 81 |       ]
 82 |      },
 83 |      "execution_count": 3,
 84 |      "metadata": {},
 85 |      "output_type": "execute_result"
 86 |     }
 87 |    ],
 88 |    "source": [
 89 |     "#clear\n",
 90 |     "type(5+3*2)"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "markdown",
 95 |    "metadata": {},
 96 |    "source": [
 97 |     "Now add decimal points."
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": 4,
103 |    "metadata": {
104 |     "collapsed": false
105 |    },
106 |    "outputs": [
107 |     {
108 |      "data": {
109 |       "text/plain": [
110 |        "12.0"
111 |       ]
112 |      },
113 |      "execution_count": 4,
114 |      "metadata": {},
115 |      "output_type": "execute_result"
116 |     }
117 |    ],
118 |    "source": [
119 |     "#clear\n",
120 |     "5+3.5*2"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": 5,
126 |    "metadata": {
127 |     "collapsed": false
128 |    },
129 |    "outputs": [
130 |     {
131 |      "data": {
132 |       "text/plain": [
133 |        "float"
134 |       ]
135 |      },
136 |      "execution_count": 5,
137 |      "metadata": {},
138 |      "output_type": "execute_result"
139 |     }
140 |    ],
141 |    "source": [
142 |     "#clear\n",
143 |     "type(5+3.0*2)"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "markdown",
148 |    "metadata": {},
149 |    "source": [
150 |     "Strings are written with single (``'``) or double quotes (`\"`)"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": 6,
156 |    "metadata": {
157 |     "collapsed": false
158 |    },
159 |    "outputs": [
160 |     {
161 |      "data": {
162 |       "text/plain": [
163 |        "'hello'"
164 |       ]
165 |      },
166 |      "execution_count": 6,
167 |      "metadata": {},
168 |      "output_type": "execute_result"
169 |     }
170 |    ],
171 |    "source": [
172 |     "#clear\n",
173 |     "\"hello\""
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "markdown",
178 |    "metadata": {},
179 |    "source": [
180 |     "Multiplication and addition work on strings, too."
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": 2,
186 |    "metadata": {
187 |     "collapsed": false
188 |    },
189 |    "outputs": [
190 |     {
191 |      "data": {
192 |       "text/plain": [
193 |        "'hello hello hello sc15'"
194 |       ]
195 |      },
196 |      "execution_count": 2,
197 |      "metadata": {},
198 |      "output_type": "execute_result"
199 |     }
200 |    ],
201 |    "source": [
202 |     "#clear\n",
203 |     "3 * 'hello ' + \"sc15\""
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "markdown",
208 |    "metadata": {},
209 |    "source": [
210 |     "Lists are written in brackets (`[]`) with commas (`,`)."
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "code",
215 |    "execution_count": 8,
216 |    "metadata": {
217 |     "collapsed": false
218 |    },
219 |    "outputs": [
220 |     {
221 |      "data": {
222 |       "text/plain": [
223 |        "[5, 3, 7]"
224 |       ]
225 |      },
226 |      "execution_count": 8,
227 |      "metadata": {},
228 |      "output_type": "execute_result"
229 |     }
230 |    ],
231 |    "source": [
232 |     "#clear\n",
233 |     "[5, 3, 7]"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "markdown",
238 |    "metadata": {},
239 |    "source": [
240 |     "List entries don't have to have the same type."
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "code",
245 |    "execution_count": 9,
246 |    "metadata": {
247 |     "collapsed": false
248 |    },
249 |    "outputs": [
250 |     {
251 |      "data": {
252 |       "text/plain": [
253 |        "['hi there', 15, [1, 2, 3]]"
254 |       ]
255 |      },
256 |      "execution_count": 9,
257 |      "metadata": {},
258 |      "output_type": "execute_result"
259 |     }
260 |    ],
261 |    "source": [
262 |     "[\"hi there\", 15, [1,2,3]]"
263 |    ]
264 |   },
265 |   {
266 |    "cell_type": "markdown",
267 |    "metadata": {},
268 |    "source": [
269 |     "\"Multiplication\" and \"addition\" work on lists, too."
270 |    ]
271 |   },
272 |   {
273 |    "cell_type": "code",
274 |    "execution_count": 10,
275 |    "metadata": {
276 |     "collapsed": false
277 |    },
278 |    "outputs": [
279 |     {
280 |      "data": {
281 |       "text/plain": [
282 |        "[1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 5, 5, 5]"
283 |       ]
284 |      },
285 |      "execution_count": 10,
286 |      "metadata": {},
287 |      "output_type": "execute_result"
288 |     }
289 |    ],
290 |    "source": [
291 |     "#clear\n",
292 |     "[1,2,3] * 4 + [5, 5, 5]"
293 |    ]
294 |   },
295 |   {
296 |    "cell_type": "markdown",
297 |    "metadata": {},
298 |    "source": [
299 |     "Hmmmmmm. Was that what you expected?"
300 |    ]
301 |   },
302 |   {
303 |    "cell_type": "code",
304 |    "execution_count": 1,
305 |    "metadata": {
306 |     "collapsed": false
307 |    },
308 |    "outputs": [
309 |     {
310 |      "data": {
311 |       "text/plain": [
312 |        "array([ 9, 13, 17])"
313 |       ]
314 |      },
315 |      "execution_count": 1,
316 |      "metadata": {},
317 |      "output_type": "execute_result"
318 |     }
319 |    ],
320 |    "source": [
321 |     "#clear\n",
322 |     "import numpy as np\n",
323 |     "\n",
324 |     "np.array([1,2,3]) * 4 + np.array([5,5,5])"
325 |    ]
326 |   },
327 |   {
328 |    "cell_type": "code",
329 |    "execution_count": null,
330 |    "metadata": {
331 |     "collapsed": false
332 |    },
333 |    "outputs": [],
334 |    "source": []
335 |   }
336 |  ],
337 |  "metadata": {},
338 |  "nbformat": 4,
339 |  "nbformat_minor": 0
340 | }


--------------------------------------------------------------------------------
/01-intro/2-4-Python-Control flow.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Python Introduction: Control Flow"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "`for` loops in Python always iterate over something list-like:"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 2,
 20 |    "metadata": {
 21 |     "collapsed": false
 22 |    },
 23 |    "outputs": [
 24 |     {
 25 |      "name": "stdout",
 26 |      "output_type": "stream",
 27 |      "text": [
 28 |       "0\n",
 29 |       "1\n",
 30 |       "2\n",
 31 |       "3\n",
 32 |       "4\n",
 33 |       "5\n",
 34 |       "6\n",
 35 |       "7\n",
 36 |       "8\n",
 37 |       "9\n"
 38 |      ]
 39 |     }
 40 |    ],
 41 |    "source": [
 42 |     "#clear\n",
 43 |     "for i in range(10):\n",
 44 |     "\n",
 45 |     "    print(i)"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "markdown",
 50 |    "metadata": {},
 51 |    "source": [
 52 |     "**Note** that Python does block-structuring by leading spaces.\n",
 53 |     "\n",
 54 |     "Also note the trailing \"`:`\"."
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "metadata": {},
 60 |    "source": [
 61 |     "---\n",
 62 |     "`if`/`else` are as you would expect them to be:"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 3,
 68 |    "metadata": {
 69 |     "collapsed": false
 70 |    },
 71 |    "outputs": [
 72 |     {
 73 |      "name": "stdout",
 74 |      "output_type": "stream",
 75 |      "text": [
 76 |       "0 is divisible by 3\n",
 77 |       "1 is not divisible by 3\n",
 78 |       "2 is not divisible by 3\n",
 79 |       "3 is divisible by 3\n",
 80 |       "4 is not divisible by 3\n",
 81 |       "5 is not divisible by 3\n",
 82 |       "6 is divisible by 3\n",
 83 |       "7 is not divisible by 3\n",
 84 |       "8 is not divisible by 3\n",
 85 |       "9 is divisible by 3\n"
 86 |      ]
 87 |     }
 88 |    ],
 89 |    "source": [
 90 |     "for i in range(10):\n",
 91 |     "    if i % 3 == 0:\n",
 92 |     "        print(\"{0} is divisible by 3\".format(i))\n",
 93 |     "    else:\n",
 94 |     "        print(\"{0} is not divisible by 3\".format(i))"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "markdown",
 99 |    "metadata": {},
100 |    "source": [
101 |     "`while` loops exist too:"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": 5,
107 |    "metadata": {
108 |     "collapsed": false
109 |    },
110 |    "outputs": [
111 |     {
112 |      "name": "stdout",
113 |      "output_type": "stream",
114 |      "text": [
115 |       "SOLUTION: 15\n"
116 |      ]
117 |     }
118 |    ],
119 |    "source": [
120 |     "i = 0\n",
121 |     "while True:\n",
122 |     "    i += 1\n",
123 |     "    if i**3 + i**2 + i + 1 == 3616:\n",
124 |     "        break\n",
125 |     "\n",
126 |     "print(\"SOLUTION:\", i)"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "markdown",
131 |    "metadata": {},
132 |    "source": [
133 |     "----\n",
134 |     "Building lists by hand can be a little long. For example, build a list of the squares of integers below 50 divisible by 7:"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": 6,
140 |    "metadata": {
141 |     "collapsed": false
142 |    },
143 |    "outputs": [],
144 |    "source": [
145 |     "#clear\n",
146 |     "mylist = []\n",
147 |     "\n",
148 |     "for i in range(50):\n",
149 |     "\n",
150 |     "    if i % 7 == 0:\n",
151 |     "\n",
152 |     "        mylist.append(i**2)"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": 7,
158 |    "metadata": {
159 |     "collapsed": false
160 |    },
161 |    "outputs": [
162 |     {
163 |      "data": {
164 |       "text/plain": [
165 |        "[0, 49, 196, 441, 784, 1225, 1764, 2401]"
166 |       ]
167 |      },
168 |      "execution_count": 7,
169 |      "metadata": {},
170 |      "output_type": "execute_result"
171 |     }
172 |    ],
173 |    "source": [
174 |     "mylist"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "markdown",
179 |    "metadata": {},
180 |    "source": [
181 |     "Python has a something called *list comprehension*:"
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "code",
186 |    "execution_count": 8,
187 |    "metadata": {
188 |     "collapsed": false
189 |    },
190 |    "outputs": [],
191 |    "source": [
192 |     "#clear\n",
193 |     "mylist = [i**2 for i in range(50) if i % 7 == 0]"
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "code",
198 |    "execution_count": 9,
199 |    "metadata": {
200 |     "collapsed": false
201 |    },
202 |    "outputs": [
203 |     {
204 |      "data": {
205 |       "text/plain": [
206 |        "[0, 49, 196, 441, 784, 1225, 1764, 2401]"
207 |       ]
208 |      },
209 |      "execution_count": 9,
210 |      "metadata": {},
211 |      "output_type": "execute_result"
212 |     }
213 |    ],
214 |    "source": [
215 |     "mylist"
216 |    ]
217 |   }
218 |  ],
219 |  "metadata": {},
220 |  "nbformat": 4,
221 |  "nbformat_minor": 0
222 | }


--------------------------------------------------------------------------------
/01-intro/2-5-Python-Functions.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Python Introduction: Functions"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "Functions help extract out common code blocks.\n",
 15 |     "\n",
 16 |     "Let's define a function `print_greeting()`."
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 1,
 22 |    "metadata": {
 23 |     "collapsed": false
 24 |    },
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "def print_greeting():\n",
 28 |     "    print(\"Hi there, how are you?\")\n",
 29 |     "    print(\"Long time no see.\")"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "markdown",
 34 |    "metadata": {},
 35 |    "source": [
 36 |     "And call it:"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": 2,
 42 |    "metadata": {
 43 |     "collapsed": false
 44 |    },
 45 |    "outputs": [
 46 |     {
 47 |      "name": "stdout",
 48 |      "output_type": "stream",
 49 |      "text": [
 50 |       "Hi there, how are you?\n",
 51 |       "Long time no see.\n"
 52 |      ]
 53 |     }
 54 |    ],
 55 |    "source": [
 56 |     "print_greeting()"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "markdown",
 61 |    "metadata": {},
 62 |    "source": [
 63 |     "That's a bit impersonal."
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": 3,
 69 |    "metadata": {
 70 |     "collapsed": false
 71 |    },
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "#clear\n",
 75 |     "def print_greeting(name):\n",
 76 |     "\n",
 77 |     "    print(\"Hi there, {0}, how are you?\".format(name))\n",
 78 |     "\n",
 79 |     "    print(\"Long time no see.\")"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 4,
 85 |    "metadata": {
 86 |     "collapsed": false
 87 |    },
 88 |    "outputs": [
 89 |     {
 90 |      "name": "stdout",
 91 |      "output_type": "stream",
 92 |      "text": [
 93 |       "Hi there, Andreas, how are you?\n",
 94 |       "Long time no see.\n"
 95 |      ]
 96 |     }
 97 |    ],
 98 |    "source": [
 99 |     "print_greeting(\"Andreas\")"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "markdown",
104 |    "metadata": {},
105 |    "source": [
106 |     "But we might not know their name.\n",
107 |     "\n",
108 |     "(And we just changed the interface of `print_greeting`!)"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": 6,
114 |    "metadata": {
115 |     "collapsed": false
116 |    },
117 |    "outputs": [],
118 |    "source": [
119 |     "#clear\n",
120 |     "def print_greeting(name=\"my friend\"):\n",
121 |     "\n",
122 |     "    print(\"Hi there, {0}, how are you?\".format(name))\n",
123 |     "\n",
124 |     "    print(\"Long time no see.\")"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": 7,
130 |    "metadata": {
131 |     "collapsed": false
132 |    },
133 |    "outputs": [
134 |     {
135 |      "name": "stdout",
136 |      "output_type": "stream",
137 |      "text": [
138 |       "Hi there, Andreas, how are you?\n",
139 |       "Long time no see.\n",
140 |       "Hi there, my friend, how are you?\n",
141 |       "Long time no see.\n"
142 |      ]
143 |     }
144 |    ],
145 |    "source": [
146 |     "print_greeting(\"Andreas\")\n",
147 |     "print_greeting()"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "markdown",
152 |    "metadata": {},
153 |    "source": [
154 |     "----"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "markdown",
159 |    "metadata": {},
160 |    "source": [
161 |     "Function parameters work like variables.\n",
162 |     "\n",
163 |     "So what does this do?"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": 8,
169 |    "metadata": {
170 |     "collapsed": false
171 |    },
172 |    "outputs": [
173 |     {
174 |      "name": "stdout",
175 |      "output_type": "stream",
176 |      "text": [
177 |       "[1, 2, 3, 5]\n"
178 |      ]
179 |     }
180 |    ],
181 |    "source": [
182 |     "def my_func(my_list):\n",
183 |     "    my_list.append(5)\n",
184 |     "    \n",
185 |     "l = [1,2,3]\n",
186 |     "my_func(l)\n",
187 |     "print(l)"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "markdown",
192 |    "metadata": {},
193 |    "source": [
194 |     "Can be very surprising!\n",
195 |     "\n",
196 |     "Define a better function `my_func_2`:"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "code",
201 |    "execution_count": 9,
202 |    "metadata": {
203 |     "collapsed": false
204 |    },
205 |    "outputs": [],
206 |    "source": [
207 |     "#clear\n",
208 |     "def my_func_2(my_list):\n",
209 |     "\n",
210 |     "    return my_list + [5]"
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "code",
215 |    "execution_count": 10,
216 |    "metadata": {
217 |     "collapsed": false
218 |    },
219 |    "outputs": [
220 |     {
221 |      "name": "stdout",
222 |      "output_type": "stream",
223 |      "text": [
224 |       "[1, 2, 3]\n",
225 |       "[1, 2, 3, 5]\n"
226 |      ]
227 |     }
228 |    ],
229 |    "source": [
230 |     "l = [1,2,3]\n",
231 |     "l2 = my_func_2(l)\n",
232 |     "print(l)\n",
233 |     "print(l2)"
234 |    ]
235 |   }
236 |  ],
237 |  "metadata": {},
238 |  "nbformat": 4,
239 |  "nbformat_minor": 0
240 | }


--------------------------------------------------------------------------------
/01-intro/2-6-Python-Objects.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Objects in Python"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "Everything in Python is an 'object'.\n",
 15 |     "\n",
 16 |     "Defining custom types of objects is easy:"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 1,
 22 |    "metadata": {
 23 |     "collapsed": false
 24 |    },
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "\n",
 28 |     "class Employee:\n",
 29 |     "    def __init__(self, name, salary):\n",
 30 |     "        self.name = name\n",
 31 |     "        self.salary = salary\n",
 32 |     "        \n",
 33 |     "    def fire(self):\n",
 34 |     "        self.salary = 0"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "* Functions within the class (type) definition are called 'methods'.\n",
 42 |     "* They take an explicit `self` parameter, through which the object is passed.\n",
 43 |     "* `__init__` is the 'constructor'\n",
 44 |     "    * Objects are created by 'calling' the type like a function.\n",
 45 |     "    * Arguments in this call are passed to the constructor"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": 11,
 51 |    "metadata": {
 52 |     "collapsed": false
 53 |    },
 54 |    "outputs": [
 55 |     {
 56 |      "data": {
 57 |       "text/plain": [
 58 |        "'Joe'"
 59 |       ]
 60 |      },
 61 |      "execution_count": 11,
 62 |      "metadata": {},
 63 |      "output_type": "execute_result"
 64 |     }
 65 |    ],
 66 |    "source": [
 67 |     "joe = Employee(\"Joe\", 100000)\n",
 68 |     "joe.name"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": 7,
 74 |    "metadata": {
 75 |     "collapsed": false
 76 |    },
 77 |    "outputs": [
 78 |     {
 79 |      "data": {
 80 |       "text/plain": [
 81 |        "100000"
 82 |       ]
 83 |      },
 84 |      "execution_count": 7,
 85 |      "metadata": {},
 86 |      "output_type": "execute_result"
 87 |     }
 88 |    ],
 89 |    "source": [
 90 |     "#clear\n",
 91 |     "joe.salary"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "markdown",
 96 |    "metadata": {},
 97 |    "source": [
 98 |     "Let's fire Joe."
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": 8,
104 |    "metadata": {
105 |     "collapsed": false
106 |    },
107 |    "outputs": [],
108 |    "source": [
109 |     "#clear\n",
110 |     "joe.fire()"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": 9,
116 |    "metadata": {
117 |     "collapsed": false
118 |    },
119 |    "outputs": [
120 |     {
121 |      "data": {
122 |       "text/plain": [
123 |        "0"
124 |       ]
125 |      },
126 |      "execution_count": 9,
127 |      "metadata": {},
128 |      "output_type": "execute_result"
129 |     }
130 |    ],
131 |    "source": [
132 |     "#clear\n",
133 |     "joe.salary"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "markdown",
138 |    "metadata": {},
139 |    "source": [
140 |     "## Inheritance"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "markdown",
145 |    "metadata": {},
146 |    "source": [
147 |     "Types can be based on other types by inheritance:"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": 10,
153 |    "metadata": {
154 |     "collapsed": false
155 |    },
156 |    "outputs": [],
157 |    "source": [
158 |     "class Boss(Employee):\n",
159 |     "    def __init__(self, name, salary, supervises):\n",
160 |     "        super(Boss, self).__init__(name, salary)\n",
161 |     "        \n",
162 |     "        self.supervises = supervises\n",
163 |     "        \n",
164 |     "    def fire(self):\n",
165 |     "        for s in self.supervises:\n",
166 |     "            s.fire()\n",
167 |     "            \n",
168 |     "        super(Boss, self).fire()"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": 12,
174 |    "metadata": {
175 |     "collapsed": false
176 |    },
177 |    "outputs": [
178 |     {
179 |      "data": {
180 |       "text/plain": [
181 |        "150000"
182 |       ]
183 |      },
184 |      "execution_count": 12,
185 |      "metadata": {},
186 |      "output_type": "execute_result"
187 |     }
188 |    ],
189 |    "source": [
190 |     "joe = Employee(\"Joe\", 100000)\n",
191 |     "jack = Employee(\"Jack\", 100000)\n",
192 |     "mike = Boss(\"Mike\", 150000, [joe, jack])\n",
193 |     "\n",
194 |     "mike.salary"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": 13,
200 |    "metadata": {
201 |     "collapsed": false
202 |    },
203 |    "outputs": [
204 |     {
205 |      "data": {
206 |       "text/plain": [
207 |        "100000"
208 |       ]
209 |      },
210 |      "execution_count": 13,
211 |      "metadata": {},
212 |      "output_type": "execute_result"
213 |     }
214 |    ],
215 |    "source": [
216 |     "#clear\n",
217 |     "joe.salary"
218 |    ]
219 |   },
220 |   {
221 |    "cell_type": "markdown",
222 |    "metadata": {},
223 |    "source": [
224 |     "Now what happens to Joe's salary if Mike gets fired?"
225 |    ]
226 |   },
227 |   {
228 |    "cell_type": "code",
229 |    "execution_count": 14,
230 |    "metadata": {
231 |     "collapsed": false
232 |    },
233 |    "outputs": [],
234 |    "source": [
235 |     "#clear\n",
236 |     "mike.fire()"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "code",
241 |    "execution_count": 15,
242 |    "metadata": {
243 |     "collapsed": false
244 |    },
245 |    "outputs": [
246 |     {
247 |      "data": {
248 |       "text/plain": [
249 |        "0"
250 |       ]
251 |      },
252 |      "execution_count": 15,
253 |      "metadata": {},
254 |      "output_type": "execute_result"
255 |     }
256 |    ],
257 |    "source": [
258 |     "#clear\n",
259 |     "joe.salary"
260 |    ]
261 |   }
262 |  ],
263 |  "metadata": {},
264 |  "nbformat": 4,
265 |  "nbformat_minor": 0
266 | }


--------------------------------------------------------------------------------
/01-intro/2-7-Python-A few more things.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Python Introduction: A few more things"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "Getting help:\n",
 15 |     "\n",
 16 |     "1) Use TAB in IPython"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 1,
 22 |    "metadata": {
 23 |     "collapsed": false
 24 |    },
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "a = [1,2,3]"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": 1,
 33 |    "metadata": {
 34 |     "collapsed": false
 35 |    },
 36 |    "outputs": [],
 37 |    "source": []
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "metadata": {},
 42 |    "source": [
 43 |     "2) Using `pydoc3` on the command line."
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "markdown",
 48 |    "metadata": {},
 49 |    "source": [
 50 |     "3) Online at <http://docs.python.org/>"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "markdown",
 55 |    "metadata": {},
 56 |    "source": [
 57 |     "----"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "markdown",
 62 |    "metadata": {},
 63 |    "source": [
 64 |     "**A few things to look up in a quiet moment**"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "markdown",
 69 |    "metadata": {},
 70 |    "source": [
 71 |     "String formatting"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": 2,
 77 |    "metadata": {
 78 |     "collapsed": false
 79 |    },
 80 |    "outputs": [
 81 |     {
 82 |      "data": {
 83 |       "text/plain": [
 84 |        "'My name is Andreas and I like hiking'"
 85 |       ]
 86 |      },
 87 |      "execution_count": 2,
 88 |      "metadata": {},
 89 |      "output_type": "execute_result"
 90 |     }
 91 |    ],
 92 |    "source": [
 93 |     "\"My name is {0} and I like {1}\".format(\"Andreas\", \"hiking\")"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "markdown",
 98 |    "metadata": {},
 99 |    "source": [
100 |     "---\n",
101 |     "Dictionaries"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": 3,
107 |    "metadata": {
108 |     "collapsed": false
109 |    },
110 |    "outputs": [
111 |     {
112 |      "data": {
113 |       "text/plain": [
114 |        "5000"
115 |       ]
116 |      },
117 |      "execution_count": 3,
118 |      "metadata": {},
119 |      "output_type": "execute_result"
120 |     }
121 |    ],
122 |    "source": [
123 |     "prices = {\"Tesla K40\": 5000, \"GTX Titan\":1400}\n",
124 |     "prices[\"Tesla K40\"]"
125 |    ]
126 |   }
127 |  ],
128 |  "metadata": {},
129 |  "nbformat": 4,
130 |  "nbformat_minor": 0
131 | }


--------------------------------------------------------------------------------
/01-intro/3-3-numpy-Broadcasting.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# numpy: Broadcasting"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {
 14 |     "collapsed": false
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import numpy as np"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 2,
 24 |    "metadata": {
 25 |     "collapsed": false
 26 |    },
 27 |    "outputs": [
 28 |     {
 29 |      "name": "stdout",
 30 |      "output_type": "stream",
 31 |      "text": [
 32 |       "(3, 3)\n",
 33 |       "[[0 1 2]\n",
 34 |       " [3 4 5]\n",
 35 |       " [6 7 8]]\n",
 36 |       "(3, 3)\n",
 37 |       "[[ 4  5  6]\n",
 38 |       " [ 7  8  9]\n",
 39 |       " [10 11 12]]\n"
 40 |      ]
 41 |     }
 42 |    ],
 43 |    "source": [
 44 |     "a = np.arange(9).reshape(3, 3)\n",
 45 |     "print(a.shape)\n",
 46 |     "print(a)\n",
 47 |     "b = np.arange(4, 4+9).reshape(3, 3)\n",
 48 |     "print(b.shape)\n",
 49 |     "print(b)"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 3,
 55 |    "metadata": {
 56 |     "collapsed": false
 57 |    },
 58 |    "outputs": [
 59 |     {
 60 |      "data": {
 61 |       "text/plain": [
 62 |        "array([[ 4,  6,  8],\n",
 63 |        "       [10, 12, 14],\n",
 64 |        "       [16, 18, 20]])"
 65 |       ]
 66 |      },
 67 |      "execution_count": 3,
 68 |      "metadata": {},
 69 |      "output_type": "execute_result"
 70 |     }
 71 |    ],
 72 |    "source": [
 73 |     "#clear\n",
 74 |     "a+b"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "markdown",
 79 |    "metadata": {},
 80 |    "source": [
 81 |     "So this is easy and one-to-one.\n"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "markdown",
 86 |    "metadata": {},
 87 |    "source": [
 88 |     "---\n",
 89 |     "\n",
 90 |     "What if the shapes do not match?"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 4,
 96 |    "metadata": {
 97 |     "collapsed": false
 98 |    },
 99 |    "outputs": [
100 |     {
101 |      "name": "stdout",
102 |      "output_type": "stream",
103 |      "text": [
104 |       "(3, 3)\n",
105 |       "[[0 1 2]\n",
106 |       " [3 4 5]\n",
107 |       " [6 7 8]]\n",
108 |       "(3,)\n",
109 |       "[0 1 2]\n"
110 |      ]
111 |     }
112 |    ],
113 |    "source": [
114 |     "a = np.arange(9).reshape(3, 3)\n",
115 |     "print(a.shape)\n",
116 |     "print(a)\n",
117 |     "b = np.arange(3)\n",
118 |     "print(b.shape)\n",
119 |     "print(b)"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "markdown",
124 |    "metadata": {},
125 |    "source": [
126 |     "What will this do?"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": 5,
132 |    "metadata": {
133 |     "collapsed": false
134 |    },
135 |    "outputs": [
136 |     {
137 |      "data": {
138 |       "text/plain": [
139 |        "array([[ 0,  2,  4],\n",
140 |        "       [ 3,  5,  7],\n",
141 |        "       [ 6,  8, 10]])"
142 |       ]
143 |      },
144 |      "execution_count": 5,
145 |      "metadata": {},
146 |      "output_type": "execute_result"
147 |     }
148 |    ],
149 |    "source": [
150 |     "a+b"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "markdown",
155 |    "metadata": {},
156 |    "source": [
157 |     "It has *broadcast* along the last axis!"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "markdown",
162 |    "metadata": {},
163 |    "source": [
164 |     "---\n",
165 |     "\n",
166 |     "Can we broadcast along the *first* axis?"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "code",
171 |    "execution_count": 6,
172 |    "metadata": {
173 |     "collapsed": false
174 |    },
175 |    "outputs": [
176 |     {
177 |      "data": {
178 |       "text/plain": [
179 |        "array([[ 0,  1,  2],\n",
180 |        "       [ 4,  5,  6],\n",
181 |        "       [ 8,  9, 10]])"
182 |       ]
183 |      },
184 |      "execution_count": 6,
185 |      "metadata": {},
186 |      "output_type": "execute_result"
187 |     }
188 |    ],
189 |    "source": [
190 |     "#clear\n",
191 |     "a+b.reshape(3, 1)"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "markdown",
196 |    "metadata": {},
197 |    "source": [
198 |     "Rules:\n",
199 |     "\n",
200 |     "* Shapes are matched axis-by-axis from last to first.\n",
201 |     "* A length-1 axis can be *broadcast* if necessary."
202 |    ]
203 |   }
204 |  ],
205 |  "metadata": {},
206 |  "nbformat": 4,
207 |  "nbformat_minor": 0
208 | }


--------------------------------------------------------------------------------
/01-intro/4-practice-ordering-tree.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Practice: Build a recursive data structure"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "You are given an array of floating point numbers called `numbers`. These numbers lie between 0 and 1.\n",
 15 |     "\n",
 16 |     "Write a function `build_tree(numbers, left, right, max_in_leaf=5)` that builds a \"tree of bins\" data structure, where\n",
 17 |     "\n",
 18 |     "* `left` is a lower bound on *numbers*\n",
 19 |     "* `right` is an upper bound on *numbers*\n",
 20 |     "* `max_in_leaf` is the largest number of numbers allowed in a leaf node of the tree\n",
 21 |     "\n",
 22 |     "Have this function do the following:\n",
 23 |     "\n",
 24 |     "* If there are fewer numbers in `numbers` than max_in_leaf, return `numbers` unmodified as a 'leaf node'.\n",
 25 |     "* Otherwise, return a tuple of the form `(left_child, pivot, right_child)`, where `pivot` is the average of `left` and `right`. `left_child` is the result of processing the part of `numbers` that is less than `pivot` through `build_tree`, and `right_child` is the same for the numbers larger than `pivot`.\n",
 26 |     "\n",
 27 |     "Hints:\n",
 28 |     "\n",
 29 |     "* look up `len()` to find the length of `numbers`, or use `numbers.shape[0]`"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 2,
 35 |    "metadata": {
 36 |     "collapsed": false
 37 |    },
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "import numpy as np\n",
 41 |     "\n",
 42 |     "numbers = np.random.rand(100)"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 3,
 48 |    "metadata": {
 49 |     "collapsed": false
 50 |    },
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "def build_tree(numbers, left, right, max_in_leaf=5):\n",
 54 |     "    # ...\n",
 55 |     "    pass"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": 4,
 61 |    "metadata": {
 62 |     "collapsed": false
 63 |    },
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "#clear\n",
 67 |     "# Solution\n",
 68 |     "\n",
 69 |     "def build_tree(numbers, left, right, max_in_leaf=5):\n",
 70 |     "    if len(numbers) <= max_in_leaf:\n",
 71 |     "        return numbers\n",
 72 |     "\n",
 73 |     "    pivot = (left + right)/2\n",
 74 |     "    return (build_tree(numbers[numbers < pivot], left, pivot, max_in_leaf),\n",
 75 |     "            pivot,\n",
 76 |     "            build_tree(numbers[numbers >= pivot], pivot, right, max_in_leaf))"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": 5,
 82 |    "metadata": {
 83 |     "collapsed": false
 84 |    },
 85 |    "outputs": [
 86 |     {
 87 |      "name": "stdout",
 88 |      "output_type": "stream",
 89 |      "text": [
 90 |       "((((array([ 0.03155442,  0.04969038,  0.00203516,  0.01134467]), 0.0625, array([ 0.08795129,  0.08484712,  0.10400076])), 0.125, ((array([ 0.13288577,  0.1348917 ,  0.13717107,  0.13363111]), 0.15625, array([ 0.18361257,  0.16379185,  0.17935313])), 0.1875, array([ 0.18835925]))), 0.25, ((((array([ 0.25471829,  0.25316833,  0.25368532]), 0.265625, array([ 0.2753575 ,  0.273414  ,  0.27016936])), 0.28125, array([ 0.31082018,  0.29369432,  0.29940896,  0.30908776])), 0.3125, array([ 0.33571279,  0.37308478,  0.33152007,  0.35286179])), 0.375, (array([ 0.42385846,  0.4181284 ,  0.41651459,  0.40505667,  0.39770273]), 0.4375, (array([ 0.45339789,  0.45886606,  0.45242226,  0.46320172]), 0.46875, array([ 0.47478645,  0.47411047]))))), 0.5, (((((array([ 0.50499705,  0.50985016]), 0.515625, array([ 0.52229993,  0.51933766,  0.51886349,  0.52999165,  0.52412507])), 0.53125, array([ 0.55858432,  0.54768638,  0.55832187,  0.5325089 ,  0.55220628])), 0.5625, (array([ 0.57673859,  0.56823789,  0.5813294 ,  0.58937822]), 0.59375, array([ 0.6160641 ,  0.61934406,  0.602265  ,  0.6162048 ]))), 0.625, (array([ 0.65624928,  0.6413943 ,  0.67663038,  0.64642908]), 0.6875, (array([ 0.70809202,  0.68992577,  0.70079716,  0.70850778]), 0.71875, array([ 0.74158749,  0.73977694,  0.73835865])))), 0.75, (((array([ 0.77388481,  0.75894454,  0.75947687,  0.75107273,  0.75294742]), 0.78125, array([ 0.81109003,  0.79674588,  0.78627348,  0.80242775,  0.79621333])), 0.8125, array([ 0.85778992,  0.8153317 ,  0.8164691 ,  0.84316018])), 0.875, ((array([ 0.88278667,  0.88526777,  0.89802129]), 0.90625, array([ 0.92812108,  0.90822701,  0.91261268])), 0.9375, ((array([ 0.94584389,  0.95214282,  0.93878381,  0.94956855]), 0.953125, array([ 0.96443695,  0.96785209])), 0.96875, array([ 0.9805466 ,  0.97100946,  0.99383466]))))))\n"
 91 |      ]
 92 |     }
 93 |    ],
 94 |    "source": [
 95 |     "tree = build_tree(numbers, 0, 1)\n",
 96 |     "print(tree)"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": null,
102 |    "metadata": {
103 |     "collapsed": false
104 |    },
105 |    "outputs": [],
106 |    "source": []
107 |   }
108 |  ],
109 |  "metadata": {
110 |   "kernelspec": {
111 |    "display_name": "Python 3",
112 |    "language": "python",
113 |    "name": "python3"
114 |   },
115 |   "language_info": {
116 |    "codemirror_mode": {
117 |     "name": "ipython",
118 |     "version": 3
119 |    },
120 |    "file_extension": ".py",
121 |    "mimetype": "text/x-python",
122 |    "name": "python",
123 |    "nbconvert_exporter": "python",
124 |    "pygments_lexer": "ipython3",
125 |    "version": "3.5.1+"
126 |   }
127 |  },
128 |  "nbformat": 4,
129 |  "nbformat_minor": 0
130 | }
131 | 


--------------------------------------------------------------------------------
/01-intro/README.rst:
--------------------------------------------------------------------------------
1 | **Tip:** ``ipynb`` files can be viewed on Github. Just click them.
2 | 


--------------------------------------------------------------------------------
/01-intro/cat.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/01-intro/cat.jpeg


--------------------------------------------------------------------------------
/02-languages/01-expression-trees.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Expression Trees"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "What's an *expression tree*?"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 1,
 20 |    "metadata": {
 21 |     "collapsed": false
 22 |    },
 23 |    "outputs": [
 24 |     {
 25 |      "data": {
 26 |       "text/plain": [
 27 |        "Variable('x')"
 28 |       ]
 29 |      },
 30 |      "execution_count": 1,
 31 |      "metadata": {},
 32 |      "output_type": "execute_result"
 33 |     }
 34 |    ],
 35 |    "source": [
 36 |     "import pymbolic.primitives as p\n",
 37 |     "x = p.Variable(\"x\")\n",
 38 |     "y = p.Variable(\"y\")\n",
 39 |     "x"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "Let's look what happens with a simple expression:"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 2,
 52 |    "metadata": {
 53 |     "collapsed": false
 54 |    },
 55 |    "outputs": [
 56 |     {
 57 |      "data": {
 58 |       "text/plain": [
 59 |        "Sum((Variable('x'), 5))"
 60 |       ]
 61 |      },
 62 |      "execution_count": 2,
 63 |      "metadata": {},
 64 |      "output_type": "execute_result"
 65 |     }
 66 |    ],
 67 |    "source": [
 68 |     "#clear\n",
 69 |     "x+5"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "markdown",
 74 |    "metadata": {},
 75 |    "source": [
 76 |     "It does not get evaluated.\n",
 77 |     "\n",
 78 |     "---\n",
 79 |     "\n",
 80 |     "Let's look at its type and structure in more detail."
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": 3,
 86 |    "metadata": {
 87 |     "collapsed": false
 88 |    },
 89 |    "outputs": [],
 90 |    "source": [
 91 |     "u = x+5"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": 4,
 97 |    "metadata": {
 98 |     "collapsed": false
 99 |    },
100 |    "outputs": [
101 |     {
102 |      "data": {
103 |       "text/plain": [
104 |        "pymbolic.primitives.Sum"
105 |       ]
106 |      },
107 |      "execution_count": 4,
108 |      "metadata": {},
109 |      "output_type": "execute_result"
110 |     }
111 |    ],
112 |    "source": [
113 |     "#clear\n",
114 |     "type(u)"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": 5,
120 |    "metadata": {
121 |     "collapsed": false
122 |    },
123 |    "outputs": [
124 |     {
125 |      "data": {
126 |       "text/plain": [
127 |        "(Variable('x'), 5)"
128 |       ]
129 |      },
130 |      "execution_count": 5,
131 |      "metadata": {},
132 |      "output_type": "execute_result"
133 |     }
134 |    ],
135 |    "source": [
136 |     "#clear\n",
137 |     "u.children"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "markdown",
142 |    "metadata": {},
143 |    "source": [
144 |     "OK, easy. What if we introduce a product?"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": 6,
150 |    "metadata": {
151 |     "collapsed": false
152 |    },
153 |    "outputs": [
154 |     {
155 |      "data": {
156 |       "text/plain": [
157 |        "Sum((Variable('x'), Product((4, Variable('y')))))"
158 |       ]
159 |      },
160 |      "execution_count": 6,
161 |      "metadata": {},
162 |      "output_type": "execute_result"
163 |     }
164 |    ],
165 |    "source": [
166 |     "u = x + 4*y\n",
167 |     "u"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": 7,
173 |    "metadata": {
174 |     "collapsed": false
175 |    },
176 |    "outputs": [
177 |     {
178 |      "data": {
179 |       "text/plain": [
180 |        "Variable('x')"
181 |       ]
182 |      },
183 |      "execution_count": 7,
184 |      "metadata": {},
185 |      "output_type": "execute_result"
186 |     }
187 |    ],
188 |    "source": [
189 |     "#clear\n",
190 |     "u.children[0]"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "code",
195 |    "execution_count": 8,
196 |    "metadata": {
197 |     "collapsed": false
198 |    },
199 |    "outputs": [
200 |     {
201 |      "data": {
202 |       "text/plain": [
203 |        "Product((4, Variable('y')))"
204 |       ]
205 |      },
206 |      "execution_count": 8,
207 |      "metadata": {},
208 |      "output_type": "execute_result"
209 |     }
210 |    ],
211 |    "source": [
212 |     "#clear\n",
213 |     "u.children[1]"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "code",
218 |    "execution_count": 9,
219 |    "metadata": {
220 |     "collapsed": false
221 |    },
222 |    "outputs": [
223 |     {
224 |      "data": {
225 |       "text/plain": [
226 |        "4"
227 |       ]
228 |      },
229 |      "execution_count": 9,
230 |      "metadata": {},
231 |      "output_type": "execute_result"
232 |     }
233 |    ],
234 |    "source": [
235 |     "#clear\n",
236 |     "u.children[1].children[0]"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "code",
241 |    "execution_count": 22,
242 |    "metadata": {
243 |     "collapsed": false
244 |    },
245 |    "outputs": [
246 |     {
247 |      "data": {
248 |       "text/plain": [
249 |        "Variable('y')"
250 |       ]
251 |      },
252 |      "execution_count": 22,
253 |      "metadata": {},
254 |      "output_type": "execute_result"
255 |     }
256 |    ],
257 |    "source": [
258 |     "#clear\n",
259 |     "u.children[1].children[1]"
260 |    ]
261 |   },
262 |   {
263 |    "cell_type": "markdown",
264 |    "metadata": {},
265 |    "source": [
266 |     "This structure is a called a *tree*, because there is a *root* and *branches*."
267 |    ]
268 |   }
269 |  ],
270 |  "metadata": {},
271 |  "nbformat": 4,
272 |  "nbformat_minor": 0
273 | }


--------------------------------------------------------------------------------
/02-languages/03-defining-custom-node-types.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Defining Custom Node Types"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "Mathematical expressions are only the first step. Most of the time, in mathematical software, the interesting aspects are special \"things\" that are strung together by expressions.\n",
 15 |     "\n",
 16 |     "So it would be helpful to be able to define our own expression types:"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 2,
 22 |    "metadata": {
 23 |     "collapsed": false
 24 |    },
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "import pymbolic.primitives as p\n",
 28 |     "\n",
 29 |     "x = p.Variable(\"x\")"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 3,
 35 |    "metadata": {
 36 |     "collapsed": false
 37 |    },
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "class DerivativeOperator(p.Expression):\n",
 41 |     "    def __init__(self, operand):\n",
 42 |     "        self.operand = operand\n",
 43 |     "\n",
 44 |     "    def __getinitargs__(self):\n",
 45 |     "        return (self.operand,)\n",
 46 |     "\n",
 47 |     "    mapper_method = \"map_derivative_operator\""
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "markdown",
 52 |    "metadata": {},
 53 |    "source": [
 54 |     "`__getinitargs__` tells `pymbolic` what the arguments of the constructor were. This is used for printing and comparisons."
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": 4,
 60 |    "metadata": {
 61 |     "collapsed": false
 62 |    },
 63 |    "outputs": [
 64 |     {
 65 |      "data": {
 66 |       "text/plain": [
 67 |        "Quotient(Variable('x'), DerivativeOperator(Power(Sum((Variable('x'), 23)), 0.5)))"
 68 |       ]
 69 |      },
 70 |      "execution_count": 4,
 71 |      "metadata": {},
 72 |      "output_type": "execute_result"
 73 |     }
 74 |    ],
 75 |    "source": [
 76 |     "u = x/DerivativeOperator((x + 23)**0.5)\n",
 77 |     "u"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "markdown",
 82 |    "metadata": {},
 83 |    "source": [
 84 |     "We can then also define custom mappers (let's call ours `DerivDoubler`) that operate on these node types:"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": 5,
 90 |    "metadata": {
 91 |     "collapsed": false
 92 |    },
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "from pymbolic.mapper import IdentityMapper"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 6,
101 |    "metadata": {
102 |     "collapsed": false
103 |    },
104 |    "outputs": [],
105 |    "source": [
106 |     "#clear\n",
107 |     "class DerivDoubler(IdentityMapper):\n",
108 |     "    def map_derivative_operator(self, expr):\n",
109 |     "        return 2*DerivativeOperator(self.rec(expr.operand))"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "markdown",
114 |    "metadata": {},
115 |    "source": [
116 |     "Now apply it:"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": 7,
122 |    "metadata": {
123 |     "collapsed": false
124 |    },
125 |    "outputs": [
126 |     {
127 |      "data": {
128 |       "text/plain": [
129 |        "Quotient(Variable('x'), Product((2, DerivativeOperator(Power(Sum((Variable('x'), 23)), 0.5)))))"
130 |       ]
131 |      },
132 |      "execution_count": 7,
133 |      "metadata": {},
134 |      "output_type": "execute_result"
135 |     }
136 |    ],
137 |    "source": [
138 |     "dd = DerivDoubler()\n",
139 |     "\n",
140 |     "dd(u)"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": null,
146 |    "metadata": {
147 |     "collapsed": false
148 |    },
149 |    "outputs": [],
150 |    "source": []
151 |   }
152 |  ],
153 |  "metadata": {
154 |   "kernelspec": {
155 |    "display_name": "Python 3",
156 |    "language": "python",
157 |    "name": "python3"
158 |   },
159 |   "language_info": {
160 |    "codemirror_mode": {
161 |     "name": "ipython",
162 |     "version": 3
163 |    },
164 |    "file_extension": ".py",
165 |    "mimetype": "text/x-python",
166 |    "name": "python",
167 |    "nbconvert_exporter": "python",
168 |    "pygments_lexer": "ipython3",
169 |    "version": "3.5.0+"
170 |   }
171 |  },
172 |  "nbformat": 4,
173 |  "nbformat_minor": 0
174 | }
175 | 


--------------------------------------------------------------------------------
/02-languages/04-accessing-python-syntax-trees.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Accessing Python Syntax Trees"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "It is also possible to access code that is written in Python. This works using the `ast` module, and works as follows:"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 1,
 20 |    "metadata": {
 21 |     "collapsed": false
 22 |    },
 23 |    "outputs": [
 24 |     {
 25 |      "name": "stdout",
 26 |      "output_type": "stream",
 27 |      "text": [
 28 |       "Module(body=[FunctionDef(name='f', args=arguments(args=[arg(arg='x', annotation=None), arg(arg='y', annotation=None)], vararg=None, kwonlyargs=[], kw_defaults=[], kwarg=None, defaults=[]), body=[Return(value=BinOp(left=BinOp(left=BinOp(left=Num(n=2), op=Mult(), right=Name(id='x', ctx=Load())), op=Add(), right=BinOp(left=Name(id='y', ctx=Load()), op=Pow(), right=Num(n=2))), op=Add(), right=Num(n=5)))], decorator_list=[], returns=None)])\n"
 29 |      ]
 30 |     }
 31 |    ],
 32 |    "source": [
 33 |     "SRC = \"\"\"\n",
 34 |     "def f(x, y):\n",
 35 |     "    return 2*x + y**2 + 5\n",
 36 |     "\"\"\"\n",
 37 |     "\n",
 38 |     "import ast\n",
 39 |     "tree = ast.parse(SRC)\n",
 40 |     "\n",
 41 |     "print(ast.dump(tree))"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "markdown",
 46 |    "metadata": {},
 47 |    "source": [
 48 |     "It is possible to transcribe the expressions here into the form discussed earlier."
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 2,
 54 |    "metadata": {
 55 |     "collapsed": false
 56 |    },
 57 |    "outputs": [
 58 |     {
 59 |      "name": "stdout",
 60 |      "output_type": "stream",
 61 |      "text": [
 62 |       "2*x + y**2 + 5\n"
 63 |      ]
 64 |     }
 65 |    ],
 66 |    "source": [
 67 |     "from pymbolic.interop.ast import ASTToPymbolic\n",
 68 |     "expr = ASTToPymbolic()(tree.body[0].body[0].value)\n",
 69 |     "print(expr)"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "markdown",
 74 |    "metadata": {},
 75 |    "source": [
 76 |     "But beware when defining languages this way. Python has very well-defined semantics, and the user will expect that your way of executing their code is a good match for their mental model of what the code should do. As such, it may be better to start with a \"blank slate\" in terms of language design, so as to not run afoul of already formed expectations."
 77 |    ]
 78 |   }
 79 |  ],
 80 |  "metadata": {
 81 |   "kernelspec": {
 82 |    "display_name": "Python 3",
 83 |    "language": "python",
 84 |    "name": "python3"
 85 |   },
 86 |   "language_info": {
 87 |    "codemirror_mode": {
 88 |     "name": "ipython",
 89 |     "version": 3
 90 |    },
 91 |    "file_extension": ".py",
 92 |    "mimetype": "text/x-python",
 93 |    "name": "python",
 94 |    "nbconvert_exporter": "python",
 95 |    "pygments_lexer": "ipython3",
 96 |    "version": "3.5.0+"
 97 |   }
 98 |  },
 99 |  "nbformat": 4,
100 |  "nbformat_minor": 0
101 | }
102 | 


--------------------------------------------------------------------------------
/02-languages/05-common-operations.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Common Operations"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## What common operations are supported?"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "Just normal mappers:\n",
 22 |     "\n",
 23 |     "* Evaluation\n",
 24 |     "* Turning expressions into 'human-readable' strings\n",
 25 |     "* Performing substitution\n",
 26 |     "* Taking derivatives\n",
 27 |     "* Finding variables on which an expression depends\n",
 28 |     "* Code Generation\n",
 29 |     "\n",
 30 |     "Also:\n",
 31 |     "\n",
 32 |     "* Parsing (i.e. turning a string into an expression)"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "## Evaluation"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 1,
 45 |    "metadata": {
 46 |     "collapsed": false
 47 |    },
 48 |    "outputs": [
 49 |     {
 50 |      "data": {
 51 |       "text/plain": [
 52 |        "Power(Sum((Power(Variable('x'), 2), Power(Variable('y'), 2))), 0.5)"
 53 |       ]
 54 |      },
 55 |      "execution_count": 1,
 56 |      "metadata": {},
 57 |      "output_type": "execute_result"
 58 |     }
 59 |    ],
 60 |    "source": [
 61 |     "from pymbolic import parse\n",
 62 |     "from pymbolic.mapper.evaluator import EvaluationMapper\n",
 63 |     "\n",
 64 |     "expr = parse(\"(x**2 + y**2)**0.5\")\n",
 65 |     "expr"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": 3,
 71 |    "metadata": {
 72 |     "collapsed": false
 73 |    },
 74 |    "outputs": [
 75 |     {
 76 |      "name": "stdout",
 77 |      "output_type": "stream",
 78 |      "text": [
 79 |       "(x**2 + y**2)**0.5\n"
 80 |      ]
 81 |     }
 82 |    ],
 83 |    "source": [
 84 |     "#clear\n",
 85 |     "print(expr)"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": 2,
 91 |    "metadata": {
 92 |     "collapsed": false
 93 |    },
 94 |    "outputs": [
 95 |     {
 96 |      "name": "stdout",
 97 |      "output_type": "stream",
 98 |      "text": [
 99 |       "17.26267650163207\n"
100 |      ]
101 |     }
102 |    ],
103 |    "source": [
104 |     "#clear\n",
105 |     "evm = EvaluationMapper({\"x\": 17, \"y\": 3})\n",
106 |     "\n",
107 |     "print(evm(expr))"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "markdown",
112 |    "metadata": {},
113 |    "source": [
114 |     "This is just a normal mapper, so its behavior can be overridden as described before."
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "markdown",
119 |    "metadata": {},
120 |    "source": [
121 |     "## Finding Independent Variables"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": 3,
127 |    "metadata": {
128 |     "collapsed": false
129 |    },
130 |    "outputs": [
131 |     {
132 |      "data": {
133 |       "text/plain": [
134 |        "{Variable('x'), Variable('y')}"
135 |       ]
136 |      },
137 |      "execution_count": 3,
138 |      "metadata": {},
139 |      "output_type": "execute_result"
140 |     }
141 |    ],
142 |    "source": [
143 |     "from pymbolic.mapper.dependency import DependencyMapper\n",
144 |     "\n",
145 |     "depmap = DependencyMapper()\n",
146 |     "depmap(expr)"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "markdown",
151 |    "metadata": {},
152 |    "source": [
153 |     "## Code generation"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": 4,
159 |    "metadata": {
160 |     "collapsed": false
161 |    },
162 |    "outputs": [
163 |     {
164 |      "data": {
165 |       "text/plain": [
166 |        "'pow(x + 4, 17)'"
167 |       ]
168 |      },
169 |      "execution_count": 4,
170 |      "metadata": {},
171 |      "output_type": "execute_result"
172 |     }
173 |    ],
174 |    "source": [
175 |     "from pymbolic.mapper.c_code import CCodeMapper\n",
176 |     "\n",
177 |     "ccm = CCodeMapper()\n",
178 |     "x = parse(\"x\")\n",
179 |     "ccm((x+4)**17)"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "markdown",
184 |    "metadata": {},
185 |    "source": [
186 |     "(We're using `parse` here just to give us a `Variable(\"x\")` object.)"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "markdown",
191 |    "metadata": {},
192 |    "source": [
193 |     "## Common subexpressions"
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "markdown",
198 |    "metadata": {},
199 |    "source": [
200 |     "Often, some parts of an expression occur multiple times in a bigger expression."
201 |    ]
202 |   },
203 |   {
204 |    "cell_type": "code",
205 |    "execution_count": 5,
206 |    "metadata": {
207 |     "collapsed": false
208 |    },
209 |    "outputs": [
210 |     {
211 |      "data": {
212 |       "text/plain": [
213 |        "'pow(x + 4, 3) + 4 * pow(x + 4, 3) * h * h + 2 * pow(x + 4, 3) * h'"
214 |       ]
215 |      },
216 |      "execution_count": 5,
217 |      "metadata": {},
218 |      "output_type": "execute_result"
219 |     }
220 |    ],
221 |    "source": [
222 |     "u = (x+4)**3\n",
223 |     "\n",
224 |     "h = parse(\"h\")\n",
225 |     "\n",
226 |     "expr = u + 2*u*h + 4*u*h**2\n",
227 |     "ccm(expr)"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "markdown",
232 |    "metadata": {},
233 |    "source": [
234 |     "Obviously, that doesn't lead to great code. In particular, the redundancy is carried through to the code side.\n",
235 |     "\n",
236 |     "There is a mechanism to prevent this redundancy. Individual parts of an expression can be tagged as \"common subexpressions\"."
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "code",
241 |    "execution_count": 6,
242 |    "metadata": {
243 |     "collapsed": false
244 |    },
245 |    "outputs": [
246 |     {
247 |      "name": "stdout",
248 |      "output_type": "stream",
249 |      "text": [
250 |       "_cse0 = pow(x + 4, 3)\n",
251 |       "_cse0 + 4 * _cse0 * h * h + 2 * _cse0 * h\n"
252 |      ]
253 |     }
254 |    ],
255 |    "source": [
256 |     "from pymbolic.primitives import CommonSubexpression as CSE\n",
257 |     "\n",
258 |     "u = CSE((x+4)**3)\n",
259 |     "\n",
260 |     "h = parse(\"h\")\n",
261 |     "\n",
262 |     "expr = u + 2*u*h + 4*u*h**2\n",
263 |     "\n",
264 |     "result = ccm(expr)\n",
265 |     "\n",
266 |     "for name, value in ccm.cse_name_list:\n",
267 |     "    print(name, \"=\", value)\n",
268 |     "    \n",
269 |     "print(result)"
270 |    ]
271 |   },
272 |   {
273 |    "cell_type": "markdown",
274 |    "metadata": {},
275 |    "source": [
276 |     "(These names can be customized, in case you're wondering.)"
277 |    ]
278 |   }
279 |  ],
280 |  "metadata": {
281 |   "kernelspec": {
282 |    "display_name": "Python 3",
283 |    "language": "python",
284 |    "name": "python3"
285 |   },
286 |   "language_info": {
287 |    "codemirror_mode": {
288 |     "name": "ipython",
289 |     "version": 3
290 |    },
291 |    "file_extension": ".py",
292 |    "mimetype": "text/x-python",
293 |    "name": "python",
294 |    "nbconvert_exporter": "python",
295 |    "pygments_lexer": "ipython3",
296 |    "version": "3.5.0+"
297 |   }
298 |  },
299 |  "nbformat": 4,
300 |  "nbformat_minor": 0
301 | }
302 | 


--------------------------------------------------------------------------------
/02-languages/06-interoperating-with-sympy.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     " # Interacting with `sympy`\n",
  8 |     " \n",
  9 |     "`pymbolic` can help take care of many *structural* transformations on your expression trees with great ease. Its main purpose is to help with program transformation after all, not to be a full computer algebra system. That said, if you need a full computer algebra system for things like calculus and simplification, it's easy to get your expressions converted between `pymbolic` and `sympy`:"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {
 16 |     "collapsed": false
 17 |    },
 18 |    "outputs": [
 19 |     {
 20 |      "name": "stderr",
 21 |      "output_type": "stream",
 22 |      "text": [
 23 |       "/usr/lib/python3/dist-packages/sympy/core/function.py:105: DeprecationWarning: inspect.getargspec() is deprecated, use inspect.signature() instead\n",
 24 |       "  evalargspec = inspect.getargspec(cls.eval)\n"
 25 |      ]
 26 |     }
 27 |    ],
 28 |    "source": [
 29 |     "import sympy as sp\n",
 30 |     "from pymbolic import var\n",
 31 |     "\n",
 32 |     "x = var(\"x\")\n",
 33 |     "y = var(\"y\")"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": 6,
 39 |    "metadata": {
 40 |     "collapsed": false
 41 |    },
 42 |    "outputs": [
 43 |     {
 44 |      "name": "stdout",
 45 |      "output_type": "stream",
 46 |      "text": [
 47 |       "(x**2 + 2*x + 1) / (x**2 + x)\n"
 48 |      ]
 49 |     }
 50 |    ],
 51 |    "source": [
 52 |     "expr = (x**2 + 2*x + 1)/(x**2 + x)\n",
 53 |     "print(expr)"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "markdown",
 58 |    "metadata": {},
 59 |    "source": [
 60 |     "Let's import pymbolic's sympy interoperability code."
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": 7,
 66 |    "metadata": {
 67 |     "collapsed": false
 68 |    },
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "# pymbolic.interop.sympy in newer versions of pymbolic\n",
 72 |     "from pymbolic.sympy_interface import (\n",
 73 |     "    PymbolicToSympyMapper, SympyToPymbolicMapper)\n",
 74 |     "\n",
 75 |     "p2s = PymbolicToSympyMapper()\n",
 76 |     "s2p = SympyToPymbolicMapper()"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": 8,
 82 |    "metadata": {
 83 |     "collapsed": false
 84 |    },
 85 |    "outputs": [
 86 |     {
 87 |      "name": "stdout",
 88 |      "output_type": "stream",
 89 |      "text": [
 90 |       "(x**2 + 2*x + 1)/(x**2 + x)\n"
 91 |      ]
 92 |     }
 93 |    ],
 94 |    "source": [
 95 |     "sympy_expr = p2s(expr)\n",
 96 |     "print(sympy_expr)"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": 9,
102 |    "metadata": {
103 |     "collapsed": false
104 |    },
105 |    "outputs": [
106 |     {
107 |      "name": "stdout",
108 |      "output_type": "stream",
109 |      "text": [
110 |       "(x + 1)/x\n"
111 |      ]
112 |     }
113 |    ],
114 |    "source": [
115 |     "sympy_result = sp.cancel(sympy_expr)\n",
116 |     "print(sympy_result)"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": 10,
122 |    "metadata": {
123 |     "collapsed": false
124 |    },
125 |    "outputs": [
126 |     {
127 |      "name": "stdout",
128 |      "output_type": "stream",
129 |      "text": [
130 |       "x**(-1)*(1 + x)\n"
131 |      ]
132 |     }
133 |    ],
134 |    "source": [
135 |     "result = s2p(sympy_result)\n",
136 |     "print(result)"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "markdown",
141 |    "metadata": {},
142 |    "source": [
143 |     "One thing to note is that `PymbolicToSympyMapper` is a regular `pymbolic` mapper, and its behavior can be overridden in case something about the translation to sympy is not quite what you want.\n",
144 |     "\n",
145 |     "`SympyToPymbolicMapper` also behaves very similarly (and can be overridden similarly) although it is not entirely the same kind of mapper."
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": null,
151 |    "metadata": {
152 |     "collapsed": true
153 |    },
154 |    "outputs": [],
155 |    "source": []
156 |   }
157 |  ],
158 |  "metadata": {
159 |   "kernelspec": {
160 |    "display_name": "Python 3",
161 |    "language": "python",
162 |    "name": "python3"
163 |   },
164 |   "language_info": {
165 |    "codemirror_mode": {
166 |     "name": "ipython",
167 |     "version": 3
168 |    },
169 |    "file_extension": ".py",
170 |    "mimetype": "text/x-python",
171 |    "name": "python",
172 |    "nbconvert_exporter": "python",
173 |    "pygments_lexer": "ipython3",
174 |    "version": "3.5.0+"
175 |   }
176 |  },
177 |  "nbformat": 4,
178 |  "nbformat_minor": 0
179 | }
180 | 


--------------------------------------------------------------------------------
/02-languages/08-practice.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Practice: Apply the chain rule"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "1. Define a custom expression node `Derivative(expr, v)` that symbolically represents taking a derivative of an expression `expr` with respect to variable `v`.\n",
 15 |     "1. Now suppose that, in order to take a derivative by a coordinate `x` (given), what your code actually has to do is consider the derivative in a *reference coordinate system* consisting of coordinates `r` and `s` and therefore needs to apply the chain rule identity\n",
 16 |     "\n",
 17 |     "$$ \\frac{d\\text{expr}}{dx} = \\frac{d\\text{expr}}{dr}\\frac{dr}{dx} + \\frac{d\\text{expr}}{ds}\\frac{ds}{dx}$$\n",
 18 |     "\n",
 19 |     "Write a `ChainRuleMapper` that applies this identity."
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 7,
 25 |    "metadata": {
 26 |     "collapsed": false
 27 |    },
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "from pymbolic import var\n",
 31 |     "from pymbolic.primitives import Expression\n",
 32 |     "from pymbolic.mapper import IdentityMapper\n",
 33 |     "\n",
 34 |     "x = var(\"x\")\n",
 35 |     "r = var(\"r\")\n",
 36 |     "s = var(\"s\")"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": 8,
 42 |    "metadata": {
 43 |     "collapsed": false
 44 |    },
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "class Derivative(Expression):\n",
 48 |     "    # ...\n",
 49 |     "    pass"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "markdown",
 54 |    "metadata": {},
 55 |    "source": [
 56 |     "To avoid conflicts with a `Derivative` node type that's already part of pymbolic, we call our mapper method `map_deriv`."
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": 32,
 62 |    "metadata": {
 63 |     "collapsed": false
 64 |    },
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "#clear\n",
 68 |     "# Solution\n",
 69 |     "\n",
 70 |     "class Derivative(Expression):\n",
 71 |     "    def __init__(self, expr, v):\n",
 72 |     "        self.expr = expr\n",
 73 |     "        self.v = v\n",
 74 |     "\n",
 75 |     "    def __getinitargs__(self):\n",
 76 |     "        return (self.expr, self.v)\n",
 77 |     "\n",
 78 |     "    mapper_method = \"map_deriv\""
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": 33,
 84 |    "metadata": {
 85 |     "collapsed": false
 86 |    },
 87 |    "outputs": [
 88 |     {
 89 |      "name": "stdout",
 90 |      "output_type": "stream",
 91 |      "text": [
 92 |       "Call(Variable('sqrt'), (Derivative(Sum((Product((27, Power(Variable('x'), 2))), Call(Variable('exp'), (Variable('x'),)))), Variable('x')),))\n"
 93 |      ]
 94 |     }
 95 |    ],
 96 |    "source": [
 97 |     "expr = var(\"sqrt\")(Derivative(27*x**2+var(\"exp\")(x), x))\n",
 98 |     "print(repr(expr))"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": 34,
104 |    "metadata": {
105 |     "collapsed": false
106 |    },
107 |    "outputs": [],
108 |    "source": [
109 |     "class ChainRuleMapper(IdentityMapper):\n",
110 |     "    # ...\n",
111 |     "    pass"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": 37,
117 |    "metadata": {
118 |     "collapsed": false
119 |    },
120 |    "outputs": [],
121 |    "source": [
122 |     "#clear\n",
123 |     "# Solution\n",
124 |     "\n",
125 |     "class ChainRuleMapper(IdentityMapper):\n",
126 |     "    def map_deriv(self, expr):\n",
127 |     "        return sum(Derivative(expr, ref_sym)*Derivative(ref_sym, x) for ref_sym in [r,s])"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "markdown",
132 |    "metadata": {},
133 |    "source": [
134 |     "Now let's test this mapper:"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": 38,
140 |    "metadata": {
141 |     "collapsed": false
142 |    },
143 |    "outputs": [
144 |     {
145 |      "data": {
146 |       "text/plain": [
147 |        "Call(Variable('sqrt'), (Sum((Product((Derivative(Derivative(Sum((Product((27, Power(Variable('x'), 2))), Call(Variable('exp'), (Variable('x'),)))), Variable('x')), Variable('r')), Derivative(Variable('r'), Variable('x')))), Product((Derivative(Derivative(Sum((Product((27, Power(Variable('x'), 2))), Call(Variable('exp'), (Variable('x'),)))), Variable('x')), Variable('s')), Derivative(Variable('s'), Variable('x')))))),))"
148 |       ]
149 |      },
150 |      "execution_count": 38,
151 |      "metadata": {},
152 |      "output_type": "execute_result"
153 |     }
154 |    ],
155 |    "source": [
156 |     "crm = ChainRuleMapper()\n",
157 |     "crm(expr)"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "markdown",
162 |    "metadata": {},
163 |    "source": [
164 |     "In case you are wondering why we can only use the 'clumsy', parenthesis-heavy form of the printed expression, it's because we haven't told pymbolic how to write out the shorter form. Here's how that can be done:"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "code",
169 |    "execution_count": 48,
170 |    "metadata": {
171 |     "collapsed": false
172 |    },
173 |    "outputs": [
174 |     {
175 |      "name": "stdout",
176 |      "output_type": "stream",
177 |      "text": [
178 |       "sqrt(d(d((27*x**2 + exp(x)))/dx)/dr*d(r)/dx + d(d((27*x**2 + exp(x)))/dx)/ds*d(s)/dx)\n"
179 |      ]
180 |     }
181 |    ],
182 |    "source": [
183 |     "from pymbolic.mapper.stringifier import StringifyMapper, PREC_PRODUCT\n",
184 |     "\n",
185 |     "class MyStringifyMapper(StringifyMapper):\n",
186 |     "    def map_deriv(self, expr, enclosing_prec):\n",
187 |     "        return \"d(%s)/d%s\" % (\n",
188 |     "            self.rec(expr.expr, PREC_PRODUCT), \n",
189 |     "            self.rec(expr.v, PREC_PRODUCT))\n",
190 |     "    \n",
191 |     "def stringifier(self):\n",
192 |     "    return MyStringifyMapper\n",
193 |     "\n",
194 |     "Derivative.stringifier = stringifier\n",
195 |     "print(crm(expr))"
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "code",
200 |    "execution_count": null,
201 |    "metadata": {
202 |     "collapsed": false
203 |    },
204 |    "outputs": [],
205 |    "source": []
206 |   }
207 |  ],
208 |  "metadata": {
209 |   "kernelspec": {
210 |    "display_name": "Python 3",
211 |    "language": "python",
212 |    "name": "python3"
213 |   },
214 |   "language_info": {
215 |    "codemirror_mode": {
216 |     "name": "ipython",
217 |     "version": 3
218 |    },
219 |    "file_extension": ".py",
220 |    "mimetype": "text/x-python",
221 |    "name": "python",
222 |    "nbconvert_exporter": "python",
223 |    "pygments_lexer": "ipython3",
224 |    "version": "3.5.0+"
225 |   }
226 |  },
227 |  "nbformat": 4,
228 |  "nbformat_minor": 0
229 | }
230 | 


--------------------------------------------------------------------------------
/02-languages/README.rst:
--------------------------------------------------------------------------------
1 | **Tip:** ``ipynb`` files can be viewed on Github. Just click them.
2 | 


--------------------------------------------------------------------------------
/02-languages/gvmagic.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Graphviz IPython magic extensions
  3 | 
  4 | Magic methods:
  5 |     %dot <dot graph>
  6 |     %%dot <dot ...
  7 |     ... graph>
  8 |     %dotstr "<dot graph>"
  9 |     %dotobj obj.to_dot()
 10 |     %dotobjs obj[0].to_dot(), obj[1].to_dot(), ...
 11 | 
 12 |     also: %twopi, %neato, %sdp, %fsdp, and %circo magic families.
 13 | 
 14 | Usage:
 15 |     %load_ext gvmagic
 16 | """
 17 | 
 18 | 
 19 | from logging import info, error
 20 | from subprocess import Popen, PIPE
 21 | 
 22 | from IPython.display import display, SVG
 23 | from IPython.core.magic import Magics
 24 | from IPython.core.magic import line_cell_magic
 25 | from IPython.core.magic import line_magic
 26 | from IPython.core.magic import magics_class
 27 | 
 28 | def show_svg(d):
 29 |     display(SVG(data=d))
 30 | 
 31 | def run_graphviz(s, layout_engine='dot'):
 32 |     """Execute dot with a layout and return a raw SVG image, or None."""
 33 |     cmd = ['dot', '-Tsvg', '-K', layout_engine]
 34 | 
 35 |     dot = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE)
 36 |     stdoutdata, stderrdata = dot.communicate(s.encode('utf-8'))
 37 |     status = dot.wait()
 38 |     if status == 0:
 39 |         return stdoutdata
 40 |     else:
 41 |         fstr = "dot returned {}\n[==== stderr ====]\n{}"
 42 |         error(fstr.format(status, stderrdata.decode('utf-8')))
 43 |         return None
 44 | 
 45 | 
 46 | @magics_class
 47 | class GraphvizMagics(Magics):
 48 | 
 49 |     @line_cell_magic
 50 |     def dot(self, line, cell=None):
 51 |         self._from_cell(line, cell, 'dot')
 52 | 
 53 |     @line_magic
 54 |     def dotstr(self, line):
 55 |         self._from_str(line, 'dot')
 56 | 
 57 |     @line_magic
 58 |     def dotobj(self, line):
 59 |         self._from_obj(line, 'dot')
 60 | 
 61 |     @line_magic
 62 |     def dotobjs(self, line):
 63 |         self._from_objs(line, 'dot')
 64 | 
 65 |     @line_cell_magic
 66 |     def neato(self, line, cell=None):
 67 |         self._from_cell(line, cell, 'neato')
 68 | 
 69 |     @line_magic
 70 |     def neatostr(self, line):
 71 |         self._from_str(line, 'neato')
 72 | 
 73 |     @line_magic
 74 |     def neatoobj(self, line):
 75 |         self._from_obj(line, 'neato')
 76 | 
 77 |     @line_magic
 78 |     def neatoobjs(self, line):
 79 |         self._from_objs(line, 'neato')
 80 | 
 81 |     @line_cell_magic
 82 |     def sfdp(self, line, cell=None):
 83 |         self._from_cell(line, cell, 'sfdp')
 84 | 
 85 |     @line_magic
 86 |     def sfdpstr(self, line):
 87 |         self._from_str(line, 'sfdp')
 88 | 
 89 |     @line_magic
 90 |     def sfdpobj(self, line):
 91 |         self._from_obj(line, 'sfdp')
 92 | 
 93 |     @line_magic
 94 |     def sfdpobjs(self, line):
 95 |         self._from_objs(line, 'sfdp')
 96 | 
 97 |     @line_cell_magic
 98 |     def fdp(self, line, cell=None):
 99 |         self._from_cell(line, cell, 'fdp')
100 | 
101 |     @line_magic
102 |     def fdpstr(self, line):
103 |         self._from_str(line, 'fdp')
104 | 
105 |     @line_magic
106 |     def fdpobj(self, line):
107 |         self._from_obj(line, 'fdp')
108 | 
109 |     @line_magic
110 |     def fdpobjs(self, line):
111 |         self._from_objs(line, 'fdp')
112 | 
113 |     @line_cell_magic
114 |     def twopi(self, line, cell=None):
115 |         self._from_cell(line, cell, 'twopi')
116 | 
117 |     @line_magic
118 |     def twopistr(self, line):
119 |         self._from_str(line, 'twopi')
120 | 
121 |     @line_magic
122 |     def twopiobj(self, line):
123 |         self._from_obj(line, 'twopi')
124 | 
125 |     @line_magic
126 |     def twopiobjs(self, line):
127 |         self._from_objs(line, 'twopi')
128 | 
129 |     @line_cell_magic
130 |     def circo(self, line, cell=None):
131 |         self._from_cell(line, cell, 'circo')
132 | 
133 |     @line_magic
134 |     def circostr(self, line):
135 |         self._from_str(line, 'circo')
136 | 
137 |     @line_magic
138 |     def circoobj(self, line):
139 |         self._from_obj(line, 'circo')
140 | 
141 |     @line_magic
142 |     def circoobjs(self, line):
143 |         self._from_objs(line, 'circo')
144 | 
145 |     def _from_cell(self, line, cell, layout_engine):
146 |         if cell is None:
147 |             s = line
148 |         else:
149 |             s = line + '\n' + cell
150 |         data = run_graphviz(s, layout_engine)
151 |         if data:
152 |             show_svg(data)
153 | 
154 |     def _from_str(self, line, layout_engine):
155 |         s = self.shell.ev(line)
156 |         data = run_graphviz(s, layout_engine)
157 |         if data:
158 |             show_svg(data)
159 | 
160 |     def _from_obj(self, line, layout_engine):
161 |         obj = self.shell.ev(line)
162 |         try:
163 |             s = obj.to_dot()
164 |         except AttributeError:
165 |             error("expected object to implement 'to_dot()' method")
166 |         except TypeError:
167 |             error("expected to_dot method to be callable w/o args")
168 |         else:
169 |             data = run_graphviz(s, layout_engine)
170 |             if data:
171 |                 show_svg(data)
172 | 
173 |     def _from_objs(self, line, layout_engine):
174 |         """dot objects magic"""
175 |         objs = self.shell.ev(line)
176 |         for i, obj in enumerate(objs):
177 |             try:
178 |                 s = obj.to_dot()
179 |             except AttributeError:
180 |                 error("expected object to implement 'to_dot()' method")
181 |             except TypeError:
182 |                 error("expected to_dot method to be callable w/o args")
183 |             else:
184 |                 data = run_graphviz(s, layout_engine)
185 |                 if data:
186 |                     info("object {}:".format(i))
187 |                     show_svg(data)
188 | 
189 | 
190 | def load_ipython_extension(ipython):
191 |     """Load the extension in IPython."""
192 |     ipython.register_magics(GraphvizMagics)
193 | 
194 | 
195 | def unload_ipython_extension(ipython):
196 |     """Unload the extension in IPython."""
197 |     pass
198 | 


--------------------------------------------------------------------------------
/03-opencl/0-slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/03-opencl/0-slides.pdf


--------------------------------------------------------------------------------
/03-opencl/1-1-hello-pyopencl.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Hello PyOpenCL"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {
 14 |     "collapsed": false
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import pyopencl as cl\n",
 19 |     "import numpy as np\n",
 20 |     "import numpy.linalg as la\n",
 21 |     "\n",
 22 |     "mf = cl.mem_flags"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "This notebook demonstrates the simplest PyOpenCL workflow that touches all essential pieces:\n",
 30 |     "\n",
 31 |     "* Data transfer\n",
 32 |     "* Kernel compilation\n",
 33 |     "* Execution"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": 2,
 39 |    "metadata": {
 40 |     "collapsed": false
 41 |    },
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "a = np.random.rand(50000).astype(np.float32)"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "markdown",
 49 |    "metadata": {},
 50 |    "source": [
 51 |     "Now create a context `ctx` and a command queue `queue`:"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": 3,
 57 |    "metadata": {
 58 |     "collapsed": false
 59 |    },
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "#clear\n",
 63 |     "ctx = cl.create_some_context()\n",
 64 |     "\n",
 65 |     "queue = cl.CommandQueue(ctx)"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "markdown",
 70 |    "metadata": {},
 71 |    "source": [
 72 |     "Now allocate a buffer. `Buffer(context, flags, size=None, hostbuf=None)`"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": 4,
 78 |    "metadata": {
 79 |     "collapsed": false
 80 |    },
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "#clear\n",
 84 |     "a_buf = cl.Buffer(ctx, mf.READ_WRITE, size=a.nbytes)"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "markdown",
 89 |    "metadata": {},
 90 |    "source": [
 91 |     "Then transfer data:"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": 5,
 97 |    "metadata": {
 98 |     "collapsed": false
 99 |    },
100 |    "outputs": [
101 |     {
102 |      "data": {
103 |       "text/plain": [
104 |        "<pyopencl.cffi_cl.NannyEvent at 0x7f3ba6748ba8>"
105 |       ]
106 |      },
107 |      "execution_count": 5,
108 |      "metadata": {},
109 |      "output_type": "execute_result"
110 |     }
111 |    ],
112 |    "source": [
113 |     "#clear\n",
114 |     "cl.enqueue_copy(queue, a_buf, a)"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "markdown",
119 |    "metadata": {},
120 |    "source": [
121 |     "Here's our kernel source code:"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": 6,
127 |    "metadata": {
128 |     "collapsed": false
129 |    },
130 |    "outputs": [],
131 |    "source": [
132 |     "prg = cl.Program(ctx, \"\"\"\n",
133 |     "    __kernel void twice(__global float *a)\n",
134 |     "    {\n",
135 |     "      int gid = get_global_id(0);\n",
136 |     "      a[gid] = 2*a[gid];\n",
137 |     "    }\n",
138 |     "    \"\"\").build()"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "markdown",
143 |    "metadata": {},
144 |    "source": [
145 |     "Run the kernel."
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": 7,
151 |    "metadata": {
152 |     "collapsed": false
153 |    },
154 |    "outputs": [
155 |     {
156 |      "data": {
157 |       "text/plain": [
158 |        "<pyopencl.cffi_cl.Event at 0x7f3ba6748ef0>"
159 |       ]
160 |      },
161 |      "execution_count": 7,
162 |      "metadata": {},
163 |      "output_type": "execute_result"
164 |     }
165 |    ],
166 |    "source": [
167 |     "#clear\n",
168 |     "prg.twice(queue, a.shape, None, a_buf)"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "markdown",
173 |    "metadata": {},
174 |    "source": [
175 |     "Copy the data back."
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": 8,
181 |    "metadata": {
182 |     "collapsed": false
183 |    },
184 |    "outputs": [
185 |     {
186 |      "data": {
187 |       "text/plain": [
188 |        "<pyopencl.cffi_cl.NannyEvent at 0x7f3ba4093ba8>"
189 |       ]
190 |      },
191 |      "execution_count": 8,
192 |      "metadata": {},
193 |      "output_type": "execute_result"
194 |     }
195 |    ],
196 |    "source": [
197 |     "#clear\n",
198 |     "result = np.empty_like(a)\n",
199 |     "\n",
200 |     "cl.enqueue_copy(queue, result, a_buf)"
201 |    ]
202 |   },
203 |   {
204 |    "cell_type": "markdown",
205 |    "metadata": {},
206 |    "source": [
207 |     "Check the result."
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "code",
212 |    "execution_count": 9,
213 |    "metadata": {
214 |     "collapsed": false
215 |    },
216 |    "outputs": [
217 |     {
218 |      "name": "stdout",
219 |      "output_type": "stream",
220 |      "text": [
221 |       "0.0 128.816\n"
222 |      ]
223 |     }
224 |    ],
225 |    "source": [
226 |     "#clear\n",
227 |     "print(la.norm(result - 2*a), la.norm(a))"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "code",
232 |    "execution_count": null,
233 |    "metadata": {
234 |     "collapsed": false
235 |    },
236 |    "outputs": [],
237 |    "source": []
238 |   }
239 |  ],
240 |  "metadata": {
241 |   "kernelspec": {
242 |    "display_name": "Python 3",
243 |    "language": "python",
244 |    "name": "python3"
245 |   },
246 |   "language_info": {
247 |    "codemirror_mode": {
248 |     "name": "ipython",
249 |     "version": 3
250 |    },
251 |    "file_extension": ".py",
252 |    "mimetype": "text/x-python",
253 |    "name": "python",
254 |    "nbconvert_exporter": "python",
255 |    "pygments_lexer": "ipython3",
256 |    "version": "3.5.1+"
257 |   }
258 |  },
259 |  "nbformat": 4,
260 |  "nbformat_minor": 0
261 | }
262 | 


--------------------------------------------------------------------------------
/03-opencl/1-3-exercise.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# PyOpenCL: An exercise"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {
 14 |     "collapsed": false
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import pyopencl as cl\n",
 19 |     "import numpy as np\n",
 20 |     "import numpy.linalg as la\n",
 21 |     "import pyopencl.array\n",
 22 |     "import pyopencl.clrandom"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "Change the code below to:\n",
 30 |     "    \n",
 31 |     "* Compute $c_i = a_ib_i$\n",
 32 |     "* Use work groups of $16\\times 16$ items\n",
 33 |     "* Benchmark $1\\times 1$ workgroups against $16\\times 16$ workgroups\n",
 34 |     "\n",
 35 |     "  * Use `time()` from the `time` module. (i.e. `import time`)\n",
 36 |     "  * Use `queue.finish()`."
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": 2,
 42 |    "metadata": {
 43 |     "collapsed": false
 44 |    },
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "ctx = cl.create_some_context()\n",
 48 |     "queue = cl.CommandQueue(ctx)"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 3,
 54 |    "metadata": {
 55 |     "collapsed": false
 56 |    },
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "a = np.random.rand(1024, 1024).astype(np.float32)"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 4,
 65 |    "metadata": {
 66 |     "collapsed": false
 67 |    },
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "prg = cl.Program(ctx, \"\"\"\n",
 71 |     "    __kernel void twice(__global float *a)\n",
 72 |     "    {\n",
 73 |     "      int gid0 = get_global_id(0);\n",
 74 |     "      int gid1 = get_global_id(1);\n",
 75 |     "      int i = gid1 * 1024 + gid0;\n",
 76 |     "      a[i] = 2*a[i];\n",
 77 |     "    }\n",
 78 |     "    \"\"\").build()\n",
 79 |     "twice = prg.twice"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 5,
 85 |    "metadata": {
 86 |     "collapsed": false
 87 |    },
 88 |    "outputs": [
 89 |     {
 90 |      "data": {
 91 |       "text/plain": [
 92 |        "<pyopencl.cffi_cl.Event at 0x7f679a3145f8>"
 93 |       ]
 94 |      },
 95 |      "execution_count": 5,
 96 |      "metadata": {},
 97 |      "output_type": "execute_result"
 98 |     }
 99 |    ],
100 |    "source": [
101 |     "a_dev = cl.array.to_device(queue, a)\n",
102 |     "twice(queue, a_dev.shape, None, a_dev.data)"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": 6,
108 |    "metadata": {
109 |     "collapsed": false
110 |    },
111 |    "outputs": [
112 |     {
113 |      "name": "stdout",
114 |      "output_type": "stream",
115 |      "text": [
116 |       "0.0 591.347\n"
117 |      ]
118 |     }
119 |    ],
120 |    "source": [
121 |     "print(la.norm(a_dev.get() - 2*a), la.norm(a))"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": null,
127 |    "metadata": {
128 |     "collapsed": false
129 |    },
130 |    "outputs": [],
131 |    "source": []
132 |   }
133 |  ],
134 |  "metadata": {
135 |   "kernelspec": {
136 |    "display_name": "Python 3",
137 |    "language": "python",
138 |    "name": "python3"
139 |   },
140 |   "language_info": {
141 |    "codemirror_mode": {
142 |     "name": "ipython",
143 |     "version": 3
144 |    },
145 |    "file_extension": ".py",
146 |    "mimetype": "text/x-python",
147 |    "name": "python",
148 |    "nbconvert_exporter": "python",
149 |    "pygments_lexer": "ipython3",
150 |    "version": "3.5.0+"
151 |   }
152 |  },
153 |  "nbformat": 4,
154 |  "nbformat_minor": 0
155 | }
156 | 


--------------------------------------------------------------------------------
/03-opencl/1-4-ipython-magic.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# PyOpenCL: Experimenting in IPython"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {
 14 |     "collapsed": false
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "from __future__ import division\n",
 19 |     "import numpy as np\n",
 20 |     "import pyopencl as cl\n",
 21 |     "import pyopencl.array"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "Load the PyOpenCL IPython extension:"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 2,
 34 |    "metadata": {
 35 |     "collapsed": false
 36 |    },
 37 |    "outputs": [
 38 |     {
 39 |      "name": "stderr",
 40 |      "output_type": "stream",
 41 |      "text": [
 42 |       "/usr/lib/python3/dist-packages/IPython/utils/traitlets.py:504: DeprecationWarning: inspect.getargspec() is deprecated, use inspect.signature() instead\n",
 43 |       "  argspec = inspect.getargspec(c)\n"
 44 |      ]
 45 |     }
 46 |    ],
 47 |    "source": [
 48 |     "%load_ext pyopencl.ipython_ext"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "markdown",
 53 |    "metadata": {},
 54 |    "source": [
 55 |     "Create an OpenCL context and a command queue:"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": 3,
 61 |    "metadata": {
 62 |     "collapsed": false
 63 |    },
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "ctx = cl.create_some_context()\n",
 67 |     "queue = cl.CommandQueue(ctx)"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "## Using the kernel 'magic'\n",
 75 |     "\n",
 76 |     "Define an OpenCL kernel using the `%%cl_kernel` magic:"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": 4,
 82 |    "metadata": {
 83 |     "collapsed": false
 84 |    },
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "%%cl_kernel\n",
 88 |     "\n",
 89 |     "__kernel void sum_vector(__global const float *a,\n",
 90 |     "__global const float *b, __global float *c)\n",
 91 |     "{\n",
 92 |     "  int gid = get_global_id(0);\n",
 93 |     "  c[gid] = a[gid] + b[gid];\n",
 94 |     "}"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "markdown",
 99 |    "metadata": {},
100 |    "source": [
101 |     "This looks for `cl_ctx` or `ctx` in the user namespace to find a PyOpenCL context.\n",
102 |     "\n",
103 |     "Kernel names are automatically injected into the user namespace, so we can just use `sum_vector` from Python below.\n",
104 |     "\n",
105 |     "Now create some data to work on:"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 5,
111 |    "metadata": {
112 |     "collapsed": false
113 |    },
114 |    "outputs": [],
115 |    "source": [
116 |     "n = 10000\n",
117 |     "\n",
118 |     "a = cl.array.empty(queue, n, dtype=np.float32)\n",
119 |     "a.fill(15)\n",
120 |     "\n",
121 |     "b_host = np.random.randn(n).astype(np.float32)\n",
122 |     "b = cl.array.to_device(queue, b_host)\n",
123 |     "\n",
124 |     "c = cl.array.empty_like(a)"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "markdown",
129 |    "metadata": {},
130 |    "source": [
131 |     "Run the kernel:"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": 6,
137 |    "metadata": {
138 |     "collapsed": false
139 |    },
140 |    "outputs": [
141 |     {
142 |      "data": {
143 |       "text/plain": [
144 |        "<pyopencl.cffi_cl.Event at 0x7f48c1df4ba8>"
145 |       ]
146 |      },
147 |      "execution_count": 6,
148 |      "metadata": {},
149 |      "output_type": "execute_result"
150 |     }
151 |    ],
152 |    "source": [
153 |     "sum_vector(queue, (n,), None, a.data, b.data, c.data)"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "markdown",
158 |    "metadata": {},
159 |    "source": [
160 |     "Check the result using `numpy` operations:"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "code",
165 |    "execution_count": 7,
166 |    "metadata": {
167 |     "collapsed": false
168 |    },
169 |    "outputs": [],
170 |    "source": [
171 |     "assert (c.get() == b_host + 15).all()"
172 |    ]
173 |   }
174 |  ],
175 |  "metadata": {
176 |   "kernelspec": {
177 |    "display_name": "Python 3",
178 |    "language": "python",
179 |    "name": "python3"
180 |   },
181 |   "language_info": {
182 |    "codemirror_mode": {
183 |     "name": "ipython",
184 |     "version": 3
185 |    },
186 |    "file_extension": ".py",
187 |    "mimetype": "text/x-python",
188 |    "name": "python",
189 |    "nbconvert_exporter": "python",
190 |    "pygments_lexer": "ipython3",
191 |    "version": "3.5.0+"
192 |   }
193 |  },
194 |  "nbformat": 4,
195 |  "nbformat_minor": 0
196 | }
197 | 


--------------------------------------------------------------------------------
/03-opencl/2-1-elementwise.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# PyOpenCL Parallel Patterns: Map/Elementwise"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Setup code"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 1,
 20 |    "metadata": {
 21 |     "collapsed": false
 22 |    },
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "import pyopencl as cl\n",
 26 |     "import pyopencl.array\n",
 27 |     "import pyopencl.clrandom\n",
 28 |     "import numpy as np"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 2,
 34 |    "metadata": {
 35 |     "collapsed": false
 36 |    },
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "ctx = cl.create_some_context()\n",
 40 |     "queue = cl.CommandQueue(ctx)"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 3,
 46 |    "metadata": {
 47 |     "collapsed": false
 48 |    },
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "n = 10**7\n",
 52 |     "a = cl.clrandom.rand(queue, n, np.float32)\n",
 53 |     "b = cl.clrandom.rand(queue, n, np.float32)"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "markdown",
 58 |    "metadata": {},
 59 |    "source": [
 60 |     "## A simple 'target application'"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "metadata": {},
 66 |    "source": [
 67 |     "We would like to evaluate this linear combination:"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": 4,
 73 |    "metadata": {
 74 |     "collapsed": false
 75 |    },
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "c1 = 5*a + 6*b"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "markdown",
 83 |    "metadata": {},
 84 |    "source": [
 85 |     "A problem with this is that every single operator (all three of them--and easily more for complicated expressions) corresponds to a kernel call, which can lead to high overhead. Let's try and avoid that by stuffing the entire operation into one kernel, in turn saving lots of memory traffic:"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": 5,
 91 |    "metadata": {
 92 |     "collapsed": false
 93 |    },
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "from pyopencl.elementwise import ElementwiseKernel"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": 6,
102 |    "metadata": {
103 |     "collapsed": false
104 |    },
105 |    "outputs": [],
106 |    "source": [
107 |     "#clear\n",
108 |     "lin_comb = ElementwiseKernel(ctx,\n",
109 |     "\n",
110 |     "        \"float a, float *x, float b, float *y, float *c\",\n",
111 |     "\n",
112 |     "        \"c[i] = a*x[i] + b*y[i]\")"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": 7,
118 |    "metadata": {
119 |     "collapsed": false
120 |    },
121 |    "outputs": [
122 |     {
123 |      "data": {
124 |       "text/plain": [
125 |        "<pyopencl.cffi_cl.Event at 0x7f6f3bd72a20>"
126 |       ]
127 |      },
128 |      "execution_count": 7,
129 |      "metadata": {},
130 |      "output_type": "execute_result"
131 |     }
132 |    ],
133 |    "source": [
134 |     "c2 = cl.array.empty_like(a)\n",
135 |     "lin_comb(5, a, 6, b, c2)"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": 8,
141 |    "metadata": {
142 |     "collapsed": false
143 |    },
144 |    "outputs": [
145 |     {
146 |      "name": "stdout",
147 |      "output_type": "stream",
148 |      "text": [
149 |       "0.0\n"
150 |      ]
151 |     }
152 |    ],
153 |    "source": [
154 |     "import numpy.linalg as la\n",
155 |     "print(la.norm(c1.get() - c2.get()))"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "markdown",
160 |    "metadata": {},
161 |    "source": [
162 |     "## Timing ElementwiseKernel\n",
163 |     "\n",
164 |     "Did this optimization pay off?"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "code",
169 |    "execution_count": 9,
170 |    "metadata": {
171 |     "collapsed": false
172 |    },
173 |    "outputs": [
174 |     {
175 |      "name": "stdout",
176 |      "output_type": "stream",
177 |      "text": [
178 |       "elapsed: 5.4626686573028564 s\n"
179 |      ]
180 |     }
181 |    ],
182 |    "source": [
183 |     "from time import time\n",
184 |     "queue.finish()\n",
185 |     "start_time = time()\n",
186 |     "\n",
187 |     "for i in range(10):\n",
188 |     "    c1 = 5*a + 6*b\n",
189 |     "    \n",
190 |     "queue.finish()\n",
191 |     "print(\"elapsed: {0} s\".format(time()-start_time))"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "code",
196 |    "execution_count": 10,
197 |    "metadata": {
198 |     "collapsed": false
199 |    },
200 |    "outputs": [
201 |     {
202 |      "name": "stdout",
203 |      "output_type": "stream",
204 |      "text": [
205 |       "elapsed: 2.354213237762451 s\n"
206 |      ]
207 |     }
208 |    ],
209 |    "source": [
210 |     "from time import time\n",
211 |     "queue.finish()\n",
212 |     "start_time = time()\n",
213 |     "\n",
214 |     "for i in range(10):\n",
215 |     "    lin_comb(5, a, 6, b, c2)\n",
216 |     "    \n",
217 |     "queue.finish()\n",
218 |     "print(\"elapsed: {0} s\".format(time()-start_time))"
219 |    ]
220 |   },
221 |   {
222 |    "cell_type": "code",
223 |    "execution_count": null,
224 |    "metadata": {
225 |     "collapsed": false
226 |    },
227 |    "outputs": [],
228 |    "source": []
229 |   }
230 |  ],
231 |  "metadata": {
232 |   "kernelspec": {
233 |    "display_name": "Python 3",
234 |    "language": "python",
235 |    "name": "python3"
236 |   },
237 |   "language_info": {
238 |    "codemirror_mode": {
239 |     "name": "ipython",
240 |     "version": 3
241 |    },
242 |    "file_extension": ".py",
243 |    "mimetype": "text/x-python",
244 |    "name": "python",
245 |    "nbconvert_exporter": "python",
246 |    "pygments_lexer": "ipython3",
247 |    "version": "3.5.0+"
248 |   }
249 |  },
250 |  "nbformat": 4,
251 |  "nbformat_minor": 0
252 | }
253 | 


--------------------------------------------------------------------------------
/03-opencl/2-2-reduction.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# PyOpenCL Parallel Patterns: Reduction"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Setup Code"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 1,
 20 |    "metadata": {
 21 |     "collapsed": false
 22 |    },
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "import pyopencl as cl\n",
 26 |     "import pyopencl.array\n",
 27 |     "import pyopencl.clrandom\n",
 28 |     "import numpy as np"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 2,
 34 |    "metadata": {
 35 |     "collapsed": false
 36 |    },
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "ctx = cl.create_some_context()\n",
 40 |     "queue = cl.CommandQueue(ctx)"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 3,
 46 |    "metadata": {
 47 |     "collapsed": false
 48 |    },
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "n = 10**7\n",
 52 |     "x = cl.clrandom.rand(queue, n, np.float64)"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "markdown",
 57 |    "metadata": {},
 58 |    "source": [
 59 |     "## Setting up the kernel: Computing a sum of squares\n",
 60 |     "\n",
 61 |     "Want to compute the sum of the squares of all entries in `x`.\n",
 62 |     "\n",
 63 |     "First, using `numpy`, as `result1` (watch out: `.get()`)"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": 4,
 69 |    "metadata": {
 70 |     "collapsed": false
 71 |    },
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "#clear\n",
 75 |     "result1 = np.sum(x.get()**2)"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "markdown",
 80 |    "metadata": {},
 81 |    "source": [
 82 |     "Then, using PyOpenCL:"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": 5,
 88 |    "metadata": {
 89 |     "collapsed": false
 90 |    },
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "from pyopencl.reduction import ReductionKernel"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "markdown",
 98 |    "metadata": {},
 99 |    "source": [
100 |     "Syntax:\n",
101 |     "\n",
102 |     "ReductionKernel(context, dtype, netural, reduce_expr, map_expr, arguments)"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": 6,
108 |    "metadata": {
109 |     "collapsed": false
110 |    },
111 |    "outputs": [],
112 |    "source": [
113 |     "#clear\n",
114 |     "rknl = ReductionKernel(ctx, np.float64,\n",
115 |     "        neutral=\"0\",\n",
116 |     "        reduce_expr=\"a+b\", map_expr=\"x[i]*x[i]\",\n",
117 |     "        arguments=\"double *x\")"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "markdown",
122 |    "metadata": {},
123 |    "source": [
124 |     "## Testing the result"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": 7,
130 |    "metadata": {
131 |     "collapsed": false
132 |    },
133 |    "outputs": [],
134 |    "source": [
135 |     "result2 = rknl(x)"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": 8,
141 |    "metadata": {
142 |     "collapsed": false
143 |    },
144 |    "outputs": [
145 |     {
146 |      "data": {
147 |       "text/plain": [
148 |        "pyopencl.array.Array"
149 |       ]
150 |      },
151 |      "execution_count": 8,
152 |      "metadata": {},
153 |      "output_type": "execute_result"
154 |     }
155 |    ],
156 |    "source": [
157 |     "type(result2)"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": 9,
163 |    "metadata": {
164 |     "collapsed": false
165 |    },
166 |    "outputs": [
167 |     {
168 |      "data": {
169 |       "text/plain": [
170 |        "()"
171 |       ]
172 |      },
173 |      "execution_count": 9,
174 |      "metadata": {},
175 |      "output_type": "execute_result"
176 |     }
177 |    ],
178 |    "source": [
179 |     "#clear\n",
180 |     "result2.shape"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "markdown",
185 |    "metadata": {},
186 |    "source": [
187 |     "Now check the result:"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": 10,
193 |    "metadata": {
194 |     "collapsed": false
195 |    },
196 |    "outputs": [
197 |     {
198 |      "name": "stdout",
199 |      "output_type": "stream",
200 |      "text": [
201 |       "9.31322574615e-10\n"
202 |      ]
203 |     }
204 |    ],
205 |    "source": [
206 |     "#clear\n",
207 |     "print(result2.get()-result1)"
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "markdown",
212 |    "metadata": {},
213 |    "source": [
214 |     "* Change this to find maximum.\n",
215 |     "* Works on structured types, too.\n",
216 |     "* What if you wanted to find maximum *and* location?"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "code",
221 |    "execution_count": null,
222 |    "metadata": {
223 |     "collapsed": true
224 |    },
225 |    "outputs": [],
226 |    "source": []
227 |   }
228 |  ],
229 |  "metadata": {
230 |   "kernelspec": {
231 |    "display_name": "Python 3",
232 |    "language": "python",
233 |    "name": "python3"
234 |   },
235 |   "language_info": {
236 |    "codemirror_mode": {
237 |     "name": "ipython",
238 |     "version": 3
239 |    },
240 |    "file_extension": ".py",
241 |    "mimetype": "text/x-python",
242 |    "name": "python",
243 |    "nbconvert_exporter": "python",
244 |    "pygments_lexer": "ipython3",
245 |    "version": "3.5.1+"
246 |   }
247 |  },
248 |  "nbformat": 4,
249 |  "nbformat_minor": 0
250 | }
251 | 


--------------------------------------------------------------------------------
/03-opencl/2-2a-monte-carlo.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Monte Carlo Method\n",
  8 |     "\n",
  9 |     "As a simple example of a Monte Carlo method, we will approximate the value of $\\pi$:"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {
 16 |     "collapsed": true
 17 |    },
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "import numpy as np\n",
 21 |     "import pyopencl as cl\n",
 22 |     "import pyopencl.array\n",
 23 |     "import pyopencl.clrandom"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 2,
 29 |    "metadata": {
 30 |     "collapsed": true
 31 |    },
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "ctx = cl.create_some_context()\n",
 35 |     "queue = cl.CommandQueue(ctx)"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "markdown",
 40 |    "metadata": {},
 41 |    "source": [
 42 |     "### Boilerplate for Random Number Generator"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 3,
 48 |    "metadata": {
 49 |     "collapsed": true
 50 |    },
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "generator_preamble = \"\"\"\n",
 54 |     "#include <pyopencl-random123/philox.cl>\n",
 55 |     "\n",
 56 |     "typedef union {\n",
 57 |     "    uint4 v;\n",
 58 |     "    philox4x32_ctr_t c;\n",
 59 |     "} philox4x32_ctr_vec_union;\n",
 60 |     "\n",
 61 |     "\n",
 62 |     "uint4 philox4x32_bump(uint4 ctr)\n",
 63 |     "{\n",
 64 |     "    if (++ctr.x == 0)\n",
 65 |     "        if (++ctr.y == 0)\n",
 66 |     "            ++ctr.z;\n",
 67 |     "    return ctr;\n",
 68 |     "}\n",
 69 |     "\n",
 70 |     "uint4 philox4x32_gen(\n",
 71 |     "        uint4 ctr,\n",
 72 |     "        uint2 key,\n",
 73 |     "        uint4 *new_ctr)\n",
 74 |     "{\n",
 75 |     "    philox4x32_ctr_vec_union result;\n",
 76 |     "    result.c = philox4x32(\n",
 77 |     "        *(philox4x32_ctr_t *) &ctr,\n",
 78 |     "        *(philox4x32_key_t *) &key);\n",
 79 |     "    *new_ctr = philox4x32_bump(ctr);\n",
 80 |     "    return result.v;\n",
 81 |     "}\n",
 82 |     "\n",
 83 |     "float4 philox4x32_f32(\n",
 84 |     "        uint4 ctr,\n",
 85 |     "        uint2 key,\n",
 86 |     "        uint4 *new_ctr)\n",
 87 |     "{\n",
 88 |     "    *new_ctr = ctr;\n",
 89 |     "    return\n",
 90 |     "        convert_float4(philox4x32_gen(*new_ctr, key, new_ctr))\n",
 91 |     "        * 2.3283064365386963e-10f;\n",
 92 |     "}\n",
 93 |     "\"\"\""
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "markdown",
 98 |    "metadata": {},
 99 |    "source": [
100 |     "### Reduction Code"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "markdown",
105 |    "metadata": {},
106 |    "source": [
107 |     "Complete the sampler code:\n",
108 |     "\n",
109 |     "```\n",
110 |     "mc_preamble_src = \"\"\"\n",
111 |     "\n",
112 |     "#include <pyopencl-complex.h>\n",
113 |     "\n",
114 |     "float compute_sample(int i, unsigned int k1)\n",
115 |     "{\n",
116 |     "    uint4 ctr = { 0, 1, 2, 3 };\n",
117 |     "    uint2 key2 = { i, k1 };\n",
118 |     "    float4 rng_res = philox4x32_f32(ctr, key2, &(ctr));\n",
119 |     "    ...\n",
120 |     "}\n",
121 |     "\"\"\"\n",
122 |     "```"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": 27,
128 |    "metadata": {
129 |     "collapsed": true
130 |    },
131 |    "outputs": [],
132 |    "source": [
133 |     "#clear\n",
134 |     "mc_preamble_src = \"\"\"\n",
135 |     "\n",
136 |     "#include <pyopencl-complex.h>\n",
137 |     "\n",
138 |     "float compute_sample(int i, unsigned int k1)\n",
139 |     "{\n",
140 |     "    uint4 ctr = { 0, 1, 2, 3 };\n",
141 |     "    uint2 key2 = { i, k1 };\n",
142 |     "    float4 rng_res = philox4x32_f32(ctr, key2, &(ctr));\n",
143 |     "    \n",
144 |     "    cfloat_t samp0 = cfloat_new(rng_res.s0, rng_res.s1);\n",
145 |     "    cfloat_t samp1 = cfloat_new(rng_res.s2, rng_res.s3);\n",
146 |     "    \n",
147 |     "    float result = 0;\n",
148 |     "    if (cfloat_abs(samp0) <= 1)\n",
149 |     "        result += 1;\n",
150 |     "    if (cfloat_abs(samp1) <= 1)\n",
151 |     "        result += 1;\n",
152 |     "        \n",
153 |     "    return result;\n",
154 |     "}\n",
155 |     "\"\"\""
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": 28,
161 |    "metadata": {
162 |     "collapsed": true
163 |    },
164 |    "outputs": [],
165 |    "source": [
166 |     "from pyopencl.reduction import ReductionKernel"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "markdown",
171 |    "metadata": {},
172 |    "source": [
173 |     "Syntax:\n",
174 |     "\n",
175 |     "`ReductionKernel(context, dtype, netural, reduce_expr, map_expr, arguments)`"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": 29,
181 |    "metadata": {
182 |     "collapsed": false
183 |    },
184 |    "outputs": [],
185 |    "source": [
186 |     "#clear\n",
187 |     "rknl = ReductionKernel(ctx, np.float32,\n",
188 |     "        neutral=\"0\",\n",
189 |     "        reduce_expr=\"a+b\", map_expr=\"compute_sample(i, k1)\",\n",
190 |     "        arguments=\"unsigned int k1\", preamble=generator_preamble+mc_preamble_src)"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "code",
195 |    "execution_count": 32,
196 |    "metadata": {
197 |     "collapsed": false
198 |    },
199 |    "outputs": [
200 |     {
201 |      "name": "stdout",
202 |      "output_type": "stream",
203 |      "text": [
204 |       "3.14154656\n"
205 |      ]
206 |     }
207 |    ],
208 |    "source": [
209 |     "n = 100000000\n",
210 |     "\n",
211 |     "nsamples_accepted = rknl(15, range=slice(n), queue=queue).get()\n",
212 |     "nsamples = 2*n\n",
213 |     "approx_pi = 4 * nsamples_accepted/nsamples\n",
214 |     "\n",
215 |     "print(approx_pi)"
216 |    ]
217 |   }
218 |  ],
219 |  "metadata": {
220 |   "kernelspec": {
221 |    "display_name": "Python 3",
222 |    "language": "python",
223 |    "name": "python3"
224 |   },
225 |   "language_info": {
226 |    "codemirror_mode": {
227 |     "name": "ipython",
228 |     "version": 3
229 |    },
230 |    "file_extension": ".py",
231 |    "mimetype": "text/x-python",
232 |    "name": "python",
233 |    "nbconvert_exporter": "python",
234 |    "pygments_lexer": "ipython3",
235 |    "version": "3.5.1+"
236 |   }
237 |  },
238 |  "nbformat": 4,
239 |  "nbformat_minor": 0
240 | }
241 | 


--------------------------------------------------------------------------------
/03-opencl/2-3-scan.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# PyOpenCL Parallel Patterns: Scan/Prefix Sum\n",
  8 |     "\n",
  9 |     "## Setup Code"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {
 16 |     "collapsed": false
 17 |    },
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "import pyopencl as cl\n",
 21 |     "import pyopencl.array\n",
 22 |     "import pyopencl.clrandom\n",
 23 |     "import numpy as np\n",
 24 |     "import numpy.linalg as la"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 2,
 30 |    "metadata": {
 31 |     "collapsed": false
 32 |    },
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "ctx = cl.create_some_context()\n",
 36 |     "queue = cl.CommandQueue(ctx)"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": 3,
 42 |    "metadata": {
 43 |     "collapsed": false
 44 |    },
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "n = 10**7\n",
 48 |     "x = cl.clrandom.rand(queue, n, np.float64)"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "markdown",
 53 |    "metadata": {},
 54 |    "source": [
 55 |     "## Setting up the kernel: Compute the prefix sum of squares\n",
 56 |     "\n",
 57 |     "Want to compute the prefix sum of the squares of all entries in `x`.\n",
 58 |     "\n",
 59 |     "First, using `numpy`, as `result1`:"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 4,
 65 |    "metadata": {
 66 |     "collapsed": false
 67 |    },
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "#clear\n",
 71 |     "result1 = np.cumsum(x.get()**2)"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "markdown",
 76 |    "metadata": {},
 77 |    "source": [
 78 |     "Then, using PyOpenCL:"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": 5,
 84 |    "metadata": {
 85 |     "collapsed": false
 86 |    },
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "from pyopencl.scan import GenericScanKernel"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "markdown",
 94 |    "metadata": {},
 95 |    "source": [
 96 |     "Syntax:\n",
 97 |     "    \n",
 98 |     "GSK(context, dtype, arguments, input_expr, scan_expr using `a` and `b`, neutral, output_statement with `item`)"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": 7,
104 |    "metadata": {
105 |     "collapsed": false
106 |    },
107 |    "outputs": [],
108 |    "source": [
109 |     "#clear\n",
110 |     "sknl = GenericScanKernel(ctx, np.float64,\n",
111 |     "    arguments=\"double *y, double *x\",\n",
112 |     "    input_expr=\"x[i]*x[i]\",\n",
113 |     "    scan_expr=\"a+b\", neutral=\"0\",\n",
114 |     "    output_statement=\"y[i] = item\")"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": 8,
120 |    "metadata": {
121 |     "collapsed": false
122 |    },
123 |    "outputs": [
124 |     {
125 |      "data": {
126 |       "text/plain": [
127 |        "<pyopencl.cffi_cl.Event at 0x7f07c6d8f630>"
128 |       ]
129 |      },
130 |      "execution_count": 8,
131 |      "metadata": {},
132 |      "output_type": "execute_result"
133 |     }
134 |    ],
135 |    "source": [
136 |     "result2 = cl.array.empty_like(x)\n",
137 |     "sknl(result2, x)"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "markdown",
142 |    "metadata": {},
143 |    "source": [
144 |     "## Testing the outcome"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": 9,
150 |    "metadata": {
151 |     "collapsed": false
152 |    },
153 |    "outputs": [
154 |     {
155 |      "name": "stdout",
156 |      "output_type": "stream",
157 |      "text": [
158 |       "0.00019364830171\n"
159 |      ]
160 |     }
161 |    ],
162 |    "source": [
163 |     "print(la.norm(result2.get() - result1))"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "markdown",
168 |    "metadata": {},
169 |    "source": [
170 |     "More features:\n",
171 |     "\n",
172 |     "* Segmented Scan\n",
173 |     "* Output stencils\n",
174 |     "* Works on structured types"
175 |    ]
176 |   }
177 |  ],
178 |  "metadata": {
179 |   "kernelspec": {
180 |    "display_name": "Python 3",
181 |    "language": "python",
182 |    "name": "python3"
183 |   },
184 |   "language_info": {
185 |    "codemirror_mode": {
186 |     "name": "ipython",
187 |     "version": 3
188 |    },
189 |    "file_extension": ".py",
190 |    "mimetype": "text/x-python",
191 |    "name": "python",
192 |    "nbconvert_exporter": "python",
193 |    "pygments_lexer": "ipython3",
194 |    "version": "3.5.0+"
195 |   }
196 |  },
197 |  "nbformat": 4,
198 |  "nbformat_minor": 0
199 | }
200 | 


--------------------------------------------------------------------------------
/03-opencl/3-practice-expression-kernel.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Practice: Generating a Simple Kernel\n",
  8 |     "\n",
  9 |     "The purpose of this practice problem is to generate a simple kernel that applies a user-supplied expression to every entry of an array. Implement a class `ExpressionKernel` that can be used as shown in the test at the end of this notebook."
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 2,
 15 |    "metadata": {
 16 |     "collapsed": false
 17 |    },
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "import numpy as np\n",
 21 |     "import numpy.linalg as la\n",
 22 |     "\n",
 23 |     "import pyopencl as cl\n",
 24 |     "import pyopencl.array\n",
 25 |     "import pyopencl.clmath\n",
 26 |     "import pyopencl.clrandom"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 3,
 32 |    "metadata": {
 33 |     "collapsed": false
 34 |    },
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "\n",
 38 |     "class ExpressionKernel:\n",
 39 |     "    def __init__(self, cl_context, expression):\n",
 40 |     "        # ...\n",
 41 |     "        pass\n",
 42 |     "    \n",
 43 |     "    def __call__(self, queue, ary):\n",
 44 |     "        # ...\n",
 45 |     "        pass"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": 4,
 51 |    "metadata": {
 52 |     "collapsed": false
 53 |    },
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "#clear\n",
 57 |     "# Solution\n",
 58 |     "\n",
 59 |     "class ExpressionKernel:\n",
 60 |     "    def __init__(self, cl_context, expression):\n",
 61 |     "        src = \"\"\"\n",
 62 |     "            kernel void apply(__global double *out, global double *in)\n",
 63 |     "            {\n",
 64 |     "                int i = get_global_id(0);\n",
 65 |     "                double x = in[i];\n",
 66 |     "                out[i] = RESULT;\n",
 67 |     "            }\n",
 68 |     "            \"\"\"\n",
 69 |     "\n",
 70 |     "        from pymbolic.mapper.c_code import CCodeMapper\n",
 71 |     "        ccm = CCodeMapper()\n",
 72 |     "        src = src.replace(\"RESULT\", ccm(expression))\n",
 73 |     "        self.prg = cl.Program(cl_context, src).build()\n",
 74 |     "        self.knl = self.prg.apply\n",
 75 |     "\n",
 76 |     "    def __call__(self, queue, ary):\n",
 77 |     "        result = cl.array.empty_like(ary)\n",
 78 |     "        self.knl(queue, ary.shape, None, result.data, ary.data)\n",
 79 |     "        return result"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "markdown",
 84 |    "metadata": {},
 85 |    "source": [
 86 |     "To test our implementation, we create a context and an array full of random numbers:"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": 5,
 92 |    "metadata": {
 93 |     "collapsed": false
 94 |    },
 95 |    "outputs": [],
 96 |    "source": [
 97 |     "cl_context = cl.create_some_context()\n",
 98 |     "queue = cl.CommandQueue(cl_context)\n",
 99 |     "\n",
100 |     "ary = cl.clrandom.rand(queue, 500, dtype=np.float64)"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": 6,
106 |    "metadata": {
107 |     "collapsed": false
108 |    },
109 |    "outputs": [
110 |     {
111 |      "name": "stdout",
112 |      "output_type": "stream",
113 |      "text": [
114 |       "9.76586150045e-16\n"
115 |      ]
116 |     }
117 |    ],
118 |    "source": [
119 |     "\n",
120 |     "from pymbolic import var\n",
121 |     "\n",
122 |     "x = var(\"x\")\n",
123 |     "eknl = ExpressionKernel(cl_context, var(\"sqrt\")(1-x**2))\n",
124 |     "\n",
125 |     "result = eknl(queue, ary)\n",
126 |     "\n",
127 |     "diff = result - cl.clmath.sqrt(1-ary**2)\n",
128 |     "print(la.norm(diff.get()))"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": null,
134 |    "metadata": {
135 |     "collapsed": true
136 |    },
137 |    "outputs": [],
138 |    "source": []
139 |   }
140 |  ],
141 |  "metadata": {
142 |   "kernelspec": {
143 |    "display_name": "Python 3",
144 |    "language": "python",
145 |    "name": "python3"
146 |   },
147 |   "language_info": {
148 |    "codemirror_mode": {
149 |     "name": "ipython",
150 |     "version": 3
151 |    },
152 |    "file_extension": ".py",
153 |    "mimetype": "text/x-python",
154 |    "name": "python",
155 |    "nbconvert_exporter": "python",
156 |    "pygments_lexer": "ipython3",
157 |    "version": "3.5.1+"
158 |   }
159 |  },
160 |  "nbformat": 4,
161 |  "nbformat_minor": 0
162 | }
163 | 


--------------------------------------------------------------------------------
/03-opencl/3-practice-hermite-monte-carlo.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Practice: Orthogonality of Hermite Polynomials\n",
  8 |     "\n",
  9 |     "In this exercise, modify the Monte-Carlo example to demonstrate the orthonormality of the two [Hermite polynomials](https://en.wikipedia.org/wiki/Hermite_polynomials)\n",
 10 |     "\n",
 11 |     "* $1$ and\n",
 12 |     "* $x^2-1$\n",
 13 |     "\n",
 14 |     "with respect to the weight $e^{-\\frac{x^2}2}$, i.e. show (numerically, using a Monte Carlo method) that\n",
 15 |     "\n",
 16 |     "$$\n",
 17 |     "\\int_{-\\infty}^\\infty 1 \\cdot (x^2-1) \\cdot e^{-\\frac{x^2}2}dx = 0\n",
 18 |     "$$\n",
 19 |     "\n",
 20 |     "and that\n",
 21 |     "\n",
 22 |     "$$\n",
 23 |     "\\int_{-\\infty}^\\infty (x^2-1)^2  \\cdot e^{-\\frac{x^2}2}dx = 2\\sqrt{2\\pi}.\n",
 24 |     "$$\n",
 25 |     "\n",
 26 |     "Realize that\n",
 27 |     "$$\n",
 28 |     "\\int_{-\\infty}^\\infty \\dots  \\cdot \\frac{e^{-\\frac{x^2}2}}{\\sqrt{2\\pi}}dx\n",
 29 |     "$$\n",
 30 |     "can be evaluated by Monte-Carlo summation of $\\dots$ where the $x$ are normally distributed.\n",
 31 |     "\n",
 32 |     "Use the [Box-Muller transform](https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform) to obtain normally-distributed random numbers from the uniformly distributed ones returned by PyOpenCL's random number generator.\n",
 33 |     "\n"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "### Initialization"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 3,
 46 |    "metadata": {
 47 |     "collapsed": true
 48 |    },
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "import numpy as np\n",
 52 |     "import pyopencl as cl\n",
 53 |     "import pyopencl.array\n",
 54 |     "import pyopencl.clrandom\n",
 55 |     "\n",
 56 |     "ctx = cl.create_some_context()\n",
 57 |     "queue = cl.CommandQueue(ctx)"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "markdown",
 62 |    "metadata": {},
 63 |    "source": [
 64 |     "### Boilerplate for Random Number Generator"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": 4,
 70 |    "metadata": {
 71 |     "collapsed": true
 72 |    },
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "generator_preamble = \"\"\"\n",
 76 |     "#include <pyopencl-random123/philox.cl>\n",
 77 |     "\n",
 78 |     "typedef union {\n",
 79 |     "    uint4 v;\n",
 80 |     "    philox4x32_ctr_t c;\n",
 81 |     "} philox4x32_ctr_vec_union;\n",
 82 |     "\n",
 83 |     "\n",
 84 |     "uint4 philox4x32_bump(uint4 ctr)\n",
 85 |     "{\n",
 86 |     "    if (++ctr.x == 0)\n",
 87 |     "        if (++ctr.y == 0)\n",
 88 |     "            ++ctr.z;\n",
 89 |     "    return ctr;\n",
 90 |     "}\n",
 91 |     "\n",
 92 |     "uint4 philox4x32_gen(\n",
 93 |     "        uint4 ctr,\n",
 94 |     "        uint2 key,\n",
 95 |     "        uint4 *new_ctr)\n",
 96 |     "{\n",
 97 |     "    philox4x32_ctr_vec_union result;\n",
 98 |     "    result.c = philox4x32(\n",
 99 |     "        *(philox4x32_ctr_t *) &ctr,\n",
100 |     "        *(philox4x32_key_t *) &key);\n",
101 |     "    *new_ctr = philox4x32_bump(ctr);\n",
102 |     "    return result.v;\n",
103 |     "}\n",
104 |     "\n",
105 |     "float4 philox4x32_f32(\n",
106 |     "        uint4 ctr,\n",
107 |     "        uint2 key,\n",
108 |     "        uint4 *new_ctr)\n",
109 |     "{\n",
110 |     "    *new_ctr = ctr;\n",
111 |     "    return\n",
112 |     "        convert_float4(philox4x32_gen(*new_ctr, key, new_ctr))\n",
113 |     "        * 2.3283064365386963e-10f;\n",
114 |     "}\n",
115 |     "\"\"\""
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "markdown",
120 |    "metadata": {},
121 |    "source": [
122 |     "### Monte-Carlo code"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": 36,
128 |    "metadata": {
129 |     "collapsed": false
130 |    },
131 |    "outputs": [],
132 |    "source": [
133 |     "#clear\n",
134 |     "\n",
135 |     "from mako.template import Template\n",
136 |     "\n",
137 |     "mc_preamble_src = Template(\"\"\"\n",
138 |     "\n",
139 |     "#include <pyopencl-complex.h>\n",
140 |     "\n",
141 |     "float compute_sample(int i, unsigned int k1)\n",
142 |     "{\n",
143 |     "    uint4 ctr = { 0, 1, 2, 3 };\n",
144 |     "    uint2 key2 = { i, k1 };\n",
145 |     "    float4 rng_res = philox4x32_f32(ctr, key2, &(ctr));\n",
146 |     "    \n",
147 |     "    float r0 = sqrt(-2*log(rng_res.s0));\n",
148 |     "    float v0 = r0*cos((float) (2*M_PI) * rng_res.s1);\n",
149 |     "    float v1 = r0*sin((float) (2*M_PI) * rng_res.s1);\n",
150 |     "\n",
151 |     "    float r2 = sqrt(-2*log(rng_res.s2));\n",
152 |     "    float v2 = r2*cos((float) (2*M_PI) * rng_res.s3);\n",
153 |     "    float v3 = r2*sin((float) (2*M_PI) * rng_res.s3);\n",
154 |     "    \n",
155 |     "    float result = 0;\n",
156 |     "    \n",
157 |     "    %for x in [\"v0\", \"v1\", \"v2\", \"v3\"]:\n",
158 |     "    {\n",
159 |     "        float x = ${x};\n",
160 |     "        float H2 = x*x - 1;\n",
161 |     "        result += H2;\n",
162 |     "    }\n",
163 |     "    %endfor\n",
164 |     "    \n",
165 |     "    return result;\n",
166 |     "}\n",
167 |     "\"\"\", strict_undefined=True).render()"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": 37,
173 |    "metadata": {
174 |     "collapsed": false
175 |    },
176 |    "outputs": [],
177 |    "source": [
178 |     "#clear\n",
179 |     "\n",
180 |     "from pyopencl.reduction import ReductionKernel\n",
181 |     "\n",
182 |     "rknl = ReductionKernel(ctx, np.float32,\n",
183 |     "        neutral=\"0\",\n",
184 |     "        reduce_expr=\"a+b\", map_expr=\"compute_sample(i, k1)\",\n",
185 |     "        arguments=\"unsigned int k1\", preamble=generator_preamble+mc_preamble_src)"
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "code",
190 |    "execution_count": 38,
191 |    "metadata": {
192 |     "collapsed": false
193 |    },
194 |    "outputs": [
195 |     {
196 |      "name": "stdout",
197 |      "output_type": "stream",
198 |      "text": [
199 |       "0.000107572192383\n"
200 |      ]
201 |     }
202 |    ],
203 |    "source": [
204 |     "#clear\n",
205 |     "n = 10000000\n",
206 |     "\n",
207 |     "nsamples = 4*n\n",
208 |     "result = rknl(15, range=slice(n), queue=queue).get() / nsamples\n",
209 |     "\n",
210 |     "print(result)"
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "code",
215 |    "execution_count": null,
216 |    "metadata": {
217 |     "collapsed": true
218 |    },
219 |    "outputs": [],
220 |    "source": []
221 |   }
222 |  ],
223 |  "metadata": {
224 |   "kernelspec": {
225 |    "display_name": "Python 3",
226 |    "language": "python",
227 |    "name": "python3"
228 |   },
229 |   "language_info": {
230 |    "codemirror_mode": {
231 |     "name": "ipython",
232 |     "version": 3
233 |    },
234 |    "file_extension": ".py",
235 |    "mimetype": "text/x-python",
236 |    "name": "python",
237 |    "nbconvert_exporter": "python",
238 |    "pygments_lexer": "ipython3",
239 |    "version": "3.5.1+"
240 |   }
241 |  },
242 |  "nbformat": 4,
243 |  "nbformat_minor": 0
244 | }
245 | 


--------------------------------------------------------------------------------
/03-opencl/README.rst:
--------------------------------------------------------------------------------
1 | **Tip:** ``ipynb`` files can be viewed on Github. Just click them.
2 | 


--------------------------------------------------------------------------------
/04-case-studies/01-indexing-and-broadcasting.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Indexing and Broadcasting in Numpy"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "Embedding mini-languages in Python has a long tradition. In this section of the tutorial, we will explore some examples of this practice."
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "The first example we will consider is so-called *broadcasting* in numpy. It may look shallow at first sight, but it and its associated operations constitute a considerable subset of the array programming language APL."
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 3,
 27 |    "metadata": {
 28 |     "collapsed": false
 29 |    },
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "import numpy as np"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 4,
 38 |    "metadata": {
 39 |     "collapsed": false
 40 |    },
 41 |    "outputs": [
 42 |     {
 43 |      "name": "stdout",
 44 |      "output_type": "stream",
 45 |      "text": [
 46 |       "[0 1 2 3]\n",
 47 |       "[ 0 10 20]\n"
 48 |      ]
 49 |     }
 50 |    ],
 51 |    "source": [
 52 |     "a = np.arange(4)\n",
 53 |     "b = np.arange(3) * 10\n",
 54 |     "print(a)\n",
 55 |     "print(b)"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": 7,
 61 |    "metadata": {
 62 |     "collapsed": false
 63 |    },
 64 |    "outputs": [
 65 |     {
 66 |      "data": {
 67 |       "text/plain": [
 68 |        "(4, 1)"
 69 |       ]
 70 |      },
 71 |      "execution_count": 7,
 72 |      "metadata": {},
 73 |      "output_type": "execute_result"
 74 |     }
 75 |    ],
 76 |    "source": [
 77 |     "a.reshape(-1, 1).shape"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 10,
 83 |    "metadata": {
 84 |     "collapsed": false
 85 |    },
 86 |    "outputs": [
 87 |     {
 88 |      "data": {
 89 |       "text/plain": [
 90 |        "array([[ 0, 10, 20],\n",
 91 |        "       [ 1, 11, 21],\n",
 92 |        "       [ 2, 12, 22],\n",
 93 |        "       [ 3, 13, 23]])"
 94 |       ]
 95 |      },
 96 |      "execution_count": 10,
 97 |      "metadata": {},
 98 |      "output_type": "execute_result"
 99 |     }
100 |    ],
101 |    "source": [
102 |     "x = a.reshape(-1, 1) + b\n",
103 |     "x"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": 12,
109 |    "metadata": {
110 |     "collapsed": false
111 |    },
112 |    "outputs": [
113 |     {
114 |      "data": {
115 |       "text/plain": [
116 |        "array([ 6, 46, 86])"
117 |       ]
118 |      },
119 |      "execution_count": 12,
120 |      "metadata": {},
121 |      "output_type": "execute_result"
122 |     }
123 |    ],
124 |    "source": [
125 |     "#clear\n",
126 |     "np.sum(x, axis=0)"
127 |    ]
128 |   }
129 |  ],
130 |  "metadata": {},
131 |  "nbformat": 4,
132 |  "nbformat_minor": 0
133 | }


--------------------------------------------------------------------------------
/04-case-studies/02-einsum.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Einstein summation in numpy"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "It turns out that `numpy` actually has several more mini-languages embedded in it. This next example is borrowed and slightly generalized from mathematics, where it is called Einstein summation."
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "Recall that matrix-matrix multiplication can be expressed by:\n",
 22 |     "$$\n",
 23 |     "(AB)_{ij} = \\sum_k A_{ik} B_{kj}$$\n",
 24 |     "\n",
 25 |     "Einstein summation is a relatively natural way of generalizing this to arrays with multiple dimensions. The above matrix-matrix multiplication expression, for example, becomes:\n",
 26 |     "\n",
 27 |     "$$ A_{ij} = B_{ik} C_{kj}$$\n",
 28 |     "\n",
 29 |     "Where the implied rule is that repeated indices that are not part of the output will be summed over.\n",
 30 |     "\n",
 31 |     "numpy simply takes this convention and turns it into a way of expressing array operations:"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": 2,
 37 |    "metadata": {
 38 |     "collapsed": false
 39 |    },
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "import numpy as np"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 3,
 48 |    "metadata": {
 49 |     "collapsed": false
 50 |    },
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "#clear\n",
 54 |     "A = np.random.randn(15, 20)\n",
 55 |     "B = np.random.randn(20, 25)\n",
 56 |     "\n",
 57 |     "AB1 = A.dot(B)"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": 4,
 63 |    "metadata": {
 64 |     "collapsed": false
 65 |    },
 66 |    "outputs": [
 67 |     {
 68 |      "name": "stdout",
 69 |      "output_type": "stream",
 70 |      "text": [
 71 |       "1.21357255039e-14\n"
 72 |      ]
 73 |     }
 74 |    ],
 75 |    "source": [
 76 |     "#clear\n",
 77 |     "AB2 = np.einsum(\"ik,kj->ij\", A, B)\n",
 78 |     "\n",
 79 |     "print(np.linalg.norm(AB1 - AB2))"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": null,
 85 |    "metadata": {
 86 |     "collapsed": true
 87 |    },
 88 |    "outputs": [],
 89 |    "source": []
 90 |   }
 91 |  ],
 92 |  "metadata": {
 93 |   "kernelspec": {
 94 |    "display_name": "Python 3",
 95 |    "language": "python",
 96 |    "name": "python3"
 97 |   },
 98 |   "language_info": {
 99 |    "codemirror_mode": {
100 |     "name": "ipython",
101 |     "version": 3
102 |    },
103 |    "file_extension": ".py",
104 |    "mimetype": "text/x-python",
105 |    "name": "python",
106 |    "nbconvert_exporter": "python",
107 |    "pygments_lexer": "ipython3",
108 |    "version": "3.5.0+"
109 |   }
110 |  },
111 |  "nbformat": 4,
112 |  "nbformat_minor": 0
113 | }
114 | 


--------------------------------------------------------------------------------
/04-case-studies/03-ufl.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# UFL, the 'Unified Form Language'"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "markdown",
12 |    "metadata": {},
13 |    "source": [
14 |     "UFL is part of FEniCS, where it is used to describe finite element problems that are to be solved using the framework. The appearance of the following code snippet should look sufficiently familiar, and it should be readily apparent how language such as this could be taken in and processed using the tools that we have seen:"
15 |    ]
16 |   },
17 |   {
18 |    "cell_type": "code",
19 |    "execution_count": null,
20 |    "metadata": {
21 |     "collapsed": false
22 |    },
23 |    "outputs": [],
24 |    "source": [
25 |     "#clear\n",
26 |     "from dolfin import *\n",
27 |     "\n",
28 |     "\n",
29 |     "\n",
30 |     "# Create mesh and define function space\n",
31 |     "\n",
32 |     "mesh = UnitSquareMesh(8, 8)\n",
33 |     "\n",
34 |     "V = FunctionSpace(mesh, \"Lagrange\", 1)\n",
35 |     "\n",
36 |     "\n",
37 |     "\n",
38 |     "# Define boundary condition\n",
39 |     "\n",
40 |     "u0 = Function(V)\n",
41 |     "\n",
42 |     "bc = DirichletBC(V, u0, \"x[0] < DOLFIN_EPS || x[0] > 1.0 - DOLFIN_EPS\")\n",
43 |     "\n",
44 |     "\n",
45 |     "\n",
46 |     "# Define variational problem\n",
47 |     "\n",
48 |     "u = TrialFunction(V)\n",
49 |     "\n",
50 |     "v = TestFunction(V)\n",
51 |     "\n",
52 |     "f = Expression(\"10*exp(-(pow(x[0] - 0.5, 2) + pow(x[1] - 0.5, 2)) / 0.02)\",\n",
53 |     "\n",
54 |     "               degree=1)\n",
55 |     "\n",
56 |     "g = Expression(\"sin(5*x[0])\", degree=1)\n",
57 |     "\n",
58 |     "a = inner(grad(u), grad(v))*dx()\n",
59 |     "\n",
60 |     "L = f*v*dx() + g*v*ds()"
61 |    ]
62 |   }
63 |  ],
64 |  "metadata": {},
65 |  "nbformat": 4,
66 |  "nbformat_minor": 0
67 | }


--------------------------------------------------------------------------------
/04-case-studies/README.rst:
--------------------------------------------------------------------------------
1 | **Tip:** ``ipynb`` files can be viewed on Github. Just click them.
2 | 


--------------------------------------------------------------------------------
/05-generating-c/01-substitution.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Generating an OpenCL kernel by Textual Substitution"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "markdown",
12 |    "metadata": {},
13 |    "source": [
14 |     "The simplest approach to generating code is to simply substitute snippets of text into an existing code \"template\". This can be done using the C preprocessor, simple string-based search and replace, or other string-value interpolation functionality present in the language. The example below demonstrates the latter case:"
15 |    ]
16 |   },
17 |   {
18 |    "cell_type": "code",
19 |    "execution_count": 1,
20 |    "metadata": {
21 |     "collapsed": false
22 |    },
23 |    "outputs": [],
24 |    "source": [
25 |     "kernel = r\"\"\"\n",
26 |     "    __kernel void {name}({arguments})\n",
27 |     "    {{\n",
28 |     "      int lid = get_local_id(0);\n",
29 |     "      int gsize = get_global_size(0);\n",
30 |     "      int work_group_start = get_local_size(0)*get_group_id(0);\n",
31 |     "      long i;\n",
32 |     "\n",
33 |     "      for (i = work_group_start + lid; i < n; i += gsize)\n",
34 |     "      {{\n",
35 |     "        {operation};\n",
36 |     "      }}\n",
37 |     "    }}\n",
38 |     "\"\"\""
39 |    ]
40 |   },
41 |   {
42 |    "cell_type": "markdown",
43 |    "metadata": {},
44 |    "source": [
45 |     "One slightly unfortunate fact that plays into using Python's `.format()` facility for this purpose is that opening and closing braces must be escaped by doubling them."
46 |    ]
47 |   },
48 |   {
49 |    "cell_type": "code",
50 |    "execution_count": 2,
51 |    "metadata": {
52 |     "collapsed": false
53 |    },
54 |    "outputs": [
55 |     {
56 |      "name": "stdout",
57 |      "output_type": "stream",
58 |      "text": [
59 |       "\n",
60 |       "    __kernel void scale(float *y, float a, float *x)\n",
61 |       "    {\n",
62 |       "      int lid = get_local_id(0);\n",
63 |       "      int gsize = get_global_size(0);\n",
64 |       "      int work_group_start = get_local_size(0)*get_group_id(0);\n",
65 |       "      long i;\n",
66 |       "\n",
67 |       "      for (i = work_group_start + lid; i < n; i += gsize)\n",
68 |       "      {\n",
69 |       "        y[i] = a*x[i];\n",
70 |       "      }\n",
71 |       "    }\n",
72 |       "\n"
73 |      ]
74 |     }
75 |    ],
76 |    "source": [
77 |     "print(kernel.format(\n",
78 |     "    name=\"scale\",\n",
79 |     "    arguments=\"float *y, float a, float *x\",\n",
80 |     "    operation=\"y[i] = a*x[i]\"\n",
81 |     "))"
82 |    ]
83 |   },
84 |   {
85 |    "cell_type": "code",
86 |    "execution_count": null,
87 |    "metadata": {
88 |     "collapsed": false
89 |    },
90 |    "outputs": [],
91 |    "source": []
92 |   }
93 |  ],
94 |  "metadata": {},
95 |  "nbformat": 4,
96 |  "nbformat_minor": 0
97 | }


--------------------------------------------------------------------------------
/05-generating-c/02-templating.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Generating an OpenCL Kernel using Textual Templating"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "A more advanced, but also less lightweight, alternative is the usage of a so-called templating engine, as it is being used to generate web pages.\n",
 15 |     "\n",
 16 |     "This offers tremendous flexibility in generation, including the possibility for full flow control, allowing applications such as loop unrolling.\n",
 17 |     "\n",
 18 |     "In the example below, we use a templating engine called 'Mako':"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 1,
 24 |    "metadata": {
 25 |     "collapsed": false
 26 |    },
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "from mako.template import Template"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 2,
 35 |    "metadata": {
 36 |     "collapsed": false
 37 |    },
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "tpl = Template(r\"\"\"\n",
 41 |     "    __kernel void ${name}(${arguments})\n",
 42 |     "    {\n",
 43 |     "      int lid = get_local_id(0);\n",
 44 |     "      int gsize = get_global_size(0);\n",
 45 |     "      int work_group_start = get_local_size(0)*get_group_id(0);\n",
 46 |     "      long i;\n",
 47 |     "\n",
 48 |     "      for (i = work_group_start + lid; i < n; i += gsize)\n",
 49 |     "      {\n",
 50 |     "        %for i_unroll in range(n_unroll):\n",
 51 |     "            ${operation};\n",
 52 |     "            %if i_unroll + 1 < n_unroll:\n",
 53 |     "                i += gsize;\n",
 54 |     "            %endif\n",
 55 |     "        %endfor\n",
 56 |     "      }\n",
 57 |     "    }\n",
 58 |     "\"\"\", strict_undefined=True)"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 3,
 64 |    "metadata": {
 65 |     "collapsed": false
 66 |    },
 67 |    "outputs": [
 68 |     {
 69 |      "name": "stdout",
 70 |      "output_type": "stream",
 71 |      "text": [
 72 |       "\n",
 73 |       "    __kernel void scale(float *y, float a, float *x)\n",
 74 |       "    {\n",
 75 |       "      int lid = get_local_id(0);\n",
 76 |       "      int gsize = get_global_size(0);\n",
 77 |       "      int work_group_start = get_local_size(0)*get_group_id(0);\n",
 78 |       "      long i;\n",
 79 |       "\n",
 80 |       "      for (i = work_group_start + lid; i < n; i += gsize)\n",
 81 |       "      {\n",
 82 |       "            y[i] = a*x[i];\n",
 83 |       "                i += gsize;\n",
 84 |       "            y[i] = a*x[i];\n",
 85 |       "      }\n",
 86 |       "    }\n",
 87 |       "\n"
 88 |      ]
 89 |     }
 90 |    ],
 91 |    "source": [
 92 |     "print(tpl.render(\n",
 93 |     "    name=\"scale\",\n",
 94 |     "    arguments=\"float *y, float a, float *x\",\n",
 95 |     "    operation=\"y[i] = a*x[i]\",\n",
 96 |     "    n_unroll=2,\n",
 97 |     "))"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": null,
103 |    "metadata": {
104 |     "collapsed": false
105 |    },
106 |    "outputs": [],
107 |    "source": []
108 |   }
109 |  ],
110 |  "metadata": {},
111 |  "nbformat": 4,
112 |  "nbformat_minor": 0
113 | }


--------------------------------------------------------------------------------
/05-generating-c/03-asts.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Generating Code by building a Syntax Tree"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "markdown",
12 |    "metadata": {},
13 |    "source": [
14 |     "The last, most structured alternative for generating code is to construct the tree data structure representing the syntax, and then transforming this back into source code form.\n",
15 |     "\n",
16 |     "This approach to code generation is perhaps the most applicable to the programmatic generation of code, and less the generation directly by a user."
17 |    ]
18 |   },
19 |   {
20 |    "cell_type": "code",
21 |    "execution_count": 1,
22 |    "metadata": {
23 |     "collapsed": false
24 |    },
25 |    "outputs": [],
26 |    "source": [
27 |     "from cgen import *"
28 |    ]
29 |   },
30 |   {
31 |    "cell_type": "code",
32 |    "execution_count": 2,
33 |    "metadata": {
34 |     "collapsed": false
35 |    },
36 |    "outputs": [],
37 |    "source": [
38 |     "func = FunctionBody(\n",
39 |     "    FunctionDeclaration(Const(Pointer(Value(\"char\", \"greet\"))), []),\n",
40 |     "    Block([Statement('return \"hello world\"')])\n",
41 |     "    )"
42 |    ]
43 |   },
44 |   {
45 |    "cell_type": "code",
46 |    "execution_count": 3,
47 |    "metadata": {
48 |     "collapsed": false
49 |    },
50 |    "outputs": [
51 |     {
52 |      "name": "stdout",
53 |      "output_type": "stream",
54 |      "text": [
55 |       "char const *greet()\n",
56 |       "{\n",
57 |       "  return \"hello world\";\n",
58 |       "}\n"
59 |      ]
60 |     }
61 |    ],
62 |    "source": [
63 |     "print(func)"
64 |    ]
65 |   },
66 |   {
67 |    "cell_type": "code",
68 |    "execution_count": null,
69 |    "metadata": {
70 |     "collapsed": false
71 |    },
72 |    "outputs": [],
73 |    "source": []
74 |   }
75 |  ],
76 |  "metadata": {},
77 |  "nbformat": 4,
78 |  "nbformat_minor": 0
79 | }


--------------------------------------------------------------------------------
/05-generating-c/04-practice.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Practice problem: Dimension-Independent Finite Difference Kernel\n",
  8 |     "\n",
  9 |     "A particular type of problem that is often tricky to address with a single codebase is handling varying dimensionality, i.e. for example handling 1D, 2D and 3D cases in a single code. In this problem, we will practice that for a simple finite difference code that applies a second-order centered finite difference operator:\n",
 10 |     "\n",
 11 |     "$$\n",
 12 |     "f''(x) \\approx  \\frac{f(x+h) - 2 f(x) + f(x-h)}{h^{2}}\n",
 13 |     "$$\n",
 14 |     "along each axis, summing the results. This implements an $n$-dimensional Laplacian ($\\triangle$) or div-grad operator.\n",
 15 |     "\n",
 16 |     "To keep things simple, we will not worry about boundary conditions. Also, to keep things simple, we will assume that we have exactly 20 data points in each direction, and we will assume the grid spacing $h$ is 1."
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 26,
 22 |    "metadata": {
 23 |     "collapsed": false
 24 |    },
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "from mako.template import Template\n",
 28 |     "import pyopencl as cl\n",
 29 |     "import pyopencl.array\n",
 30 |     "import pyopencl.clrandom\n",
 31 |     "import numpy as np"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": 27,
 37 |    "metadata": {
 38 |     "collapsed": false
 39 |    },
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "tpl = Template(\"\"\"\n",
 43 |     "    kernel void fdiff(global float *out, global float * ary)\n",
 44 |     "    {\n",
 45 |     "        out[...] = ...\n",
 46 |     "    }\n",
 47 |     "    \"\"\", strict_undefined=True)"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": 36,
 53 |    "metadata": {
 54 |     "collapsed": false
 55 |    },
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "#clear\n",
 59 |     "# Solution\n",
 60 |     "\n",
 61 |     "\n",
 62 |     "\n",
 63 |     "tpl = Template(\"\"\"\n",
 64 |     "\n",
 65 |     "    kernel void fdiff(global float *out, global float * ary)\n",
 66 |     "\n",
 67 |     "    {\n",
 68 |     "\n",
 69 |     "        int ibase = \n",
 70 |     "\n",
 71 |     "            <% stride = 1 %>\n",
 72 |     "\n",
 73 |     "            %for iax in range(dim):  \n",
 74 |     "\n",
 75 |     "                + (get_global_id(${iax}) + 1)*${stride}\n",
 76 |     "\n",
 77 |     "                <% stride *= 20 %>\n",
 78 |     "\n",
 79 |     "            %endfor\n",
 80 |     "\n",
 81 |     "            ; \n",
 82 |     "\n",
 83 |     "        \n",
 84 |     "\n",
 85 |     "        out[ibase] = -2*${dim}*ary[ibase]\n",
 86 |     "\n",
 87 |     "            <% stride = 1 %>\n",
 88 |     "\n",
 89 |     "            %for iax in range(dim):  \n",
 90 |     "\n",
 91 |     "                + ary[ibase - ${stride}] + ary[ibase + ${stride}]\n",
 92 |     "\n",
 93 |     "                <% stride *= 20 %>\n",
 94 |     "\n",
 95 |     "            %endfor\n",
 96 |     "\n",
 97 |     "            ;\n",
 98 |     "\n",
 99 |     "    }\n",
100 |     "\n",
101 |     "    \"\"\", strict_undefined=True)"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": 37,
107 |    "metadata": {
108 |     "collapsed": false
109 |    },
110 |    "outputs": [
111 |     {
112 |      "name": "stdout",
113 |      "output_type": "stream",
114 |      "text": [
115 |       "\n",
116 |       "    kernel void fdiff(global float *out, global float * ary)\n",
117 |       "    {\n",
118 |       "        int ibase = \n",
119 |       "            \n",
120 |       "                + (get_global_id(0) + 1)*1\n",
121 |       "                \n",
122 |       "                + (get_global_id(1) + 1)*20\n",
123 |       "                \n",
124 |       "                + (get_global_id(2) + 1)*400\n",
125 |       "                \n",
126 |       "            ; \n",
127 |       "        \n",
128 |       "        out[ibase] = -2*3*ary[ibase]\n",
129 |       "            \n",
130 |       "                + ary[ibase - 1] + ary[ibase + 1]\n",
131 |       "                \n",
132 |       "                + ary[ibase - 20] + ary[ibase + 20]\n",
133 |       "                \n",
134 |       "                + ary[ibase - 400] + ary[ibase + 400]\n",
135 |       "                \n",
136 |       "            ;\n",
137 |       "    }\n",
138 |       "    \n"
139 |      ]
140 |     }
141 |    ],
142 |    "source": [
143 |     "dim = 3\n",
144 |     "code = tpl.render(dim=dim)\n",
145 |     "print(code)\n",
146 |     "\n",
147 |     "cl_context = cl.create_some_context()\n",
148 |     "queue = cl.CommandQueue(cl_context)\n",
149 |     "\n",
150 |     "prg = cl.Program(cl_context, code).build()\n",
151 |     "knl = prg.fdiff\n",
152 |     "\n",
153 |     "a = cl.clrandom.rand(queue, (20,)*dim, dtype=np.float32)\n",
154 |     "out = cl.array.empty_like(a)\n",
155 |     "\n",
156 |     "knl(queue, (18,)*dim, None, out.data, a.data)\n",
157 |     "queue.finish()"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": null,
163 |    "metadata": {
164 |     "collapsed": false
165 |    },
166 |    "outputs": [],
167 |    "source": []
168 |   }
169 |  ],
170 |  "metadata": {},
171 |  "nbformat": 4,
172 |  "nbformat_minor": 0
173 | }


--------------------------------------------------------------------------------
/05-generating-c/README.rst:
--------------------------------------------------------------------------------
1 | **Tip:** ``ipynb`` files can be viewed on Github. Just click them.
2 | 


--------------------------------------------------------------------------------
/06-loopy/0-slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/06-loopy/0-slides.pdf


--------------------------------------------------------------------------------
/06-loopy/03-reduction.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Loopy: Reductions\n",
  8 |     "\n",
  9 |     "## Setup code"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 6,
 15 |    "metadata": {
 16 |     "collapsed": false,
 17 |     "jupyter": {
 18 |      "outputs_hidden": false
 19 |     }
 20 |    },
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "import numpy as np\n",
 24 |     "import pyopencl as cl\n",
 25 |     "import pyopencl.array\n",
 26 |     "import pyopencl.clrandom\n",
 27 |     "import loopy as lp\n",
 28 |     "\n",
 29 |     "from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 7,
 35 |    "metadata": {
 36 |     "collapsed": false,
 37 |     "jupyter": {
 38 |      "outputs_hidden": false
 39 |     }
 40 |    },
 41 |    "outputs": [
 42 |     {
 43 |      "name": "stdout",
 44 |      "output_type": "stream",
 45 |      "text": [
 46 |       "Choose platform:\n",
 47 |       "[0] <pyopencl.Platform 'Portable Computing Language' at 0x7ff8d5ac06e8>\n",
 48 |       "[1] <pyopencl.Platform 'Intel(R) OpenCL' at 0x36a3e38>\n"
 49 |      ]
 50 |     },
 51 |     {
 52 |      "name": "stdin",
 53 |      "output_type": "stream",
 54 |      "text": [
 55 |       "Choice [0]: \n"
 56 |      ]
 57 |     },
 58 |     {
 59 |      "name": "stdout",
 60 |      "output_type": "stream",
 61 |      "text": [
 62 |       "Set the environment variable PYOPENCL_CTX='' to avoid being asked again.\n"
 63 |      ]
 64 |     }
 65 |    ],
 66 |    "source": [
 67 |     "ctx = cl.create_some_context(interactive=True)\n",
 68 |     "queue = cl.CommandQueue(ctx)"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": 8,
 74 |    "metadata": {
 75 |     "collapsed": false,
 76 |     "jupyter": {
 77 |      "outputs_hidden": false
 78 |     }
 79 |    },
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "n = 1024\n",
 83 |     "a = cl.clrandom.rand(queue, (n, n), dtype=np.float32)\n",
 84 |     "x = cl.clrandom.rand(queue, (n,), dtype=np.float32)"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "markdown",
 89 |    "metadata": {},
 90 |    "source": [
 91 |     "## Capturing matrix-vector multiplication"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": 9,
 97 |    "metadata": {
 98 |     "collapsed": false,
 99 |     "jupyter": {
100 |      "outputs_hidden": false
101 |     }
102 |    },
103 |    "outputs": [],
104 |    "source": [
105 |     "knl = lp.make_kernel(\n",
106 |     "    \"{[i,k]: 0<=i,k<n}\",\n",
107 |     "    \"b[i] = sum(k, a[i, k]*x[k])\"\n",
108 |     "    )"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": 11,
114 |    "metadata": {
115 |     "collapsed": false,
116 |     "jupyter": {
117 |      "outputs_hidden": false
118 |     }
119 |    },
120 |    "outputs": [
121 |     {
122 |      "name": "stdout",
123 |      "output_type": "stream",
124 |      "text": [
125 |       "\u001b[36m#\u001b[39;49;00m\u001b[36mdefine lid(N) ((int) get_local_id(N))\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
126 |       "\u001b[36m#\u001b[39;49;00m\u001b[36mdefine gid(N) ((int) get_group_id(N))\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n",
127 |       "\n",
128 |       "__kernel \u001b[36mvoid\u001b[39;49;00m \u001b[32m__attribute__\u001b[39;49;00m ((reqd_work_group_size(\u001b[34m1\u001b[39;49;00m, \u001b[34m1\u001b[39;49;00m, \u001b[34m1\u001b[39;49;00m))) loopy_kernel(__global \u001b[36mfloat\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ a, __global \u001b[36mfloat\u001b[39;49;00m *__restrict__ b, \u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m n, __global \u001b[36mfloat\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ x)\n",
129 |       "{\n",
130 |       "  \u001b[36mfloat\u001b[39;49;00m acc_k;\n",
131 |       "\n",
132 |       "  \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m i = \u001b[34m0\u001b[39;49;00m; i <= -\u001b[34m1\u001b[39;49;00m + n; ++i)\n",
133 |       "  {\n",
134 |       "    acc_k = \u001b[34m0.0f\u001b[39;49;00m;\n",
135 |       "    \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m k = \u001b[34m0\u001b[39;49;00m; k <= -\u001b[34m1\u001b[39;49;00m + n; ++k)\n",
136 |       "      acc_k = acc_k + a[n * i + k] * x[k];\n",
137 |       "    b[i] = acc_k;\n",
138 |       "  }\n",
139 |       "}\n",
140 |       "\n"
141 |      ]
142 |     }
143 |    ],
144 |    "source": [
145 |     "knl = lp.set_options(knl, write_cl=True)\n",
146 |     "evt, _ = knl(queue, a=a, x=x)"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": null,
152 |    "metadata": {},
153 |    "outputs": [],
154 |    "source": []
155 |   }
156 |  ],
157 |  "metadata": {
158 |   "kernelspec": {
159 |    "display_name": "Python 3",
160 |    "language": "python",
161 |    "name": "python3"
162 |   },
163 |   "language_info": {
164 |    "codemirror_mode": {
165 |     "name": "ipython",
166 |     "version": 3
167 |    },
168 |    "file_extension": ".py",
169 |    "mimetype": "text/x-python",
170 |    "name": "python",
171 |    "nbconvert_exporter": "python",
172 |    "pygments_lexer": "ipython3",
173 |    "version": "3.8.4"
174 |   }
175 |  },
176 |  "nbformat": 4,
177 |  "nbformat_minor": 4
178 | }
179 | 


--------------------------------------------------------------------------------
/06-loopy/07-practice-image-processing.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Exercise: DSLs and Code generation with Loopy (open-ended)\n",
 8 |     "\n",
 9 |     "1. Define a small domain-specific language for image processing. Operations you could support include:\n",
10 |     "    * `blur(n, img)`: To keep things simple, take an average of the neighboring $n\\times n$ pixels.\n",
11 |     "    * `mask(x, y, width, height)`. (`loopy` supports `if(condition, then, else)`, which maps to an `If(cond, then, else)` node in pymbolic.)\n",
12 |     "    * Blending and nonlinear filtering using arithmetic\n",
13 |     "2. Using your language, implement a filter that blurs the top left quarter of an image\n",
14 |     "3. Implement your language \"conventionally\", using functions and loopy kernels\n",
15 |     "4. Implement your language by capturing an expression tree and executing that.\n",
16 |     "5. Think of and try to implement (at least) one optimization you could apply to the expression tree. You may use [Halide](http://halide-lang.org/) as inspiration if you like."
17 |    ]
18 |   },
19 |   {
20 |    "cell_type": "code",
21 |    "execution_count": null,
22 |    "metadata": {
23 |     "collapsed": false,
24 |     "jupyter": {
25 |      "outputs_hidden": false
26 |     }
27 |    },
28 |    "outputs": [],
29 |    "source": []
30 |   }
31 |  ],
32 |  "metadata": {
33 |   "kernelspec": {
34 |    "display_name": "Python 3",
35 |    "language": "python",
36 |    "name": "python3"
37 |   },
38 |   "language_info": {
39 |    "codemirror_mode": {
40 |     "name": "ipython",
41 |     "version": 3
42 |    },
43 |    "file_extension": ".py",
44 |    "mimetype": "text/x-python",
45 |    "name": "python",
46 |    "nbconvert_exporter": "python",
47 |    "pygments_lexer": "ipython3",
48 |    "version": "3.8.4"
49 |   }
50 |  },
51 |  "nbformat": 4,
52 |  "nbformat_minor": 4
53 | }
54 | 


--------------------------------------------------------------------------------
/06-loopy/README.rst:
--------------------------------------------------------------------------------
1 | **Tip:** ``ipynb`` files can be viewed on Github. Just click them.
2 | 


--------------------------------------------------------------------------------
/06-loopy/cat.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/06-loopy/cat.jpeg


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | Domain-specific languages to Manycore and GPU: Building High-Performance Tools with Python
  2 | ==========================================================================================
  3 | 
  4 | A tutorial on Domain-Specific Languages
  5 | ---------------------------------------
  6 | 
  7 | This tutorial teaches you:
  8 | 
  9 | * how to **define** mathematically-oriented **domain-specific languages** ("DSLs") in
 10 |   Python
 11 | 
 12 | * how to build **transformations** for your DSLs to take them from **abstraction** to
 13 |   **implementation**
 14 | 
 15 | * how to generate highly efficient code from your domain-specific language
 16 | 
 17 | * how to use **just-in-time compilation** with OpenCL from Python to **execute
 18 |   generated code**
 19 | 
 20 | * a few existing **design studies** and **use cases** for domain-specific languages
 21 | 
 22 | * how to use loopy to generate **highly efficient code** to work with **array data**
 23 |   targeting **heterogeneous processor architectures** (CPUs/GPUs)
 24 | 
 25 | The tutorial also includes a brief introductory section to familiarize you with
 26 | the Python and numpy syntax.
 27 | 
 28 | This material is an updated version of a
 29 | `tutorial <http://sc15.supercomputing.org/schedule/event_detail-evid=tut174.html>`_ I
 30 | presented at `Supercomputing '15 <http://sc15.supercomputing.org>`_ in Austin.
 31 | 
 32 | Virtual machine image
 33 | ---------------------
 34 | 
 35 | A virtual machine image is available that has all the necessary tools
 36 | installed, to allow for easy experimentation. Follow these instructions
 37 | to get started:
 38 | 
 39 | 1. Download a version of VirtualBox suitable for your system and install it:
 40 | 
 41 |    https://www.virtualbox.org/wiki/Downloads
 42 | 
 43 | 2. Download the machine image itself:
 44 | 
 45 |    http://andreask.cs.illinois.edu/tmp/dsl-tutorial.ova
 46 | 
 47 | 3. (Optionally) Check whether the image downloaded correctly using the
 48 |    md5sum command line tool (Linux/OS X). On Windows, use this
 49 |    tool:
 50 | 
 51 |    http://www.pc-tools.net/win32/md5sums/
 52 | 
 53 |    Compare the computed checksum with the following value:
 54 |    6aa97e046293f8811d1749ab046f7f61
 55 | 
 56 |    Only proceed once the two match. If they don't, delete the file and
 57 |    retry the download.
 58 | 
 59 | 4. Open VirtualBox, click "File > Import Appliance", select the
 60 |    downloaded image and just click "Next" a few times.  Once imported,
 61 |    double-click on the virtual machine to make sure it starts. After a
 62 |    little while, you should see a simple desktop environment.
 63 | 
 64 | 5. Once all these steps complete successfully, congratulations! You are
 65 |    good to go. I'm looking forward to seeing you at the tutorial.
 66 | 
 67 | 6. Double-click the "Terminal" symbol on the desktop and enter::
 68 | 
 69 |       curl -L https://bit.ly/sc15-dsl | bash
 70 | 
 71 |    This will download these materials onto the virtual machine and put them
 72 |    into a subdirectory called ``sc15-tutorial-materials``. Next, type::
 73 | 
 74 |        ipython3 notebook
 75 | 
 76 |    to launch a browser-based interface and get started.
 77 | 
 78 | Software tools
 79 | --------------
 80 | 
 81 | The tutorial demonstrates the use of the following pieces of software:
 82 | 
 83 | Core packages:
 84 | 
 85 | *   Python: https://www.python.org
 86 | *   numpy: https://www.numpy.org
 87 | *   pymbolic: https://github.com/inducer/pymbolic
 88 | *   PyOpenCL: https://github.com/pyopencl/pyopencl
 89 | *   loopy: https://github.com/inducer/loopy
 90 | 
 91 | Supporting packages:
 92 | 
 93 | *   matplotlib: http://www.matplotlib.org
 94 | *   mako: http://www.makotemplates.org
 95 | *   cgen: https://github.com/inducer/cgen
 96 | 
 97 | All open-source under MIT/BSD licenses.
 98 | 
 99 | License
100 | -------
101 | 
102 | Copyright 2015 Andreas Kloeckner
103 | 
104 | Materials are available for use under a Creative Commons CC-BY license.  See
105 | included file ``LICENSE`` for details.  (I.e. by and large: retain authorship
106 | information, and otherwise do what you want)
107 | 


--------------------------------------------------------------------------------
/assemble.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/zsh
 2 | 
 3 | set -e
 4 | setopt -o EXTENDED_GLOB
 5 | 
 6 | unset PYTHONWARNINGS
 7 | 
 8 | TUT_ID=dsl
 9 | 
10 | PDF_OUTPUT=0
11 | HTML_OUTPUT=1
12 | 
13 | mkdir -p dist
14 | rm -Rf cleared
15 | 
16 | ME=$(readlink -f "$0")
17 | DIR=$(dirname "$ME")
18 | MYDIR=$(cd "$DIR" && pwd)
19 | 
20 | function with_echo()
21 | {
22 |   echo "$@"
23 |   "$@"
24 | }
25 | 
26 | for nb in [0-9]*/**/*ipynb; do
27 |   echo "PROCESSING $nb"
28 |   DIR="$(dirname "$nb")"
29 |   TRUNK="$(basename "$nb")"
30 |   TRUNK="${TRUNK%.ipynb}"
31 | 
32 |   CONV_DIR="dist/$DIR"
33 |   mkdir -p "$CONV_DIR"
34 |   CONV_BASE="dist/${nb%.ipynb}"
35 |   CONV_PY="${CONV_BASE}.py"
36 |   CONV_HTML="${CONV_BASE}.html"
37 |   CONV_PDF="${CONV_BASE}.pdf"
38 | 
39 |   PROCESSED_IPYNB="${CONV_BASE}.ipynb"
40 |   "$MYDIR/ipython-demo-tools/prepare-ipynb" remove-marks "$nb" "$PROCESSED_IPYNB"
41 |   # if ! test -f "$CONV_PY" || test "$nb" -nt "$CONV_PY"; then
42 |   #   jupyter-nnbconvert "$PROCESSED_IPYNB" --to=python "--output=${CONV_BASE}"
43 |   # fi
44 |   if [[ "$HTML_OUTPUT" = "1" ]]  && (! test -f "$CONV_HTML" || test "$nb" -nt "$CONV_HTML"); then
45 |     with_echo python $(which jupyter-nbconvert) "$PROCESSED_IPYNB" --to=html
46 |   fi
47 |   if [[ "$PDF_OUTPUT" = "1" ]] && (! test -f "$CONV_PDF" || test "$nb" -nt "$CONV_PDF"); then
48 |     with_echo python $(which jupyter-nbconvert) "$PROCESSED_IPYNB" --to=pdf
49 |   fi
50 | 
51 |   CONV_DIR="cleared/$DIR"
52 |   with_echo mkdir -p "$CONV_DIR"
53 |   CONV_IPYNB="cleared/$nb"
54 |   with_echo "$MYDIR/ipython-demo-tools/prepare-ipynb" clear-output clear-marked-inputs "$nb" "$CONV_IPYNB"
55 | done
56 | function mkdir_and_cp()
57 | {
58 |   dn=$(dirname "$2")
59 |   with_echo mkdir -p "$dn"
60 |   with_echo cp "$1" "$2"
61 | 
62 | }
63 | 
64 | for i in [0-9]*/**/*~*ipynb~*.pyc~*\~(#q.)(#qN); do
65 |   mkdir_and_cp $i dist/$i
66 |   mkdir_and_cp $i cleared/$i
67 | done
68 | for i in slides/out/[0-9]*pdf; do
69 |   bn=$(basename "$i")
70 |   mkdir_and_cp $i dist/${bn%.pdf}/0-slides.pdf
71 | done
72 | 
73 | with_echo cp -R --reflink dist $TUT_ID-tutorial-materials
74 | with_echo cp -R --reflink cleared $TUT_ID-tutorial-materials
75 | with_echo rm -f $TUT_ID-tutorial-materials-dist.zip
76 | with_echo zip -r $TUT_ID-tutorial-materials-dist.zip $TUT_ID-tutorial-materials
77 | with_echo rm -Rf $TUT_ID-tutorial-materials
78 | 
79 | 


--------------------------------------------------------------------------------
/aux/index.md:
--------------------------------------------------------------------------------
 1 | # Tutorial Instructions
 2 | 
 3 | ## Material
 4 | 
 5 | *   [Browse here](BROWSE_PATH)
 6 | *   [Browse on GitHub](GITHUB_URL)
 7 | *   [Download as a zip file](ZIP_NAME)
 8 | 
 9 | ## Getting started with the Virtual Machine
10 | 
11 | *   Install [Virtualbox](http://virtualbox.org)
12 | *   Obtain the machine image.
13 | 
14 |     If I have announced that it is available online, you can [download it](tutorial.ova) from here,
15 |     otherwise grab it from one of the USB sticks being passed around.
16 | 
17 | *   Open VirtualBox, click "File > Import Appliance", select the downloaded image and just click "Next" a few times.
18 | *   Once imported, double-click on the virtual machine to start using it.
19 | *   After a little while, a graphical desktop environment should appear.
20 | *   Double-click the "Terminal" icon
21 | *   Run the followng command:
22 | 
23 |     ```
24 |     curl -L http://bit.ly/ak-tut-pack | bash
25 |     ```
26 | *   Follow the directions on the screen.
27 | 


--------------------------------------------------------------------------------
/aux/ipython_config.py:
--------------------------------------------------------------------------------
1 | c = get_config()
2 | c.InteractiveShellApp.matplotlib = "inline"
3 | 
4 | 


--------------------------------------------------------------------------------
/aux/material-email.txt:
--------------------------------------------------------------------------------
 1 | Dear participant,
 2 | 
 3 | It is my great pleasure to welcome you to the tutorial "Domain-specific
 4 | languages to high performance: Code generation and transformation in Python"
 5 | 
 6 | At the tutorial, we will be learning how to use code generation from the
 7 | comfortable and powerful scripting language Python to make writing
 8 | high-performance, parallel code for CPUs and GPUs easier, and, perhaps,
 9 | even fun.
10 | 
11 | To make sure you have a good experience and ample opportunity for
12 | experimentation at the tutorial, I would like to ask that you download
13 | and install a virtual machine image that I have prepared specifically
14 | for the tutorial. Follow these instructions to get started:
15 | 
16 | 1. Download a version of VirtualBox suitable for your system and install it:
17 | 
18 |    https://www.virtualbox.org/wiki/Downloads
19 | 
20 | 2. Download the machine image itself:
21 | 
22 |    http://andreask.cs.illinois.edu/tmp/dsl-tutorial.ova
23 | 
24 | 3. (Optionally) Check whether the image downloaded correctly using the
25 |    md5sum command line tool (Linux/OS X). On Windows, use this
26 |    tool:
27 | 
28 |    http://www.pc-tools.net/win32/md5sums/
29 | 
30 |    Compare the computed checksum with the following value:
31 |    a49989f216970d4b8842eba566a392a6
32 | 
33 |    Only proceed once the two match. If they don't, delete the file and
34 |    retry the download.
35 | 
36 | 4. Open VirtualBox, click "File > Import Appliance", select the
37 |    downloaded image and just click "Next" a few times.  Once imported,
38 |    double-click on the virtual machine to make sure it starts. After a
39 |    little while, you should see a simple desktop environment.
40 | 
41 | 5. Once all these steps complete successfully, congratulations! You are
42 |    good to go. I'm looking forward to seeing you at the tutorial.
43 | 
44 | See you soon,
45 | Andreas
46 | 
47 | 


--------------------------------------------------------------------------------
/aux/pystuff-requirements.txt:
--------------------------------------------------------------------------------
 1 | mako
 2 | git+git://github.com/inducer/pytools
 3 | git+git://github.com/inducer/pymbolic
 4 | git+git://github.com/inducer/cgen
 5 | git+git://github.com/inducer/genpy
 6 | sympy
 7 | git+git://github.com/pyopencl/pyopencl
 8 | git+git://github.com/inducer/islpy
 9 | git+git://github.com/inducer/loopy
10 | 
11 | hg+https://bitbucket.org/inducer/f2py
12 | git+git://github.com/inducer/ply
13 | 
14 | 


--------------------------------------------------------------------------------
/aux/sudoers:
--------------------------------------------------------------------------------
1 | Defaults        exempt_group=sudo
2 | 
3 | 


--------------------------------------------------------------------------------
/aux/time-planning.ods:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/aux/time-planning.ods


--------------------------------------------------------------------------------
/aux/tut-pack.run:
--------------------------------------------------------------------------------
 1 | TUT_ID=MY_TUT_ID
 2 | set -e
 3 | if test -d $TUT_ID-tutorial-materials; then
 4 |   echo "------------------------------------------------------------"
 5 |   echo "A folder '$TUT_ID-tutorial-materials' already exists. "
 6 |   echo "If you would like to redownload the materials, delete the "
 7 |   echo "folder using"
 8 |   echo
 9 |   echo "rm -Rf $TUT_ID-tutorial-materials"
10 |   echo
11 |   echo "and then retry this command."
12 |   echo "------------------------------------------------------------"
13 |   exit 1
14 | fi
15 | 
16 | URL=MYURL
17 | 
18 | echo "------------------------------------------------------------"
19 | echo "One second, fetching your tutorial materials..."
20 | echo "------------------------------------------------------------"
21 | echo "Downloading..."
22 | set -x
23 | rm -f $TUT_ID-tutorial-materials-dist.zip
24 | curl -O $URL
25 | echo "Unpacking..."
26 | unzip -q $TUT_ID-tutorial-materials-dist.zip
27 | set +x
28 | 
29 | echo "------------------------------------------------------------"
30 | echo "All done!"
31 | echo "------------------------------------------------------------"
32 | echo "Your tutorial materials are unpacked and ready for use,"
33 | echo "right here in the subdirectory $TUT_ID-tutorial-materials."
34 | echo
35 | echo "Type the following to get started:"
36 | echo "  cd $TUT_ID-tutorial-materials"
37 | echo "  jupyter notebook"
38 | echo
39 | echo "Enjoy the tutorial!"
40 | 


--------------------------------------------------------------------------------
/aux/upload.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | set -e
 4 | set -x
 5 | 
 6 | TUT_ID=dsl
 7 | URL=https://andreask.cs.illinois.edu/tutorial/$TUT_ID-tutorial-materials-dist.zip
 8 | GITHUB_URL=https://github.com/inducer/languages-and-codegen-tutorial
 9 | TGT=tiker.net:public_html/tutorial
10 | 
11 | sed s,GITHUB_URL,$GITHUB_URL, index.md | \
12 |   sed s,BROWSE_PATH,$TUT_ID-tutorial-materials/dist/, | \
13 |   sed s,ZIP_NAME,$TUT_ID-tutorial-materials-dist.zip, | \
14 |   pandoc -t html - > index.html
15 | rsync -rav index.html $TGT/
16 | rm index.html
17 | 
18 | cp tut-pack.run tut-pack-custom.run
19 | sed -i s,MY_TUT_ID,$TUT_ID, tut-pack-custom.run
20 | sed -i s,MYURL,$URL, tut-pack-custom.run
21 | rsync -rav tut-pack-custom.run $TGT/tut-pack.run
22 | rm tut-pack-custom.run
23 | 
24 | rsync -rav --progress ../$TUT_ID-tutorial-materials-dist.zip $TGT
25 | rsync -rav --progress --delete ../dist $TGT/$TUT_ID-tutorial-materials
26 | 
27 | 
28 | echo "COMPLETED"
29 | 


--------------------------------------------------------------------------------
/aux/video-script.txt:
--------------------------------------------------------------------------------
 1 | (Title card)
 2 | 
 3 | Hi, my name is Andreas Kloeckner, and I'm looking forward to having you join me
 4 | for my tutorial "From description to code generation: building high-performance
 5 | tools in Python"at supercomputing 2015.
 6 | 
 7 | The harsh reality about high-performance computing, to my mind, is that, while
 8 | the mathematical ideas are often quite simple, the code that ultimately
 9 | expresses them is very much not.
10 | 
11 | (SCREENSHOT OF COMPLICATED CODE)
12 | 
13 | And so the constant fight in doing scientific computing with an ambition for
14 | high-performance is to manage this incidental complexity.
15 | 
16 | The idea that is easy in theory, but tricky in practice, is to keep the amount
17 | of code used to specify the computation commensurate with the complexity of the
18 | mathematical ideas.
19 | 
20 | Abstraction, of course, is the mathematical and computer science tool to make
21 | that happen. And it is often easy to build abstractions, just think of the
22 | matrix and vector objects in Matlab or Python's numpy. The crux is, though,
23 | that these abstractions are rarely free in terms of computational resources.
24 | 
25 | So this tutorial is about a set of Python-based open-source tools designed to help with that.
26 | 
27 | * It all starts with the capability to design domain-specific languages.
28 | 
29 | * The next important step is to enrich and transform these languages in ways
30 | that gradually add more implementation detail.
31 | 
32 | * Once there is enough detail for the model to be implemented, code needs to be
33 | generated and actually executed, most likely on high-performance devices such
34 | as GPUs.
35 | 
36 | -----------------
37 | 
38 | To make this as informative, enlightening, and entertaining as possible, we
39 | will do this as follows:
40 | 
41 | The tutorial is organized as a sequence of interactive "notebooks", each
42 | consisting of code interleaved with text and images. As we go, we will work
43 | through these notebooks, and you will see the tools in action, doing the things
44 | that they were designed to do. Some key bits of code we will write together as
45 | a group, and other times you will work through small practice problems on your
46 | own or in small groups.
47 | 
48 | I will provide a virtual machine image for you that contains all the software you need to follow along.
49 | 
50 | -------------------
51 | 
52 | First, I will show you a few bits and pieces of Python, an approachable
53 | high-level language that we will use as a foundation for our work. This should
54 | be enough to keep you going during the tutorial, even if you have never
55 | programmed in Python. As long as you have programmed before, you should be fine.
56 | 
57 | Next, we will learn how to build and transform expression trees, the natural
58 | way of capturing mathematical ideas on a computer. This can help describe many
59 | things, from a PDE to an image processing algorithm.
60 | 
61 | Next, we will worry about executing high-performance code from Python. To do
62 | so, we will use PyOpenCL, a Python package that comfortably lets us execute C
63 | code on GPUs and CPUs.
64 | 
65 | Next, we will think about how to actually generate that code, at runtime, right
66 | before it gets used. To get started with something simple, we can piece
67 | together code like Legos from premade pieces. This is very easy and already
68 | quite versatile.
69 | 
70 | Finally, we will investigate a tool called loopy that can help with more
71 | challenging code generation tasks where you would like to wring the last ounce
72 | of performance out of a machine.
73 | 
74 | -------------------
75 | 
76 | And that's it. At the end of the tutorial, my goal is for you to have a good
77 | grasp of how to build tools that go from a description all the way down to
78 | high-performance code.
79 | 
80 | I am looking forward to seeing you there!


--------------------------------------------------------------------------------
/aux/vm-requirements.txt:
--------------------------------------------------------------------------------
1 | clang-3.8
2 | pocl
3 |   with LLC_HOST_CPU=i686 LLVM_CONFIG=/usr/bin/llvm-config-3.8 ./configure --prefix=/opt/pocl-build
4 | graphviz
5 | 
6 | see pystuff-requirements.txt
7 | 
8 | dd zeros
9 | 


--------------------------------------------------------------------------------
/prepare-all-notebooks.sh:
--------------------------------------------------------------------------------
1 | ipython-demo-tools/prepare-all-notebooks.sh


--------------------------------------------------------------------------------
/slides/.gitignore:
--------------------------------------------------------------------------------
 1 | *.nav
 2 | *.aux
 3 | out/*.pdf
 4 | *.toc
 5 | *.snm
 6 | *.log
 7 | *~
 8 | .*.swp
 9 | *.out
10 | *.emergency
11 | pic-stuttgart.pdf
12 | *.avi
13 | *.mov
14 | *.mpg
15 | *.vrb
16 | *.bbl
17 | *.blg
18 | .sw[op]
19 | out/
20 | simula
21 | 


--------------------------------------------------------------------------------
/slides/06-loopy.tex:
--------------------------------------------------------------------------------
  1 | \documentclass[english,compress]{beamer}
  2 | \nonstopmode%
  3 | \input{settings}
  4 | 
  5 | \logoenable%
  6 | 
  7 | \pgfdeclarelayer{grid}
  8 | \pgfsetlayers{background,grid,main,foreground}
  9 | \def\intd{\, d}
 10 | 
 11 | \def\bigncentered#1{
 12 |   \begin{center}
 13 |     \Huge\bfseries #1
 14 |   \end{center}
 15 | }
 16 | 
 17 | \begin{document}
 18 | 
 19 | \title{%
 20 |    Part 6: Loopy
 21 | }
 22 | 
 23 | \institute{Computer Science $\cdot$ University of Illinois at
 24 | Urbana-Champaign}
 25 | 
 26 | \author{Andreas Klöckner}
 27 | 
 28 | \date{}
 29 | 
 30 | \frame{\titlepage}
 31 | 
 32 | % \begin{frame}{Thanks}
 33 | % 
 34 | %   \begin{itemize}
 35 | %   \item Tim Warburton (Rice)
 36 | %   \item Lucas Wilcox (NPS)
 37 | %   \item Leslie Greengard (NYU)
 38 | %   \item Early adopters (Rob Kirby, Maxim Kuznetsov, Ivan Oseledets)
 39 | %   %\item PyOpenCL, PyCUDA contributors
 40 | %   \item AMD, Nvidia
 41 | %   \end{itemize}
 42 | % 
 43 | % \end{frame}
 44 | % -----------------------------------------------------------------------------
 45 | \section[Loo.py]{Loop Generation}
 46 | % -----------------------------------------------------------------------------
 47 | \subsection{Loo.py}
 48 | % -----------------------------------------------------------------------------
 49 | % {{{
 50 | \begin{frame}{Automating GPU Programming}
 51 |   \begin{beamercolorbox}[sep=3mm]{block body}
 52 |     High-performance programming can be a time-consuming trial-and-error
 53 |     process.
 54 |   \end{beamercolorbox}
 55 |   Obvious idea: Let the computer do it. How?
 56 |   \begin{itemize}
 57 |     \item One way: ``Smart'' compiler, ``dumb'' developer
 58 |       \begin{itemize}
 59 |         \item GPU programming requires complex tradeoffs
 60 |         \item Tradeoffs require heuristics
 61 |         \item Heuristics are fragile
 62 |       \end{itemize}
 63 |     \item Another way: ``Smart'' developer, ``dumb'' compiler
 64 |       \begin{itemize}
 65 |         \item Error-prone
 66 |         \item Expensive in developer time
 67 |         \item User can use manual/automatic tuning
 68 |       \end{itemize}
 69 |   \end{itemize}
 70 |   \uncover<2->{%
 71 |     \begin{tikzpicture} [overlay]
 72 |       \node at (current page.center) [draw,drop shadow,fill=white,
 73 |       inner xsep=0.5cm,inner ysep=0.5cm,thick]
 74 |         {%
 75 |           So compromise!
 76 | 
 77 |           Following: an idea of a compromise.
 78 |         } ;
 79 |     \end{tikzpicture}
 80 |   }
 81 | \end{frame}
 82 | % -----------------------------------------------------------------------------
 83 | \begin{frame}{Setting the Stage}
 84 |   \begin{columns}
 85 |     \column{0.55\textwidth}
 86 |       Idea: Create IR + library of transformations
 87 |       \begin{itemize}
 88 |         \item Start with math-y statement of the operation
 89 |         \item ``Push a few buttons'' to optimize for the target
 90 |           device
 91 |         \item Strongly separate these two parts
 92 |       \end{itemize}
 93 | 
 94 |       \medskip
 95 |       Philosophy:
 96 |       \begin{itemize}
 97 |         \item Avoid ``intelligence''
 98 |         \item User can assume partial responsibility for correctness
 99 |         \item Embedding in Python provides generation/transform
100 |           flexibility
101 |       \end{itemize}
102 |     \column{0.45\textwidth}
103 |       \includegraphics[width=\textwidth]{loopy-crop.pdf}
104 |   \end{columns}
105 |   \uncover<2>{%
106 |     \begin{tikzpicture} [overlay]
107 |       \node [above left=1cm of current page.south east, draw,drop shadow,fill=white,
108 |       inner xsep=0.5cm,inner ysep=0.5cm,thick, text
109 |       width=0.7\textwidth]
110 |         {%
111 |           Loopy is infrastructure.
112 | 
113 |           \medskip
114 |           Auto-tuners and domain-specific
115 |           libraries are ``above'' loopy conceptually.
116 |         } ;
117 |     \end{tikzpicture}
118 |   }
119 | \end{frame}
120 | % -----------------------------------------------------------------------------
121 | \begin{frame}
122 |   \bigncentered{DEMO TIME}
123 | \end{frame}
124 | % -----------------------------------------------------------------------------
125 | \begin{frame}{Capturing Variants}
126 |   \lstinputlisting[basicstyle=\scriptsize]{loopy-variants.py}
127 |   \uncover<2>{%
128 |     \begin{tikzpicture} [overlay]
129 |       \node [above left=1cm of current page.south east, draw,drop shadow,fill=white,
130 |       inner xsep=0.5cm,inner ysep=0.5cm,thick,
131 |       text width=0.5\textwidth]
132 |         {%
133 |           Easy to \emph{non-redundantly} capture multiple variants of
134 |           the same kernel.
135 |         } ;
136 |     \end{tikzpicture}
137 |   }
138 | \end{frame}
139 | % -----------------------------------------------------------------------------
140 | \begin{frame}{Ordering}
141 |   \begin{itemize}
142 |     \item Completely \emph{un}ordered by default
143 |     \item Program only well-formed
144 | 
145 |       \emph{if} domain traversal order does not matter
146 |     \item Depdencies
147 | 
148 |       \emph{can} dictate execution order
149 | 
150 |       \emph{within} largest set of shared loops
151 |   \end{itemize}
152 | \end{frame}
153 | % -----------------------------------------------------------------------------
154 | \begin{frame}{Loo.py vs reality}
155 |   \begin{itemize}
156 |     \item
157 |     Two modes of operation:
158 |     \begin{itemize}
159 |       \item Standalone
160 |       \item In-process
161 |     \end{itemize}
162 |     \item Flat data structure:
163 |       \begin{itemize}
164 |         \item Easy to manipulate
165 |         \item Kernel fusion
166 |       \end{itemize}
167 |     \item Register-your-own:
168 |       \begin{itemize}
169 |         \item Functions
170 |         \item Symbols
171 |         \item Reductions
172 |       \end{itemize}
173 |     \item Literal code `escape hatch'
174 |     \item Predicated execution
175 |     \item Tree-of-domains for data-dependent control flow
176 |   \end{itemize}
177 | \end{frame}
178 | % -----------------------------------------------------------------------------
179 | \begin{frame}{Bonus Features}
180 |   \begin{columns}
181 |     \column{0.2\textwidth}
182 |       \includegraphics[width=\textwidth]{glass-dollar.jpeg}
183 |     \column{0.7\textwidth}
184 |       Free extras:
185 |       \begin{itemize}
186 |         \item A-priori bounds checking
187 |         \item Generate a sequential version of the code
188 |         \item Automatic Benchmarking
189 |         \item Free tuning advice
190 |           \begin{itemize}
191 |             \item Local memory layout
192 |             \item Suboptimal use of hw parallelism
193 |             \item Based on knowledge about target hardware
194 |           \end{itemize}
195 |         \item Automatic Testing
196 |           \begin{itemize}
197 |             \item \dots against sequential version
198 |             \item \dots which is easier to verify
199 |           \end{itemize}
200 |       \end{itemize}
201 |   \end{columns}
202 | \end{frame}
203 | \addimgcredit{Glass dollar: sxc.hu/flaivoloka}
204 | % -----------------------------------------------------------------------------
205 | \begin{frame}
206 |   \bigncentered{DEMO TIME}
207 | \end{frame}
208 | 
209 | % }}}
210 | \end{document}
211 | 
212 | % vim: foldmethod=marker
213 | 


--------------------------------------------------------------------------------
/slides/beamercolorthemeuiuc.sty:
--------------------------------------------------------------------------------
 1 | % Copyright 2004 by Madhusudan Singh <madhusudan.singh@gmail.com>
 2 | %
 3 | % This file may be distributed and/or modified
 4 | %
 5 | % 1. under the LaTeX Project Public License and/or
 6 | % 2. under the GNU Public License.
 7 | %
 8 | % See the file doc/licenses/LICENSE for more details.
 9 | 
10 | \mode<presentation>
11 | 
12 | \usecolortheme{whale}
13 | \usecolortheme{orchid}
14 | 
15 | %\definecolor{nyuviolet}{RGB}{87,6,172}
16 | %\colorlet{nyuviodark}{nyuviolet!80!black}
17 | \definecolor{mygray}{RGB}{200,200,200}
18 | 
19 | % http://identitystandards.illinois.edu/graphicstandardsmanual/generalguidelines/colors.html
20 | \definecolor{uiucblue}{RGB}{0,60,125}
21 | \definecolor{uiucorange}{RGB}{244,127,36}
22 | \definecolor{uiuclightblue}{RGB}{110,139,191}
23 | \definecolor{uiucdarkorange}{RGB}{239,138,28}
24 | 
25 | %\setbeamercolor{alerted text}{fg=!yellow}
26 | 
27 | \setbeamercolor*{palette primary}{fg=black,bg=uiucdarkorange}
28 | \setbeamercolor*{palette secondary}{fg=white,bg=uiucblue}
29 | \setbeamercolor*{palette tertiary}{fg=white,bg=uiucblue}
30 | \setbeamercolor{frametitle}{fg=white,bg=uiucblue}
31 | 
32 | \setbeamercolor*{palette quaternary}{fg=black,bg=uiuclightblue}
33 | 
34 | %\setbeamercolor*{sidebar}{fg=darkblue,bg=orange!75!white}
35 | 
36 | %\setbeamercolor*{palette sidebar primary}{fg=darkblue!10!black}
37 | %\setbeamercolor*{palette sidebar secondary}{fg=white}
38 | %\setbeamercolor*{palette sidebar tertiary}{fg=darkblue!50!black}
39 | %\setbeamercolor*{palette sidebar quaternary}{fg=yellow!10!orange}
40 | 
41 | \setbeamercolor*{titlelike}{bg=uiucblue,fg=white}
42 | 
43 | \setbeamercolor*{block title}{bg=uiucblue,fg=white}
44 | %\setbeamercolor*{block title example}{bg=brown,fg=white}
45 | \setbeamercolor*{structure}{fg=mygray!50!black,bg=white}
46 | %\setbeamercolor{frametitle right}{bg=yellow!60!orange}
47 | 
48 | %\setbeamercolor*{separation line}{}
49 | %\setbeamercolor*{fine separation line}{}
50 | 
51 | \mode<all>
52 | 


--------------------------------------------------------------------------------
/slides/code/loopy-variants.py:
--------------------------------------------------------------------------------
 1 | knl = ...
 2 | 
 3 | def variant_cpu(knl):
 4 |     knl = lp.split_dimension(knl, "i", 16*4096, outer_tag="g.0", slabs=(0, 1))
 5 |     knl = lp.split_dimension(knl, "i_inner", 16,
 6 |             inner_tag="unr")
 7 |     return knl
 8 | 
 9 | def variant_gpu(knl):
10 |     knl = lp.split_dimension(knl, "i", 4*256, outer_tag="g.0", slabs=(0, 1))
11 |     knl = lp.split_dimension(knl, "i_inner", block_size,
12 |             outer_tag="unr", inner_tag="l.0")
13 |     return knl
14 | 
15 | for variant in [variant_cpu, variant_gpu]:
16 |     kernel_gen = lp.generate_loop_schedules(variant(knl))
17 |     # ...
18 | 


--------------------------------------------------------------------------------
/slides/code/transpose.cl:
--------------------------------------------------------------------------------
 1 | void transpose(
 2 |   __global float *a_t, __global float *a,
 3 |   unsigned a_width, unsigned a_height)
 4 | {
 5 |   int base_idx_a   =
 6 |     get_group_id(0) * BLK_SIZE +
 7 |     get_group_id(1) * A_BLOCK_STRIDE;
 8 |   int base_idx_a_t =
 9 |     get_group_id(1) * BLK_SIZE +
10 |     get_group_id(0) * A_T_BLOCK_STRIDE;
11 | 
12 |   int glob_idx_a =
13 |     base_idx_a + get_local_id(0) 
14 |     + a_width * get_local_id(1);
15 |   int glob_idx_a_t = 
16 |     base_idx_a_t + get_local_id(0) 
17 |     + a_height * get_local_id(1);
18 | 
19 |   __local float a_local[BLK_SIZE][BLK_SIZE+1];
20 | 
21 |   a_local[get_local_id(1)*BLK_SIZE+get_local_id(0)] = 
22 |     a[glob_idx_a];
23 | 
24 |   barrier(CLK_LOCAL_MEM_FENCE);
25 | 
26 |   a_t[glob_idx_a_t] = 
27 |     a_local[get_local_id(0)*BLK_SIZE+get_local_id(1)];
28 | }
29 | 


--------------------------------------------------------------------------------
/slides/code/transpose.cu:
--------------------------------------------------------------------------------
 1 | __global__ void transpose(
 2 |     float *A_t, float *A,
 3 |     int a_width, int a_height)
 4 | {
 5 |   int base_idx_a   =
 6 |     blockIdx.x * BLK_SIZE +
 7 |     blockIdx.y * A_BLOCK_STRIDE;
 8 |   int base_idx_a_t =
 9 |     blockIdx.y * BLK_SIZE +
10 |     blockIdx.x * A_T_BLOCK_STRIDE;
11 | 
12 |   int glob_idx_a =
13 |     base_idx_a + threadIdx.x
14 |     + a_width * threadIdx.y;
15 |   int glob_idx_a_t =
16 |     base_idx_a_t + threadIdx.x
17 |     + a_height * threadIdx.y;
18 | 
19 |   __shared__ float A_shared[BLK_SIZE][BLK_SIZE+1];
20 | 
21 |   A_shared[threadIdx.y][threadIdx.x] =
22 |     A[glob_idx_a];
23 | 
24 |   __syncthreads();
25 | 
26 |   A_t[glob_idx_a_t] =
27 |     A_shared[threadIdx.x][threadIdx.y];
28 | }
29 | 


--------------------------------------------------------------------------------
/slides/kloeckislides.sty:
--------------------------------------------------------------------------------
  1 | \usepackage[utf8]{inputenc}
  2 | \setcounter{secnumdepth}{3}
  3 | \setcounter{tocdepth}{3}
  4 | \usepackage{amsmath}
  5 | \usepackage{color}
  6 | \usepackage{amssymb}
  7 | %\usepackage{esint}
  8 | \usepackage{verbatim} % for env comment
  9 | \usepackage{listings}
 10 | \usepackage{stmaryrd}
 11 | \usepackage{colortbl}
 12 | \usepackage{babel}
 13 | \usepackage{wasysym}
 14 | 
 15 | \definecolor{green}{RGB}{0, 180, 0}
 16 | \definecolor{red}{RGB}{180, 0, 0}
 17 | \colorlet{grellow}{green!50!yellow}
 18 | \colorlet{codeback}{gray!20}
 19 | 
 20 | \usepackage{multimedia}
 21 | 
 22 | \usepackage{tikz}
 23 | \usetikzlibrary{calc}
 24 | \usetikzlibrary{positioning}
 25 | \usetikzlibrary{fadings}
 26 | \usetikzlibrary{chains}
 27 | \usetikzlibrary{scopes}
 28 | \usetikzlibrary{shadows}
 29 | \usetikzlibrary{arrows}
 30 | \usetikzlibrary{snakes}
 31 | \usetikzlibrary{shapes.misc}
 32 | \usetikzlibrary{shapes.symbols}
 33 | \usetikzlibrary{shapes.multipart}
 34 | \usetikzlibrary{fit}
 35 | \usetikzlibrary{shapes.arrows}
 36 | \usetikzlibrary{shapes.geometric}
 37 | \usetikzlibrary{shapes.callouts}
 38 | \usetikzlibrary{decorations.text}
 39 | 
 40 | \pgfdeclarelayer{background}
 41 | \pgfdeclarelayer{foreground}
 42 | \pgfsetlayers{background,main,foreground}
 43 | 
 44 | \tikzstyle{every picture}+=[remember picture]
 45 | 
 46 | \def\allimgcredits{}
 47 | \makeatletter
 48 | \def\addimgcredit#1{\g@addto@macro\allimgcredits{\item #1}}
 49 | \makeatother
 50 | \def\imagecreditslide{
 51 |   \begin{frame}[shrink,label=image-credits]{Image Credits}
 52 |     \begin{itemize}
 53 |       \allimgcredits
 54 |     \end{itemize}
 55 |   \end{frame}
 56 | }
 57 | 
 58 | \def\gatheredappendix{}
 59 | \makeatletter
 60 | \long\def\addtoappendix#1{
 61 |   \g@addto@macro\gatheredappendix{#1}
 62 | }
 63 | \makeatother
 64 | 
 65 | \newcommand{\cc}{\raisebox{-0.75ex}{\includegraphics[height=3ex]{cc.pdf}}}
 66 | 
 67 | \newcommand{\D}{\mathsf{D}}
 68 | \newcommand{\mathd}{\,\mathsf{d}}
 69 | 
 70 | \newcommand{\avg}[1]{\{#1\}}
 71 | \newcommand{\jump}[1]{\left\llbracket#1\right\rrbracket}
 72 | 
 73 | \newcommand{\questionframe}[1]{
 74 |   \begin{frame}{Questions?}
 75 |     \begin{center}
 76 |     \textbf{\Huge ?}
 77 |     \par#1
 78 |     \end{center}
 79 |   \end{frame}
 80 | }
 81 | 
 82 | \lstset{
 83 |   %language=Python,
 84 |   %alsolanguage=C,
 85 |   showstringspaces=false,
 86 |   basicstyle=\small,
 87 |   stringstyle=\color{blue},
 88 |   columns=flexible,
 89 |   emph={[2]pycuda,numpy,cuda,cl},
 90 |   emphstyle={[2]\color{red}},
 91 |   backgroundcolor=\color{codeback},
 92 |   frame=single,
 93 |   framerule=0pt,
 94 |   framesep=1.5pt,
 95 |   rangebeginprefix=//\ ,
 96 |   rangeendprefix=//\ ,
 97 |   includerangemarker=false,
 98 |   }
 99 | 
100 | \pgfdeclareimage[height=0.8cm]{brown-logo}{brown-logo.pdf}
101 | \def\mylogotext{\pgfuseimage{brown-logo}\hspace*{0.3cm}}
102 | \newcommand{\logoenable}{\logo{\mylogotext}}
103 | \newcommand{\logodisable}{ \logo{} }
104 | \newenvironment{nologo}{\logodisable}{\logoenable}
105 | \newenvironment{noheadfoot}{
106 |   \begingroup
107 |   \begin{nologo}
108 |   \setbeamertemplate{headline}{}
109 |   \setbeamertemplate{footline}{}
110 | }{
111 |   \end{nologo}
112 |   \endgroup
113 | }
114 | 
115 | \newcommand{\symball}[2]{
116 |   \begin{tikzpicture}[baseline=-0.7ex]
117 |     \shadedraw [shading=ball,ball color=#1,use as bounding box] 
118 |       circle (1ex) node at (0.7ex,0) [minimum width=0.7ex] {};
119 | 
120 |     \node [text=white,font=\bfseries] {#2};
121 |   \end{tikzpicture}}
122 | \newcommand{\plusball}{\symball{green}{{\small +}}}
123 | \newcommand{\okball}{\symball{orange}{o}}
124 | \newcommand{\minusball}{\symball{red}{-}}
125 | 
126 | \let\epsilon=\varepsilon
127 | \let\phi=\varphi
128 | 
129 | \newcommand{\subitem}[1]{\begin{itemize}\item #1 \end{itemize}}
130 | \newcommand{\creditto}[1]{
131 |   \begin{tikzpicture}[overlay]
132 |     \node [xshift=1cm,yshift=0.5cm]
133 |       at (current page.south west)
134 |       [font=\scriptsize,fill=gray!30,anchor=south west,opacity=0.5]
135 |       {#1};
136 |   \end{tikzpicture}
137 | }
138 | 
139 | \def\evalprint#1{{\pgfmathtruncatemacro{\mathresult}{#1}\mathresult}}
140 | 
141 | \makeatletter
142 | \newcommand*{\overlaynumber}{\number\beamer@slideinframe}
143 | \makeatother
144 | 


--------------------------------------------------------------------------------
/slides/latexmkrc:
--------------------------------------------------------------------------------
 1 | # http://tex.stackexchange.com/questions/11710/specify-output-directory-when-using-latexmk
 2 | $pdflatex="pdflatex -interaction nonstopmode %O %S";
 3 | $out_dir = 'out';
 4 | $pdf_mode = 1;
 5 | $pdf_previewer = 'xdg-open';
 6 | 
 7 | @default_files = ('0[0-9]*tex');
 8 | 
 9 | $ENV{TEXINPUTS} .=':media';
10 | $ENV{TEXINPUTS} .=':slides';
11 | $ENV{TEXINPUTS} .=':code';
12 | $ENV{TEXINPUTS} .=':vids';
13 | $HOME = $ENV{HOME};
14 | 


--------------------------------------------------------------------------------
/slides/media/amd-logo.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/amd-logo.pdf


--------------------------------------------------------------------------------
/slides/media/apple-logo.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/apple-logo.pdf


--------------------------------------------------------------------------------
/slides/media/c870.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/c870.png


--------------------------------------------------------------------------------
/slides/media/cl-programs-and-kernels-v2.tex:
--------------------------------------------------------------------------------
 1 | \begin{frame}[fragile]{Programs and Kernels}
 2 |   \begin{lstlisting}[gobble=4]
 3 |     prg = cl.Program(context, src)
 4 |   \end{lstlisting}
 5 |   \begin{columns}
 6 |     \column{0.65\textwidth}
 7 |       \begin{itemize}
 8 |         \item \texttt{src}: OpenCL device code
 9 |           \begin{itemize}
10 |             \item Derivative of C99
11 |             \item Functions with \texttt{\_\_kernel} attribute
12 |               can be invoked from host
13 |           \end{itemize}
14 |         \item \texttt{prg.build(options="",\\
15 |           \hspace*{2em}devices=None)}
16 |         \item \texttt{kernel = prg.kernel\_name}
17 |         \item \texttt{kernel(queue,\\
18 |           \hspace*{2em}$(G_x,G_y,G_z)$, $(L_x,L_y,L_z)$, \\
19 |           \hspace*{2em}arg, \dots, \\
20 |           \hspace*{2em}wait\_for=None)}\\
21 |       \end{itemize}
22 |     \column{0.3\textwidth}
23 |       % boo yuck
24 |       \hspace*{-1cm}\includegraphics[width=1.3\textwidth]{cpu.jpeg}
25 |   \end{columns}
26 | \end{frame}
27 | \begin{frame}[fragile]{Program Objects}
28 |   \begin{lstlisting}[gobble=4]
29 |     kernel(queue, (Gx,Gy,Gz), (Sx,Sy,Sz), arg, ..., wait_for=None)
30 |   \end{lstlisting}
31 |   \begin{columns}
32 |     \column{0.3\textwidth}
33 |       \includegraphics[width=1.3\textwidth]{cpu.jpeg}
34 |     \column{0.65\textwidth}
35 |       \begin{overlayarea}{\textwidth}{0.5\textheight}
36 |         \only<+>{
37 |           \texttt{arg} may be:
38 |           \begin{itemize}
39 |             \item \texttt{None} (a \texttt{NULL} pointer)
40 |             \item \texttt{numpy} sized scalars:
41 |               \texttt{numpy.int64,numpy.float32,\dots}
42 |             \item Anything with buffer interface:\\
43 |               \texttt{numpy.ndarray}, \texttt{str}\\
44 |             \item Buffer Objects
45 |             \item Also: \texttt{cl.Image}, \texttt{cl.Sampler}, 
46 |               \texttt{cl.LocalMemory}
47 |           \end{itemize}
48 |         }
49 |         \only<+>{
50 |           Explicitly sized scalars:\\
51 |           {\color{red}\ding{54}  Annoying, error-prone.}
52 | 
53 |           \medskip
54 |           Better:
55 | 
56 |           \texttt{%
57 |           kernel.set\_scalar\_arg\_dtypes([\\
58 |           \hspace*{3ex}numpy.int32, None,\\
59 |           \hspace*{3ex}numpy.float32])}
60 |           \medskip
61 | 
62 |           Use \texttt{None} for non-scalars.
63 |         }
64 |       \end{overlayarea}
65 |   \end{columns}
66 | \end{frame}
67 | \addimgcredit{CPU: sxc.hu/dimshik}
68 | 


--------------------------------------------------------------------------------
/slides/media/context.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/context.jpeg


--------------------------------------------------------------------------------
/slides/media/cpu.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/cpu.jpeg


--------------------------------------------------------------------------------
/slides/media/general-dep-graph.tex:
--------------------------------------------------------------------------------
 1 | \begin{tikzpicture}[
 2 |   scale=0.014,thick,
 3 |   annode/.style={xshift=0.1cm},
 4 |   intermed/.style={input,fill=intermed},
 5 |   ]
 6 |     \node [input] (A) at (152,479) [draw,ellipse] {A};
 7 |     \node [intermed] (C) at (80,295) [draw,ellipse] {C};
 8 |     \node [intermed] (B) at (152,387) [draw,ellipse] {B};
 9 |     \node [intermed] (E) at (27,203) [draw,ellipse] {E};
10 |     \node [intermed] (G) at (99,111) [draw,ellipse] {G};
11 |     \node [intermed] (F) at (99,203) [draw,ellipse] {F};
12 |     \node [intermed] (Q) at (211,203) [draw,ellipse] {Q};
13 |     \node [intermed] (P) at (152,295) [draw,ellipse] {P};
14 |     \node [output] (R) at (154,19) [draw,ellipse] {R};
15 |     \draw [->] (C) -- (F);
16 |     \draw (96,249) node [annode] {h};
17 |     \draw [->] (G) -- (R);
18 |     \draw (134.5,65) node [annode] {r};
19 |     \draw [->] (B) -- (C);
20 |     \draw (126.5,341) node [annode] {g};
21 |     \draw [->] (P) -- (R);
22 |     \draw (156.5,157) node [annode] {r};
23 |     \draw [->] (E) -- (G);
24 |     \draw (73.5,157) node [annode] {g};
25 |     \draw [->] (Q) -- (R);
26 |     \draw (193.5,111) node [annode] {r};
27 |     \draw [->] (F) -- (G);
28 |     \draw (103.5,157) node [annode] {g};
29 |     \draw [->] (B) -- (Q);
30 |     \draw (202.5,295) node [annode] {q};
31 |     \draw [->] (A) -- (B);
32 |     \draw (154.5,433) node [annode] {f};
33 |     \draw [->] (B) -- (P);
34 |     \draw (156.5,341) node [annode] {p};
35 |     \draw [->] (C) -- (E);
36 |     \draw (61.5,249) node [annode] {f};
37 | \end{tikzpicture}
38 | 


--------------------------------------------------------------------------------
/slides/media/glass-dollar.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/glass-dollar.jpeg


--------------------------------------------------------------------------------
/slides/media/intel-logo.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/intel-logo.pdf


--------------------------------------------------------------------------------
/slides/media/loopy-crop.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/loopy-crop.pdf


--------------------------------------------------------------------------------
/slides/media/memory.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/memory.png


--------------------------------------------------------------------------------
/slides/media/nvidia.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/nvidia.pdf


--------------------------------------------------------------------------------
/slides/media/onion.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/onion.jpeg


--------------------------------------------------------------------------------
/slides/media/opencl-11.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/opencl-11.pdf


--------------------------------------------------------------------------------
/slides/media/opencl-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/opencl-logo.png


--------------------------------------------------------------------------------
/slides/media/opencl-overview.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/opencl-overview.pdf


--------------------------------------------------------------------------------
/slides/media/parallel-field.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/parallel-field.jpeg


--------------------------------------------------------------------------------
/slides/media/python-logo-no-shadow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/python-logo-no-shadow.png


--------------------------------------------------------------------------------
/slides/media/question-mark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/question-mark.png


--------------------------------------------------------------------------------
/slides/media/queue.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/queue.jpeg


--------------------------------------------------------------------------------
/slides/media/radar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/radar.png


--------------------------------------------------------------------------------
/slides/media/tree.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/tree.jpeg


--------------------------------------------------------------------------------
/slides/settings.tex:
--------------------------------------------------------------------------------
 1 | %\batchmode
 2 | \usepackage{kloeckislides}
 3 | \nonstopmode
 4 | 
 5 | \usepackage{pifont}
 6 | 
 7 | \useoutertheme{split}
 8 | \useinnertheme{rectangles}
 9 | \usecolortheme{uiuc}
10 | \usetikzlibrary{arrows}
11 | 
12 | \usepackage{ifthen}
13 | 
14 | \pgfdeclareimage[height=0.8cm]{uiuc-logo}{uiuc-logo.pdf}
15 | \def\mylogotext{\pgfuseimage{uiuc-logo}\hspace*{0.3cm}}
16 | %\def\mylogotext{}
17 | 
18 | \AtBeginSection[] {
19 |   \begin{frame}<beamer>
20 |   \frametitle{Outline}
21 |   \tableofcontents[sectionstyle=show/shaded,subsectionstyle=show/show/hide]
22 | \end{frame}
23 | }
24 | \AtBeginSubsection[] {
25 |   \begin{frame}<beamer>
26 |   \frametitle{Outline}
27 |   \tableofcontents[sectionstyle=show/shaded,subsectionstyle=show/shaded/hide]
28 | \end{frame}
29 | }
30 | 
31 | \definecolor{green}{RGB}{0, 180, 0}
32 | \definecolor{red}{RGB}{180, 0, 0}
33 | \colorlet{grellow}{green!50!yellow}
34 | \colorlet{codeback}{gray!20}
35 | 
36 | \DeclareMathOperator{\argmin}{argmin}
37 | \DeclareMathOperator{\argmax}{argmax}
38 | 
39 | 
40 | \lstset{
41 |   language=Python,
42 |   alsolanguage=C,
43 |   rangebeginprefix=\#\ ,
44 |   rangeendprefix=\#\ ,
45 |   }
46 | 
47 | \colorlet{input}{green!30}
48 | \colorlet{output}{red!30}
49 | \colorlet{intermed}{blue!30}
50 | 
51 | \definecolor{fetch}{RGB}{227,110,35}
52 | \definecolor{alu}{RGB}{255,188,24}
53 | \definecolor{context}{RGB}{132,146,175}
54 | 
55 | 
56 | \setbeamertemplate{navigation symbols}{}
57 | 
58 | \let\tmop=\operatorname
59 | 
60 | \usepackage[normalem]{ulem}
61 | 
62 | \def\curl{\operatorname{curl}}
63 | 
64 | \def\checkmark{\textbf{\color{green}\ding{51}}}
65 | \def\crossmark{\textbf{\color{red}\ding{56}}}
66 | 


--------------------------------------------------------------------------------
/slides/slides/barrier.tex:
--------------------------------------------------------------------------------
 1 | \begin{frame}{Synchronization}
 2 |   What is a Barrier?
 3 | 
 4 |   \bigskip
 5 |   \begin{center}
 6 |   \begin{tikzpicture}[scale=0.8,
 7 |   thread/.style={blue,very thick,->},
 8 |   barrier/.style={ultra thick},
 9 |   stopped/.style={fill=red,shape=regular polygon,regular polygon sides=8},
10 |   ]
11 |   \draw [barrier] (0,0) -- +(4,0) ;
12 |   \uncover<+>{
13 |     \draw [thread] (1,5) -- +(0,-2) ;
14 |     \draw [thread] (2,5) -- +(0,-3) ;
15 |     \draw [thread] (3,5) -- +(0,-1) ;
16 |   }
17 |   \uncover<+>{
18 |     \draw [thread] (1,5) -- +(0,-3) ;
19 |     \draw [thread] (2,5) -- +(0,-4) ;
20 |     \draw [thread] (3,5) -- +(0,-2) ;
21 |   }
22 |   \uncover<+>{
23 |     \node [stopped] at (2,0) {};
24 |     \draw [thread] (1,5) -- +(0,-4) ;
25 |     \draw [thread] (2,5) -- +(0,-5) ;
26 |     \draw [thread] (3,5) -- +(0,-3) ;
27 |   }
28 |   \uncover<+>{
29 |     \node [stopped] at (2,0) {};
30 |     \node [stopped] at (1,0) {};
31 |     \draw [thread] (1,5) -- +(0,-5) ;
32 |     \draw [thread] (2,5) -- +(0,-5) ;
33 |     \draw [thread] (3,5) -- +(0,-4) ;
34 |   }
35 |   \uncover<+>{
36 |     \node [stopped] at (2,0) {};
37 |     \node [stopped] at (1,0) {};
38 |     \node [stopped] at (3,0) {};
39 |     \draw [thread] (1,5) -- +(0,-5) ;
40 |     \draw [thread] (2,5) -- +(0,-5) ;
41 |     \draw [thread] (3,5) -- +(0,-5) ;
42 |   }
43 |   \uncover<+>{
44 |     \draw [thread] (1,5) -- +(0,-5) ;
45 |     \draw [thread] (2,5) -- +(0,-5) ;
46 |     \draw [thread] (3,5) -- +(0,-5) ;
47 |   }
48 |   \uncover<+>{
49 |     \draw [thread] (1,5) -- +(0,-6) ;
50 |     \draw [thread] (2,5) -- +(0,-6) ;
51 |     \draw [thread] (3,5) -- +(0,-6) ;
52 |   }
53 |   \end{tikzpicture}
54 |   \end{center}
55 | \end{frame}
56 | 
57 | 


--------------------------------------------------------------------------------
/slides/slides/cl-buffer-objects-v4.tex:
--------------------------------------------------------------------------------
 1 | \begin{frame}[fragile]{Memory Objects: Buffers}
 2 |   \begin{lstlisting}[gobble=4]
 3 |     buf = cl.Buffer(context, flags, size=0, hostbuf=None)
 4 |   \end{lstlisting}
 5 |   \begin{columns}
 6 |     \column{0.7\textwidth}
 7 |       \begin{overlayarea}{\textwidth}{0.7\textheight}
 8 |         \only<+>{
 9 |           \begin{itemize}
10 |             \item Chunk of device memory
11 |             \item No type information: ``Bag of bytes''
12 |             \item Observe: \emph{Not} tied to device.
13 | 
14 |               $\rightarrow$ no fixed memory address
15 | 
16 |               $\rightarrow$ pointers do \emph{not} survive kernel
17 |               launches
18 | 
19 |               $\rightarrow$ movable between devices
20 | 
21 |               $\rightarrow$ not even allocated before first use!
22 |             \item \texttt{flags}:
23 |               \begin{itemize}
24 |                 \item \texttt{READ\_ONLY/WRITE\_ONLY/READ\_WRITE}
25 |                 \item \{\texttt{ALLOC,COPY,USE}\}\texttt{\_HOST\_PTR}
26 |               \end{itemize}
27 |           \end{itemize}
28 |         }
29 |         \only<+>{
30 |           \texttt{COPY\_HOST\_PTR}:
31 |           \begin{itemize}
32 |             \item Use \texttt{hostbuf} as initial content of buffer
33 |           \end{itemize}
34 |           \texttt{USE\_HOST\_PTR}:
35 |           \begin{itemize}
36 |             \item \texttt{hostbuf} \emph{is} the buffer. 
37 |             \item Caching in device memory is allowed.
38 |           \end{itemize}
39 |           \texttt{ALLOC\_HOST\_PTR}:
40 |           \begin{itemize}
41 |             \item \emph{New} host memory (unrelated to
42 |               \texttt{hostbuf}) is visible from device
43 |               \emph{and} host.
44 |           \end{itemize}
45 |         }
46 |         \only<+>{
47 |           \begin{itemize}
48 |             \item Specify \texttt{hostbuf} or \texttt{size} (or both)
49 |             \item \texttt{hostbuf}: Needs Python Buffer Interface\\
50 |               e.g. \texttt{numpy.ndarray}, \texttt{str}.
51 |               \subitem{Important: Memory layout matters}
52 |             \item Passed to device code as pointers\\
53 |               (e.g. \texttt{float *}, \texttt{int *})
54 |             \item \texttt{enqueue\_copy}(queue, dest, src)
55 |             \item Can be mapped into host address space:\\
56 |               \texttt{cl.MemoryMap}.
57 |           \end{itemize}
58 |         }
59 |       \end{overlayarea}
60 |     \column{0.3\textwidth}
61 |       \includegraphics[width=\textwidth]{memory.png}
62 |   \end{columns}
63 | \end{frame}
64 | \addimgcredit{RAM stick: sxc.hu/gobran11}
65 | 


--------------------------------------------------------------------------------
/slides/slides/cl-command-queue.tex:
--------------------------------------------------------------------------------
 1 | \begin{frame}[fragile]{Command Queues and Events}
 2 |   \begin{lstlisting}[gobble=4]
 3 |     queue = cl.CommandQueue(context, device=None, 
 4 |       properties=None | [(prop, value),...])
 5 |   \end{lstlisting}
 6 |   \begin{columns}
 7 |     \column{0.65\textwidth}
 8 |         \begin{itemize}
 9 |           \item Attached to single device
10 |           \item \text{cl.command\_queue\_properties}\dots
11 |             \begin{itemize}
12 |               \item \texttt{OUT\_OF\_ORDER\_EXEC\_MODE\_ENABLE}:\\
13 |                 Do not force sequential execution
14 |               \item \texttt{PROFILING\_ENABLE}:\\
15 |                 Gather timing info
16 |             \end{itemize}
17 |         \end{itemize}
18 |     \column{0.35\textwidth}
19 |       \includegraphics[width=\textwidth]{queue.jpeg}
20 |   \end{columns}
21 | \end{frame}
22 | \addimgcredit{Queue: sxc.hu/cobrasoft}
23 | 
24 | 


--------------------------------------------------------------------------------
/slides/slides/cl-command-queues.tex:
--------------------------------------------------------------------------------
 1 | {
 2 |   \newcommand{\brick}[6]{
 3 |     \draw [fill=#4!50]
 4 |       (0,0) rectangle (#1,#2) coordinate [pos=0.5] (brickfront);
 5 |     \draw [fill=#4]
 6 |       (#1,0) -- (#1,0,-1) -- (#1,#2,-1) -- (#1,#2) --cycle;
 7 |     \draw [fill=#4]
 8 |       (0,#2) -- (0,#2,-1) -- (#1,#2,-1) -- (#1,#2) --cycle;
 9 |     #6
10 |     \begin{pgfonlayer}{foreground}
11 |       \node [fill=#4!50,inner xsep=2pt,inner ysep=2pt,opacity=0.7,#5] at (brickfront) { #3 } ;
12 |       \node [#5] at (brickfront) { #3 } ;
13 |     \end{pgfonlayer}
14 |   }
15 |   \newcommand{\drawevt}[2]{
16 |     \fill [#2,opacity=0.5] 
17 |       (0,#1) -- (1.5,#1) -- (1.5,#1,-1)
18 |       -- (1.5,#1+0.2,-1) -- (1.5,#1+0.2) -- (0,#1+0.2) --  cycle ;
19 |   }
20 |   \begin{frame}{OpenCL: Command Queues}
21 |     \begin{columns}
22 |       \column{0.45\textwidth}
23 |         \begin{itemize}
24 |           \item Host and Device run asynchronously
25 |           \item Host submits to queue:
26 |             \uncover{
27 |               \begin{itemize}
28 |                 \item Computations
29 |                 \item Memory Transfers
30 |                 \item Sync primitives
31 |                 \item \dots
32 |               \end{itemize}
33 |             }
34 |           \item Host can wait for\\drained queue
35 |           \item Profiling
36 | 
37 |         \end{itemize}
38 | 
39 |       \column{0.5\textwidth}
40 |         \begin{tikzpicture}
41 |           \brick{1.25}{2}{Host}{gray}{}{}
42 |           \begin{scope}[xshift=2.5cm,yshift=-1.5cm]
43 |             \brick{2.5}{1.25}{Device}{gray}{}{}
44 |           \end{scope}
45 |           \begin{scope}[xshift=2.5cm]
46 |             \brick{0.75}{2}{Queue 1}{blue}{text=white,rotate=90}{
47 |               \foreach\i in {0,0.2,...,1.4}
48 |                 \draw (0,\i) -- (0.75,\i) -- (0.75,\i,-1);
49 |             }
50 |           \end{scope}
51 |           \begin{scope}[xshift=3.5cm]
52 |             \brick{0.75}{2}{Queue 2}{blue}{text=white,rotate=90}{
53 |               \foreach\i in {0,0.2,...,0.9}
54 |                 \draw (0,\i) -- (0.75,\i) -- (0.75,\i,-1);
55 |             }
56 |           \end{scope}
57 | 
58 |           \node [font=\Large] at (5.25,1.25) {\dots} ;
59 | 
60 |           \draw [very thick,->] (1.25,1,-0.5) -| (2,2.5,-0.5) -| (2.875,2,-0.5);
61 |           \draw [very thick,->] (2,2.5,-0.5) -| (3.875,2,-0.5);
62 |           \draw [very thick,->] (2,2.5,-0.5) -| (4.875,2,-0.5);
63 |           \draw [very thick,->] (2.5,-1) -| (0.625,0);
64 |             
65 |         \end{tikzpicture}
66 |     \end{columns}
67 |   \end{frame}
68 | }
69 | 


--------------------------------------------------------------------------------
/slides/slides/cl-compute-dag-v2.tex:
--------------------------------------------------------------------------------
 1 | \begin{frame}{Capturing Dependencies}
 2 |   \begin{columns}
 3 |     \column{0.3\textwidth}
 4 |       B = f(A)\\
 5 |       C = g(B)\\
 6 |       E = f(C)\\
 7 |       F = h(C)\\
 8 |       G = g(E,F)\\
 9 |       P = p(B)\\
10 |       Q = q(B)\\
11 |       R = r(G,P,Q)
12 |     \column{0.6\textwidth}
13 |       \begin{center}
14 |         \input{general-dep-graph}
15 |       \end{center}
16 |   \end{columns}
17 |   \uncover<2>{
18 |     \begin{tikzpicture} [overlay]
19 |       \node [above right=1cm of current page.south west, draw,drop shadow,fill=white,
20 |       text width=0.6\textwidth, inner xsep=0.5cm,inner ysep=0.5cm,thick]
21 |         {
22 |           \begin{itemize}
23 |             \item Switch queue to out-of-order mode!
24 | 
25 |             \item Specify as list of events using 
26 |               \texttt{wait\_for=} optional keyword
27 |               to \texttt{enqueue\_XXX}.
28 | 
29 |             \item Can also enqueue barrier.
30 | 
31 |             \item Common use case: Transmit/receive
32 |               from other MPI ranks.
33 | 
34 |             \item Possible in hardware on Nv Fermi, AMD Cayman:
35 |               Submit parallel work to increase machine use.
36 |               \subitem{Not yet ubiquitously implemented}
37 |           \end{itemize}
38 |         } ;
39 |     \end{tikzpicture}
40 |   }
41 | \end{frame}
42 | 


--------------------------------------------------------------------------------
/slides/slides/cl-computing-as-a-service.tex:
--------------------------------------------------------------------------------
  1 | {
  2 | \def\evalprint#1{{\pgfmathtruncatemacro{\mathresult}{#1}\mathresult}}
  3 | \begin{frame}{OpenCL: Computing as a Service}
  4 | 
  5 |   \begin{tikzpicture}[
  6 |     z={(0.5cm,-1cm)},
  7 |     every shadow/.style={shadow xshift=-0.1cm,shadow yshift=0.1cm},
  8 |     memory/.style={fill=blue!40,draw=blue},
  9 |     langarrow/.style={single arrow,shape border rotate=90,
 10 |       single arrow tip angle=165,single arrow head extend=0.6cm,
 11 |       draw,thick,fill=yellow},
 12 |   ]
 13 |     \uncover<+->{
 14 |     \node [draw,inner sep=5mm,fill=green!40,drop shadow,
 15 |       text width=1.5cm,text centered] (host) {Host\\(CPU)} ;
 16 |       \uncover<3-4>{
 17 |         \node [above left=0.2cm of host.south east,font=\tiny,memory,
 18 |           inner sep=0.5mm,minimum width=1.3cm]
 19 |           { Memory } ;
 20 |       }
 21 |     }
 22 |     \uncover<+->{
 23 |       \foreach \i in {0,...,3}
 24 |       {
 25 |         \pgfmathtruncatemacro{\plat}{\i/2}
 26 |         \node 
 27 |           [draw,fill=yellow!50, anchor=west,text width=4.5cm,font=\small] 
 28 |           at ($(host.east)+(1.75+\plat,0,-1.5+\i)$)
 29 |           (cdev\i)
 30 |           {
 31 |             Compute Device \evalprint{mod(\i,2)}
 32 |             {\tiny(Platform \evalprint{\i/2})}\\
 33 |             \begin{tikzpicture}
 34 |               \foreach \j in {0,1,2}
 35 |               {
 36 |                 \foreach \k in {0,1,2,7}
 37 |                   \coordinate (pe\i\j\k) at (0.15*\k,0,0.2*\j) ;
 38 |                 \node 
 39 |                   [draw,fill=orange!40,fit={(pe\i\j0) (pe\i\j7) (0,0.4,0.2*\j) }] 
 40 |                   (unit\i\j)
 41 |                   {};
 42 |                 \foreach \k in {0,1,2,7}
 43 |                   \filldraw 
 44 |                     [fill=red!30]
 45 |                     (pe\i\j\k) ++(-0.05,0) rectangle ++ (0.1,0.4) ;
 46 |                 \node at (4.5*0.15,0.2,0.2*\j) 
 47 |                   [anchor=center,font=\tiny,text width=] 
 48 |                   {$\cdots$} ;
 49 |               }
 50 |               \uncover<4>{
 51 |                 \draw (pe\i27) ++(0.5,0) 
 52 |                   node [anchor=south west,memory,text width=,minimum height=0.8cm]
 53 |                   {Memory};
 54 |               }
 55 |             \end{tikzpicture}
 56 |           } ;
 57 |         \draw [thick] 
 58 |           (host.east) -- ++(1,0) -- ++(0,0,-1.5+\i) -- ++(\plat+0.75,0);
 59 |       }
 60 |     }
 61 | 
 62 |     % memory ------------------------------------------------------------------
 63 |     \uncover<+>{}
 64 |     \uncover<+>{}
 65 | 
 66 |     % platforms ---------------------------------------------------------------
 67 |     \uncover<+>{}
 68 |     \uncover<+>{
 69 |       \node [fit=(cdev0) (cdev1),draw,dashed,thick] (plat0) {} ;
 70 |       \node at (plat0.north west) [anchor=south west]
 71 |         {Platform 0 (e.g. CPUs)} ;
 72 |     }
 73 |     \uncover<+>{
 74 |       \node [fit=(cdev2) (cdev3),draw,dashed,thick] (plat1) {} ;
 75 |       \node at (plat1.south west) [anchor=north west]
 76 |         {Platform 1 (e.g. GPUs)} ;
 77 |     }
 78 | 
 79 |     % hardware ----------------------------------------------------------------
 80 |     \uncover<+>{}
 81 |     \uncover<+-+(2)>{
 82 |       \draw [<-,thick] (cdev0) -- ++(-3,0.35)
 83 |         node [anchor=east,text width=2.5cm] 
 84 |           {(think ``chip'',\\ has memory interface)} ;
 85 |     }
 86 |     \uncover<+-+(1)>{
 87 |       \draw [<-,thick] (unit32.center) -- ++(-3,0.1)
 88 |         node [anchor=east,text width=3.25cm] 
 89 |           {Compute Unit\\(think ``processor'',\\ has insn. fetch)} ;
 90 |     }
 91 |     \uncover<+>{
 92 |       \draw [<-,thick] (pe327) -- ++(-1.5,-1)
 93 |         node [anchor=east,text width=3.35cm] 
 94 |           {Processing Element\\(think ``SIMD lane'')} ;
 95 |     }
 96 | 
 97 |     % programming interfaces --------------------------------------------------
 98 |     \uncover<+>{}
 99 |     \uncover<+-+(1)>{
100 |       \node [fit=(host)] (hostwrap) {} ;
101 |       \node at (hostwrap.south) 
102 |         [anchor=north,langarrow]
103 |         {Python} ;
104 |     }
105 |     \uncover<+->{
106 |       \node [fit=(plat0) (plat1)] (devwrap) {} ;
107 |       \node at (devwrap.south) 
108 |         [anchor=north,draw,langarrow]
109 |         {Device Language: $\sim$ C99} ;
110 |     }
111 |   \end{tikzpicture}
112 | \end{frame}
113 | }
114 | 


--------------------------------------------------------------------------------
/slides/slides/cl-context-v2.tex:
--------------------------------------------------------------------------------
 1 | \begin{frame}[fragile]{Contexts}
 2 |   \begin{lstlisting}[gobble=4]
 3 |     context = cl.Context(devices=None | [dev1, dev2], dev_type=None)
 4 |     context = cl.create_some_context(interactive=True)
 5 |   \end{lstlisting}
 6 | 
 7 |   \begin{columns}
 8 |     \column{0.25\textwidth}
 9 |       \includegraphics[width=\textwidth]{context.jpeg}
10 |     \column{0.75\textwidth}
11 |       \begin{itemize}
12 |         \item Spans one or more Devices
13 |         \item Create from device type or list of devices
14 |           \subitem{See docs for \texttt{cl.Platform}, \texttt{cl.Device}}
15 |         \item \texttt{dev\_type}: 
16 |           \texttt{\textit{DEFAULT}},
17 |           \texttt{ALL}, \texttt{CPU}, \texttt{GPU}
18 |         \item Needed to\dots
19 |           \begin{itemize}
20 |             \item \dots allocate Memory Objects
21 |             \item \dots create and build Programs
22 |             \item \dots host Command Queues
23 |             \item \dots execute Grids
24 |           \end{itemize}
25 |       \end{itemize}
26 |   \end{columns}
27 | \end{frame}
28 | \addimgcredit{Context: sxc.hu/svilen001}
29 | 
30 | 


--------------------------------------------------------------------------------
/slides/slides/cl-device.tex:
--------------------------------------------------------------------------------
 1 | \begin{frame}{CL ``Compute Device''}
 2 |   \begin{columns}
 3 |     \column{0.25\textwidth}
 4 |       \includegraphics[width=\textwidth]{c870.png}
 5 |     \column{0.75\textwidth}
 6 |       CL Compute Devices:
 7 |       \begin{itemize}
 8 |         \item CPUs, GPUs, accelerators, \dots
 9 |           \subitem{Anything that fits the programming model.}
10 |         \item A processor die with an interface to off-chip memory
11 |         \item Can get list of devices from platform.
12 |       \end{itemize}
13 |   \end{columns}
14 | \end{frame}
15 | 


--------------------------------------------------------------------------------
/slides/slides/cl-platform.tex:
--------------------------------------------------------------------------------
 1 | \begin{frame}{CL ``Platform''}
 2 |   \begin{columns}
 3 |     \column{0.25\textwidth}
 4 |       \begin{tikzpicture}[x=1cm,y=2cm]
 5 |         \foreach \i in {1,...,10}
 6 |         {
 7 |           \pgfmathrand
 8 |           \let\myx=\pgfmathresult
 9 |           \pgfmathrand
10 |           \let\myy=\pgfmathresult
11 |           \node at (\myx, \myy) {
12 |             \includegraphics[width=0.6\textwidth]{c870.png}
13 |           } ;
14 |         }
15 |       \end{tikzpicture}
16 |     \column{0.75\textwidth}
17 |     \begin{itemize}
18 |       \item ``Platform'': a collection of devices, all from 
19 |         the same \emph{vendor}.
20 | 
21 |       \item All devices in a platform use same CL driver/implementation.
22 |       \item Multiple platforms can be used from one
23 |         program $\rightarrow$ \emph{ICD}.
24 | 
25 |         \medskip
26 |         \texttt{libOpenCL.so}: ICD loader
27 | 
28 |         \medskip
29 |         \texttt{/etc/OpenCL/vendors/\textit{somename}.icd}:
30 |           Plain text file with name of \texttt{.so} containing 
31 |           CL implementation.
32 | 
33 |     \end{itemize}
34 |   \end{columns}
35 | \end{frame}
36 | 


--------------------------------------------------------------------------------
/slides/slides/cuda-cl-dictionary.tex:
--------------------------------------------------------------------------------
 1 | \begin{frame}{OpenCL $\leftrightarrow$ CUDA: A dictionary}
 2 |   \begin{tikzpicture}[overlay]
 3 |     \node [anchor=south east,rotate=10,opacity=0.3] 
 4 |     at ($(current page.south east) + (-0.5cm,2cm)$)
 5 |     { \includegraphics[width=6cm]{dictionary-desat.jpeg} } ;
 6 |   \end{tikzpicture}
 7 | 
 8 |   \begin{tabular}{r|l}
 9 |     \textbf{OpenCL} & \textbf{CUDA} \\
10 |     \hline
11 |     Grid & Grid \\
12 |     Work Group& Block \\
13 |     Work Item & Thread \\
14 |     \texttt{\_\_kernel} & \texttt{\_\_global\_\_} \\
15 |     \texttt{\_\_global} & \texttt{\_\_device\_\_} \\
16 |     \texttt{\_\_local} & \texttt{\_\_shared\_\_} \\
17 |     \texttt{\_\_private} & \texttt{\_\_local\_\_} \\
18 |     \texttt{image$n$d\_t} & \texttt{texture\textless type, $n$, ...\textgreater} \\
19 |     \texttt{barrier(LMF)} & \texttt{\_\_syncthreads()} \\
20 |     \texttt{get\_local\_id(012)} & \texttt{threadIdx.xyz} \\
21 |     \texttt{get\_group\_id(012)} & \texttt{blockIdx.xyz} \\
22 |     \texttt{get\_global\_id(012)} & -- (reimplement) \\
23 |   \end{tabular}
24 | \end{frame}
25 | \addimgcredit{Dictionary: sxc.hu/topfer}
26 | 


--------------------------------------------------------------------------------
/slides/slides/gpu-cl-execution-model.tex:
--------------------------------------------------------------------------------
 1 | \begin{frame}{OpenCL: Execution Model}
 2 |   \begin{columns}
 3 |   \column{.3\textwidth}
 4 |     \begin{tikzpicture}[font=\tiny\bfseries,y=-1cm,anchor=north west]
 5 |       \node at (0,-0.4) [inner sep=0] (gridtitle) {$n$D Grid};
 6 |       \foreach \x in {0, 1, 2}
 7 |         \foreach \y in {0, 1}
 8 |           \node at (0.9*\x, 0.75*\y ) (wgroup\x\y) [draw, fill=red!60, rectangle, 
 9 |             text width=0.6cm, text centered, inner sep=1mm]
10 |               {Group $(\x, \y)$} ;
11 | 
12 |       \begin{pgfonlayer}{background}
13 |         \node [draw,thick,fill=red!30,fit=(gridtitle) (wgroup21)] (gridbox) {} ;
14 |       \end{pgfonlayer}
15 | 
16 |       \begin{scope}[yshift=-3cm]
17 |       \node at (0, -0.4) [inner sep=0] (grouptitle) {Work Group $(1,0)$};
18 |       \foreach \x in {0, 1, 2, 3}
19 |         \foreach \y in {0, 1, 2, 3}
20 |           \node at (0.8*\x, .75*\y) [draw, fill=red!90, rectangle, text width=0.5cm, 
21 |             text centered,inner sep=1mm]
22 |             (item\x\y) { Item $(\x, \y)$ };
23 |       \end{scope}
24 | 
25 |       \begin{pgfonlayer}{background}
26 |         \node [draw,thick,fill=red!60,fit=(grouptitle) (item33)] (groupbox) {} ;
27 |       \end{pgfonlayer}
28 | 
29 |       \draw[dashed] (wgroup11.south west) -- (groupbox.north west);
30 |       \draw[dashed] (wgroup11.south east) -- (groupbox.north east);
31 | 
32 |     \end{tikzpicture}
33 | 
34 |   \column{0.7\textwidth}
35 | 
36 |     \begin{itemize}
37 |       \item<+-> Two-tiered Parallelism
38 |         \begin{itemize}
39 |         \item Grid = $N_x\times N_y \times N_z$ work groups
40 |         \item Work group = $S_x \times S_y\times S_z$ work items
41 |         \item Total: $\prod_{i\in\{x,y,z\}} S_i N_i$ work items
42 |         \end{itemize}
43 |       \item<+-> Comm/Sync only within work group
44 |         \begin{itemize}
45 |         \item Work group maps to compute unit
46 |         \end{itemize}
47 |       \item<+-> Grid/Group $\approx$ outer loops in an algorithm
48 |       \item<.-> Device Language:\\
49 |         \texttt{get\_\{global,group,local\}\_\{id,size\}\\(\texttt{axis})}
50 |     \end{itemize}
51 |   \end{columns}
52 | \end{frame}
53 | 


--------------------------------------------------------------------------------
/slides/slides/memory-fence.tex:
--------------------------------------------------------------------------------
  1 | \begin{frame}{Synchronization}
  2 |   What is a Memory Fence?
  3 | 
  4 |   \bigskip
  5 |   \begin{center}
  6 |   \begin{tikzpicture}[scale=0.8,
  7 |   thread/.style={blue,very thick,->},
  8 |   fence/.style={ultra thick},
  9 |   memlocation/.style={thick,draw,fill=blue!20,minimum width=1.5cm},
 10 |   write/.style={thick,->,red,dashed},
 11 |   read/.style={thick,->,green,dashed},
 12 |   meminstr/.style={pos=0.5,font=\small},
 13 |   ]
 14 |   \only<1-5>{
 15 |     \node [memlocation] (mem) at (0,1) { 17 } ;
 16 |   }
 17 |   \only<6-7>{
 18 |     \node [memlocation] (mem) at (0,1) { 18 } ;
 19 |   }
 20 |   \uncover<+-+(1)>{
 21 |     \draw [thread] (-3,5) -- +(0,-1) coordinate (t1write) ;
 22 |     \draw [thread] (3,5) -- +(0,-1) ;
 23 |   }
 24 |   \uncover<+-+(4)>{
 25 |     \draw [write] (t1write) -- (mem) node [meminstr] {write 18};
 26 |   }
 27 |   \uncover<+>{
 28 |     \draw [thread] (-3,5) -- +(0,-2) ;
 29 |     \draw [thread] (3,5) -- +(0,-2) coordinate (t2read);
 30 |   }
 31 |   \uncover<.-.(1)>{
 32 |     \draw [read] (t2read) -- (mem) node [meminstr] {read};
 33 |   }
 34 |   \uncover<+>{
 35 |     \draw [thread] (-3,5) -- +(0,-3) ;
 36 |     \draw [thread] (3,5) -- +(0,-3) coordinate (t2readc);
 37 |     \draw [read] (mem) -- (t2readc) node [meminstr] {17};
 38 |   }
 39 |   \uncover<+>{
 40 |     \draw [thread] (-3,5) -- +(0,-4) ;
 41 |     \draw [thread] (3,5) -- +(0,-4) ;
 42 |   }
 43 |   \uncover<+>{
 44 |     \draw [thread] (-3,5) -- +(0,-5) ;
 45 |     \draw [thread] (3,5) -- +(0,-5) ;
 46 |   }
 47 |   \uncover<+>{
 48 |     \draw [thread] (-3,5) -- +(0,-6) ;
 49 |     \draw [thread] (3,5) -- +(0,-6) ;
 50 |   }
 51 | 
 52 |   \end{tikzpicture}
 53 |   \end{center}
 54 | \end{frame}
 55 | % -----------------------------------------------------------------------------
 56 | \begin{frame}{Synchronization}
 57 |   What is a Memory Fence? An ordering restriction for memory access.
 58 | 
 59 |   \bigskip
 60 |   \begin{center}
 61 |   \begin{tikzpicture}[scale=0.8,
 62 |   thread/.style={blue,very thick,->},
 63 |   fence/.style={ultra thick},
 64 |   memlocation/.style={thick,draw,fill=blue!20,minimum width=1.5cm},
 65 |   write/.style={thick,->,red,dashed},
 66 |   read/.style={thick,->,green,dashed},
 67 |   meminstr/.style={pos=0.5,font=\small},
 68 |   stopped/.style={fill=red,shape=regular polygon,regular polygon sides=8},
 69 |   ]
 70 |   \draw [fence] (-4,0) -- (4,0) ;
 71 |   \only<1-4>{
 72 |     \node [memlocation] (mem) at (0,0) { 17 } ;
 73 |   }
 74 |   \only<5->{
 75 |     \node [memlocation] (mem) at (0,0) { 18 } ;
 76 |   }
 77 |   \uncover<+-+(1)>{
 78 |     \draw [thread] (-3,2) -- +(0,-1) coordinate (t1write) ;
 79 |     \draw [thread] (3,2) -- +(0,-1) ;
 80 |   }
 81 |   \uncover<+-+(2)>{
 82 |     \draw [write] (t1write) -- (mem) node [meminstr] {write 18};
 83 |   }
 84 |   \uncover<+-+(3)>{
 85 |     \draw [thread] (-3,2) -- +(0,-2) ;
 86 |     \draw [thread] (3,2) -- +(0,-2) coordinate (t2read);
 87 |   }
 88 |   \uncover<+-+(1)>{
 89 |     \node [stopped] at (-3,0) {};
 90 |     \node [stopped] at (3,0) {};
 91 |   }
 92 |   \addtocounter{beamerpauses}{2}
 93 | 
 94 |   \uncover<+>{
 95 |     \draw [thread] (-3,2) -- +(0,-3) ;
 96 |     \draw [thread] (3,2) -- +(0,-3) coordinate (t2read);
 97 |   }
 98 |   \uncover<.->{
 99 |     \draw [read] (t2read) -- (mem) node [meminstr] {read};
100 |   }
101 |   \uncover<+->{
102 |     \draw [thread] (-3,2) -- +(0,-4) ;
103 |     \draw [thread] (3,2) -- +(0,-4) coordinate (t2readc);
104 |     \draw [read] (mem) -- (t2readc) node [meminstr] {18};
105 |   }
106 | 
107 |   \end{tikzpicture}
108 |   \end{center}
109 |   %\uncover<+->{
110 |     %Flavors: All \{reads, writes, accesses\} complete\\
111 |     %before continuing.
112 |   %}
113 | \end{frame}
114 | 
115 | 


--------------------------------------------------------------------------------
/slides/slides/what-is-opencl-v2.tex:
--------------------------------------------------------------------------------
 1 | \begin{frame}{What is OpenCL?}
 2 |   \begin{columns}
 3 |     \column{0.7\textwidth}
 4 | 
 5 |       OpenCL (Open Computing Language) is an open, royalty-free
 6 |       standard for general purpose parallel programming across CPUs,
 7 |       GPUs and other processors.
 8 |       \hfill{\footnotesize[OpenCL 1.1 spec]}
 9 |       \bigskip
10 | 
11 | 
12 |       \begin{itemize}
13 |         \item Device-neutral (Nv GPU, AMD GPU, Intel/AMD CPU)
14 |         \item Vendor-neutral
15 |         \item Comes with RTCG
16 |       \end{itemize}
17 |       Defines:
18 |       \begin{itemize}
19 |         \item Host-side programming interface (library)
20 |         \item Device-side programming language (!)
21 |       \end{itemize}
22 | 
23 |     \column{0.3\textwidth}
24 |       \includegraphics[width=\textwidth] {opencl-logo.png}
25 | 
26 |   \end{columns}
27 | \end{frame}
28 | 


--------------------------------------------------------------------------------
/slides/slides/why-gpu-scripting-v3.tex:
--------------------------------------------------------------------------------
 1 | \begin{frame}{Why do Scripting for GPUs?}
 2 |   \begin{columns}
 3 |     \column{0.6\textwidth}
 4 |     \begin{itemize}
 5 |       \item GPUs are everything that scripting languages are not.
 6 |         \begin{itemize}
 7 |           \item Highly parallel
 8 |           \item Very architecture-sensitive
 9 |           \item Built for maximum FP/memory throughput
10 |         \end{itemize}
11 |         $\rightarrow$ complement each other
12 |       \item CPU: largely restricted to control tasks ($\sim$1000/sec)
13 |         \begin{itemize}
14 |           \item Scripting fast enough
15 |         \end{itemize}
16 |       \item Python + CUDA = \textbf{PyCUDA}
17 |       \item Python + OpenCL = \textbf{PyOpenCL}
18 |     \end{itemize}
19 |     \column{0.4\textwidth}
20 |       \includegraphics[width=\textwidth]{c870.png}
21 |   \end{columns}
22 | \end{frame}
23 | \addimgcredit{C870 GPU: Nvidia Corp.}
24 | 
25 | 


--------------------------------------------------------------------------------
/slides/update-slides.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/sh
 2 | 
 3 | set -e
 4 | set -x
 5 | 
 6 | for i in 01 03 06; do
 7 |   tgt_dir=$(echo ../$i*)
 8 |   cp out/$i-*.pdf  $tgt_dir/0-slides.pdf
 9 | done
10 | 


--------------------------------------------------------------------------------