├── .gitignore ├── .gitmodules ├── 01-intro ├── 0-slides.pdf ├── 1-intro-via-image-processing.ipynb ├── 2-1-Python-Types.ipynb ├── 2-2-Python-Names and Values.ipynb ├── 2-3-Python-Indexing.ipynb ├── 2-4-Python-Control flow.ipynb ├── 2-5-Python-Functions.ipynb ├── 2-6-Python-Objects.ipynb ├── 2-7-Python-A few more things.ipynb ├── 3-1-numpy-Introduction.ipynb ├── 3-2-numpy-Indexing.ipynb ├── 3-3-numpy-Broadcasting.ipynb ├── 3-4-numpy-Tools.ipynb ├── 3-5-numpy-Data Storage.ipynb ├── 4-practice-ordering-tree.ipynb ├── README.rst └── cat.jpeg ├── 02-languages ├── 01-expression-trees.ipynb ├── 02-traversing-trees.ipynb ├── 03-defining-custom-node-types.ipynb ├── 04-accessing-python-syntax-trees.ipynb ├── 05-common-operations.ipynb ├── 06-interoperating-with-sympy.ipynb ├── 07-internal-representations.ipynb ├── 08-practice.ipynb ├── README.rst └── gvmagic.py ├── 03-opencl ├── 0-slides.pdf ├── 1-1-hello-pyopencl.ipynb ├── 1-2-pyopencl-arrays.ipynb ├── 1-3-exercise.ipynb ├── 1-4-ipython-magic.ipynb ├── 2-1-elementwise.ipynb ├── 2-2-reduction.ipynb ├── 2-2a-monte-carlo.ipynb ├── 2-3-scan.ipynb ├── 3-practice-expression-kernel.ipynb ├── 3-practice-hermite-monte-carlo.ipynb ├── 3-practice-thinking-with-scans.ipynb └── README.rst ├── 04-case-studies ├── 01-indexing-and-broadcasting.ipynb ├── 02-einsum.ipynb ├── 03-ufl.ipynb └── README.rst ├── 05-generating-c ├── 01-substitution.ipynb ├── 02-templating.ipynb ├── 03-asts.ipynb ├── 04-practice.ipynb └── README.rst ├── 06-loopy ├── 0-slides.pdf ├── 01-rank-one.ipynb ├── 02-data-layout.ipynb ├── 03-reduction.ipynb ├── 04-intermediate-results.ipynb ├── 05-pde-to-code.ipynb ├── 05a-image-processing-language.ipynb ├── 06-operation-counting.ipynb ├── 07-practice-einsum.ipynb ├── 07-practice-image-processing.ipynb ├── 07-practice-matrix-products.ipynb ├── 08-monte-carlo.ipynb ├── README.rst └── cat.jpeg ├── LICENSE ├── README.rst ├── assemble.sh ├── aux ├── index.md ├── ipython_config.py ├── material-email.txt ├── pystuff-requirements.txt ├── sudoers ├── time-planning.ods ├── tut-pack.run ├── upload.sh ├── video-script.txt └── vm-requirements.txt ├── prepare-all-notebooks.sh └── slides ├── .gitignore ├── 01-intro.tex ├── 03-opencl.tex ├── 06-loopy.tex ├── beamercolorthemeuiuc.sty ├── code ├── loopy-variants.py ├── transpose.cl └── transpose.cu ├── kloeckislides.sty ├── latexmkrc ├── media ├── amd-logo.pdf ├── apple-logo.pdf ├── c870.png ├── cl-programs-and-kernels-v2.tex ├── context.jpeg ├── cpu.jpeg ├── general-dep-graph.tex ├── glass-dollar.jpeg ├── intel-logo.pdf ├── loopy-crop.pdf ├── memory.png ├── nvidia.pdf ├── onion.jpeg ├── opencl-11.pdf ├── opencl-logo.png ├── opencl-overview.pdf ├── parallel-field.jpeg ├── python-logo-no-shadow.png ├── question-mark.png ├── queue.jpeg ├── radar.png └── tree.jpeg ├── settings.tex ├── slides ├── barrier.tex ├── cl-buffer-objects-v4.tex ├── cl-command-queue.tex ├── cl-command-queues.tex ├── cl-compute-dag-v2.tex ├── cl-computing-as-a-service.tex ├── cl-context-v2.tex ├── cl-device.tex ├── cl-platform.tex ├── cl-prog-model-hardware.tex ├── cuda-cl-dictionary.tex ├── gpu-cl-execution-model.tex ├── memory-fence.tex ├── what-is-opencl-v2.tex └── why-gpu-scripting-v3.tex └── update-slides.sh /.gitignore: -------------------------------------------------------------------------------- 1 | .*.swp 2 | .ipynb_checkpoints 3 | *~ 4 | cleared 5 | upload 6 | dist 7 | __pycache__ 8 | *dist.zip 9 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "ipython-demo-tools"] 2 | path = ipython-demo-tools 3 | url = https://github.com/inducer/ipython-demo-tools.git 4 | -------------------------------------------------------------------------------- /01-intro/0-slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/01-intro/0-slides.pdf -------------------------------------------------------------------------------- /01-intro/2-1-Python-Types.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Python Introduction: Types" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Let's evaluate some simple expressions." 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": { 21 | "collapsed": false 22 | }, 23 | "outputs": [ 24 | { 25 | "data": { 26 | "text/plain": [ 27 | "6" 28 | ] 29 | }, 30 | "execution_count": 1, 31 | "metadata": {}, 32 | "output_type": "execute_result" 33 | } 34 | ], 35 | "source": [ 36 | "#clear\n", 37 | "3*2" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 2, 43 | "metadata": { 44 | "collapsed": false 45 | }, 46 | "outputs": [ 47 | { 48 | "data": { 49 | "text/plain": [ 50 | "11" 51 | ] 52 | }, 53 | "execution_count": 2, 54 | "metadata": {}, 55 | "output_type": "execute_result" 56 | } 57 | ], 58 | "source": [ 59 | "#clear\n", 60 | "5+3*2" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "You can use `type()` to find the *type* of an expression." 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 3, 73 | "metadata": { 74 | "collapsed": false 75 | }, 76 | "outputs": [ 77 | { 78 | "data": { 79 | "text/plain": [ 80 | "int" 81 | ] 82 | }, 83 | "execution_count": 3, 84 | "metadata": {}, 85 | "output_type": "execute_result" 86 | } 87 | ], 88 | "source": [ 89 | "#clear\n", 90 | "type(5+3*2)" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "Now add decimal points." 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 4, 103 | "metadata": { 104 | "collapsed": false 105 | }, 106 | "outputs": [ 107 | { 108 | "data": { 109 | "text/plain": [ 110 | "12.0" 111 | ] 112 | }, 113 | "execution_count": 4, 114 | "metadata": {}, 115 | "output_type": "execute_result" 116 | } 117 | ], 118 | "source": [ 119 | "#clear\n", 120 | "5+3.5*2" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 5, 126 | "metadata": { 127 | "collapsed": false 128 | }, 129 | "outputs": [ 130 | { 131 | "data": { 132 | "text/plain": [ 133 | "float" 134 | ] 135 | }, 136 | "execution_count": 5, 137 | "metadata": {}, 138 | "output_type": "execute_result" 139 | } 140 | ], 141 | "source": [ 142 | "#clear\n", 143 | "type(5+3.0*2)" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "Strings are written with single (``'``) or double quotes (`\"`)" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 6, 156 | "metadata": { 157 | "collapsed": false 158 | }, 159 | "outputs": [ 160 | { 161 | "data": { 162 | "text/plain": [ 163 | "'hello'" 164 | ] 165 | }, 166 | "execution_count": 6, 167 | "metadata": {}, 168 | "output_type": "execute_result" 169 | } 170 | ], 171 | "source": [ 172 | "#clear\n", 173 | "\"hello\"" 174 | ] 175 | }, 176 | { 177 | "cell_type": "markdown", 178 | "metadata": {}, 179 | "source": [ 180 | "Multiplication and addition work on strings, too." 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 2, 186 | "metadata": { 187 | "collapsed": false 188 | }, 189 | "outputs": [ 190 | { 191 | "data": { 192 | "text/plain": [ 193 | "'hello hello hello sc15'" 194 | ] 195 | }, 196 | "execution_count": 2, 197 | "metadata": {}, 198 | "output_type": "execute_result" 199 | } 200 | ], 201 | "source": [ 202 | "#clear\n", 203 | "3 * 'hello ' + \"sc15\"" 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": {}, 209 | "source": [ 210 | "Lists are written in brackets (`[]`) with commas (`,`)." 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": 8, 216 | "metadata": { 217 | "collapsed": false 218 | }, 219 | "outputs": [ 220 | { 221 | "data": { 222 | "text/plain": [ 223 | "[5, 3, 7]" 224 | ] 225 | }, 226 | "execution_count": 8, 227 | "metadata": {}, 228 | "output_type": "execute_result" 229 | } 230 | ], 231 | "source": [ 232 | "#clear\n", 233 | "[5, 3, 7]" 234 | ] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "metadata": {}, 239 | "source": [ 240 | "List entries don't have to have the same type." 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 9, 246 | "metadata": { 247 | "collapsed": false 248 | }, 249 | "outputs": [ 250 | { 251 | "data": { 252 | "text/plain": [ 253 | "['hi there', 15, [1, 2, 3]]" 254 | ] 255 | }, 256 | "execution_count": 9, 257 | "metadata": {}, 258 | "output_type": "execute_result" 259 | } 260 | ], 261 | "source": [ 262 | "[\"hi there\", 15, [1,2,3]]" 263 | ] 264 | }, 265 | { 266 | "cell_type": "markdown", 267 | "metadata": {}, 268 | "source": [ 269 | "\"Multiplication\" and \"addition\" work on lists, too." 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 10, 275 | "metadata": { 276 | "collapsed": false 277 | }, 278 | "outputs": [ 279 | { 280 | "data": { 281 | "text/plain": [ 282 | "[1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 5, 5, 5]" 283 | ] 284 | }, 285 | "execution_count": 10, 286 | "metadata": {}, 287 | "output_type": "execute_result" 288 | } 289 | ], 290 | "source": [ 291 | "#clear\n", 292 | "[1,2,3] * 4 + [5, 5, 5]" 293 | ] 294 | }, 295 | { 296 | "cell_type": "markdown", 297 | "metadata": {}, 298 | "source": [ 299 | "Hmmmmmm. Was that what you expected?" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": 1, 305 | "metadata": { 306 | "collapsed": false 307 | }, 308 | "outputs": [ 309 | { 310 | "data": { 311 | "text/plain": [ 312 | "array([ 9, 13, 17])" 313 | ] 314 | }, 315 | "execution_count": 1, 316 | "metadata": {}, 317 | "output_type": "execute_result" 318 | } 319 | ], 320 | "source": [ 321 | "#clear\n", 322 | "import numpy as np\n", 323 | "\n", 324 | "np.array([1,2,3]) * 4 + np.array([5,5,5])" 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": null, 330 | "metadata": { 331 | "collapsed": false 332 | }, 333 | "outputs": [], 334 | "source": [] 335 | } 336 | ], 337 | "metadata": {}, 338 | "nbformat": 4, 339 | "nbformat_minor": 0 340 | } -------------------------------------------------------------------------------- /01-intro/2-4-Python-Control flow.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Python Introduction: Control Flow" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "`for` loops in Python always iterate over something list-like:" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "metadata": { 21 | "collapsed": false 22 | }, 23 | "outputs": [ 24 | { 25 | "name": "stdout", 26 | "output_type": "stream", 27 | "text": [ 28 | "0\n", 29 | "1\n", 30 | "2\n", 31 | "3\n", 32 | "4\n", 33 | "5\n", 34 | "6\n", 35 | "7\n", 36 | "8\n", 37 | "9\n" 38 | ] 39 | } 40 | ], 41 | "source": [ 42 | "#clear\n", 43 | "for i in range(10):\n", 44 | "\n", 45 | " print(i)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "metadata": {}, 51 | "source": [ 52 | "**Note** that Python does block-structuring by leading spaces.\n", 53 | "\n", 54 | "Also note the trailing \"`:`\"." 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "---\n", 62 | "`if`/`else` are as you would expect them to be:" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 3, 68 | "metadata": { 69 | "collapsed": false 70 | }, 71 | "outputs": [ 72 | { 73 | "name": "stdout", 74 | "output_type": "stream", 75 | "text": [ 76 | "0 is divisible by 3\n", 77 | "1 is not divisible by 3\n", 78 | "2 is not divisible by 3\n", 79 | "3 is divisible by 3\n", 80 | "4 is not divisible by 3\n", 81 | "5 is not divisible by 3\n", 82 | "6 is divisible by 3\n", 83 | "7 is not divisible by 3\n", 84 | "8 is not divisible by 3\n", 85 | "9 is divisible by 3\n" 86 | ] 87 | } 88 | ], 89 | "source": [ 90 | "for i in range(10):\n", 91 | " if i % 3 == 0:\n", 92 | " print(\"{0} is divisible by 3\".format(i))\n", 93 | " else:\n", 94 | " print(\"{0} is not divisible by 3\".format(i))" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "`while` loops exist too:" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 5, 107 | "metadata": { 108 | "collapsed": false 109 | }, 110 | "outputs": [ 111 | { 112 | "name": "stdout", 113 | "output_type": "stream", 114 | "text": [ 115 | "SOLUTION: 15\n" 116 | ] 117 | } 118 | ], 119 | "source": [ 120 | "i = 0\n", 121 | "while True:\n", 122 | " i += 1\n", 123 | " if i**3 + i**2 + i + 1 == 3616:\n", 124 | " break\n", 125 | "\n", 126 | "print(\"SOLUTION:\", i)" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "----\n", 134 | "Building lists by hand can be a little long. For example, build a list of the squares of integers below 50 divisible by 7:" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 6, 140 | "metadata": { 141 | "collapsed": false 142 | }, 143 | "outputs": [], 144 | "source": [ 145 | "#clear\n", 146 | "mylist = []\n", 147 | "\n", 148 | "for i in range(50):\n", 149 | "\n", 150 | " if i % 7 == 0:\n", 151 | "\n", 152 | " mylist.append(i**2)" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 7, 158 | "metadata": { 159 | "collapsed": false 160 | }, 161 | "outputs": [ 162 | { 163 | "data": { 164 | "text/plain": [ 165 | "[0, 49, 196, 441, 784, 1225, 1764, 2401]" 166 | ] 167 | }, 168 | "execution_count": 7, 169 | "metadata": {}, 170 | "output_type": "execute_result" 171 | } 172 | ], 173 | "source": [ 174 | "mylist" 175 | ] 176 | }, 177 | { 178 | "cell_type": "markdown", 179 | "metadata": {}, 180 | "source": [ 181 | "Python has a something called *list comprehension*:" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 8, 187 | "metadata": { 188 | "collapsed": false 189 | }, 190 | "outputs": [], 191 | "source": [ 192 | "#clear\n", 193 | "mylist = [i**2 for i in range(50) if i % 7 == 0]" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 9, 199 | "metadata": { 200 | "collapsed": false 201 | }, 202 | "outputs": [ 203 | { 204 | "data": { 205 | "text/plain": [ 206 | "[0, 49, 196, 441, 784, 1225, 1764, 2401]" 207 | ] 208 | }, 209 | "execution_count": 9, 210 | "metadata": {}, 211 | "output_type": "execute_result" 212 | } 213 | ], 214 | "source": [ 215 | "mylist" 216 | ] 217 | } 218 | ], 219 | "metadata": {}, 220 | "nbformat": 4, 221 | "nbformat_minor": 0 222 | } -------------------------------------------------------------------------------- /01-intro/2-5-Python-Functions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Python Introduction: Functions" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Functions help extract out common code blocks.\n", 15 | "\n", 16 | "Let's define a function `print_greeting()`." 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": { 23 | "collapsed": false 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "def print_greeting():\n", 28 | " print(\"Hi there, how are you?\")\n", 29 | " print(\"Long time no see.\")" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "And call it:" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 2, 42 | "metadata": { 43 | "collapsed": false 44 | }, 45 | "outputs": [ 46 | { 47 | "name": "stdout", 48 | "output_type": "stream", 49 | "text": [ 50 | "Hi there, how are you?\n", 51 | "Long time no see.\n" 52 | ] 53 | } 54 | ], 55 | "source": [ 56 | "print_greeting()" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "That's a bit impersonal." 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 3, 69 | "metadata": { 70 | "collapsed": false 71 | }, 72 | "outputs": [], 73 | "source": [ 74 | "#clear\n", 75 | "def print_greeting(name):\n", 76 | "\n", 77 | " print(\"Hi there, {0}, how are you?\".format(name))\n", 78 | "\n", 79 | " print(\"Long time no see.\")" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 4, 85 | "metadata": { 86 | "collapsed": false 87 | }, 88 | "outputs": [ 89 | { 90 | "name": "stdout", 91 | "output_type": "stream", 92 | "text": [ 93 | "Hi there, Andreas, how are you?\n", 94 | "Long time no see.\n" 95 | ] 96 | } 97 | ], 98 | "source": [ 99 | "print_greeting(\"Andreas\")" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "But we might not know their name.\n", 107 | "\n", 108 | "(And we just changed the interface of `print_greeting`!)" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 6, 114 | "metadata": { 115 | "collapsed": false 116 | }, 117 | "outputs": [], 118 | "source": [ 119 | "#clear\n", 120 | "def print_greeting(name=\"my friend\"):\n", 121 | "\n", 122 | " print(\"Hi there, {0}, how are you?\".format(name))\n", 123 | "\n", 124 | " print(\"Long time no see.\")" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 7, 130 | "metadata": { 131 | "collapsed": false 132 | }, 133 | "outputs": [ 134 | { 135 | "name": "stdout", 136 | "output_type": "stream", 137 | "text": [ 138 | "Hi there, Andreas, how are you?\n", 139 | "Long time no see.\n", 140 | "Hi there, my friend, how are you?\n", 141 | "Long time no see.\n" 142 | ] 143 | } 144 | ], 145 | "source": [ 146 | "print_greeting(\"Andreas\")\n", 147 | "print_greeting()" 148 | ] 149 | }, 150 | { 151 | "cell_type": "markdown", 152 | "metadata": {}, 153 | "source": [ 154 | "----" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "Function parameters work like variables.\n", 162 | "\n", 163 | "So what does this do?" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 8, 169 | "metadata": { 170 | "collapsed": false 171 | }, 172 | "outputs": [ 173 | { 174 | "name": "stdout", 175 | "output_type": "stream", 176 | "text": [ 177 | "[1, 2, 3, 5]\n" 178 | ] 179 | } 180 | ], 181 | "source": [ 182 | "def my_func(my_list):\n", 183 | " my_list.append(5)\n", 184 | " \n", 185 | "l = [1,2,3]\n", 186 | "my_func(l)\n", 187 | "print(l)" 188 | ] 189 | }, 190 | { 191 | "cell_type": "markdown", 192 | "metadata": {}, 193 | "source": [ 194 | "Can be very surprising!\n", 195 | "\n", 196 | "Define a better function `my_func_2`:" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": 9, 202 | "metadata": { 203 | "collapsed": false 204 | }, 205 | "outputs": [], 206 | "source": [ 207 | "#clear\n", 208 | "def my_func_2(my_list):\n", 209 | "\n", 210 | " return my_list + [5]" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": 10, 216 | "metadata": { 217 | "collapsed": false 218 | }, 219 | "outputs": [ 220 | { 221 | "name": "stdout", 222 | "output_type": "stream", 223 | "text": [ 224 | "[1, 2, 3]\n", 225 | "[1, 2, 3, 5]\n" 226 | ] 227 | } 228 | ], 229 | "source": [ 230 | "l = [1,2,3]\n", 231 | "l2 = my_func_2(l)\n", 232 | "print(l)\n", 233 | "print(l2)" 234 | ] 235 | } 236 | ], 237 | "metadata": {}, 238 | "nbformat": 4, 239 | "nbformat_minor": 0 240 | } -------------------------------------------------------------------------------- /01-intro/2-6-Python-Objects.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Objects in Python" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Everything in Python is an 'object'.\n", 15 | "\n", 16 | "Defining custom types of objects is easy:" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": { 23 | "collapsed": false 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "\n", 28 | "class Employee:\n", 29 | " def __init__(self, name, salary):\n", 30 | " self.name = name\n", 31 | " self.salary = salary\n", 32 | " \n", 33 | " def fire(self):\n", 34 | " self.salary = 0" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "* Functions within the class (type) definition are called 'methods'.\n", 42 | "* They take an explicit `self` parameter, through which the object is passed.\n", 43 | "* `__init__` is the 'constructor'\n", 44 | " * Objects are created by 'calling' the type like a function.\n", 45 | " * Arguments in this call are passed to the constructor" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 11, 51 | "metadata": { 52 | "collapsed": false 53 | }, 54 | "outputs": [ 55 | { 56 | "data": { 57 | "text/plain": [ 58 | "'Joe'" 59 | ] 60 | }, 61 | "execution_count": 11, 62 | "metadata": {}, 63 | "output_type": "execute_result" 64 | } 65 | ], 66 | "source": [ 67 | "joe = Employee(\"Joe\", 100000)\n", 68 | "joe.name" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 7, 74 | "metadata": { 75 | "collapsed": false 76 | }, 77 | "outputs": [ 78 | { 79 | "data": { 80 | "text/plain": [ 81 | "100000" 82 | ] 83 | }, 84 | "execution_count": 7, 85 | "metadata": {}, 86 | "output_type": "execute_result" 87 | } 88 | ], 89 | "source": [ 90 | "#clear\n", 91 | "joe.salary" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "Let's fire Joe." 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 8, 104 | "metadata": { 105 | "collapsed": false 106 | }, 107 | "outputs": [], 108 | "source": [ 109 | "#clear\n", 110 | "joe.fire()" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 9, 116 | "metadata": { 117 | "collapsed": false 118 | }, 119 | "outputs": [ 120 | { 121 | "data": { 122 | "text/plain": [ 123 | "0" 124 | ] 125 | }, 126 | "execution_count": 9, 127 | "metadata": {}, 128 | "output_type": "execute_result" 129 | } 130 | ], 131 | "source": [ 132 | "#clear\n", 133 | "joe.salary" 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": {}, 139 | "source": [ 140 | "## Inheritance" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": {}, 146 | "source": [ 147 | "Types can be based on other types by inheritance:" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 10, 153 | "metadata": { 154 | "collapsed": false 155 | }, 156 | "outputs": [], 157 | "source": [ 158 | "class Boss(Employee):\n", 159 | " def __init__(self, name, salary, supervises):\n", 160 | " super(Boss, self).__init__(name, salary)\n", 161 | " \n", 162 | " self.supervises = supervises\n", 163 | " \n", 164 | " def fire(self):\n", 165 | " for s in self.supervises:\n", 166 | " s.fire()\n", 167 | " \n", 168 | " super(Boss, self).fire()" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 12, 174 | "metadata": { 175 | "collapsed": false 176 | }, 177 | "outputs": [ 178 | { 179 | "data": { 180 | "text/plain": [ 181 | "150000" 182 | ] 183 | }, 184 | "execution_count": 12, 185 | "metadata": {}, 186 | "output_type": "execute_result" 187 | } 188 | ], 189 | "source": [ 190 | "joe = Employee(\"Joe\", 100000)\n", 191 | "jack = Employee(\"Jack\", 100000)\n", 192 | "mike = Boss(\"Mike\", 150000, [joe, jack])\n", 193 | "\n", 194 | "mike.salary" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 13, 200 | "metadata": { 201 | "collapsed": false 202 | }, 203 | "outputs": [ 204 | { 205 | "data": { 206 | "text/plain": [ 207 | "100000" 208 | ] 209 | }, 210 | "execution_count": 13, 211 | "metadata": {}, 212 | "output_type": "execute_result" 213 | } 214 | ], 215 | "source": [ 216 | "#clear\n", 217 | "joe.salary" 218 | ] 219 | }, 220 | { 221 | "cell_type": "markdown", 222 | "metadata": {}, 223 | "source": [ 224 | "Now what happens to Joe's salary if Mike gets fired?" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": 14, 230 | "metadata": { 231 | "collapsed": false 232 | }, 233 | "outputs": [], 234 | "source": [ 235 | "#clear\n", 236 | "mike.fire()" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": 15, 242 | "metadata": { 243 | "collapsed": false 244 | }, 245 | "outputs": [ 246 | { 247 | "data": { 248 | "text/plain": [ 249 | "0" 250 | ] 251 | }, 252 | "execution_count": 15, 253 | "metadata": {}, 254 | "output_type": "execute_result" 255 | } 256 | ], 257 | "source": [ 258 | "#clear\n", 259 | "joe.salary" 260 | ] 261 | } 262 | ], 263 | "metadata": {}, 264 | "nbformat": 4, 265 | "nbformat_minor": 0 266 | } -------------------------------------------------------------------------------- /01-intro/2-7-Python-A few more things.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Python Introduction: A few more things" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Getting help:\n", 15 | "\n", 16 | "1) Use TAB in IPython" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": { 23 | "collapsed": false 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "a = [1,2,3]" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 1, 33 | "metadata": { 34 | "collapsed": false 35 | }, 36 | "outputs": [], 37 | "source": [] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "2) Using `pydoc3` on the command line." 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "3) Online at " 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "----" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "**A few things to look up in a quiet moment**" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "String formatting" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 2, 77 | "metadata": { 78 | "collapsed": false 79 | }, 80 | "outputs": [ 81 | { 82 | "data": { 83 | "text/plain": [ 84 | "'My name is Andreas and I like hiking'" 85 | ] 86 | }, 87 | "execution_count": 2, 88 | "metadata": {}, 89 | "output_type": "execute_result" 90 | } 91 | ], 92 | "source": [ 93 | "\"My name is {0} and I like {1}\".format(\"Andreas\", \"hiking\")" 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": {}, 99 | "source": [ 100 | "---\n", 101 | "Dictionaries" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 3, 107 | "metadata": { 108 | "collapsed": false 109 | }, 110 | "outputs": [ 111 | { 112 | "data": { 113 | "text/plain": [ 114 | "5000" 115 | ] 116 | }, 117 | "execution_count": 3, 118 | "metadata": {}, 119 | "output_type": "execute_result" 120 | } 121 | ], 122 | "source": [ 123 | "prices = {\"Tesla K40\": 5000, \"GTX Titan\":1400}\n", 124 | "prices[\"Tesla K40\"]" 125 | ] 126 | } 127 | ], 128 | "metadata": {}, 129 | "nbformat": 4, 130 | "nbformat_minor": 0 131 | } -------------------------------------------------------------------------------- /01-intro/3-3-numpy-Broadcasting.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# numpy: Broadcasting" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "collapsed": false 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "import numpy as np" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": { 25 | "collapsed": false 26 | }, 27 | "outputs": [ 28 | { 29 | "name": "stdout", 30 | "output_type": "stream", 31 | "text": [ 32 | "(3, 3)\n", 33 | "[[0 1 2]\n", 34 | " [3 4 5]\n", 35 | " [6 7 8]]\n", 36 | "(3, 3)\n", 37 | "[[ 4 5 6]\n", 38 | " [ 7 8 9]\n", 39 | " [10 11 12]]\n" 40 | ] 41 | } 42 | ], 43 | "source": [ 44 | "a = np.arange(9).reshape(3, 3)\n", 45 | "print(a.shape)\n", 46 | "print(a)\n", 47 | "b = np.arange(4, 4+9).reshape(3, 3)\n", 48 | "print(b.shape)\n", 49 | "print(b)" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 3, 55 | "metadata": { 56 | "collapsed": false 57 | }, 58 | "outputs": [ 59 | { 60 | "data": { 61 | "text/plain": [ 62 | "array([[ 4, 6, 8],\n", 63 | " [10, 12, 14],\n", 64 | " [16, 18, 20]])" 65 | ] 66 | }, 67 | "execution_count": 3, 68 | "metadata": {}, 69 | "output_type": "execute_result" 70 | } 71 | ], 72 | "source": [ 73 | "#clear\n", 74 | "a+b" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "So this is easy and one-to-one.\n" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "---\n", 89 | "\n", 90 | "What if the shapes do not match?" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 4, 96 | "metadata": { 97 | "collapsed": false 98 | }, 99 | "outputs": [ 100 | { 101 | "name": "stdout", 102 | "output_type": "stream", 103 | "text": [ 104 | "(3, 3)\n", 105 | "[[0 1 2]\n", 106 | " [3 4 5]\n", 107 | " [6 7 8]]\n", 108 | "(3,)\n", 109 | "[0 1 2]\n" 110 | ] 111 | } 112 | ], 113 | "source": [ 114 | "a = np.arange(9).reshape(3, 3)\n", 115 | "print(a.shape)\n", 116 | "print(a)\n", 117 | "b = np.arange(3)\n", 118 | "print(b.shape)\n", 119 | "print(b)" 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": {}, 125 | "source": [ 126 | "What will this do?" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 5, 132 | "metadata": { 133 | "collapsed": false 134 | }, 135 | "outputs": [ 136 | { 137 | "data": { 138 | "text/plain": [ 139 | "array([[ 0, 2, 4],\n", 140 | " [ 3, 5, 7],\n", 141 | " [ 6, 8, 10]])" 142 | ] 143 | }, 144 | "execution_count": 5, 145 | "metadata": {}, 146 | "output_type": "execute_result" 147 | } 148 | ], 149 | "source": [ 150 | "a+b" 151 | ] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "metadata": {}, 156 | "source": [ 157 | "It has *broadcast* along the last axis!" 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": {}, 163 | "source": [ 164 | "---\n", 165 | "\n", 166 | "Can we broadcast along the *first* axis?" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": 6, 172 | "metadata": { 173 | "collapsed": false 174 | }, 175 | "outputs": [ 176 | { 177 | "data": { 178 | "text/plain": [ 179 | "array([[ 0, 1, 2],\n", 180 | " [ 4, 5, 6],\n", 181 | " [ 8, 9, 10]])" 182 | ] 183 | }, 184 | "execution_count": 6, 185 | "metadata": {}, 186 | "output_type": "execute_result" 187 | } 188 | ], 189 | "source": [ 190 | "#clear\n", 191 | "a+b.reshape(3, 1)" 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "metadata": {}, 197 | "source": [ 198 | "Rules:\n", 199 | "\n", 200 | "* Shapes are matched axis-by-axis from last to first.\n", 201 | "* A length-1 axis can be *broadcast* if necessary." 202 | ] 203 | } 204 | ], 205 | "metadata": {}, 206 | "nbformat": 4, 207 | "nbformat_minor": 0 208 | } -------------------------------------------------------------------------------- /01-intro/4-practice-ordering-tree.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Practice: Build a recursive data structure" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "You are given an array of floating point numbers called `numbers`. These numbers lie between 0 and 1.\n", 15 | "\n", 16 | "Write a function `build_tree(numbers, left, right, max_in_leaf=5)` that builds a \"tree of bins\" data structure, where\n", 17 | "\n", 18 | "* `left` is a lower bound on *numbers*\n", 19 | "* `right` is an upper bound on *numbers*\n", 20 | "* `max_in_leaf` is the largest number of numbers allowed in a leaf node of the tree\n", 21 | "\n", 22 | "Have this function do the following:\n", 23 | "\n", 24 | "* If there are fewer numbers in `numbers` than max_in_leaf, return `numbers` unmodified as a 'leaf node'.\n", 25 | "* Otherwise, return a tuple of the form `(left_child, pivot, right_child)`, where `pivot` is the average of `left` and `right`. `left_child` is the result of processing the part of `numbers` that is less than `pivot` through `build_tree`, and `right_child` is the same for the numbers larger than `pivot`.\n", 26 | "\n", 27 | "Hints:\n", 28 | "\n", 29 | "* look up `len()` to find the length of `numbers`, or use `numbers.shape[0]`" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 2, 35 | "metadata": { 36 | "collapsed": false 37 | }, 38 | "outputs": [], 39 | "source": [ 40 | "import numpy as np\n", 41 | "\n", 42 | "numbers = np.random.rand(100)" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 3, 48 | "metadata": { 49 | "collapsed": false 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "def build_tree(numbers, left, right, max_in_leaf=5):\n", 54 | " # ...\n", 55 | " pass" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 4, 61 | "metadata": { 62 | "collapsed": false 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "#clear\n", 67 | "# Solution\n", 68 | "\n", 69 | "def build_tree(numbers, left, right, max_in_leaf=5):\n", 70 | " if len(numbers) <= max_in_leaf:\n", 71 | " return numbers\n", 72 | "\n", 73 | " pivot = (left + right)/2\n", 74 | " return (build_tree(numbers[numbers < pivot], left, pivot, max_in_leaf),\n", 75 | " pivot,\n", 76 | " build_tree(numbers[numbers >= pivot], pivot, right, max_in_leaf))" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 5, 82 | "metadata": { 83 | "collapsed": false 84 | }, 85 | "outputs": [ 86 | { 87 | "name": "stdout", 88 | "output_type": "stream", 89 | "text": [ 90 | "((((array([ 0.03155442, 0.04969038, 0.00203516, 0.01134467]), 0.0625, array([ 0.08795129, 0.08484712, 0.10400076])), 0.125, ((array([ 0.13288577, 0.1348917 , 0.13717107, 0.13363111]), 0.15625, array([ 0.18361257, 0.16379185, 0.17935313])), 0.1875, array([ 0.18835925]))), 0.25, ((((array([ 0.25471829, 0.25316833, 0.25368532]), 0.265625, array([ 0.2753575 , 0.273414 , 0.27016936])), 0.28125, array([ 0.31082018, 0.29369432, 0.29940896, 0.30908776])), 0.3125, array([ 0.33571279, 0.37308478, 0.33152007, 0.35286179])), 0.375, (array([ 0.42385846, 0.4181284 , 0.41651459, 0.40505667, 0.39770273]), 0.4375, (array([ 0.45339789, 0.45886606, 0.45242226, 0.46320172]), 0.46875, array([ 0.47478645, 0.47411047]))))), 0.5, (((((array([ 0.50499705, 0.50985016]), 0.515625, array([ 0.52229993, 0.51933766, 0.51886349, 0.52999165, 0.52412507])), 0.53125, array([ 0.55858432, 0.54768638, 0.55832187, 0.5325089 , 0.55220628])), 0.5625, (array([ 0.57673859, 0.56823789, 0.5813294 , 0.58937822]), 0.59375, array([ 0.6160641 , 0.61934406, 0.602265 , 0.6162048 ]))), 0.625, (array([ 0.65624928, 0.6413943 , 0.67663038, 0.64642908]), 0.6875, (array([ 0.70809202, 0.68992577, 0.70079716, 0.70850778]), 0.71875, array([ 0.74158749, 0.73977694, 0.73835865])))), 0.75, (((array([ 0.77388481, 0.75894454, 0.75947687, 0.75107273, 0.75294742]), 0.78125, array([ 0.81109003, 0.79674588, 0.78627348, 0.80242775, 0.79621333])), 0.8125, array([ 0.85778992, 0.8153317 , 0.8164691 , 0.84316018])), 0.875, ((array([ 0.88278667, 0.88526777, 0.89802129]), 0.90625, array([ 0.92812108, 0.90822701, 0.91261268])), 0.9375, ((array([ 0.94584389, 0.95214282, 0.93878381, 0.94956855]), 0.953125, array([ 0.96443695, 0.96785209])), 0.96875, array([ 0.9805466 , 0.97100946, 0.99383466]))))))\n" 91 | ] 92 | } 93 | ], 94 | "source": [ 95 | "tree = build_tree(numbers, 0, 1)\n", 96 | "print(tree)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": { 103 | "collapsed": false 104 | }, 105 | "outputs": [], 106 | "source": [] 107 | } 108 | ], 109 | "metadata": { 110 | "kernelspec": { 111 | "display_name": "Python 3", 112 | "language": "python", 113 | "name": "python3" 114 | }, 115 | "language_info": { 116 | "codemirror_mode": { 117 | "name": "ipython", 118 | "version": 3 119 | }, 120 | "file_extension": ".py", 121 | "mimetype": "text/x-python", 122 | "name": "python", 123 | "nbconvert_exporter": "python", 124 | "pygments_lexer": "ipython3", 125 | "version": "3.5.1+" 126 | } 127 | }, 128 | "nbformat": 4, 129 | "nbformat_minor": 0 130 | } 131 | -------------------------------------------------------------------------------- /01-intro/README.rst: -------------------------------------------------------------------------------- 1 | **Tip:** ``ipynb`` files can be viewed on Github. Just click them. 2 | -------------------------------------------------------------------------------- /01-intro/cat.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/01-intro/cat.jpeg -------------------------------------------------------------------------------- /02-languages/01-expression-trees.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Expression Trees" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "What's an *expression tree*?" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": { 21 | "collapsed": false 22 | }, 23 | "outputs": [ 24 | { 25 | "data": { 26 | "text/plain": [ 27 | "Variable('x')" 28 | ] 29 | }, 30 | "execution_count": 1, 31 | "metadata": {}, 32 | "output_type": "execute_result" 33 | } 34 | ], 35 | "source": [ 36 | "import pymbolic.primitives as p\n", 37 | "x = p.Variable(\"x\")\n", 38 | "y = p.Variable(\"y\")\n", 39 | "x" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "Let's look what happens with a simple expression:" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 2, 52 | "metadata": { 53 | "collapsed": false 54 | }, 55 | "outputs": [ 56 | { 57 | "data": { 58 | "text/plain": [ 59 | "Sum((Variable('x'), 5))" 60 | ] 61 | }, 62 | "execution_count": 2, 63 | "metadata": {}, 64 | "output_type": "execute_result" 65 | } 66 | ], 67 | "source": [ 68 | "#clear\n", 69 | "x+5" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "It does not get evaluated.\n", 77 | "\n", 78 | "---\n", 79 | "\n", 80 | "Let's look at its type and structure in more detail." 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 3, 86 | "metadata": { 87 | "collapsed": false 88 | }, 89 | "outputs": [], 90 | "source": [ 91 | "u = x+5" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 4, 97 | "metadata": { 98 | "collapsed": false 99 | }, 100 | "outputs": [ 101 | { 102 | "data": { 103 | "text/plain": [ 104 | "pymbolic.primitives.Sum" 105 | ] 106 | }, 107 | "execution_count": 4, 108 | "metadata": {}, 109 | "output_type": "execute_result" 110 | } 111 | ], 112 | "source": [ 113 | "#clear\n", 114 | "type(u)" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 5, 120 | "metadata": { 121 | "collapsed": false 122 | }, 123 | "outputs": [ 124 | { 125 | "data": { 126 | "text/plain": [ 127 | "(Variable('x'), 5)" 128 | ] 129 | }, 130 | "execution_count": 5, 131 | "metadata": {}, 132 | "output_type": "execute_result" 133 | } 134 | ], 135 | "source": [ 136 | "#clear\n", 137 | "u.children" 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "OK, easy. What if we introduce a product?" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 6, 150 | "metadata": { 151 | "collapsed": false 152 | }, 153 | "outputs": [ 154 | { 155 | "data": { 156 | "text/plain": [ 157 | "Sum((Variable('x'), Product((4, Variable('y')))))" 158 | ] 159 | }, 160 | "execution_count": 6, 161 | "metadata": {}, 162 | "output_type": "execute_result" 163 | } 164 | ], 165 | "source": [ 166 | "u = x + 4*y\n", 167 | "u" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 7, 173 | "metadata": { 174 | "collapsed": false 175 | }, 176 | "outputs": [ 177 | { 178 | "data": { 179 | "text/plain": [ 180 | "Variable('x')" 181 | ] 182 | }, 183 | "execution_count": 7, 184 | "metadata": {}, 185 | "output_type": "execute_result" 186 | } 187 | ], 188 | "source": [ 189 | "#clear\n", 190 | "u.children[0]" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 8, 196 | "metadata": { 197 | "collapsed": false 198 | }, 199 | "outputs": [ 200 | { 201 | "data": { 202 | "text/plain": [ 203 | "Product((4, Variable('y')))" 204 | ] 205 | }, 206 | "execution_count": 8, 207 | "metadata": {}, 208 | "output_type": "execute_result" 209 | } 210 | ], 211 | "source": [ 212 | "#clear\n", 213 | "u.children[1]" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": 9, 219 | "metadata": { 220 | "collapsed": false 221 | }, 222 | "outputs": [ 223 | { 224 | "data": { 225 | "text/plain": [ 226 | "4" 227 | ] 228 | }, 229 | "execution_count": 9, 230 | "metadata": {}, 231 | "output_type": "execute_result" 232 | } 233 | ], 234 | "source": [ 235 | "#clear\n", 236 | "u.children[1].children[0]" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": 22, 242 | "metadata": { 243 | "collapsed": false 244 | }, 245 | "outputs": [ 246 | { 247 | "data": { 248 | "text/plain": [ 249 | "Variable('y')" 250 | ] 251 | }, 252 | "execution_count": 22, 253 | "metadata": {}, 254 | "output_type": "execute_result" 255 | } 256 | ], 257 | "source": [ 258 | "#clear\n", 259 | "u.children[1].children[1]" 260 | ] 261 | }, 262 | { 263 | "cell_type": "markdown", 264 | "metadata": {}, 265 | "source": [ 266 | "This structure is a called a *tree*, because there is a *root* and *branches*." 267 | ] 268 | } 269 | ], 270 | "metadata": {}, 271 | "nbformat": 4, 272 | "nbformat_minor": 0 273 | } -------------------------------------------------------------------------------- /02-languages/03-defining-custom-node-types.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Defining Custom Node Types" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Mathematical expressions are only the first step. Most of the time, in mathematical software, the interesting aspects are special \"things\" that are strung together by expressions.\n", 15 | "\n", 16 | "So it would be helpful to be able to define our own expression types:" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 2, 22 | "metadata": { 23 | "collapsed": false 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "import pymbolic.primitives as p\n", 28 | "\n", 29 | "x = p.Variable(\"x\")" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 3, 35 | "metadata": { 36 | "collapsed": false 37 | }, 38 | "outputs": [], 39 | "source": [ 40 | "class DerivativeOperator(p.Expression):\n", 41 | " def __init__(self, operand):\n", 42 | " self.operand = operand\n", 43 | "\n", 44 | " def __getinitargs__(self):\n", 45 | " return (self.operand,)\n", 46 | "\n", 47 | " mapper_method = \"map_derivative_operator\"" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "`__getinitargs__` tells `pymbolic` what the arguments of the constructor were. This is used for printing and comparisons." 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 4, 60 | "metadata": { 61 | "collapsed": false 62 | }, 63 | "outputs": [ 64 | { 65 | "data": { 66 | "text/plain": [ 67 | "Quotient(Variable('x'), DerivativeOperator(Power(Sum((Variable('x'), 23)), 0.5)))" 68 | ] 69 | }, 70 | "execution_count": 4, 71 | "metadata": {}, 72 | "output_type": "execute_result" 73 | } 74 | ], 75 | "source": [ 76 | "u = x/DerivativeOperator((x + 23)**0.5)\n", 77 | "u" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "We can then also define custom mappers (let's call ours `DerivDoubler`) that operate on these node types:" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 5, 90 | "metadata": { 91 | "collapsed": false 92 | }, 93 | "outputs": [], 94 | "source": [ 95 | "from pymbolic.mapper import IdentityMapper" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 6, 101 | "metadata": { 102 | "collapsed": false 103 | }, 104 | "outputs": [], 105 | "source": [ 106 | "#clear\n", 107 | "class DerivDoubler(IdentityMapper):\n", 108 | " def map_derivative_operator(self, expr):\n", 109 | " return 2*DerivativeOperator(self.rec(expr.operand))" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "Now apply it:" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 7, 122 | "metadata": { 123 | "collapsed": false 124 | }, 125 | "outputs": [ 126 | { 127 | "data": { 128 | "text/plain": [ 129 | "Quotient(Variable('x'), Product((2, DerivativeOperator(Power(Sum((Variable('x'), 23)), 0.5)))))" 130 | ] 131 | }, 132 | "execution_count": 7, 133 | "metadata": {}, 134 | "output_type": "execute_result" 135 | } 136 | ], 137 | "source": [ 138 | "dd = DerivDoubler()\n", 139 | "\n", 140 | "dd(u)" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": null, 146 | "metadata": { 147 | "collapsed": false 148 | }, 149 | "outputs": [], 150 | "source": [] 151 | } 152 | ], 153 | "metadata": { 154 | "kernelspec": { 155 | "display_name": "Python 3", 156 | "language": "python", 157 | "name": "python3" 158 | }, 159 | "language_info": { 160 | "codemirror_mode": { 161 | "name": "ipython", 162 | "version": 3 163 | }, 164 | "file_extension": ".py", 165 | "mimetype": "text/x-python", 166 | "name": "python", 167 | "nbconvert_exporter": "python", 168 | "pygments_lexer": "ipython3", 169 | "version": "3.5.0+" 170 | } 171 | }, 172 | "nbformat": 4, 173 | "nbformat_minor": 0 174 | } 175 | -------------------------------------------------------------------------------- /02-languages/04-accessing-python-syntax-trees.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Accessing Python Syntax Trees" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "It is also possible to access code that is written in Python. This works using the `ast` module, and works as follows:" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": { 21 | "collapsed": false 22 | }, 23 | "outputs": [ 24 | { 25 | "name": "stdout", 26 | "output_type": "stream", 27 | "text": [ 28 | "Module(body=[FunctionDef(name='f', args=arguments(args=[arg(arg='x', annotation=None), arg(arg='y', annotation=None)], vararg=None, kwonlyargs=[], kw_defaults=[], kwarg=None, defaults=[]), body=[Return(value=BinOp(left=BinOp(left=BinOp(left=Num(n=2), op=Mult(), right=Name(id='x', ctx=Load())), op=Add(), right=BinOp(left=Name(id='y', ctx=Load()), op=Pow(), right=Num(n=2))), op=Add(), right=Num(n=5)))], decorator_list=[], returns=None)])\n" 29 | ] 30 | } 31 | ], 32 | "source": [ 33 | "SRC = \"\"\"\n", 34 | "def f(x, y):\n", 35 | " return 2*x + y**2 + 5\n", 36 | "\"\"\"\n", 37 | "\n", 38 | "import ast\n", 39 | "tree = ast.parse(SRC)\n", 40 | "\n", 41 | "print(ast.dump(tree))" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "It is possible to transcribe the expressions here into the form discussed earlier." 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 2, 54 | "metadata": { 55 | "collapsed": false 56 | }, 57 | "outputs": [ 58 | { 59 | "name": "stdout", 60 | "output_type": "stream", 61 | "text": [ 62 | "2*x + y**2 + 5\n" 63 | ] 64 | } 65 | ], 66 | "source": [ 67 | "from pymbolic.interop.ast import ASTToPymbolic\n", 68 | "expr = ASTToPymbolic()(tree.body[0].body[0].value)\n", 69 | "print(expr)" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "But beware when defining languages this way. Python has very well-defined semantics, and the user will expect that your way of executing their code is a good match for their mental model of what the code should do. As such, it may be better to start with a \"blank slate\" in terms of language design, so as to not run afoul of already formed expectations." 77 | ] 78 | } 79 | ], 80 | "metadata": { 81 | "kernelspec": { 82 | "display_name": "Python 3", 83 | "language": "python", 84 | "name": "python3" 85 | }, 86 | "language_info": { 87 | "codemirror_mode": { 88 | "name": "ipython", 89 | "version": 3 90 | }, 91 | "file_extension": ".py", 92 | "mimetype": "text/x-python", 93 | "name": "python", 94 | "nbconvert_exporter": "python", 95 | "pygments_lexer": "ipython3", 96 | "version": "3.5.0+" 97 | } 98 | }, 99 | "nbformat": 4, 100 | "nbformat_minor": 0 101 | } 102 | -------------------------------------------------------------------------------- /02-languages/05-common-operations.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Common Operations" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## What common operations are supported?" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "Just normal mappers:\n", 22 | "\n", 23 | "* Evaluation\n", 24 | "* Turning expressions into 'human-readable' strings\n", 25 | "* Performing substitution\n", 26 | "* Taking derivatives\n", 27 | "* Finding variables on which an expression depends\n", 28 | "* Code Generation\n", 29 | "\n", 30 | "Also:\n", 31 | "\n", 32 | "* Parsing (i.e. turning a string into an expression)" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "## Evaluation" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 1, 45 | "metadata": { 46 | "collapsed": false 47 | }, 48 | "outputs": [ 49 | { 50 | "data": { 51 | "text/plain": [ 52 | "Power(Sum((Power(Variable('x'), 2), Power(Variable('y'), 2))), 0.5)" 53 | ] 54 | }, 55 | "execution_count": 1, 56 | "metadata": {}, 57 | "output_type": "execute_result" 58 | } 59 | ], 60 | "source": [ 61 | "from pymbolic import parse\n", 62 | "from pymbolic.mapper.evaluator import EvaluationMapper\n", 63 | "\n", 64 | "expr = parse(\"(x**2 + y**2)**0.5\")\n", 65 | "expr" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 3, 71 | "metadata": { 72 | "collapsed": false 73 | }, 74 | "outputs": [ 75 | { 76 | "name": "stdout", 77 | "output_type": "stream", 78 | "text": [ 79 | "(x**2 + y**2)**0.5\n" 80 | ] 81 | } 82 | ], 83 | "source": [ 84 | "#clear\n", 85 | "print(expr)" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 2, 91 | "metadata": { 92 | "collapsed": false 93 | }, 94 | "outputs": [ 95 | { 96 | "name": "stdout", 97 | "output_type": "stream", 98 | "text": [ 99 | "17.26267650163207\n" 100 | ] 101 | } 102 | ], 103 | "source": [ 104 | "#clear\n", 105 | "evm = EvaluationMapper({\"x\": 17, \"y\": 3})\n", 106 | "\n", 107 | "print(evm(expr))" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "This is just a normal mapper, so its behavior can be overridden as described before." 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "## Finding Independent Variables" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 3, 127 | "metadata": { 128 | "collapsed": false 129 | }, 130 | "outputs": [ 131 | { 132 | "data": { 133 | "text/plain": [ 134 | "{Variable('x'), Variable('y')}" 135 | ] 136 | }, 137 | "execution_count": 3, 138 | "metadata": {}, 139 | "output_type": "execute_result" 140 | } 141 | ], 142 | "source": [ 143 | "from pymbolic.mapper.dependency import DependencyMapper\n", 144 | "\n", 145 | "depmap = DependencyMapper()\n", 146 | "depmap(expr)" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": {}, 152 | "source": [ 153 | "## Code generation" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 4, 159 | "metadata": { 160 | "collapsed": false 161 | }, 162 | "outputs": [ 163 | { 164 | "data": { 165 | "text/plain": [ 166 | "'pow(x + 4, 17)'" 167 | ] 168 | }, 169 | "execution_count": 4, 170 | "metadata": {}, 171 | "output_type": "execute_result" 172 | } 173 | ], 174 | "source": [ 175 | "from pymbolic.mapper.c_code import CCodeMapper\n", 176 | "\n", 177 | "ccm = CCodeMapper()\n", 178 | "x = parse(\"x\")\n", 179 | "ccm((x+4)**17)" 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": {}, 185 | "source": [ 186 | "(We're using `parse` here just to give us a `Variable(\"x\")` object.)" 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": {}, 192 | "source": [ 193 | "## Common subexpressions" 194 | ] 195 | }, 196 | { 197 | "cell_type": "markdown", 198 | "metadata": {}, 199 | "source": [ 200 | "Often, some parts of an expression occur multiple times in a bigger expression." 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": 5, 206 | "metadata": { 207 | "collapsed": false 208 | }, 209 | "outputs": [ 210 | { 211 | "data": { 212 | "text/plain": [ 213 | "'pow(x + 4, 3) + 4 * pow(x + 4, 3) * h * h + 2 * pow(x + 4, 3) * h'" 214 | ] 215 | }, 216 | "execution_count": 5, 217 | "metadata": {}, 218 | "output_type": "execute_result" 219 | } 220 | ], 221 | "source": [ 222 | "u = (x+4)**3\n", 223 | "\n", 224 | "h = parse(\"h\")\n", 225 | "\n", 226 | "expr = u + 2*u*h + 4*u*h**2\n", 227 | "ccm(expr)" 228 | ] 229 | }, 230 | { 231 | "cell_type": "markdown", 232 | "metadata": {}, 233 | "source": [ 234 | "Obviously, that doesn't lead to great code. In particular, the redundancy is carried through to the code side.\n", 235 | "\n", 236 | "There is a mechanism to prevent this redundancy. Individual parts of an expression can be tagged as \"common subexpressions\"." 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": 6, 242 | "metadata": { 243 | "collapsed": false 244 | }, 245 | "outputs": [ 246 | { 247 | "name": "stdout", 248 | "output_type": "stream", 249 | "text": [ 250 | "_cse0 = pow(x + 4, 3)\n", 251 | "_cse0 + 4 * _cse0 * h * h + 2 * _cse0 * h\n" 252 | ] 253 | } 254 | ], 255 | "source": [ 256 | "from pymbolic.primitives import CommonSubexpression as CSE\n", 257 | "\n", 258 | "u = CSE((x+4)**3)\n", 259 | "\n", 260 | "h = parse(\"h\")\n", 261 | "\n", 262 | "expr = u + 2*u*h + 4*u*h**2\n", 263 | "\n", 264 | "result = ccm(expr)\n", 265 | "\n", 266 | "for name, value in ccm.cse_name_list:\n", 267 | " print(name, \"=\", value)\n", 268 | " \n", 269 | "print(result)" 270 | ] 271 | }, 272 | { 273 | "cell_type": "markdown", 274 | "metadata": {}, 275 | "source": [ 276 | "(These names can be customized, in case you're wondering.)" 277 | ] 278 | } 279 | ], 280 | "metadata": { 281 | "kernelspec": { 282 | "display_name": "Python 3", 283 | "language": "python", 284 | "name": "python3" 285 | }, 286 | "language_info": { 287 | "codemirror_mode": { 288 | "name": "ipython", 289 | "version": 3 290 | }, 291 | "file_extension": ".py", 292 | "mimetype": "text/x-python", 293 | "name": "python", 294 | "nbconvert_exporter": "python", 295 | "pygments_lexer": "ipython3", 296 | "version": "3.5.0+" 297 | } 298 | }, 299 | "nbformat": 4, 300 | "nbformat_minor": 0 301 | } 302 | -------------------------------------------------------------------------------- /02-languages/06-interoperating-with-sympy.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | " # Interacting with `sympy`\n", 8 | " \n", 9 | "`pymbolic` can help take care of many *structural* transformations on your expression trees with great ease. Its main purpose is to help with program transformation after all, not to be a full computer algebra system. That said, if you need a full computer algebra system for things like calculus and simplification, it's easy to get your expressions converted between `pymbolic` and `sympy`:" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": { 16 | "collapsed": false 17 | }, 18 | "outputs": [ 19 | { 20 | "name": "stderr", 21 | "output_type": "stream", 22 | "text": [ 23 | "/usr/lib/python3/dist-packages/sympy/core/function.py:105: DeprecationWarning: inspect.getargspec() is deprecated, use inspect.signature() instead\n", 24 | " evalargspec = inspect.getargspec(cls.eval)\n" 25 | ] 26 | } 27 | ], 28 | "source": [ 29 | "import sympy as sp\n", 30 | "from pymbolic import var\n", 31 | "\n", 32 | "x = var(\"x\")\n", 33 | "y = var(\"y\")" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 6, 39 | "metadata": { 40 | "collapsed": false 41 | }, 42 | "outputs": [ 43 | { 44 | "name": "stdout", 45 | "output_type": "stream", 46 | "text": [ 47 | "(x**2 + 2*x + 1) / (x**2 + x)\n" 48 | ] 49 | } 50 | ], 51 | "source": [ 52 | "expr = (x**2 + 2*x + 1)/(x**2 + x)\n", 53 | "print(expr)" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "Let's import pymbolic's sympy interoperability code." 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 7, 66 | "metadata": { 67 | "collapsed": false 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "# pymbolic.interop.sympy in newer versions of pymbolic\n", 72 | "from pymbolic.sympy_interface import (\n", 73 | " PymbolicToSympyMapper, SympyToPymbolicMapper)\n", 74 | "\n", 75 | "p2s = PymbolicToSympyMapper()\n", 76 | "s2p = SympyToPymbolicMapper()" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 8, 82 | "metadata": { 83 | "collapsed": false 84 | }, 85 | "outputs": [ 86 | { 87 | "name": "stdout", 88 | "output_type": "stream", 89 | "text": [ 90 | "(x**2 + 2*x + 1)/(x**2 + x)\n" 91 | ] 92 | } 93 | ], 94 | "source": [ 95 | "sympy_expr = p2s(expr)\n", 96 | "print(sympy_expr)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 9, 102 | "metadata": { 103 | "collapsed": false 104 | }, 105 | "outputs": [ 106 | { 107 | "name": "stdout", 108 | "output_type": "stream", 109 | "text": [ 110 | "(x + 1)/x\n" 111 | ] 112 | } 113 | ], 114 | "source": [ 115 | "sympy_result = sp.cancel(sympy_expr)\n", 116 | "print(sympy_result)" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 10, 122 | "metadata": { 123 | "collapsed": false 124 | }, 125 | "outputs": [ 126 | { 127 | "name": "stdout", 128 | "output_type": "stream", 129 | "text": [ 130 | "x**(-1)*(1 + x)\n" 131 | ] 132 | } 133 | ], 134 | "source": [ 135 | "result = s2p(sympy_result)\n", 136 | "print(result)" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": {}, 142 | "source": [ 143 | "One thing to note is that `PymbolicToSympyMapper` is a regular `pymbolic` mapper, and its behavior can be overridden in case something about the translation to sympy is not quite what you want.\n", 144 | "\n", 145 | "`SympyToPymbolicMapper` also behaves very similarly (and can be overridden similarly) although it is not entirely the same kind of mapper." 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": null, 151 | "metadata": { 152 | "collapsed": true 153 | }, 154 | "outputs": [], 155 | "source": [] 156 | } 157 | ], 158 | "metadata": { 159 | "kernelspec": { 160 | "display_name": "Python 3", 161 | "language": "python", 162 | "name": "python3" 163 | }, 164 | "language_info": { 165 | "codemirror_mode": { 166 | "name": "ipython", 167 | "version": 3 168 | }, 169 | "file_extension": ".py", 170 | "mimetype": "text/x-python", 171 | "name": "python", 172 | "nbconvert_exporter": "python", 173 | "pygments_lexer": "ipython3", 174 | "version": "3.5.0+" 175 | } 176 | }, 177 | "nbformat": 4, 178 | "nbformat_minor": 0 179 | } 180 | -------------------------------------------------------------------------------- /02-languages/08-practice.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Practice: Apply the chain rule" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "1. Define a custom expression node `Derivative(expr, v)` that symbolically represents taking a derivative of an expression `expr` with respect to variable `v`.\n", 15 | "1. Now suppose that, in order to take a derivative by a coordinate `x` (given), what your code actually has to do is consider the derivative in a *reference coordinate system* consisting of coordinates `r` and `s` and therefore needs to apply the chain rule identity\n", 16 | "\n", 17 | "$$ \\frac{d\\text{expr}}{dx} = \\frac{d\\text{expr}}{dr}\\frac{dr}{dx} + \\frac{d\\text{expr}}{ds}\\frac{ds}{dx}$$\n", 18 | "\n", 19 | "Write a `ChainRuleMapper` that applies this identity." 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 7, 25 | "metadata": { 26 | "collapsed": false 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "from pymbolic import var\n", 31 | "from pymbolic.primitives import Expression\n", 32 | "from pymbolic.mapper import IdentityMapper\n", 33 | "\n", 34 | "x = var(\"x\")\n", 35 | "r = var(\"r\")\n", 36 | "s = var(\"s\")" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 8, 42 | "metadata": { 43 | "collapsed": false 44 | }, 45 | "outputs": [], 46 | "source": [ 47 | "class Derivative(Expression):\n", 48 | " # ...\n", 49 | " pass" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "To avoid conflicts with a `Derivative` node type that's already part of pymbolic, we call our mapper method `map_deriv`." 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 32, 62 | "metadata": { 63 | "collapsed": false 64 | }, 65 | "outputs": [], 66 | "source": [ 67 | "#clear\n", 68 | "# Solution\n", 69 | "\n", 70 | "class Derivative(Expression):\n", 71 | " def __init__(self, expr, v):\n", 72 | " self.expr = expr\n", 73 | " self.v = v\n", 74 | "\n", 75 | " def __getinitargs__(self):\n", 76 | " return (self.expr, self.v)\n", 77 | "\n", 78 | " mapper_method = \"map_deriv\"" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 33, 84 | "metadata": { 85 | "collapsed": false 86 | }, 87 | "outputs": [ 88 | { 89 | "name": "stdout", 90 | "output_type": "stream", 91 | "text": [ 92 | "Call(Variable('sqrt'), (Derivative(Sum((Product((27, Power(Variable('x'), 2))), Call(Variable('exp'), (Variable('x'),)))), Variable('x')),))\n" 93 | ] 94 | } 95 | ], 96 | "source": [ 97 | "expr = var(\"sqrt\")(Derivative(27*x**2+var(\"exp\")(x), x))\n", 98 | "print(repr(expr))" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 34, 104 | "metadata": { 105 | "collapsed": false 106 | }, 107 | "outputs": [], 108 | "source": [ 109 | "class ChainRuleMapper(IdentityMapper):\n", 110 | " # ...\n", 111 | " pass" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 37, 117 | "metadata": { 118 | "collapsed": false 119 | }, 120 | "outputs": [], 121 | "source": [ 122 | "#clear\n", 123 | "# Solution\n", 124 | "\n", 125 | "class ChainRuleMapper(IdentityMapper):\n", 126 | " def map_deriv(self, expr):\n", 127 | " return sum(Derivative(expr, ref_sym)*Derivative(ref_sym, x) for ref_sym in [r,s])" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "Now let's test this mapper:" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 38, 140 | "metadata": { 141 | "collapsed": false 142 | }, 143 | "outputs": [ 144 | { 145 | "data": { 146 | "text/plain": [ 147 | "Call(Variable('sqrt'), (Sum((Product((Derivative(Derivative(Sum((Product((27, Power(Variable('x'), 2))), Call(Variable('exp'), (Variable('x'),)))), Variable('x')), Variable('r')), Derivative(Variable('r'), Variable('x')))), Product((Derivative(Derivative(Sum((Product((27, Power(Variable('x'), 2))), Call(Variable('exp'), (Variable('x'),)))), Variable('x')), Variable('s')), Derivative(Variable('s'), Variable('x')))))),))" 148 | ] 149 | }, 150 | "execution_count": 38, 151 | "metadata": {}, 152 | "output_type": "execute_result" 153 | } 154 | ], 155 | "source": [ 156 | "crm = ChainRuleMapper()\n", 157 | "crm(expr)" 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": {}, 163 | "source": [ 164 | "In case you are wondering why we can only use the 'clumsy', parenthesis-heavy form of the printed expression, it's because we haven't told pymbolic how to write out the shorter form. Here's how that can be done:" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 48, 170 | "metadata": { 171 | "collapsed": false 172 | }, 173 | "outputs": [ 174 | { 175 | "name": "stdout", 176 | "output_type": "stream", 177 | "text": [ 178 | "sqrt(d(d((27*x**2 + exp(x)))/dx)/dr*d(r)/dx + d(d((27*x**2 + exp(x)))/dx)/ds*d(s)/dx)\n" 179 | ] 180 | } 181 | ], 182 | "source": [ 183 | "from pymbolic.mapper.stringifier import StringifyMapper, PREC_PRODUCT\n", 184 | "\n", 185 | "class MyStringifyMapper(StringifyMapper):\n", 186 | " def map_deriv(self, expr, enclosing_prec):\n", 187 | " return \"d(%s)/d%s\" % (\n", 188 | " self.rec(expr.expr, PREC_PRODUCT), \n", 189 | " self.rec(expr.v, PREC_PRODUCT))\n", 190 | " \n", 191 | "def stringifier(self):\n", 192 | " return MyStringifyMapper\n", 193 | "\n", 194 | "Derivative.stringifier = stringifier\n", 195 | "print(crm(expr))" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": null, 201 | "metadata": { 202 | "collapsed": false 203 | }, 204 | "outputs": [], 205 | "source": [] 206 | } 207 | ], 208 | "metadata": { 209 | "kernelspec": { 210 | "display_name": "Python 3", 211 | "language": "python", 212 | "name": "python3" 213 | }, 214 | "language_info": { 215 | "codemirror_mode": { 216 | "name": "ipython", 217 | "version": 3 218 | }, 219 | "file_extension": ".py", 220 | "mimetype": "text/x-python", 221 | "name": "python", 222 | "nbconvert_exporter": "python", 223 | "pygments_lexer": "ipython3", 224 | "version": "3.5.0+" 225 | } 226 | }, 227 | "nbformat": 4, 228 | "nbformat_minor": 0 229 | } 230 | -------------------------------------------------------------------------------- /02-languages/README.rst: -------------------------------------------------------------------------------- 1 | **Tip:** ``ipynb`` files can be viewed on Github. Just click them. 2 | -------------------------------------------------------------------------------- /02-languages/gvmagic.py: -------------------------------------------------------------------------------- 1 | """ 2 | Graphviz IPython magic extensions 3 | 4 | Magic methods: 5 | %dot 6 | %%dot 8 | %dotstr "" 9 | %dotobj obj.to_dot() 10 | %dotobjs obj[0].to_dot(), obj[1].to_dot(), ... 11 | 12 | also: %twopi, %neato, %sdp, %fsdp, and %circo magic families. 13 | 14 | Usage: 15 | %load_ext gvmagic 16 | """ 17 | 18 | 19 | from logging import info, error 20 | from subprocess import Popen, PIPE 21 | 22 | from IPython.display import display, SVG 23 | from IPython.core.magic import Magics 24 | from IPython.core.magic import line_cell_magic 25 | from IPython.core.magic import line_magic 26 | from IPython.core.magic import magics_class 27 | 28 | def show_svg(d): 29 | display(SVG(data=d)) 30 | 31 | def run_graphviz(s, layout_engine='dot'): 32 | """Execute dot with a layout and return a raw SVG image, or None.""" 33 | cmd = ['dot', '-Tsvg', '-K', layout_engine] 34 | 35 | dot = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE) 36 | stdoutdata, stderrdata = dot.communicate(s.encode('utf-8')) 37 | status = dot.wait() 38 | if status == 0: 39 | return stdoutdata 40 | else: 41 | fstr = "dot returned {}\n[==== stderr ====]\n{}" 42 | error(fstr.format(status, stderrdata.decode('utf-8'))) 43 | return None 44 | 45 | 46 | @magics_class 47 | class GraphvizMagics(Magics): 48 | 49 | @line_cell_magic 50 | def dot(self, line, cell=None): 51 | self._from_cell(line, cell, 'dot') 52 | 53 | @line_magic 54 | def dotstr(self, line): 55 | self._from_str(line, 'dot') 56 | 57 | @line_magic 58 | def dotobj(self, line): 59 | self._from_obj(line, 'dot') 60 | 61 | @line_magic 62 | def dotobjs(self, line): 63 | self._from_objs(line, 'dot') 64 | 65 | @line_cell_magic 66 | def neato(self, line, cell=None): 67 | self._from_cell(line, cell, 'neato') 68 | 69 | @line_magic 70 | def neatostr(self, line): 71 | self._from_str(line, 'neato') 72 | 73 | @line_magic 74 | def neatoobj(self, line): 75 | self._from_obj(line, 'neato') 76 | 77 | @line_magic 78 | def neatoobjs(self, line): 79 | self._from_objs(line, 'neato') 80 | 81 | @line_cell_magic 82 | def sfdp(self, line, cell=None): 83 | self._from_cell(line, cell, 'sfdp') 84 | 85 | @line_magic 86 | def sfdpstr(self, line): 87 | self._from_str(line, 'sfdp') 88 | 89 | @line_magic 90 | def sfdpobj(self, line): 91 | self._from_obj(line, 'sfdp') 92 | 93 | @line_magic 94 | def sfdpobjs(self, line): 95 | self._from_objs(line, 'sfdp') 96 | 97 | @line_cell_magic 98 | def fdp(self, line, cell=None): 99 | self._from_cell(line, cell, 'fdp') 100 | 101 | @line_magic 102 | def fdpstr(self, line): 103 | self._from_str(line, 'fdp') 104 | 105 | @line_magic 106 | def fdpobj(self, line): 107 | self._from_obj(line, 'fdp') 108 | 109 | @line_magic 110 | def fdpobjs(self, line): 111 | self._from_objs(line, 'fdp') 112 | 113 | @line_cell_magic 114 | def twopi(self, line, cell=None): 115 | self._from_cell(line, cell, 'twopi') 116 | 117 | @line_magic 118 | def twopistr(self, line): 119 | self._from_str(line, 'twopi') 120 | 121 | @line_magic 122 | def twopiobj(self, line): 123 | self._from_obj(line, 'twopi') 124 | 125 | @line_magic 126 | def twopiobjs(self, line): 127 | self._from_objs(line, 'twopi') 128 | 129 | @line_cell_magic 130 | def circo(self, line, cell=None): 131 | self._from_cell(line, cell, 'circo') 132 | 133 | @line_magic 134 | def circostr(self, line): 135 | self._from_str(line, 'circo') 136 | 137 | @line_magic 138 | def circoobj(self, line): 139 | self._from_obj(line, 'circo') 140 | 141 | @line_magic 142 | def circoobjs(self, line): 143 | self._from_objs(line, 'circo') 144 | 145 | def _from_cell(self, line, cell, layout_engine): 146 | if cell is None: 147 | s = line 148 | else: 149 | s = line + '\n' + cell 150 | data = run_graphviz(s, layout_engine) 151 | if data: 152 | show_svg(data) 153 | 154 | def _from_str(self, line, layout_engine): 155 | s = self.shell.ev(line) 156 | data = run_graphviz(s, layout_engine) 157 | if data: 158 | show_svg(data) 159 | 160 | def _from_obj(self, line, layout_engine): 161 | obj = self.shell.ev(line) 162 | try: 163 | s = obj.to_dot() 164 | except AttributeError: 165 | error("expected object to implement 'to_dot()' method") 166 | except TypeError: 167 | error("expected to_dot method to be callable w/o args") 168 | else: 169 | data = run_graphviz(s, layout_engine) 170 | if data: 171 | show_svg(data) 172 | 173 | def _from_objs(self, line, layout_engine): 174 | """dot objects magic""" 175 | objs = self.shell.ev(line) 176 | for i, obj in enumerate(objs): 177 | try: 178 | s = obj.to_dot() 179 | except AttributeError: 180 | error("expected object to implement 'to_dot()' method") 181 | except TypeError: 182 | error("expected to_dot method to be callable w/o args") 183 | else: 184 | data = run_graphviz(s, layout_engine) 185 | if data: 186 | info("object {}:".format(i)) 187 | show_svg(data) 188 | 189 | 190 | def load_ipython_extension(ipython): 191 | """Load the extension in IPython.""" 192 | ipython.register_magics(GraphvizMagics) 193 | 194 | 195 | def unload_ipython_extension(ipython): 196 | """Unload the extension in IPython.""" 197 | pass 198 | -------------------------------------------------------------------------------- /03-opencl/0-slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/03-opencl/0-slides.pdf -------------------------------------------------------------------------------- /03-opencl/1-1-hello-pyopencl.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Hello PyOpenCL" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "collapsed": false 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "import pyopencl as cl\n", 19 | "import numpy as np\n", 20 | "import numpy.linalg as la\n", 21 | "\n", 22 | "mf = cl.mem_flags" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "This notebook demonstrates the simplest PyOpenCL workflow that touches all essential pieces:\n", 30 | "\n", 31 | "* Data transfer\n", 32 | "* Kernel compilation\n", 33 | "* Execution" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 2, 39 | "metadata": { 40 | "collapsed": false 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "a = np.random.rand(50000).astype(np.float32)" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "Now create a context `ctx` and a command queue `queue`:" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 3, 57 | "metadata": { 58 | "collapsed": false 59 | }, 60 | "outputs": [], 61 | "source": [ 62 | "#clear\n", 63 | "ctx = cl.create_some_context()\n", 64 | "\n", 65 | "queue = cl.CommandQueue(ctx)" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "Now allocate a buffer. `Buffer(context, flags, size=None, hostbuf=None)`" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 4, 78 | "metadata": { 79 | "collapsed": false 80 | }, 81 | "outputs": [], 82 | "source": [ 83 | "#clear\n", 84 | "a_buf = cl.Buffer(ctx, mf.READ_WRITE, size=a.nbytes)" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "Then transfer data:" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 5, 97 | "metadata": { 98 | "collapsed": false 99 | }, 100 | "outputs": [ 101 | { 102 | "data": { 103 | "text/plain": [ 104 | "" 105 | ] 106 | }, 107 | "execution_count": 5, 108 | "metadata": {}, 109 | "output_type": "execute_result" 110 | } 111 | ], 112 | "source": [ 113 | "#clear\n", 114 | "cl.enqueue_copy(queue, a_buf, a)" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "Here's our kernel source code:" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 6, 127 | "metadata": { 128 | "collapsed": false 129 | }, 130 | "outputs": [], 131 | "source": [ 132 | "prg = cl.Program(ctx, \"\"\"\n", 133 | " __kernel void twice(__global float *a)\n", 134 | " {\n", 135 | " int gid = get_global_id(0);\n", 136 | " a[gid] = 2*a[gid];\n", 137 | " }\n", 138 | " \"\"\").build()" 139 | ] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "metadata": {}, 144 | "source": [ 145 | "Run the kernel." 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 7, 151 | "metadata": { 152 | "collapsed": false 153 | }, 154 | "outputs": [ 155 | { 156 | "data": { 157 | "text/plain": [ 158 | "" 159 | ] 160 | }, 161 | "execution_count": 7, 162 | "metadata": {}, 163 | "output_type": "execute_result" 164 | } 165 | ], 166 | "source": [ 167 | "#clear\n", 168 | "prg.twice(queue, a.shape, None, a_buf)" 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "metadata": {}, 174 | "source": [ 175 | "Copy the data back." 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 8, 181 | "metadata": { 182 | "collapsed": false 183 | }, 184 | "outputs": [ 185 | { 186 | "data": { 187 | "text/plain": [ 188 | "" 189 | ] 190 | }, 191 | "execution_count": 8, 192 | "metadata": {}, 193 | "output_type": "execute_result" 194 | } 195 | ], 196 | "source": [ 197 | "#clear\n", 198 | "result = np.empty_like(a)\n", 199 | "\n", 200 | "cl.enqueue_copy(queue, result, a_buf)" 201 | ] 202 | }, 203 | { 204 | "cell_type": "markdown", 205 | "metadata": {}, 206 | "source": [ 207 | "Check the result." 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": 9, 213 | "metadata": { 214 | "collapsed": false 215 | }, 216 | "outputs": [ 217 | { 218 | "name": "stdout", 219 | "output_type": "stream", 220 | "text": [ 221 | "0.0 128.816\n" 222 | ] 223 | } 224 | ], 225 | "source": [ 226 | "#clear\n", 227 | "print(la.norm(result - 2*a), la.norm(a))" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": null, 233 | "metadata": { 234 | "collapsed": false 235 | }, 236 | "outputs": [], 237 | "source": [] 238 | } 239 | ], 240 | "metadata": { 241 | "kernelspec": { 242 | "display_name": "Python 3", 243 | "language": "python", 244 | "name": "python3" 245 | }, 246 | "language_info": { 247 | "codemirror_mode": { 248 | "name": "ipython", 249 | "version": 3 250 | }, 251 | "file_extension": ".py", 252 | "mimetype": "text/x-python", 253 | "name": "python", 254 | "nbconvert_exporter": "python", 255 | "pygments_lexer": "ipython3", 256 | "version": "3.5.1+" 257 | } 258 | }, 259 | "nbformat": 4, 260 | "nbformat_minor": 0 261 | } 262 | -------------------------------------------------------------------------------- /03-opencl/1-3-exercise.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# PyOpenCL: An exercise" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "collapsed": false 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "import pyopencl as cl\n", 19 | "import numpy as np\n", 20 | "import numpy.linalg as la\n", 21 | "import pyopencl.array\n", 22 | "import pyopencl.clrandom" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "Change the code below to:\n", 30 | " \n", 31 | "* Compute $c_i = a_ib_i$\n", 32 | "* Use work groups of $16\\times 16$ items\n", 33 | "* Benchmark $1\\times 1$ workgroups against $16\\times 16$ workgroups\n", 34 | "\n", 35 | " * Use `time()` from the `time` module. (i.e. `import time`)\n", 36 | " * Use `queue.finish()`." 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 2, 42 | "metadata": { 43 | "collapsed": false 44 | }, 45 | "outputs": [], 46 | "source": [ 47 | "ctx = cl.create_some_context()\n", 48 | "queue = cl.CommandQueue(ctx)" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 3, 54 | "metadata": { 55 | "collapsed": false 56 | }, 57 | "outputs": [], 58 | "source": [ 59 | "a = np.random.rand(1024, 1024).astype(np.float32)" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 4, 65 | "metadata": { 66 | "collapsed": false 67 | }, 68 | "outputs": [], 69 | "source": [ 70 | "prg = cl.Program(ctx, \"\"\"\n", 71 | " __kernel void twice(__global float *a)\n", 72 | " {\n", 73 | " int gid0 = get_global_id(0);\n", 74 | " int gid1 = get_global_id(1);\n", 75 | " int i = gid1 * 1024 + gid0;\n", 76 | " a[i] = 2*a[i];\n", 77 | " }\n", 78 | " \"\"\").build()\n", 79 | "twice = prg.twice" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 5, 85 | "metadata": { 86 | "collapsed": false 87 | }, 88 | "outputs": [ 89 | { 90 | "data": { 91 | "text/plain": [ 92 | "" 93 | ] 94 | }, 95 | "execution_count": 5, 96 | "metadata": {}, 97 | "output_type": "execute_result" 98 | } 99 | ], 100 | "source": [ 101 | "a_dev = cl.array.to_device(queue, a)\n", 102 | "twice(queue, a_dev.shape, None, a_dev.data)" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 6, 108 | "metadata": { 109 | "collapsed": false 110 | }, 111 | "outputs": [ 112 | { 113 | "name": "stdout", 114 | "output_type": "stream", 115 | "text": [ 116 | "0.0 591.347\n" 117 | ] 118 | } 119 | ], 120 | "source": [ 121 | "print(la.norm(a_dev.get() - 2*a), la.norm(a))" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": { 128 | "collapsed": false 129 | }, 130 | "outputs": [], 131 | "source": [] 132 | } 133 | ], 134 | "metadata": { 135 | "kernelspec": { 136 | "display_name": "Python 3", 137 | "language": "python", 138 | "name": "python3" 139 | }, 140 | "language_info": { 141 | "codemirror_mode": { 142 | "name": "ipython", 143 | "version": 3 144 | }, 145 | "file_extension": ".py", 146 | "mimetype": "text/x-python", 147 | "name": "python", 148 | "nbconvert_exporter": "python", 149 | "pygments_lexer": "ipython3", 150 | "version": "3.5.0+" 151 | } 152 | }, 153 | "nbformat": 4, 154 | "nbformat_minor": 0 155 | } 156 | -------------------------------------------------------------------------------- /03-opencl/1-4-ipython-magic.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# PyOpenCL: Experimenting in IPython" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": { 14 | "collapsed": false 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "from __future__ import division\n", 19 | "import numpy as np\n", 20 | "import pyopencl as cl\n", 21 | "import pyopencl.array" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "Load the PyOpenCL IPython extension:" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "metadata": { 35 | "collapsed": false 36 | }, 37 | "outputs": [ 38 | { 39 | "name": "stderr", 40 | "output_type": "stream", 41 | "text": [ 42 | "/usr/lib/python3/dist-packages/IPython/utils/traitlets.py:504: DeprecationWarning: inspect.getargspec() is deprecated, use inspect.signature() instead\n", 43 | " argspec = inspect.getargspec(c)\n" 44 | ] 45 | } 46 | ], 47 | "source": [ 48 | "%load_ext pyopencl.ipython_ext" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "Create an OpenCL context and a command queue:" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 3, 61 | "metadata": { 62 | "collapsed": false 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "ctx = cl.create_some_context()\n", 67 | "queue = cl.CommandQueue(ctx)" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "## Using the kernel 'magic'\n", 75 | "\n", 76 | "Define an OpenCL kernel using the `%%cl_kernel` magic:" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 4, 82 | "metadata": { 83 | "collapsed": false 84 | }, 85 | "outputs": [], 86 | "source": [ 87 | "%%cl_kernel\n", 88 | "\n", 89 | "__kernel void sum_vector(__global const float *a,\n", 90 | "__global const float *b, __global float *c)\n", 91 | "{\n", 92 | " int gid = get_global_id(0);\n", 93 | " c[gid] = a[gid] + b[gid];\n", 94 | "}" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "This looks for `cl_ctx` or `ctx` in the user namespace to find a PyOpenCL context.\n", 102 | "\n", 103 | "Kernel names are automatically injected into the user namespace, so we can just use `sum_vector` from Python below.\n", 104 | "\n", 105 | "Now create some data to work on:" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 5, 111 | "metadata": { 112 | "collapsed": false 113 | }, 114 | "outputs": [], 115 | "source": [ 116 | "n = 10000\n", 117 | "\n", 118 | "a = cl.array.empty(queue, n, dtype=np.float32)\n", 119 | "a.fill(15)\n", 120 | "\n", 121 | "b_host = np.random.randn(n).astype(np.float32)\n", 122 | "b = cl.array.to_device(queue, b_host)\n", 123 | "\n", 124 | "c = cl.array.empty_like(a)" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": {}, 130 | "source": [ 131 | "Run the kernel:" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 6, 137 | "metadata": { 138 | "collapsed": false 139 | }, 140 | "outputs": [ 141 | { 142 | "data": { 143 | "text/plain": [ 144 | "" 145 | ] 146 | }, 147 | "execution_count": 6, 148 | "metadata": {}, 149 | "output_type": "execute_result" 150 | } 151 | ], 152 | "source": [ 153 | "sum_vector(queue, (n,), None, a.data, b.data, c.data)" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": {}, 159 | "source": [ 160 | "Check the result using `numpy` operations:" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 7, 166 | "metadata": { 167 | "collapsed": false 168 | }, 169 | "outputs": [], 170 | "source": [ 171 | "assert (c.get() == b_host + 15).all()" 172 | ] 173 | } 174 | ], 175 | "metadata": { 176 | "kernelspec": { 177 | "display_name": "Python 3", 178 | "language": "python", 179 | "name": "python3" 180 | }, 181 | "language_info": { 182 | "codemirror_mode": { 183 | "name": "ipython", 184 | "version": 3 185 | }, 186 | "file_extension": ".py", 187 | "mimetype": "text/x-python", 188 | "name": "python", 189 | "nbconvert_exporter": "python", 190 | "pygments_lexer": "ipython3", 191 | "version": "3.5.0+" 192 | } 193 | }, 194 | "nbformat": 4, 195 | "nbformat_minor": 0 196 | } 197 | -------------------------------------------------------------------------------- /03-opencl/2-1-elementwise.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# PyOpenCL Parallel Patterns: Map/Elementwise" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Setup code" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": { 21 | "collapsed": false 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "import pyopencl as cl\n", 26 | "import pyopencl.array\n", 27 | "import pyopencl.clrandom\n", 28 | "import numpy as np" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "metadata": { 35 | "collapsed": false 36 | }, 37 | "outputs": [], 38 | "source": [ 39 | "ctx = cl.create_some_context()\n", 40 | "queue = cl.CommandQueue(ctx)" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 3, 46 | "metadata": { 47 | "collapsed": false 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "n = 10**7\n", 52 | "a = cl.clrandom.rand(queue, n, np.float32)\n", 53 | "b = cl.clrandom.rand(queue, n, np.float32)" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "## A simple 'target application'" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "We would like to evaluate this linear combination:" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 4, 73 | "metadata": { 74 | "collapsed": false 75 | }, 76 | "outputs": [], 77 | "source": [ 78 | "c1 = 5*a + 6*b" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "A problem with this is that every single operator (all three of them--and easily more for complicated expressions) corresponds to a kernel call, which can lead to high overhead. Let's try and avoid that by stuffing the entire operation into one kernel, in turn saving lots of memory traffic:" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 5, 91 | "metadata": { 92 | "collapsed": false 93 | }, 94 | "outputs": [], 95 | "source": [ 96 | "from pyopencl.elementwise import ElementwiseKernel" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 6, 102 | "metadata": { 103 | "collapsed": false 104 | }, 105 | "outputs": [], 106 | "source": [ 107 | "#clear\n", 108 | "lin_comb = ElementwiseKernel(ctx,\n", 109 | "\n", 110 | " \"float a, float *x, float b, float *y, float *c\",\n", 111 | "\n", 112 | " \"c[i] = a*x[i] + b*y[i]\")" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 7, 118 | "metadata": { 119 | "collapsed": false 120 | }, 121 | "outputs": [ 122 | { 123 | "data": { 124 | "text/plain": [ 125 | "" 126 | ] 127 | }, 128 | "execution_count": 7, 129 | "metadata": {}, 130 | "output_type": "execute_result" 131 | } 132 | ], 133 | "source": [ 134 | "c2 = cl.array.empty_like(a)\n", 135 | "lin_comb(5, a, 6, b, c2)" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 8, 141 | "metadata": { 142 | "collapsed": false 143 | }, 144 | "outputs": [ 145 | { 146 | "name": "stdout", 147 | "output_type": "stream", 148 | "text": [ 149 | "0.0\n" 150 | ] 151 | } 152 | ], 153 | "source": [ 154 | "import numpy.linalg as la\n", 155 | "print(la.norm(c1.get() - c2.get()))" 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": {}, 161 | "source": [ 162 | "## Timing ElementwiseKernel\n", 163 | "\n", 164 | "Did this optimization pay off?" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 9, 170 | "metadata": { 171 | "collapsed": false 172 | }, 173 | "outputs": [ 174 | { 175 | "name": "stdout", 176 | "output_type": "stream", 177 | "text": [ 178 | "elapsed: 5.4626686573028564 s\n" 179 | ] 180 | } 181 | ], 182 | "source": [ 183 | "from time import time\n", 184 | "queue.finish()\n", 185 | "start_time = time()\n", 186 | "\n", 187 | "for i in range(10):\n", 188 | " c1 = 5*a + 6*b\n", 189 | " \n", 190 | "queue.finish()\n", 191 | "print(\"elapsed: {0} s\".format(time()-start_time))" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": 10, 197 | "metadata": { 198 | "collapsed": false 199 | }, 200 | "outputs": [ 201 | { 202 | "name": "stdout", 203 | "output_type": "stream", 204 | "text": [ 205 | "elapsed: 2.354213237762451 s\n" 206 | ] 207 | } 208 | ], 209 | "source": [ 210 | "from time import time\n", 211 | "queue.finish()\n", 212 | "start_time = time()\n", 213 | "\n", 214 | "for i in range(10):\n", 215 | " lin_comb(5, a, 6, b, c2)\n", 216 | " \n", 217 | "queue.finish()\n", 218 | "print(\"elapsed: {0} s\".format(time()-start_time))" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": null, 224 | "metadata": { 225 | "collapsed": false 226 | }, 227 | "outputs": [], 228 | "source": [] 229 | } 230 | ], 231 | "metadata": { 232 | "kernelspec": { 233 | "display_name": "Python 3", 234 | "language": "python", 235 | "name": "python3" 236 | }, 237 | "language_info": { 238 | "codemirror_mode": { 239 | "name": "ipython", 240 | "version": 3 241 | }, 242 | "file_extension": ".py", 243 | "mimetype": "text/x-python", 244 | "name": "python", 245 | "nbconvert_exporter": "python", 246 | "pygments_lexer": "ipython3", 247 | "version": "3.5.0+" 248 | } 249 | }, 250 | "nbformat": 4, 251 | "nbformat_minor": 0 252 | } 253 | -------------------------------------------------------------------------------- /03-opencl/2-2-reduction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# PyOpenCL Parallel Patterns: Reduction" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Setup Code" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": { 21 | "collapsed": false 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "import pyopencl as cl\n", 26 | "import pyopencl.array\n", 27 | "import pyopencl.clrandom\n", 28 | "import numpy as np" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "metadata": { 35 | "collapsed": false 36 | }, 37 | "outputs": [], 38 | "source": [ 39 | "ctx = cl.create_some_context()\n", 40 | "queue = cl.CommandQueue(ctx)" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 3, 46 | "metadata": { 47 | "collapsed": false 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "n = 10**7\n", 52 | "x = cl.clrandom.rand(queue, n, np.float64)" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "## Setting up the kernel: Computing a sum of squares\n", 60 | "\n", 61 | "Want to compute the sum of the squares of all entries in `x`.\n", 62 | "\n", 63 | "First, using `numpy`, as `result1` (watch out: `.get()`)" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 4, 69 | "metadata": { 70 | "collapsed": false 71 | }, 72 | "outputs": [], 73 | "source": [ 74 | "#clear\n", 75 | "result1 = np.sum(x.get()**2)" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": {}, 81 | "source": [ 82 | "Then, using PyOpenCL:" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 5, 88 | "metadata": { 89 | "collapsed": false 90 | }, 91 | "outputs": [], 92 | "source": [ 93 | "from pyopencl.reduction import ReductionKernel" 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": {}, 99 | "source": [ 100 | "Syntax:\n", 101 | "\n", 102 | "ReductionKernel(context, dtype, netural, reduce_expr, map_expr, arguments)" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 6, 108 | "metadata": { 109 | "collapsed": false 110 | }, 111 | "outputs": [], 112 | "source": [ 113 | "#clear\n", 114 | "rknl = ReductionKernel(ctx, np.float64,\n", 115 | " neutral=\"0\",\n", 116 | " reduce_expr=\"a+b\", map_expr=\"x[i]*x[i]\",\n", 117 | " arguments=\"double *x\")" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "## Testing the result" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 7, 130 | "metadata": { 131 | "collapsed": false 132 | }, 133 | "outputs": [], 134 | "source": [ 135 | "result2 = rknl(x)" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 8, 141 | "metadata": { 142 | "collapsed": false 143 | }, 144 | "outputs": [ 145 | { 146 | "data": { 147 | "text/plain": [ 148 | "pyopencl.array.Array" 149 | ] 150 | }, 151 | "execution_count": 8, 152 | "metadata": {}, 153 | "output_type": "execute_result" 154 | } 155 | ], 156 | "source": [ 157 | "type(result2)" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 9, 163 | "metadata": { 164 | "collapsed": false 165 | }, 166 | "outputs": [ 167 | { 168 | "data": { 169 | "text/plain": [ 170 | "()" 171 | ] 172 | }, 173 | "execution_count": 9, 174 | "metadata": {}, 175 | "output_type": "execute_result" 176 | } 177 | ], 178 | "source": [ 179 | "#clear\n", 180 | "result2.shape" 181 | ] 182 | }, 183 | { 184 | "cell_type": "markdown", 185 | "metadata": {}, 186 | "source": [ 187 | "Now check the result:" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": 10, 193 | "metadata": { 194 | "collapsed": false 195 | }, 196 | "outputs": [ 197 | { 198 | "name": "stdout", 199 | "output_type": "stream", 200 | "text": [ 201 | "9.31322574615e-10\n" 202 | ] 203 | } 204 | ], 205 | "source": [ 206 | "#clear\n", 207 | "print(result2.get()-result1)" 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "metadata": {}, 213 | "source": [ 214 | "* Change this to find maximum.\n", 215 | "* Works on structured types, too.\n", 216 | "* What if you wanted to find maximum *and* location?" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "metadata": { 223 | "collapsed": true 224 | }, 225 | "outputs": [], 226 | "source": [] 227 | } 228 | ], 229 | "metadata": { 230 | "kernelspec": { 231 | "display_name": "Python 3", 232 | "language": "python", 233 | "name": "python3" 234 | }, 235 | "language_info": { 236 | "codemirror_mode": { 237 | "name": "ipython", 238 | "version": 3 239 | }, 240 | "file_extension": ".py", 241 | "mimetype": "text/x-python", 242 | "name": "python", 243 | "nbconvert_exporter": "python", 244 | "pygments_lexer": "ipython3", 245 | "version": "3.5.1+" 246 | } 247 | }, 248 | "nbformat": 4, 249 | "nbformat_minor": 0 250 | } 251 | -------------------------------------------------------------------------------- /03-opencl/2-2a-monte-carlo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Monte Carlo Method\n", 8 | "\n", 9 | "As a simple example of a Monte Carlo method, we will approximate the value of $\\pi$:" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": { 16 | "collapsed": true 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "import numpy as np\n", 21 | "import pyopencl as cl\n", 22 | "import pyopencl.array\n", 23 | "import pyopencl.clrandom" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 2, 29 | "metadata": { 30 | "collapsed": true 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "ctx = cl.create_some_context()\n", 35 | "queue = cl.CommandQueue(ctx)" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "### Boilerplate for Random Number Generator" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 3, 48 | "metadata": { 49 | "collapsed": true 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "generator_preamble = \"\"\"\n", 54 | "#include \n", 55 | "\n", 56 | "typedef union {\n", 57 | " uint4 v;\n", 58 | " philox4x32_ctr_t c;\n", 59 | "} philox4x32_ctr_vec_union;\n", 60 | "\n", 61 | "\n", 62 | "uint4 philox4x32_bump(uint4 ctr)\n", 63 | "{\n", 64 | " if (++ctr.x == 0)\n", 65 | " if (++ctr.y == 0)\n", 66 | " ++ctr.z;\n", 67 | " return ctr;\n", 68 | "}\n", 69 | "\n", 70 | "uint4 philox4x32_gen(\n", 71 | " uint4 ctr,\n", 72 | " uint2 key,\n", 73 | " uint4 *new_ctr)\n", 74 | "{\n", 75 | " philox4x32_ctr_vec_union result;\n", 76 | " result.c = philox4x32(\n", 77 | " *(philox4x32_ctr_t *) &ctr,\n", 78 | " *(philox4x32_key_t *) &key);\n", 79 | " *new_ctr = philox4x32_bump(ctr);\n", 80 | " return result.v;\n", 81 | "}\n", 82 | "\n", 83 | "float4 philox4x32_f32(\n", 84 | " uint4 ctr,\n", 85 | " uint2 key,\n", 86 | " uint4 *new_ctr)\n", 87 | "{\n", 88 | " *new_ctr = ctr;\n", 89 | " return\n", 90 | " convert_float4(philox4x32_gen(*new_ctr, key, new_ctr))\n", 91 | " * 2.3283064365386963e-10f;\n", 92 | "}\n", 93 | "\"\"\"" 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": {}, 99 | "source": [ 100 | "### Reduction Code" 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "Complete the sampler code:\n", 108 | "\n", 109 | "```\n", 110 | "mc_preamble_src = \"\"\"\n", 111 | "\n", 112 | "#include \n", 113 | "\n", 114 | "float compute_sample(int i, unsigned int k1)\n", 115 | "{\n", 116 | " uint4 ctr = { 0, 1, 2, 3 };\n", 117 | " uint2 key2 = { i, k1 };\n", 118 | " float4 rng_res = philox4x32_f32(ctr, key2, &(ctr));\n", 119 | " ...\n", 120 | "}\n", 121 | "\"\"\"\n", 122 | "```" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 27, 128 | "metadata": { 129 | "collapsed": true 130 | }, 131 | "outputs": [], 132 | "source": [ 133 | "#clear\n", 134 | "mc_preamble_src = \"\"\"\n", 135 | "\n", 136 | "#include \n", 137 | "\n", 138 | "float compute_sample(int i, unsigned int k1)\n", 139 | "{\n", 140 | " uint4 ctr = { 0, 1, 2, 3 };\n", 141 | " uint2 key2 = { i, k1 };\n", 142 | " float4 rng_res = philox4x32_f32(ctr, key2, &(ctr));\n", 143 | " \n", 144 | " cfloat_t samp0 = cfloat_new(rng_res.s0, rng_res.s1);\n", 145 | " cfloat_t samp1 = cfloat_new(rng_res.s2, rng_res.s3);\n", 146 | " \n", 147 | " float result = 0;\n", 148 | " if (cfloat_abs(samp0) <= 1)\n", 149 | " result += 1;\n", 150 | " if (cfloat_abs(samp1) <= 1)\n", 151 | " result += 1;\n", 152 | " \n", 153 | " return result;\n", 154 | "}\n", 155 | "\"\"\"" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 28, 161 | "metadata": { 162 | "collapsed": true 163 | }, 164 | "outputs": [], 165 | "source": [ 166 | "from pyopencl.reduction import ReductionKernel" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "Syntax:\n", 174 | "\n", 175 | "`ReductionKernel(context, dtype, netural, reduce_expr, map_expr, arguments)`" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 29, 181 | "metadata": { 182 | "collapsed": false 183 | }, 184 | "outputs": [], 185 | "source": [ 186 | "#clear\n", 187 | "rknl = ReductionKernel(ctx, np.float32,\n", 188 | " neutral=\"0\",\n", 189 | " reduce_expr=\"a+b\", map_expr=\"compute_sample(i, k1)\",\n", 190 | " arguments=\"unsigned int k1\", preamble=generator_preamble+mc_preamble_src)" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 32, 196 | "metadata": { 197 | "collapsed": false 198 | }, 199 | "outputs": [ 200 | { 201 | "name": "stdout", 202 | "output_type": "stream", 203 | "text": [ 204 | "3.14154656\n" 205 | ] 206 | } 207 | ], 208 | "source": [ 209 | "n = 100000000\n", 210 | "\n", 211 | "nsamples_accepted = rknl(15, range=slice(n), queue=queue).get()\n", 212 | "nsamples = 2*n\n", 213 | "approx_pi = 4 * nsamples_accepted/nsamples\n", 214 | "\n", 215 | "print(approx_pi)" 216 | ] 217 | } 218 | ], 219 | "metadata": { 220 | "kernelspec": { 221 | "display_name": "Python 3", 222 | "language": "python", 223 | "name": "python3" 224 | }, 225 | "language_info": { 226 | "codemirror_mode": { 227 | "name": "ipython", 228 | "version": 3 229 | }, 230 | "file_extension": ".py", 231 | "mimetype": "text/x-python", 232 | "name": "python", 233 | "nbconvert_exporter": "python", 234 | "pygments_lexer": "ipython3", 235 | "version": "3.5.1+" 236 | } 237 | }, 238 | "nbformat": 4, 239 | "nbformat_minor": 0 240 | } 241 | -------------------------------------------------------------------------------- /03-opencl/2-3-scan.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# PyOpenCL Parallel Patterns: Scan/Prefix Sum\n", 8 | "\n", 9 | "## Setup Code" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": { 16 | "collapsed": false 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "import pyopencl as cl\n", 21 | "import pyopencl.array\n", 22 | "import pyopencl.clrandom\n", 23 | "import numpy as np\n", 24 | "import numpy.linalg as la" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 2, 30 | "metadata": { 31 | "collapsed": false 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "ctx = cl.create_some_context()\n", 36 | "queue = cl.CommandQueue(ctx)" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 3, 42 | "metadata": { 43 | "collapsed": false 44 | }, 45 | "outputs": [], 46 | "source": [ 47 | "n = 10**7\n", 48 | "x = cl.clrandom.rand(queue, n, np.float64)" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "## Setting up the kernel: Compute the prefix sum of squares\n", 56 | "\n", 57 | "Want to compute the prefix sum of the squares of all entries in `x`.\n", 58 | "\n", 59 | "First, using `numpy`, as `result1`:" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 4, 65 | "metadata": { 66 | "collapsed": false 67 | }, 68 | "outputs": [], 69 | "source": [ 70 | "#clear\n", 71 | "result1 = np.cumsum(x.get()**2)" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "Then, using PyOpenCL:" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 5, 84 | "metadata": { 85 | "collapsed": false 86 | }, 87 | "outputs": [], 88 | "source": [ 89 | "from pyopencl.scan import GenericScanKernel" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "Syntax:\n", 97 | " \n", 98 | "GSK(context, dtype, arguments, input_expr, scan_expr using `a` and `b`, neutral, output_statement with `item`)" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 7, 104 | "metadata": { 105 | "collapsed": false 106 | }, 107 | "outputs": [], 108 | "source": [ 109 | "#clear\n", 110 | "sknl = GenericScanKernel(ctx, np.float64,\n", 111 | " arguments=\"double *y, double *x\",\n", 112 | " input_expr=\"x[i]*x[i]\",\n", 113 | " scan_expr=\"a+b\", neutral=\"0\",\n", 114 | " output_statement=\"y[i] = item\")" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 8, 120 | "metadata": { 121 | "collapsed": false 122 | }, 123 | "outputs": [ 124 | { 125 | "data": { 126 | "text/plain": [ 127 | "" 128 | ] 129 | }, 130 | "execution_count": 8, 131 | "metadata": {}, 132 | "output_type": "execute_result" 133 | } 134 | ], 135 | "source": [ 136 | "result2 = cl.array.empty_like(x)\n", 137 | "sknl(result2, x)" 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "## Testing the outcome" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 9, 150 | "metadata": { 151 | "collapsed": false 152 | }, 153 | "outputs": [ 154 | { 155 | "name": "stdout", 156 | "output_type": "stream", 157 | "text": [ 158 | "0.00019364830171\n" 159 | ] 160 | } 161 | ], 162 | "source": [ 163 | "print(la.norm(result2.get() - result1))" 164 | ] 165 | }, 166 | { 167 | "cell_type": "markdown", 168 | "metadata": {}, 169 | "source": [ 170 | "More features:\n", 171 | "\n", 172 | "* Segmented Scan\n", 173 | "* Output stencils\n", 174 | "* Works on structured types" 175 | ] 176 | } 177 | ], 178 | "metadata": { 179 | "kernelspec": { 180 | "display_name": "Python 3", 181 | "language": "python", 182 | "name": "python3" 183 | }, 184 | "language_info": { 185 | "codemirror_mode": { 186 | "name": "ipython", 187 | "version": 3 188 | }, 189 | "file_extension": ".py", 190 | "mimetype": "text/x-python", 191 | "name": "python", 192 | "nbconvert_exporter": "python", 193 | "pygments_lexer": "ipython3", 194 | "version": "3.5.0+" 195 | } 196 | }, 197 | "nbformat": 4, 198 | "nbformat_minor": 0 199 | } 200 | -------------------------------------------------------------------------------- /03-opencl/3-practice-expression-kernel.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Practice: Generating a Simple Kernel\n", 8 | "\n", 9 | "The purpose of this practice problem is to generate a simple kernel that applies a user-supplied expression to every entry of an array. Implement a class `ExpressionKernel` that can be used as shown in the test at the end of this notebook." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": { 16 | "collapsed": false 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "import numpy as np\n", 21 | "import numpy.linalg as la\n", 22 | "\n", 23 | "import pyopencl as cl\n", 24 | "import pyopencl.array\n", 25 | "import pyopencl.clmath\n", 26 | "import pyopencl.clrandom" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 3, 32 | "metadata": { 33 | "collapsed": false 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "\n", 38 | "class ExpressionKernel:\n", 39 | " def __init__(self, cl_context, expression):\n", 40 | " # ...\n", 41 | " pass\n", 42 | " \n", 43 | " def __call__(self, queue, ary):\n", 44 | " # ...\n", 45 | " pass" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 4, 51 | "metadata": { 52 | "collapsed": false 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "#clear\n", 57 | "# Solution\n", 58 | "\n", 59 | "class ExpressionKernel:\n", 60 | " def __init__(self, cl_context, expression):\n", 61 | " src = \"\"\"\n", 62 | " kernel void apply(__global double *out, global double *in)\n", 63 | " {\n", 64 | " int i = get_global_id(0);\n", 65 | " double x = in[i];\n", 66 | " out[i] = RESULT;\n", 67 | " }\n", 68 | " \"\"\"\n", 69 | "\n", 70 | " from pymbolic.mapper.c_code import CCodeMapper\n", 71 | " ccm = CCodeMapper()\n", 72 | " src = src.replace(\"RESULT\", ccm(expression))\n", 73 | " self.prg = cl.Program(cl_context, src).build()\n", 74 | " self.knl = self.prg.apply\n", 75 | "\n", 76 | " def __call__(self, queue, ary):\n", 77 | " result = cl.array.empty_like(ary)\n", 78 | " self.knl(queue, ary.shape, None, result.data, ary.data)\n", 79 | " return result" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "To test our implementation, we create a context and an array full of random numbers:" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 5, 92 | "metadata": { 93 | "collapsed": false 94 | }, 95 | "outputs": [], 96 | "source": [ 97 | "cl_context = cl.create_some_context()\n", 98 | "queue = cl.CommandQueue(cl_context)\n", 99 | "\n", 100 | "ary = cl.clrandom.rand(queue, 500, dtype=np.float64)" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 6, 106 | "metadata": { 107 | "collapsed": false 108 | }, 109 | "outputs": [ 110 | { 111 | "name": "stdout", 112 | "output_type": "stream", 113 | "text": [ 114 | "9.76586150045e-16\n" 115 | ] 116 | } 117 | ], 118 | "source": [ 119 | "\n", 120 | "from pymbolic import var\n", 121 | "\n", 122 | "x = var(\"x\")\n", 123 | "eknl = ExpressionKernel(cl_context, var(\"sqrt\")(1-x**2))\n", 124 | "\n", 125 | "result = eknl(queue, ary)\n", 126 | "\n", 127 | "diff = result - cl.clmath.sqrt(1-ary**2)\n", 128 | "print(la.norm(diff.get()))" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": { 135 | "collapsed": true 136 | }, 137 | "outputs": [], 138 | "source": [] 139 | } 140 | ], 141 | "metadata": { 142 | "kernelspec": { 143 | "display_name": "Python 3", 144 | "language": "python", 145 | "name": "python3" 146 | }, 147 | "language_info": { 148 | "codemirror_mode": { 149 | "name": "ipython", 150 | "version": 3 151 | }, 152 | "file_extension": ".py", 153 | "mimetype": "text/x-python", 154 | "name": "python", 155 | "nbconvert_exporter": "python", 156 | "pygments_lexer": "ipython3", 157 | "version": "3.5.1+" 158 | } 159 | }, 160 | "nbformat": 4, 161 | "nbformat_minor": 0 162 | } 163 | -------------------------------------------------------------------------------- /03-opencl/3-practice-hermite-monte-carlo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Practice: Orthogonality of Hermite Polynomials\n", 8 | "\n", 9 | "In this exercise, modify the Monte-Carlo example to demonstrate the orthonormality of the two [Hermite polynomials](https://en.wikipedia.org/wiki/Hermite_polynomials)\n", 10 | "\n", 11 | "* $1$ and\n", 12 | "* $x^2-1$\n", 13 | "\n", 14 | "with respect to the weight $e^{-\\frac{x^2}2}$, i.e. show (numerically, using a Monte Carlo method) that\n", 15 | "\n", 16 | "$$\n", 17 | "\\int_{-\\infty}^\\infty 1 \\cdot (x^2-1) \\cdot e^{-\\frac{x^2}2}dx = 0\n", 18 | "$$\n", 19 | "\n", 20 | "and that\n", 21 | "\n", 22 | "$$\n", 23 | "\\int_{-\\infty}^\\infty (x^2-1)^2 \\cdot e^{-\\frac{x^2}2}dx = 2\\sqrt{2\\pi}.\n", 24 | "$$\n", 25 | "\n", 26 | "Realize that\n", 27 | "$$\n", 28 | "\\int_{-\\infty}^\\infty \\dots \\cdot \\frac{e^{-\\frac{x^2}2}}{\\sqrt{2\\pi}}dx\n", 29 | "$$\n", 30 | "can be evaluated by Monte-Carlo summation of $\\dots$ where the $x$ are normally distributed.\n", 31 | "\n", 32 | "Use the [Box-Muller transform](https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform) to obtain normally-distributed random numbers from the uniformly distributed ones returned by PyOpenCL's random number generator.\n", 33 | "\n" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "### Initialization" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 3, 46 | "metadata": { 47 | "collapsed": true 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "import numpy as np\n", 52 | "import pyopencl as cl\n", 53 | "import pyopencl.array\n", 54 | "import pyopencl.clrandom\n", 55 | "\n", 56 | "ctx = cl.create_some_context()\n", 57 | "queue = cl.CommandQueue(ctx)" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "### Boilerplate for Random Number Generator" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 4, 70 | "metadata": { 71 | "collapsed": true 72 | }, 73 | "outputs": [], 74 | "source": [ 75 | "generator_preamble = \"\"\"\n", 76 | "#include \n", 77 | "\n", 78 | "typedef union {\n", 79 | " uint4 v;\n", 80 | " philox4x32_ctr_t c;\n", 81 | "} philox4x32_ctr_vec_union;\n", 82 | "\n", 83 | "\n", 84 | "uint4 philox4x32_bump(uint4 ctr)\n", 85 | "{\n", 86 | " if (++ctr.x == 0)\n", 87 | " if (++ctr.y == 0)\n", 88 | " ++ctr.z;\n", 89 | " return ctr;\n", 90 | "}\n", 91 | "\n", 92 | "uint4 philox4x32_gen(\n", 93 | " uint4 ctr,\n", 94 | " uint2 key,\n", 95 | " uint4 *new_ctr)\n", 96 | "{\n", 97 | " philox4x32_ctr_vec_union result;\n", 98 | " result.c = philox4x32(\n", 99 | " *(philox4x32_ctr_t *) &ctr,\n", 100 | " *(philox4x32_key_t *) &key);\n", 101 | " *new_ctr = philox4x32_bump(ctr);\n", 102 | " return result.v;\n", 103 | "}\n", 104 | "\n", 105 | "float4 philox4x32_f32(\n", 106 | " uint4 ctr,\n", 107 | " uint2 key,\n", 108 | " uint4 *new_ctr)\n", 109 | "{\n", 110 | " *new_ctr = ctr;\n", 111 | " return\n", 112 | " convert_float4(philox4x32_gen(*new_ctr, key, new_ctr))\n", 113 | " * 2.3283064365386963e-10f;\n", 114 | "}\n", 115 | "\"\"\"" 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "### Monte-Carlo code" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 36, 128 | "metadata": { 129 | "collapsed": false 130 | }, 131 | "outputs": [], 132 | "source": [ 133 | "#clear\n", 134 | "\n", 135 | "from mako.template import Template\n", 136 | "\n", 137 | "mc_preamble_src = Template(\"\"\"\n", 138 | "\n", 139 | "#include \n", 140 | "\n", 141 | "float compute_sample(int i, unsigned int k1)\n", 142 | "{\n", 143 | " uint4 ctr = { 0, 1, 2, 3 };\n", 144 | " uint2 key2 = { i, k1 };\n", 145 | " float4 rng_res = philox4x32_f32(ctr, key2, &(ctr));\n", 146 | " \n", 147 | " float r0 = sqrt(-2*log(rng_res.s0));\n", 148 | " float v0 = r0*cos((float) (2*M_PI) * rng_res.s1);\n", 149 | " float v1 = r0*sin((float) (2*M_PI) * rng_res.s1);\n", 150 | "\n", 151 | " float r2 = sqrt(-2*log(rng_res.s2));\n", 152 | " float v2 = r2*cos((float) (2*M_PI) * rng_res.s3);\n", 153 | " float v3 = r2*sin((float) (2*M_PI) * rng_res.s3);\n", 154 | " \n", 155 | " float result = 0;\n", 156 | " \n", 157 | " %for x in [\"v0\", \"v1\", \"v2\", \"v3\"]:\n", 158 | " {\n", 159 | " float x = ${x};\n", 160 | " float H2 = x*x - 1;\n", 161 | " result += H2;\n", 162 | " }\n", 163 | " %endfor\n", 164 | " \n", 165 | " return result;\n", 166 | "}\n", 167 | "\"\"\", strict_undefined=True).render()" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 37, 173 | "metadata": { 174 | "collapsed": false 175 | }, 176 | "outputs": [], 177 | "source": [ 178 | "#clear\n", 179 | "\n", 180 | "from pyopencl.reduction import ReductionKernel\n", 181 | "\n", 182 | "rknl = ReductionKernel(ctx, np.float32,\n", 183 | " neutral=\"0\",\n", 184 | " reduce_expr=\"a+b\", map_expr=\"compute_sample(i, k1)\",\n", 185 | " arguments=\"unsigned int k1\", preamble=generator_preamble+mc_preamble_src)" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 38, 191 | "metadata": { 192 | "collapsed": false 193 | }, 194 | "outputs": [ 195 | { 196 | "name": "stdout", 197 | "output_type": "stream", 198 | "text": [ 199 | "0.000107572192383\n" 200 | ] 201 | } 202 | ], 203 | "source": [ 204 | "#clear\n", 205 | "n = 10000000\n", 206 | "\n", 207 | "nsamples = 4*n\n", 208 | "result = rknl(15, range=slice(n), queue=queue).get() / nsamples\n", 209 | "\n", 210 | "print(result)" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": { 217 | "collapsed": true 218 | }, 219 | "outputs": [], 220 | "source": [] 221 | } 222 | ], 223 | "metadata": { 224 | "kernelspec": { 225 | "display_name": "Python 3", 226 | "language": "python", 227 | "name": "python3" 228 | }, 229 | "language_info": { 230 | "codemirror_mode": { 231 | "name": "ipython", 232 | "version": 3 233 | }, 234 | "file_extension": ".py", 235 | "mimetype": "text/x-python", 236 | "name": "python", 237 | "nbconvert_exporter": "python", 238 | "pygments_lexer": "ipython3", 239 | "version": "3.5.1+" 240 | } 241 | }, 242 | "nbformat": 4, 243 | "nbformat_minor": 0 244 | } 245 | -------------------------------------------------------------------------------- /03-opencl/README.rst: -------------------------------------------------------------------------------- 1 | **Tip:** ``ipynb`` files can be viewed on Github. Just click them. 2 | -------------------------------------------------------------------------------- /04-case-studies/01-indexing-and-broadcasting.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Indexing and Broadcasting in Numpy" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Embedding mini-languages in Python has a long tradition. In this section of the tutorial, we will explore some examples of this practice." 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "The first example we will consider is so-called *broadcasting* in numpy. It may look shallow at first sight, but it and its associated operations constitute a considerable subset of the array programming language APL." 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 3, 27 | "metadata": { 28 | "collapsed": false 29 | }, 30 | "outputs": [], 31 | "source": [ 32 | "import numpy as np" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 4, 38 | "metadata": { 39 | "collapsed": false 40 | }, 41 | "outputs": [ 42 | { 43 | "name": "stdout", 44 | "output_type": "stream", 45 | "text": [ 46 | "[0 1 2 3]\n", 47 | "[ 0 10 20]\n" 48 | ] 49 | } 50 | ], 51 | "source": [ 52 | "a = np.arange(4)\n", 53 | "b = np.arange(3) * 10\n", 54 | "print(a)\n", 55 | "print(b)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 7, 61 | "metadata": { 62 | "collapsed": false 63 | }, 64 | "outputs": [ 65 | { 66 | "data": { 67 | "text/plain": [ 68 | "(4, 1)" 69 | ] 70 | }, 71 | "execution_count": 7, 72 | "metadata": {}, 73 | "output_type": "execute_result" 74 | } 75 | ], 76 | "source": [ 77 | "a.reshape(-1, 1).shape" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 10, 83 | "metadata": { 84 | "collapsed": false 85 | }, 86 | "outputs": [ 87 | { 88 | "data": { 89 | "text/plain": [ 90 | "array([[ 0, 10, 20],\n", 91 | " [ 1, 11, 21],\n", 92 | " [ 2, 12, 22],\n", 93 | " [ 3, 13, 23]])" 94 | ] 95 | }, 96 | "execution_count": 10, 97 | "metadata": {}, 98 | "output_type": "execute_result" 99 | } 100 | ], 101 | "source": [ 102 | "x = a.reshape(-1, 1) + b\n", 103 | "x" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 12, 109 | "metadata": { 110 | "collapsed": false 111 | }, 112 | "outputs": [ 113 | { 114 | "data": { 115 | "text/plain": [ 116 | "array([ 6, 46, 86])" 117 | ] 118 | }, 119 | "execution_count": 12, 120 | "metadata": {}, 121 | "output_type": "execute_result" 122 | } 123 | ], 124 | "source": [ 125 | "#clear\n", 126 | "np.sum(x, axis=0)" 127 | ] 128 | } 129 | ], 130 | "metadata": {}, 131 | "nbformat": 4, 132 | "nbformat_minor": 0 133 | } -------------------------------------------------------------------------------- /04-case-studies/02-einsum.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Einstein summation in numpy" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "It turns out that `numpy` actually has several more mini-languages embedded in it. This next example is borrowed and slightly generalized from mathematics, where it is called Einstein summation." 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "Recall that matrix-matrix multiplication can be expressed by:\n", 22 | "$$\n", 23 | "(AB)_{ij} = \\sum_k A_{ik} B_{kj}$$\n", 24 | "\n", 25 | "Einstein summation is a relatively natural way of generalizing this to arrays with multiple dimensions. The above matrix-matrix multiplication expression, for example, becomes:\n", 26 | "\n", 27 | "$$ A_{ij} = B_{ik} C_{kj}$$\n", 28 | "\n", 29 | "Where the implied rule is that repeated indices that are not part of the output will be summed over.\n", 30 | "\n", 31 | "numpy simply takes this convention and turns it into a way of expressing array operations:" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 2, 37 | "metadata": { 38 | "collapsed": false 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "import numpy as np" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 3, 48 | "metadata": { 49 | "collapsed": false 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "#clear\n", 54 | "A = np.random.randn(15, 20)\n", 55 | "B = np.random.randn(20, 25)\n", 56 | "\n", 57 | "AB1 = A.dot(B)" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 4, 63 | "metadata": { 64 | "collapsed": false 65 | }, 66 | "outputs": [ 67 | { 68 | "name": "stdout", 69 | "output_type": "stream", 70 | "text": [ 71 | "1.21357255039e-14\n" 72 | ] 73 | } 74 | ], 75 | "source": [ 76 | "#clear\n", 77 | "AB2 = np.einsum(\"ik,kj->ij\", A, B)\n", 78 | "\n", 79 | "print(np.linalg.norm(AB1 - AB2))" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": { 86 | "collapsed": true 87 | }, 88 | "outputs": [], 89 | "source": [] 90 | } 91 | ], 92 | "metadata": { 93 | "kernelspec": { 94 | "display_name": "Python 3", 95 | "language": "python", 96 | "name": "python3" 97 | }, 98 | "language_info": { 99 | "codemirror_mode": { 100 | "name": "ipython", 101 | "version": 3 102 | }, 103 | "file_extension": ".py", 104 | "mimetype": "text/x-python", 105 | "name": "python", 106 | "nbconvert_exporter": "python", 107 | "pygments_lexer": "ipython3", 108 | "version": "3.5.0+" 109 | } 110 | }, 111 | "nbformat": 4, 112 | "nbformat_minor": 0 113 | } 114 | -------------------------------------------------------------------------------- /04-case-studies/03-ufl.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# UFL, the 'Unified Form Language'" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "UFL is part of FEniCS, where it is used to describe finite element problems that are to be solved using the framework. The appearance of the following code snippet should look sufficiently familiar, and it should be readily apparent how language such as this could be taken in and processed using the tools that we have seen:" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": { 21 | "collapsed": false 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "#clear\n", 26 | "from dolfin import *\n", 27 | "\n", 28 | "\n", 29 | "\n", 30 | "# Create mesh and define function space\n", 31 | "\n", 32 | "mesh = UnitSquareMesh(8, 8)\n", 33 | "\n", 34 | "V = FunctionSpace(mesh, \"Lagrange\", 1)\n", 35 | "\n", 36 | "\n", 37 | "\n", 38 | "# Define boundary condition\n", 39 | "\n", 40 | "u0 = Function(V)\n", 41 | "\n", 42 | "bc = DirichletBC(V, u0, \"x[0] < DOLFIN_EPS || x[0] > 1.0 - DOLFIN_EPS\")\n", 43 | "\n", 44 | "\n", 45 | "\n", 46 | "# Define variational problem\n", 47 | "\n", 48 | "u = TrialFunction(V)\n", 49 | "\n", 50 | "v = TestFunction(V)\n", 51 | "\n", 52 | "f = Expression(\"10*exp(-(pow(x[0] - 0.5, 2) + pow(x[1] - 0.5, 2)) / 0.02)\",\n", 53 | "\n", 54 | " degree=1)\n", 55 | "\n", 56 | "g = Expression(\"sin(5*x[0])\", degree=1)\n", 57 | "\n", 58 | "a = inner(grad(u), grad(v))*dx()\n", 59 | "\n", 60 | "L = f*v*dx() + g*v*ds()" 61 | ] 62 | } 63 | ], 64 | "metadata": {}, 65 | "nbformat": 4, 66 | "nbformat_minor": 0 67 | } -------------------------------------------------------------------------------- /04-case-studies/README.rst: -------------------------------------------------------------------------------- 1 | **Tip:** ``ipynb`` files can be viewed on Github. Just click them. 2 | -------------------------------------------------------------------------------- /05-generating-c/01-substitution.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Generating an OpenCL kernel by Textual Substitution" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "The simplest approach to generating code is to simply substitute snippets of text into an existing code \"template\". This can be done using the C preprocessor, simple string-based search and replace, or other string-value interpolation functionality present in the language. The example below demonstrates the latter case:" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": { 21 | "collapsed": false 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "kernel = r\"\"\"\n", 26 | " __kernel void {name}({arguments})\n", 27 | " {{\n", 28 | " int lid = get_local_id(0);\n", 29 | " int gsize = get_global_size(0);\n", 30 | " int work_group_start = get_local_size(0)*get_group_id(0);\n", 31 | " long i;\n", 32 | "\n", 33 | " for (i = work_group_start + lid; i < n; i += gsize)\n", 34 | " {{\n", 35 | " {operation};\n", 36 | " }}\n", 37 | " }}\n", 38 | "\"\"\"" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "One slightly unfortunate fact that plays into using Python's `.format()` facility for this purpose is that opening and closing braces must be escaped by doubling them." 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 2, 51 | "metadata": { 52 | "collapsed": false 53 | }, 54 | "outputs": [ 55 | { 56 | "name": "stdout", 57 | "output_type": "stream", 58 | "text": [ 59 | "\n", 60 | " __kernel void scale(float *y, float a, float *x)\n", 61 | " {\n", 62 | " int lid = get_local_id(0);\n", 63 | " int gsize = get_global_size(0);\n", 64 | " int work_group_start = get_local_size(0)*get_group_id(0);\n", 65 | " long i;\n", 66 | "\n", 67 | " for (i = work_group_start + lid; i < n; i += gsize)\n", 68 | " {\n", 69 | " y[i] = a*x[i];\n", 70 | " }\n", 71 | " }\n", 72 | "\n" 73 | ] 74 | } 75 | ], 76 | "source": [ 77 | "print(kernel.format(\n", 78 | " name=\"scale\",\n", 79 | " arguments=\"float *y, float a, float *x\",\n", 80 | " operation=\"y[i] = a*x[i]\"\n", 81 | "))" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": { 88 | "collapsed": false 89 | }, 90 | "outputs": [], 91 | "source": [] 92 | } 93 | ], 94 | "metadata": {}, 95 | "nbformat": 4, 96 | "nbformat_minor": 0 97 | } -------------------------------------------------------------------------------- /05-generating-c/02-templating.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Generating an OpenCL Kernel using Textual Templating" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "A more advanced, but also less lightweight, alternative is the usage of a so-called templating engine, as it is being used to generate web pages.\n", 15 | "\n", 16 | "This offers tremendous flexibility in generation, including the possibility for full flow control, allowing applications such as loop unrolling.\n", 17 | "\n", 18 | "In the example below, we use a templating engine called 'Mako':" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 1, 24 | "metadata": { 25 | "collapsed": false 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "from mako.template import Template" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 2, 35 | "metadata": { 36 | "collapsed": false 37 | }, 38 | "outputs": [], 39 | "source": [ 40 | "tpl = Template(r\"\"\"\n", 41 | " __kernel void ${name}(${arguments})\n", 42 | " {\n", 43 | " int lid = get_local_id(0);\n", 44 | " int gsize = get_global_size(0);\n", 45 | " int work_group_start = get_local_size(0)*get_group_id(0);\n", 46 | " long i;\n", 47 | "\n", 48 | " for (i = work_group_start + lid; i < n; i += gsize)\n", 49 | " {\n", 50 | " %for i_unroll in range(n_unroll):\n", 51 | " ${operation};\n", 52 | " %if i_unroll + 1 < n_unroll:\n", 53 | " i += gsize;\n", 54 | " %endif\n", 55 | " %endfor\n", 56 | " }\n", 57 | " }\n", 58 | "\"\"\", strict_undefined=True)" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 3, 64 | "metadata": { 65 | "collapsed": false 66 | }, 67 | "outputs": [ 68 | { 69 | "name": "stdout", 70 | "output_type": "stream", 71 | "text": [ 72 | "\n", 73 | " __kernel void scale(float *y, float a, float *x)\n", 74 | " {\n", 75 | " int lid = get_local_id(0);\n", 76 | " int gsize = get_global_size(0);\n", 77 | " int work_group_start = get_local_size(0)*get_group_id(0);\n", 78 | " long i;\n", 79 | "\n", 80 | " for (i = work_group_start + lid; i < n; i += gsize)\n", 81 | " {\n", 82 | " y[i] = a*x[i];\n", 83 | " i += gsize;\n", 84 | " y[i] = a*x[i];\n", 85 | " }\n", 86 | " }\n", 87 | "\n" 88 | ] 89 | } 90 | ], 91 | "source": [ 92 | "print(tpl.render(\n", 93 | " name=\"scale\",\n", 94 | " arguments=\"float *y, float a, float *x\",\n", 95 | " operation=\"y[i] = a*x[i]\",\n", 96 | " n_unroll=2,\n", 97 | "))" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": { 104 | "collapsed": false 105 | }, 106 | "outputs": [], 107 | "source": [] 108 | } 109 | ], 110 | "metadata": {}, 111 | "nbformat": 4, 112 | "nbformat_minor": 0 113 | } -------------------------------------------------------------------------------- /05-generating-c/03-asts.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Generating Code by building a Syntax Tree" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "The last, most structured alternative for generating code is to construct the tree data structure representing the syntax, and then transforming this back into source code form.\n", 15 | "\n", 16 | "This approach to code generation is perhaps the most applicable to the programmatic generation of code, and less the generation directly by a user." 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": { 23 | "collapsed": false 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "from cgen import *" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 2, 33 | "metadata": { 34 | "collapsed": false 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "func = FunctionBody(\n", 39 | " FunctionDeclaration(Const(Pointer(Value(\"char\", \"greet\"))), []),\n", 40 | " Block([Statement('return \"hello world\"')])\n", 41 | " )" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 3, 47 | "metadata": { 48 | "collapsed": false 49 | }, 50 | "outputs": [ 51 | { 52 | "name": "stdout", 53 | "output_type": "stream", 54 | "text": [ 55 | "char const *greet()\n", 56 | "{\n", 57 | " return \"hello world\";\n", 58 | "}\n" 59 | ] 60 | } 61 | ], 62 | "source": [ 63 | "print(func)" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": { 70 | "collapsed": false 71 | }, 72 | "outputs": [], 73 | "source": [] 74 | } 75 | ], 76 | "metadata": {}, 77 | "nbformat": 4, 78 | "nbformat_minor": 0 79 | } -------------------------------------------------------------------------------- /05-generating-c/04-practice.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Practice problem: Dimension-Independent Finite Difference Kernel\n", 8 | "\n", 9 | "A particular type of problem that is often tricky to address with a single codebase is handling varying dimensionality, i.e. for example handling 1D, 2D and 3D cases in a single code. In this problem, we will practice that for a simple finite difference code that applies a second-order centered finite difference operator:\n", 10 | "\n", 11 | "$$\n", 12 | "f''(x) \\approx \\frac{f(x+h) - 2 f(x) + f(x-h)}{h^{2}}\n", 13 | "$$\n", 14 | "along each axis, summing the results. This implements an $n$-dimensional Laplacian ($\\triangle$) or div-grad operator.\n", 15 | "\n", 16 | "To keep things simple, we will not worry about boundary conditions. Also, to keep things simple, we will assume that we have exactly 20 data points in each direction, and we will assume the grid spacing $h$ is 1." 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 26, 22 | "metadata": { 23 | "collapsed": false 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "from mako.template import Template\n", 28 | "import pyopencl as cl\n", 29 | "import pyopencl.array\n", 30 | "import pyopencl.clrandom\n", 31 | "import numpy as np" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 27, 37 | "metadata": { 38 | "collapsed": false 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "tpl = Template(\"\"\"\n", 43 | " kernel void fdiff(global float *out, global float * ary)\n", 44 | " {\n", 45 | " out[...] = ...\n", 46 | " }\n", 47 | " \"\"\", strict_undefined=True)" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 36, 53 | "metadata": { 54 | "collapsed": false 55 | }, 56 | "outputs": [], 57 | "source": [ 58 | "#clear\n", 59 | "# Solution\n", 60 | "\n", 61 | "\n", 62 | "\n", 63 | "tpl = Template(\"\"\"\n", 64 | "\n", 65 | " kernel void fdiff(global float *out, global float * ary)\n", 66 | "\n", 67 | " {\n", 68 | "\n", 69 | " int ibase = \n", 70 | "\n", 71 | " <% stride = 1 %>\n", 72 | "\n", 73 | " %for iax in range(dim): \n", 74 | "\n", 75 | " + (get_global_id(${iax}) + 1)*${stride}\n", 76 | "\n", 77 | " <% stride *= 20 %>\n", 78 | "\n", 79 | " %endfor\n", 80 | "\n", 81 | " ; \n", 82 | "\n", 83 | " \n", 84 | "\n", 85 | " out[ibase] = -2*${dim}*ary[ibase]\n", 86 | "\n", 87 | " <% stride = 1 %>\n", 88 | "\n", 89 | " %for iax in range(dim): \n", 90 | "\n", 91 | " + ary[ibase - ${stride}] + ary[ibase + ${stride}]\n", 92 | "\n", 93 | " <% stride *= 20 %>\n", 94 | "\n", 95 | " %endfor\n", 96 | "\n", 97 | " ;\n", 98 | "\n", 99 | " }\n", 100 | "\n", 101 | " \"\"\", strict_undefined=True)" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 37, 107 | "metadata": { 108 | "collapsed": false 109 | }, 110 | "outputs": [ 111 | { 112 | "name": "stdout", 113 | "output_type": "stream", 114 | "text": [ 115 | "\n", 116 | " kernel void fdiff(global float *out, global float * ary)\n", 117 | " {\n", 118 | " int ibase = \n", 119 | " \n", 120 | " + (get_global_id(0) + 1)*1\n", 121 | " \n", 122 | " + (get_global_id(1) + 1)*20\n", 123 | " \n", 124 | " + (get_global_id(2) + 1)*400\n", 125 | " \n", 126 | " ; \n", 127 | " \n", 128 | " out[ibase] = -2*3*ary[ibase]\n", 129 | " \n", 130 | " + ary[ibase - 1] + ary[ibase + 1]\n", 131 | " \n", 132 | " + ary[ibase - 20] + ary[ibase + 20]\n", 133 | " \n", 134 | " + ary[ibase - 400] + ary[ibase + 400]\n", 135 | " \n", 136 | " ;\n", 137 | " }\n", 138 | " \n" 139 | ] 140 | } 141 | ], 142 | "source": [ 143 | "dim = 3\n", 144 | "code = tpl.render(dim=dim)\n", 145 | "print(code)\n", 146 | "\n", 147 | "cl_context = cl.create_some_context()\n", 148 | "queue = cl.CommandQueue(cl_context)\n", 149 | "\n", 150 | "prg = cl.Program(cl_context, code).build()\n", 151 | "knl = prg.fdiff\n", 152 | "\n", 153 | "a = cl.clrandom.rand(queue, (20,)*dim, dtype=np.float32)\n", 154 | "out = cl.array.empty_like(a)\n", 155 | "\n", 156 | "knl(queue, (18,)*dim, None, out.data, a.data)\n", 157 | "queue.finish()" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "metadata": { 164 | "collapsed": false 165 | }, 166 | "outputs": [], 167 | "source": [] 168 | } 169 | ], 170 | "metadata": {}, 171 | "nbformat": 4, 172 | "nbformat_minor": 0 173 | } -------------------------------------------------------------------------------- /05-generating-c/README.rst: -------------------------------------------------------------------------------- 1 | **Tip:** ``ipynb`` files can be viewed on Github. Just click them. 2 | -------------------------------------------------------------------------------- /06-loopy/0-slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/06-loopy/0-slides.pdf -------------------------------------------------------------------------------- /06-loopy/03-reduction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Loopy: Reductions\n", 8 | "\n", 9 | "## Setup code" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 6, 15 | "metadata": { 16 | "collapsed": false, 17 | "jupyter": { 18 | "outputs_hidden": false 19 | } 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "import numpy as np\n", 24 | "import pyopencl as cl\n", 25 | "import pyopencl.array\n", 26 | "import pyopencl.clrandom\n", 27 | "import loopy as lp\n", 28 | "\n", 29 | "from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 7, 35 | "metadata": { 36 | "collapsed": false, 37 | "jupyter": { 38 | "outputs_hidden": false 39 | } 40 | }, 41 | "outputs": [ 42 | { 43 | "name": "stdout", 44 | "output_type": "stream", 45 | "text": [ 46 | "Choose platform:\n", 47 | "[0] \n", 48 | "[1] \n" 49 | ] 50 | }, 51 | { 52 | "name": "stdin", 53 | "output_type": "stream", 54 | "text": [ 55 | "Choice [0]: \n" 56 | ] 57 | }, 58 | { 59 | "name": "stdout", 60 | "output_type": "stream", 61 | "text": [ 62 | "Set the environment variable PYOPENCL_CTX='' to avoid being asked again.\n" 63 | ] 64 | } 65 | ], 66 | "source": [ 67 | "ctx = cl.create_some_context(interactive=True)\n", 68 | "queue = cl.CommandQueue(ctx)" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 8, 74 | "metadata": { 75 | "collapsed": false, 76 | "jupyter": { 77 | "outputs_hidden": false 78 | } 79 | }, 80 | "outputs": [], 81 | "source": [ 82 | "n = 1024\n", 83 | "a = cl.clrandom.rand(queue, (n, n), dtype=np.float32)\n", 84 | "x = cl.clrandom.rand(queue, (n,), dtype=np.float32)" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "## Capturing matrix-vector multiplication" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 9, 97 | "metadata": { 98 | "collapsed": false, 99 | "jupyter": { 100 | "outputs_hidden": false 101 | } 102 | }, 103 | "outputs": [], 104 | "source": [ 105 | "knl = lp.make_kernel(\n", 106 | " \"{[i,k]: 0<=i,k`_ I 30 | presented at `Supercomputing '15 `_ in Austin. 31 | 32 | Virtual machine image 33 | --------------------- 34 | 35 | A virtual machine image is available that has all the necessary tools 36 | installed, to allow for easy experimentation. Follow these instructions 37 | to get started: 38 | 39 | 1. Download a version of VirtualBox suitable for your system and install it: 40 | 41 | https://www.virtualbox.org/wiki/Downloads 42 | 43 | 2. Download the machine image itself: 44 | 45 | http://andreask.cs.illinois.edu/tmp/dsl-tutorial.ova 46 | 47 | 3. (Optionally) Check whether the image downloaded correctly using the 48 | md5sum command line tool (Linux/OS X). On Windows, use this 49 | tool: 50 | 51 | http://www.pc-tools.net/win32/md5sums/ 52 | 53 | Compare the computed checksum with the following value: 54 | 6aa97e046293f8811d1749ab046f7f61 55 | 56 | Only proceed once the two match. If they don't, delete the file and 57 | retry the download. 58 | 59 | 4. Open VirtualBox, click "File > Import Appliance", select the 60 | downloaded image and just click "Next" a few times. Once imported, 61 | double-click on the virtual machine to make sure it starts. After a 62 | little while, you should see a simple desktop environment. 63 | 64 | 5. Once all these steps complete successfully, congratulations! You are 65 | good to go. I'm looking forward to seeing you at the tutorial. 66 | 67 | 6. Double-click the "Terminal" symbol on the desktop and enter:: 68 | 69 | curl -L https://bit.ly/sc15-dsl | bash 70 | 71 | This will download these materials onto the virtual machine and put them 72 | into a subdirectory called ``sc15-tutorial-materials``. Next, type:: 73 | 74 | ipython3 notebook 75 | 76 | to launch a browser-based interface and get started. 77 | 78 | Software tools 79 | -------------- 80 | 81 | The tutorial demonstrates the use of the following pieces of software: 82 | 83 | Core packages: 84 | 85 | * Python: https://www.python.org 86 | * numpy: https://www.numpy.org 87 | * pymbolic: https://github.com/inducer/pymbolic 88 | * PyOpenCL: https://github.com/pyopencl/pyopencl 89 | * loopy: https://github.com/inducer/loopy 90 | 91 | Supporting packages: 92 | 93 | * matplotlib: http://www.matplotlib.org 94 | * mako: http://www.makotemplates.org 95 | * cgen: https://github.com/inducer/cgen 96 | 97 | All open-source under MIT/BSD licenses. 98 | 99 | License 100 | ------- 101 | 102 | Copyright 2015 Andreas Kloeckner 103 | 104 | Materials are available for use under a Creative Commons CC-BY license. See 105 | included file ``LICENSE`` for details. (I.e. by and large: retain authorship 106 | information, and otherwise do what you want) 107 | -------------------------------------------------------------------------------- /assemble.sh: -------------------------------------------------------------------------------- 1 | #! /bin/zsh 2 | 3 | set -e 4 | setopt -o EXTENDED_GLOB 5 | 6 | unset PYTHONWARNINGS 7 | 8 | TUT_ID=dsl 9 | 10 | PDF_OUTPUT=0 11 | HTML_OUTPUT=1 12 | 13 | mkdir -p dist 14 | rm -Rf cleared 15 | 16 | ME=$(readlink -f "$0") 17 | DIR=$(dirname "$ME") 18 | MYDIR=$(cd "$DIR" && pwd) 19 | 20 | function with_echo() 21 | { 22 | echo "$@" 23 | "$@" 24 | } 25 | 26 | for nb in [0-9]*/**/*ipynb; do 27 | echo "PROCESSING $nb" 28 | DIR="$(dirname "$nb")" 29 | TRUNK="$(basename "$nb")" 30 | TRUNK="${TRUNK%.ipynb}" 31 | 32 | CONV_DIR="dist/$DIR" 33 | mkdir -p "$CONV_DIR" 34 | CONV_BASE="dist/${nb%.ipynb}" 35 | CONV_PY="${CONV_BASE}.py" 36 | CONV_HTML="${CONV_BASE}.html" 37 | CONV_PDF="${CONV_BASE}.pdf" 38 | 39 | PROCESSED_IPYNB="${CONV_BASE}.ipynb" 40 | "$MYDIR/ipython-demo-tools/prepare-ipynb" remove-marks "$nb" "$PROCESSED_IPYNB" 41 | # if ! test -f "$CONV_PY" || test "$nb" -nt "$CONV_PY"; then 42 | # jupyter-nnbconvert "$PROCESSED_IPYNB" --to=python "--output=${CONV_BASE}" 43 | # fi 44 | if [[ "$HTML_OUTPUT" = "1" ]] && (! test -f "$CONV_HTML" || test "$nb" -nt "$CONV_HTML"); then 45 | with_echo python $(which jupyter-nbconvert) "$PROCESSED_IPYNB" --to=html 46 | fi 47 | if [[ "$PDF_OUTPUT" = "1" ]] && (! test -f "$CONV_PDF" || test "$nb" -nt "$CONV_PDF"); then 48 | with_echo python $(which jupyter-nbconvert) "$PROCESSED_IPYNB" --to=pdf 49 | fi 50 | 51 | CONV_DIR="cleared/$DIR" 52 | with_echo mkdir -p "$CONV_DIR" 53 | CONV_IPYNB="cleared/$nb" 54 | with_echo "$MYDIR/ipython-demo-tools/prepare-ipynb" clear-output clear-marked-inputs "$nb" "$CONV_IPYNB" 55 | done 56 | function mkdir_and_cp() 57 | { 58 | dn=$(dirname "$2") 59 | with_echo mkdir -p "$dn" 60 | with_echo cp "$1" "$2" 61 | 62 | } 63 | 64 | for i in [0-9]*/**/*~*ipynb~*.pyc~*\~(#q.)(#qN); do 65 | mkdir_and_cp $i dist/$i 66 | mkdir_and_cp $i cleared/$i 67 | done 68 | for i in slides/out/[0-9]*pdf; do 69 | bn=$(basename "$i") 70 | mkdir_and_cp $i dist/${bn%.pdf}/0-slides.pdf 71 | done 72 | 73 | with_echo cp -R --reflink dist $TUT_ID-tutorial-materials 74 | with_echo cp -R --reflink cleared $TUT_ID-tutorial-materials 75 | with_echo rm -f $TUT_ID-tutorial-materials-dist.zip 76 | with_echo zip -r $TUT_ID-tutorial-materials-dist.zip $TUT_ID-tutorial-materials 77 | with_echo rm -Rf $TUT_ID-tutorial-materials 78 | 79 | -------------------------------------------------------------------------------- /aux/index.md: -------------------------------------------------------------------------------- 1 | # Tutorial Instructions 2 | 3 | ## Material 4 | 5 | * [Browse here](BROWSE_PATH) 6 | * [Browse on GitHub](GITHUB_URL) 7 | * [Download as a zip file](ZIP_NAME) 8 | 9 | ## Getting started with the Virtual Machine 10 | 11 | * Install [Virtualbox](http://virtualbox.org) 12 | * Obtain the machine image. 13 | 14 | If I have announced that it is available online, you can [download it](tutorial.ova) from here, 15 | otherwise grab it from one of the USB sticks being passed around. 16 | 17 | * Open VirtualBox, click "File > Import Appliance", select the downloaded image and just click "Next" a few times. 18 | * Once imported, double-click on the virtual machine to start using it. 19 | * After a little while, a graphical desktop environment should appear. 20 | * Double-click the "Terminal" icon 21 | * Run the followng command: 22 | 23 | ``` 24 | curl -L http://bit.ly/ak-tut-pack | bash 25 | ``` 26 | * Follow the directions on the screen. 27 | -------------------------------------------------------------------------------- /aux/ipython_config.py: -------------------------------------------------------------------------------- 1 | c = get_config() 2 | c.InteractiveShellApp.matplotlib = "inline" 3 | 4 | -------------------------------------------------------------------------------- /aux/material-email.txt: -------------------------------------------------------------------------------- 1 | Dear participant, 2 | 3 | It is my great pleasure to welcome you to the tutorial "Domain-specific 4 | languages to high performance: Code generation and transformation in Python" 5 | 6 | At the tutorial, we will be learning how to use code generation from the 7 | comfortable and powerful scripting language Python to make writing 8 | high-performance, parallel code for CPUs and GPUs easier, and, perhaps, 9 | even fun. 10 | 11 | To make sure you have a good experience and ample opportunity for 12 | experimentation at the tutorial, I would like to ask that you download 13 | and install a virtual machine image that I have prepared specifically 14 | for the tutorial. Follow these instructions to get started: 15 | 16 | 1. Download a version of VirtualBox suitable for your system and install it: 17 | 18 | https://www.virtualbox.org/wiki/Downloads 19 | 20 | 2. Download the machine image itself: 21 | 22 | http://andreask.cs.illinois.edu/tmp/dsl-tutorial.ova 23 | 24 | 3. (Optionally) Check whether the image downloaded correctly using the 25 | md5sum command line tool (Linux/OS X). On Windows, use this 26 | tool: 27 | 28 | http://www.pc-tools.net/win32/md5sums/ 29 | 30 | Compare the computed checksum with the following value: 31 | a49989f216970d4b8842eba566a392a6 32 | 33 | Only proceed once the two match. If they don't, delete the file and 34 | retry the download. 35 | 36 | 4. Open VirtualBox, click "File > Import Appliance", select the 37 | downloaded image and just click "Next" a few times. Once imported, 38 | double-click on the virtual machine to make sure it starts. After a 39 | little while, you should see a simple desktop environment. 40 | 41 | 5. Once all these steps complete successfully, congratulations! You are 42 | good to go. I'm looking forward to seeing you at the tutorial. 43 | 44 | See you soon, 45 | Andreas 46 | 47 | -------------------------------------------------------------------------------- /aux/pystuff-requirements.txt: -------------------------------------------------------------------------------- 1 | mako 2 | git+git://github.com/inducer/pytools 3 | git+git://github.com/inducer/pymbolic 4 | git+git://github.com/inducer/cgen 5 | git+git://github.com/inducer/genpy 6 | sympy 7 | git+git://github.com/pyopencl/pyopencl 8 | git+git://github.com/inducer/islpy 9 | git+git://github.com/inducer/loopy 10 | 11 | hg+https://bitbucket.org/inducer/f2py 12 | git+git://github.com/inducer/ply 13 | 14 | -------------------------------------------------------------------------------- /aux/sudoers: -------------------------------------------------------------------------------- 1 | Defaults exempt_group=sudo 2 | 3 | -------------------------------------------------------------------------------- /aux/time-planning.ods: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/aux/time-planning.ods -------------------------------------------------------------------------------- /aux/tut-pack.run: -------------------------------------------------------------------------------- 1 | TUT_ID=MY_TUT_ID 2 | set -e 3 | if test -d $TUT_ID-tutorial-materials; then 4 | echo "------------------------------------------------------------" 5 | echo "A folder '$TUT_ID-tutorial-materials' already exists. " 6 | echo "If you would like to redownload the materials, delete the " 7 | echo "folder using" 8 | echo 9 | echo "rm -Rf $TUT_ID-tutorial-materials" 10 | echo 11 | echo "and then retry this command." 12 | echo "------------------------------------------------------------" 13 | exit 1 14 | fi 15 | 16 | URL=MYURL 17 | 18 | echo "------------------------------------------------------------" 19 | echo "One second, fetching your tutorial materials..." 20 | echo "------------------------------------------------------------" 21 | echo "Downloading..." 22 | set -x 23 | rm -f $TUT_ID-tutorial-materials-dist.zip 24 | curl -O $URL 25 | echo "Unpacking..." 26 | unzip -q $TUT_ID-tutorial-materials-dist.zip 27 | set +x 28 | 29 | echo "------------------------------------------------------------" 30 | echo "All done!" 31 | echo "------------------------------------------------------------" 32 | echo "Your tutorial materials are unpacked and ready for use," 33 | echo "right here in the subdirectory $TUT_ID-tutorial-materials." 34 | echo 35 | echo "Type the following to get started:" 36 | echo " cd $TUT_ID-tutorial-materials" 37 | echo " jupyter notebook" 38 | echo 39 | echo "Enjoy the tutorial!" 40 | -------------------------------------------------------------------------------- /aux/upload.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | set -e 4 | set -x 5 | 6 | TUT_ID=dsl 7 | URL=https://andreask.cs.illinois.edu/tutorial/$TUT_ID-tutorial-materials-dist.zip 8 | GITHUB_URL=https://github.com/inducer/languages-and-codegen-tutorial 9 | TGT=tiker.net:public_html/tutorial 10 | 11 | sed s,GITHUB_URL,$GITHUB_URL, index.md | \ 12 | sed s,BROWSE_PATH,$TUT_ID-tutorial-materials/dist/, | \ 13 | sed s,ZIP_NAME,$TUT_ID-tutorial-materials-dist.zip, | \ 14 | pandoc -t html - > index.html 15 | rsync -rav index.html $TGT/ 16 | rm index.html 17 | 18 | cp tut-pack.run tut-pack-custom.run 19 | sed -i s,MY_TUT_ID,$TUT_ID, tut-pack-custom.run 20 | sed -i s,MYURL,$URL, tut-pack-custom.run 21 | rsync -rav tut-pack-custom.run $TGT/tut-pack.run 22 | rm tut-pack-custom.run 23 | 24 | rsync -rav --progress ../$TUT_ID-tutorial-materials-dist.zip $TGT 25 | rsync -rav --progress --delete ../dist $TGT/$TUT_ID-tutorial-materials 26 | 27 | 28 | echo "COMPLETED" 29 | -------------------------------------------------------------------------------- /aux/video-script.txt: -------------------------------------------------------------------------------- 1 | (Title card) 2 | 3 | Hi, my name is Andreas Kloeckner, and I'm looking forward to having you join me 4 | for my tutorial "From description to code generation: building high-performance 5 | tools in Python"at supercomputing 2015. 6 | 7 | The harsh reality about high-performance computing, to my mind, is that, while 8 | the mathematical ideas are often quite simple, the code that ultimately 9 | expresses them is very much not. 10 | 11 | (SCREENSHOT OF COMPLICATED CODE) 12 | 13 | And so the constant fight in doing scientific computing with an ambition for 14 | high-performance is to manage this incidental complexity. 15 | 16 | The idea that is easy in theory, but tricky in practice, is to keep the amount 17 | of code used to specify the computation commensurate with the complexity of the 18 | mathematical ideas. 19 | 20 | Abstraction, of course, is the mathematical and computer science tool to make 21 | that happen. And it is often easy to build abstractions, just think of the 22 | matrix and vector objects in Matlab or Python's numpy. The crux is, though, 23 | that these abstractions are rarely free in terms of computational resources. 24 | 25 | So this tutorial is about a set of Python-based open-source tools designed to help with that. 26 | 27 | * It all starts with the capability to design domain-specific languages. 28 | 29 | * The next important step is to enrich and transform these languages in ways 30 | that gradually add more implementation detail. 31 | 32 | * Once there is enough detail for the model to be implemented, code needs to be 33 | generated and actually executed, most likely on high-performance devices such 34 | as GPUs. 35 | 36 | ----------------- 37 | 38 | To make this as informative, enlightening, and entertaining as possible, we 39 | will do this as follows: 40 | 41 | The tutorial is organized as a sequence of interactive "notebooks", each 42 | consisting of code interleaved with text and images. As we go, we will work 43 | through these notebooks, and you will see the tools in action, doing the things 44 | that they were designed to do. Some key bits of code we will write together as 45 | a group, and other times you will work through small practice problems on your 46 | own or in small groups. 47 | 48 | I will provide a virtual machine image for you that contains all the software you need to follow along. 49 | 50 | ------------------- 51 | 52 | First, I will show you a few bits and pieces of Python, an approachable 53 | high-level language that we will use as a foundation for our work. This should 54 | be enough to keep you going during the tutorial, even if you have never 55 | programmed in Python. As long as you have programmed before, you should be fine. 56 | 57 | Next, we will learn how to build and transform expression trees, the natural 58 | way of capturing mathematical ideas on a computer. This can help describe many 59 | things, from a PDE to an image processing algorithm. 60 | 61 | Next, we will worry about executing high-performance code from Python. To do 62 | so, we will use PyOpenCL, a Python package that comfortably lets us execute C 63 | code on GPUs and CPUs. 64 | 65 | Next, we will think about how to actually generate that code, at runtime, right 66 | before it gets used. To get started with something simple, we can piece 67 | together code like Legos from premade pieces. This is very easy and already 68 | quite versatile. 69 | 70 | Finally, we will investigate a tool called loopy that can help with more 71 | challenging code generation tasks where you would like to wring the last ounce 72 | of performance out of a machine. 73 | 74 | ------------------- 75 | 76 | And that's it. At the end of the tutorial, my goal is for you to have a good 77 | grasp of how to build tools that go from a description all the way down to 78 | high-performance code. 79 | 80 | I am looking forward to seeing you there! -------------------------------------------------------------------------------- /aux/vm-requirements.txt: -------------------------------------------------------------------------------- 1 | clang-3.8 2 | pocl 3 | with LLC_HOST_CPU=i686 LLVM_CONFIG=/usr/bin/llvm-config-3.8 ./configure --prefix=/opt/pocl-build 4 | graphviz 5 | 6 | see pystuff-requirements.txt 7 | 8 | dd zeros 9 | -------------------------------------------------------------------------------- /prepare-all-notebooks.sh: -------------------------------------------------------------------------------- 1 | ipython-demo-tools/prepare-all-notebooks.sh -------------------------------------------------------------------------------- /slides/.gitignore: -------------------------------------------------------------------------------- 1 | *.nav 2 | *.aux 3 | out/*.pdf 4 | *.toc 5 | *.snm 6 | *.log 7 | *~ 8 | .*.swp 9 | *.out 10 | *.emergency 11 | pic-stuttgart.pdf 12 | *.avi 13 | *.mov 14 | *.mpg 15 | *.vrb 16 | *.bbl 17 | *.blg 18 | .sw[op] 19 | out/ 20 | simula 21 | -------------------------------------------------------------------------------- /slides/06-loopy.tex: -------------------------------------------------------------------------------- 1 | \documentclass[english,compress]{beamer} 2 | \nonstopmode% 3 | \input{settings} 4 | 5 | \logoenable% 6 | 7 | \pgfdeclarelayer{grid} 8 | \pgfsetlayers{background,grid,main,foreground} 9 | \def\intd{\, d} 10 | 11 | \def\bigncentered#1{ 12 | \begin{center} 13 | \Huge\bfseries #1 14 | \end{center} 15 | } 16 | 17 | \begin{document} 18 | 19 | \title{% 20 | Part 6: Loopy 21 | } 22 | 23 | \institute{Computer Science $\cdot$ University of Illinois at 24 | Urbana-Champaign} 25 | 26 | \author{Andreas Klöckner} 27 | 28 | \date{} 29 | 30 | \frame{\titlepage} 31 | 32 | % \begin{frame}{Thanks} 33 | % 34 | % \begin{itemize} 35 | % \item Tim Warburton (Rice) 36 | % \item Lucas Wilcox (NPS) 37 | % \item Leslie Greengard (NYU) 38 | % \item Early adopters (Rob Kirby, Maxim Kuznetsov, Ivan Oseledets) 39 | % %\item PyOpenCL, PyCUDA contributors 40 | % \item AMD, Nvidia 41 | % \end{itemize} 42 | % 43 | % \end{frame} 44 | % ----------------------------------------------------------------------------- 45 | \section[Loo.py]{Loop Generation} 46 | % ----------------------------------------------------------------------------- 47 | \subsection{Loo.py} 48 | % ----------------------------------------------------------------------------- 49 | % {{{ 50 | \begin{frame}{Automating GPU Programming} 51 | \begin{beamercolorbox}[sep=3mm]{block body} 52 | High-performance programming can be a time-consuming trial-and-error 53 | process. 54 | \end{beamercolorbox} 55 | Obvious idea: Let the computer do it. How? 56 | \begin{itemize} 57 | \item One way: ``Smart'' compiler, ``dumb'' developer 58 | \begin{itemize} 59 | \item GPU programming requires complex tradeoffs 60 | \item Tradeoffs require heuristics 61 | \item Heuristics are fragile 62 | \end{itemize} 63 | \item Another way: ``Smart'' developer, ``dumb'' compiler 64 | \begin{itemize} 65 | \item Error-prone 66 | \item Expensive in developer time 67 | \item User can use manual/automatic tuning 68 | \end{itemize} 69 | \end{itemize} 70 | \uncover<2->{% 71 | \begin{tikzpicture} [overlay] 72 | \node at (current page.center) [draw,drop shadow,fill=white, 73 | inner xsep=0.5cm,inner ysep=0.5cm,thick] 74 | {% 75 | So compromise! 76 | 77 | Following: an idea of a compromise. 78 | } ; 79 | \end{tikzpicture} 80 | } 81 | \end{frame} 82 | % ----------------------------------------------------------------------------- 83 | \begin{frame}{Setting the Stage} 84 | \begin{columns} 85 | \column{0.55\textwidth} 86 | Idea: Create IR + library of transformations 87 | \begin{itemize} 88 | \item Start with math-y statement of the operation 89 | \item ``Push a few buttons'' to optimize for the target 90 | device 91 | \item Strongly separate these two parts 92 | \end{itemize} 93 | 94 | \medskip 95 | Philosophy: 96 | \begin{itemize} 97 | \item Avoid ``intelligence'' 98 | \item User can assume partial responsibility for correctness 99 | \item Embedding in Python provides generation/transform 100 | flexibility 101 | \end{itemize} 102 | \column{0.45\textwidth} 103 | \includegraphics[width=\textwidth]{loopy-crop.pdf} 104 | \end{columns} 105 | \uncover<2>{% 106 | \begin{tikzpicture} [overlay] 107 | \node [above left=1cm of current page.south east, draw,drop shadow,fill=white, 108 | inner xsep=0.5cm,inner ysep=0.5cm,thick, text 109 | width=0.7\textwidth] 110 | {% 111 | Loopy is infrastructure. 112 | 113 | \medskip 114 | Auto-tuners and domain-specific 115 | libraries are ``above'' loopy conceptually. 116 | } ; 117 | \end{tikzpicture} 118 | } 119 | \end{frame} 120 | % ----------------------------------------------------------------------------- 121 | \begin{frame} 122 | \bigncentered{DEMO TIME} 123 | \end{frame} 124 | % ----------------------------------------------------------------------------- 125 | \begin{frame}{Capturing Variants} 126 | \lstinputlisting[basicstyle=\scriptsize]{loopy-variants.py} 127 | \uncover<2>{% 128 | \begin{tikzpicture} [overlay] 129 | \node [above left=1cm of current page.south east, draw,drop shadow,fill=white, 130 | inner xsep=0.5cm,inner ysep=0.5cm,thick, 131 | text width=0.5\textwidth] 132 | {% 133 | Easy to \emph{non-redundantly} capture multiple variants of 134 | the same kernel. 135 | } ; 136 | \end{tikzpicture} 137 | } 138 | \end{frame} 139 | % ----------------------------------------------------------------------------- 140 | \begin{frame}{Ordering} 141 | \begin{itemize} 142 | \item Completely \emph{un}ordered by default 143 | \item Program only well-formed 144 | 145 | \emph{if} domain traversal order does not matter 146 | \item Depdencies 147 | 148 | \emph{can} dictate execution order 149 | 150 | \emph{within} largest set of shared loops 151 | \end{itemize} 152 | \end{frame} 153 | % ----------------------------------------------------------------------------- 154 | \begin{frame}{Loo.py vs reality} 155 | \begin{itemize} 156 | \item 157 | Two modes of operation: 158 | \begin{itemize} 159 | \item Standalone 160 | \item In-process 161 | \end{itemize} 162 | \item Flat data structure: 163 | \begin{itemize} 164 | \item Easy to manipulate 165 | \item Kernel fusion 166 | \end{itemize} 167 | \item Register-your-own: 168 | \begin{itemize} 169 | \item Functions 170 | \item Symbols 171 | \item Reductions 172 | \end{itemize} 173 | \item Literal code `escape hatch' 174 | \item Predicated execution 175 | \item Tree-of-domains for data-dependent control flow 176 | \end{itemize} 177 | \end{frame} 178 | % ----------------------------------------------------------------------------- 179 | \begin{frame}{Bonus Features} 180 | \begin{columns} 181 | \column{0.2\textwidth} 182 | \includegraphics[width=\textwidth]{glass-dollar.jpeg} 183 | \column{0.7\textwidth} 184 | Free extras: 185 | \begin{itemize} 186 | \item A-priori bounds checking 187 | \item Generate a sequential version of the code 188 | \item Automatic Benchmarking 189 | \item Free tuning advice 190 | \begin{itemize} 191 | \item Local memory layout 192 | \item Suboptimal use of hw parallelism 193 | \item Based on knowledge about target hardware 194 | \end{itemize} 195 | \item Automatic Testing 196 | \begin{itemize} 197 | \item \dots against sequential version 198 | \item \dots which is easier to verify 199 | \end{itemize} 200 | \end{itemize} 201 | \end{columns} 202 | \end{frame} 203 | \addimgcredit{Glass dollar: sxc.hu/flaivoloka} 204 | % ----------------------------------------------------------------------------- 205 | \begin{frame} 206 | \bigncentered{DEMO TIME} 207 | \end{frame} 208 | 209 | % }}} 210 | \end{document} 211 | 212 | % vim: foldmethod=marker 213 | -------------------------------------------------------------------------------- /slides/beamercolorthemeuiuc.sty: -------------------------------------------------------------------------------- 1 | % Copyright 2004 by Madhusudan Singh 2 | % 3 | % This file may be distributed and/or modified 4 | % 5 | % 1. under the LaTeX Project Public License and/or 6 | % 2. under the GNU Public License. 7 | % 8 | % See the file doc/licenses/LICENSE for more details. 9 | 10 | \mode 11 | 12 | \usecolortheme{whale} 13 | \usecolortheme{orchid} 14 | 15 | %\definecolor{nyuviolet}{RGB}{87,6,172} 16 | %\colorlet{nyuviodark}{nyuviolet!80!black} 17 | \definecolor{mygray}{RGB}{200,200,200} 18 | 19 | % http://identitystandards.illinois.edu/graphicstandardsmanual/generalguidelines/colors.html 20 | \definecolor{uiucblue}{RGB}{0,60,125} 21 | \definecolor{uiucorange}{RGB}{244,127,36} 22 | \definecolor{uiuclightblue}{RGB}{110,139,191} 23 | \definecolor{uiucdarkorange}{RGB}{239,138,28} 24 | 25 | %\setbeamercolor{alerted text}{fg=!yellow} 26 | 27 | \setbeamercolor*{palette primary}{fg=black,bg=uiucdarkorange} 28 | \setbeamercolor*{palette secondary}{fg=white,bg=uiucblue} 29 | \setbeamercolor*{palette tertiary}{fg=white,bg=uiucblue} 30 | \setbeamercolor{frametitle}{fg=white,bg=uiucblue} 31 | 32 | \setbeamercolor*{palette quaternary}{fg=black,bg=uiuclightblue} 33 | 34 | %\setbeamercolor*{sidebar}{fg=darkblue,bg=orange!75!white} 35 | 36 | %\setbeamercolor*{palette sidebar primary}{fg=darkblue!10!black} 37 | %\setbeamercolor*{palette sidebar secondary}{fg=white} 38 | %\setbeamercolor*{palette sidebar tertiary}{fg=darkblue!50!black} 39 | %\setbeamercolor*{palette sidebar quaternary}{fg=yellow!10!orange} 40 | 41 | \setbeamercolor*{titlelike}{bg=uiucblue,fg=white} 42 | 43 | \setbeamercolor*{block title}{bg=uiucblue,fg=white} 44 | %\setbeamercolor*{block title example}{bg=brown,fg=white} 45 | \setbeamercolor*{structure}{fg=mygray!50!black,bg=white} 46 | %\setbeamercolor{frametitle right}{bg=yellow!60!orange} 47 | 48 | %\setbeamercolor*{separation line}{} 49 | %\setbeamercolor*{fine separation line}{} 50 | 51 | \mode 52 | -------------------------------------------------------------------------------- /slides/code/loopy-variants.py: -------------------------------------------------------------------------------- 1 | knl = ... 2 | 3 | def variant_cpu(knl): 4 | knl = lp.split_dimension(knl, "i", 16*4096, outer_tag="g.0", slabs=(0, 1)) 5 | knl = lp.split_dimension(knl, "i_inner", 16, 6 | inner_tag="unr") 7 | return knl 8 | 9 | def variant_gpu(knl): 10 | knl = lp.split_dimension(knl, "i", 4*256, outer_tag="g.0", slabs=(0, 1)) 11 | knl = lp.split_dimension(knl, "i_inner", block_size, 12 | outer_tag="unr", inner_tag="l.0") 13 | return knl 14 | 15 | for variant in [variant_cpu, variant_gpu]: 16 | kernel_gen = lp.generate_loop_schedules(variant(knl)) 17 | # ... 18 | -------------------------------------------------------------------------------- /slides/code/transpose.cl: -------------------------------------------------------------------------------- 1 | void transpose( 2 | __global float *a_t, __global float *a, 3 | unsigned a_width, unsigned a_height) 4 | { 5 | int base_idx_a = 6 | get_group_id(0) * BLK_SIZE + 7 | get_group_id(1) * A_BLOCK_STRIDE; 8 | int base_idx_a_t = 9 | get_group_id(1) * BLK_SIZE + 10 | get_group_id(0) * A_T_BLOCK_STRIDE; 11 | 12 | int glob_idx_a = 13 | base_idx_a + get_local_id(0) 14 | + a_width * get_local_id(1); 15 | int glob_idx_a_t = 16 | base_idx_a_t + get_local_id(0) 17 | + a_height * get_local_id(1); 18 | 19 | __local float a_local[BLK_SIZE][BLK_SIZE+1]; 20 | 21 | a_local[get_local_id(1)*BLK_SIZE+get_local_id(0)] = 22 | a[glob_idx_a]; 23 | 24 | barrier(CLK_LOCAL_MEM_FENCE); 25 | 26 | a_t[glob_idx_a_t] = 27 | a_local[get_local_id(0)*BLK_SIZE+get_local_id(1)]; 28 | } 29 | -------------------------------------------------------------------------------- /slides/code/transpose.cu: -------------------------------------------------------------------------------- 1 | __global__ void transpose( 2 | float *A_t, float *A, 3 | int a_width, int a_height) 4 | { 5 | int base_idx_a = 6 | blockIdx.x * BLK_SIZE + 7 | blockIdx.y * A_BLOCK_STRIDE; 8 | int base_idx_a_t = 9 | blockIdx.y * BLK_SIZE + 10 | blockIdx.x * A_T_BLOCK_STRIDE; 11 | 12 | int glob_idx_a = 13 | base_idx_a + threadIdx.x 14 | + a_width * threadIdx.y; 15 | int glob_idx_a_t = 16 | base_idx_a_t + threadIdx.x 17 | + a_height * threadIdx.y; 18 | 19 | __shared__ float A_shared[BLK_SIZE][BLK_SIZE+1]; 20 | 21 | A_shared[threadIdx.y][threadIdx.x] = 22 | A[glob_idx_a]; 23 | 24 | __syncthreads(); 25 | 26 | A_t[glob_idx_a_t] = 27 | A_shared[threadIdx.x][threadIdx.y]; 28 | } 29 | -------------------------------------------------------------------------------- /slides/kloeckislides.sty: -------------------------------------------------------------------------------- 1 | \usepackage[utf8]{inputenc} 2 | \setcounter{secnumdepth}{3} 3 | \setcounter{tocdepth}{3} 4 | \usepackage{amsmath} 5 | \usepackage{color} 6 | \usepackage{amssymb} 7 | %\usepackage{esint} 8 | \usepackage{verbatim} % for env comment 9 | \usepackage{listings} 10 | \usepackage{stmaryrd} 11 | \usepackage{colortbl} 12 | \usepackage{babel} 13 | \usepackage{wasysym} 14 | 15 | \definecolor{green}{RGB}{0, 180, 0} 16 | \definecolor{red}{RGB}{180, 0, 0} 17 | \colorlet{grellow}{green!50!yellow} 18 | \colorlet{codeback}{gray!20} 19 | 20 | \usepackage{multimedia} 21 | 22 | \usepackage{tikz} 23 | \usetikzlibrary{calc} 24 | \usetikzlibrary{positioning} 25 | \usetikzlibrary{fadings} 26 | \usetikzlibrary{chains} 27 | \usetikzlibrary{scopes} 28 | \usetikzlibrary{shadows} 29 | \usetikzlibrary{arrows} 30 | \usetikzlibrary{snakes} 31 | \usetikzlibrary{shapes.misc} 32 | \usetikzlibrary{shapes.symbols} 33 | \usetikzlibrary{shapes.multipart} 34 | \usetikzlibrary{fit} 35 | \usetikzlibrary{shapes.arrows} 36 | \usetikzlibrary{shapes.geometric} 37 | \usetikzlibrary{shapes.callouts} 38 | \usetikzlibrary{decorations.text} 39 | 40 | \pgfdeclarelayer{background} 41 | \pgfdeclarelayer{foreground} 42 | \pgfsetlayers{background,main,foreground} 43 | 44 | \tikzstyle{every picture}+=[remember picture] 45 | 46 | \def\allimgcredits{} 47 | \makeatletter 48 | \def\addimgcredit#1{\g@addto@macro\allimgcredits{\item #1}} 49 | \makeatother 50 | \def\imagecreditslide{ 51 | \begin{frame}[shrink,label=image-credits]{Image Credits} 52 | \begin{itemize} 53 | \allimgcredits 54 | \end{itemize} 55 | \end{frame} 56 | } 57 | 58 | \def\gatheredappendix{} 59 | \makeatletter 60 | \long\def\addtoappendix#1{ 61 | \g@addto@macro\gatheredappendix{#1} 62 | } 63 | \makeatother 64 | 65 | \newcommand{\cc}{\raisebox{-0.75ex}{\includegraphics[height=3ex]{cc.pdf}}} 66 | 67 | \newcommand{\D}{\mathsf{D}} 68 | \newcommand{\mathd}{\,\mathsf{d}} 69 | 70 | \newcommand{\avg}[1]{\{#1\}} 71 | \newcommand{\jump}[1]{\left\llbracket#1\right\rrbracket} 72 | 73 | \newcommand{\questionframe}[1]{ 74 | \begin{frame}{Questions?} 75 | \begin{center} 76 | \textbf{\Huge ?} 77 | \par#1 78 | \end{center} 79 | \end{frame} 80 | } 81 | 82 | \lstset{ 83 | %language=Python, 84 | %alsolanguage=C, 85 | showstringspaces=false, 86 | basicstyle=\small, 87 | stringstyle=\color{blue}, 88 | columns=flexible, 89 | emph={[2]pycuda,numpy,cuda,cl}, 90 | emphstyle={[2]\color{red}}, 91 | backgroundcolor=\color{codeback}, 92 | frame=single, 93 | framerule=0pt, 94 | framesep=1.5pt, 95 | rangebeginprefix=//\ , 96 | rangeendprefix=//\ , 97 | includerangemarker=false, 98 | } 99 | 100 | \pgfdeclareimage[height=0.8cm]{brown-logo}{brown-logo.pdf} 101 | \def\mylogotext{\pgfuseimage{brown-logo}\hspace*{0.3cm}} 102 | \newcommand{\logoenable}{\logo{\mylogotext}} 103 | \newcommand{\logodisable}{ \logo{} } 104 | \newenvironment{nologo}{\logodisable}{\logoenable} 105 | \newenvironment{noheadfoot}{ 106 | \begingroup 107 | \begin{nologo} 108 | \setbeamertemplate{headline}{} 109 | \setbeamertemplate{footline}{} 110 | }{ 111 | \end{nologo} 112 | \endgroup 113 | } 114 | 115 | \newcommand{\symball}[2]{ 116 | \begin{tikzpicture}[baseline=-0.7ex] 117 | \shadedraw [shading=ball,ball color=#1,use as bounding box] 118 | circle (1ex) node at (0.7ex,0) [minimum width=0.7ex] {}; 119 | 120 | \node [text=white,font=\bfseries] {#2}; 121 | \end{tikzpicture}} 122 | \newcommand{\plusball}{\symball{green}{{\small +}}} 123 | \newcommand{\okball}{\symball{orange}{o}} 124 | \newcommand{\minusball}{\symball{red}{-}} 125 | 126 | \let\epsilon=\varepsilon 127 | \let\phi=\varphi 128 | 129 | \newcommand{\subitem}[1]{\begin{itemize}\item #1 \end{itemize}} 130 | \newcommand{\creditto}[1]{ 131 | \begin{tikzpicture}[overlay] 132 | \node [xshift=1cm,yshift=0.5cm] 133 | at (current page.south west) 134 | [font=\scriptsize,fill=gray!30,anchor=south west,opacity=0.5] 135 | {#1}; 136 | \end{tikzpicture} 137 | } 138 | 139 | \def\evalprint#1{{\pgfmathtruncatemacro{\mathresult}{#1}\mathresult}} 140 | 141 | \makeatletter 142 | \newcommand*{\overlaynumber}{\number\beamer@slideinframe} 143 | \makeatother 144 | -------------------------------------------------------------------------------- /slides/latexmkrc: -------------------------------------------------------------------------------- 1 | # http://tex.stackexchange.com/questions/11710/specify-output-directory-when-using-latexmk 2 | $pdflatex="pdflatex -interaction nonstopmode %O %S"; 3 | $out_dir = 'out'; 4 | $pdf_mode = 1; 5 | $pdf_previewer = 'xdg-open'; 6 | 7 | @default_files = ('0[0-9]*tex'); 8 | 9 | $ENV{TEXINPUTS} .=':media'; 10 | $ENV{TEXINPUTS} .=':slides'; 11 | $ENV{TEXINPUTS} .=':code'; 12 | $ENV{TEXINPUTS} .=':vids'; 13 | $HOME = $ENV{HOME}; 14 | -------------------------------------------------------------------------------- /slides/media/amd-logo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/amd-logo.pdf -------------------------------------------------------------------------------- /slides/media/apple-logo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/apple-logo.pdf -------------------------------------------------------------------------------- /slides/media/c870.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/c870.png -------------------------------------------------------------------------------- /slides/media/cl-programs-and-kernels-v2.tex: -------------------------------------------------------------------------------- 1 | \begin{frame}[fragile]{Programs and Kernels} 2 | \begin{lstlisting}[gobble=4] 3 | prg = cl.Program(context, src) 4 | \end{lstlisting} 5 | \begin{columns} 6 | \column{0.65\textwidth} 7 | \begin{itemize} 8 | \item \texttt{src}: OpenCL device code 9 | \begin{itemize} 10 | \item Derivative of C99 11 | \item Functions with \texttt{\_\_kernel} attribute 12 | can be invoked from host 13 | \end{itemize} 14 | \item \texttt{prg.build(options="",\\ 15 | \hspace*{2em}devices=None)} 16 | \item \texttt{kernel = prg.kernel\_name} 17 | \item \texttt{kernel(queue,\\ 18 | \hspace*{2em}$(G_x,G_y,G_z)$, $(L_x,L_y,L_z)$, \\ 19 | \hspace*{2em}arg, \dots, \\ 20 | \hspace*{2em}wait\_for=None)}\\ 21 | \end{itemize} 22 | \column{0.3\textwidth} 23 | % boo yuck 24 | \hspace*{-1cm}\includegraphics[width=1.3\textwidth]{cpu.jpeg} 25 | \end{columns} 26 | \end{frame} 27 | \begin{frame}[fragile]{Program Objects} 28 | \begin{lstlisting}[gobble=4] 29 | kernel(queue, (Gx,Gy,Gz), (Sx,Sy,Sz), arg, ..., wait_for=None) 30 | \end{lstlisting} 31 | \begin{columns} 32 | \column{0.3\textwidth} 33 | \includegraphics[width=1.3\textwidth]{cpu.jpeg} 34 | \column{0.65\textwidth} 35 | \begin{overlayarea}{\textwidth}{0.5\textheight} 36 | \only<+>{ 37 | \texttt{arg} may be: 38 | \begin{itemize} 39 | \item \texttt{None} (a \texttt{NULL} pointer) 40 | \item \texttt{numpy} sized scalars: 41 | \texttt{numpy.int64,numpy.float32,\dots} 42 | \item Anything with buffer interface:\\ 43 | \texttt{numpy.ndarray}, \texttt{str}\\ 44 | \item Buffer Objects 45 | \item Also: \texttt{cl.Image}, \texttt{cl.Sampler}, 46 | \texttt{cl.LocalMemory} 47 | \end{itemize} 48 | } 49 | \only<+>{ 50 | Explicitly sized scalars:\\ 51 | {\color{red}\ding{54} Annoying, error-prone.} 52 | 53 | \medskip 54 | Better: 55 | 56 | \texttt{% 57 | kernel.set\_scalar\_arg\_dtypes([\\ 58 | \hspace*{3ex}numpy.int32, None,\\ 59 | \hspace*{3ex}numpy.float32])} 60 | \medskip 61 | 62 | Use \texttt{None} for non-scalars. 63 | } 64 | \end{overlayarea} 65 | \end{columns} 66 | \end{frame} 67 | \addimgcredit{CPU: sxc.hu/dimshik} 68 | -------------------------------------------------------------------------------- /slides/media/context.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/context.jpeg -------------------------------------------------------------------------------- /slides/media/cpu.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/cpu.jpeg -------------------------------------------------------------------------------- /slides/media/general-dep-graph.tex: -------------------------------------------------------------------------------- 1 | \begin{tikzpicture}[ 2 | scale=0.014,thick, 3 | annode/.style={xshift=0.1cm}, 4 | intermed/.style={input,fill=intermed}, 5 | ] 6 | \node [input] (A) at (152,479) [draw,ellipse] {A}; 7 | \node [intermed] (C) at (80,295) [draw,ellipse] {C}; 8 | \node [intermed] (B) at (152,387) [draw,ellipse] {B}; 9 | \node [intermed] (E) at (27,203) [draw,ellipse] {E}; 10 | \node [intermed] (G) at (99,111) [draw,ellipse] {G}; 11 | \node [intermed] (F) at (99,203) [draw,ellipse] {F}; 12 | \node [intermed] (Q) at (211,203) [draw,ellipse] {Q}; 13 | \node [intermed] (P) at (152,295) [draw,ellipse] {P}; 14 | \node [output] (R) at (154,19) [draw,ellipse] {R}; 15 | \draw [->] (C) -- (F); 16 | \draw (96,249) node [annode] {h}; 17 | \draw [->] (G) -- (R); 18 | \draw (134.5,65) node [annode] {r}; 19 | \draw [->] (B) -- (C); 20 | \draw (126.5,341) node [annode] {g}; 21 | \draw [->] (P) -- (R); 22 | \draw (156.5,157) node [annode] {r}; 23 | \draw [->] (E) -- (G); 24 | \draw (73.5,157) node [annode] {g}; 25 | \draw [->] (Q) -- (R); 26 | \draw (193.5,111) node [annode] {r}; 27 | \draw [->] (F) -- (G); 28 | \draw (103.5,157) node [annode] {g}; 29 | \draw [->] (B) -- (Q); 30 | \draw (202.5,295) node [annode] {q}; 31 | \draw [->] (A) -- (B); 32 | \draw (154.5,433) node [annode] {f}; 33 | \draw [->] (B) -- (P); 34 | \draw (156.5,341) node [annode] {p}; 35 | \draw [->] (C) -- (E); 36 | \draw (61.5,249) node [annode] {f}; 37 | \end{tikzpicture} 38 | -------------------------------------------------------------------------------- /slides/media/glass-dollar.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/glass-dollar.jpeg -------------------------------------------------------------------------------- /slides/media/intel-logo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/intel-logo.pdf -------------------------------------------------------------------------------- /slides/media/loopy-crop.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/loopy-crop.pdf -------------------------------------------------------------------------------- /slides/media/memory.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/memory.png -------------------------------------------------------------------------------- /slides/media/nvidia.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/nvidia.pdf -------------------------------------------------------------------------------- /slides/media/onion.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/onion.jpeg -------------------------------------------------------------------------------- /slides/media/opencl-11.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/opencl-11.pdf -------------------------------------------------------------------------------- /slides/media/opencl-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/opencl-logo.png -------------------------------------------------------------------------------- /slides/media/opencl-overview.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/opencl-overview.pdf -------------------------------------------------------------------------------- /slides/media/parallel-field.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/parallel-field.jpeg -------------------------------------------------------------------------------- /slides/media/python-logo-no-shadow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/python-logo-no-shadow.png -------------------------------------------------------------------------------- /slides/media/question-mark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/question-mark.png -------------------------------------------------------------------------------- /slides/media/queue.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/queue.jpeg -------------------------------------------------------------------------------- /slides/media/radar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/radar.png -------------------------------------------------------------------------------- /slides/media/tree.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inducer/languages-and-codegen-tutorial/66c32997a561d37b1f7f09d76844c742830e016e/slides/media/tree.jpeg -------------------------------------------------------------------------------- /slides/settings.tex: -------------------------------------------------------------------------------- 1 | %\batchmode 2 | \usepackage{kloeckislides} 3 | \nonstopmode 4 | 5 | \usepackage{pifont} 6 | 7 | \useoutertheme{split} 8 | \useinnertheme{rectangles} 9 | \usecolortheme{uiuc} 10 | \usetikzlibrary{arrows} 11 | 12 | \usepackage{ifthen} 13 | 14 | \pgfdeclareimage[height=0.8cm]{uiuc-logo}{uiuc-logo.pdf} 15 | \def\mylogotext{\pgfuseimage{uiuc-logo}\hspace*{0.3cm}} 16 | %\def\mylogotext{} 17 | 18 | \AtBeginSection[] { 19 | \begin{frame} 20 | \frametitle{Outline} 21 | \tableofcontents[sectionstyle=show/shaded,subsectionstyle=show/show/hide] 22 | \end{frame} 23 | } 24 | \AtBeginSubsection[] { 25 | \begin{frame} 26 | \frametitle{Outline} 27 | \tableofcontents[sectionstyle=show/shaded,subsectionstyle=show/shaded/hide] 28 | \end{frame} 29 | } 30 | 31 | \definecolor{green}{RGB}{0, 180, 0} 32 | \definecolor{red}{RGB}{180, 0, 0} 33 | \colorlet{grellow}{green!50!yellow} 34 | \colorlet{codeback}{gray!20} 35 | 36 | \DeclareMathOperator{\argmin}{argmin} 37 | \DeclareMathOperator{\argmax}{argmax} 38 | 39 | 40 | \lstset{ 41 | language=Python, 42 | alsolanguage=C, 43 | rangebeginprefix=\#\ , 44 | rangeendprefix=\#\ , 45 | } 46 | 47 | \colorlet{input}{green!30} 48 | \colorlet{output}{red!30} 49 | \colorlet{intermed}{blue!30} 50 | 51 | \definecolor{fetch}{RGB}{227,110,35} 52 | \definecolor{alu}{RGB}{255,188,24} 53 | \definecolor{context}{RGB}{132,146,175} 54 | 55 | 56 | \setbeamertemplate{navigation symbols}{} 57 | 58 | \let\tmop=\operatorname 59 | 60 | \usepackage[normalem]{ulem} 61 | 62 | \def\curl{\operatorname{curl}} 63 | 64 | \def\checkmark{\textbf{\color{green}\ding{51}}} 65 | \def\crossmark{\textbf{\color{red}\ding{56}}} 66 | -------------------------------------------------------------------------------- /slides/slides/barrier.tex: -------------------------------------------------------------------------------- 1 | \begin{frame}{Synchronization} 2 | What is a Barrier? 3 | 4 | \bigskip 5 | \begin{center} 6 | \begin{tikzpicture}[scale=0.8, 7 | thread/.style={blue,very thick,->}, 8 | barrier/.style={ultra thick}, 9 | stopped/.style={fill=red,shape=regular polygon,regular polygon sides=8}, 10 | ] 11 | \draw [barrier] (0,0) -- +(4,0) ; 12 | \uncover<+>{ 13 | \draw [thread] (1,5) -- +(0,-2) ; 14 | \draw [thread] (2,5) -- +(0,-3) ; 15 | \draw [thread] (3,5) -- +(0,-1) ; 16 | } 17 | \uncover<+>{ 18 | \draw [thread] (1,5) -- +(0,-3) ; 19 | \draw [thread] (2,5) -- +(0,-4) ; 20 | \draw [thread] (3,5) -- +(0,-2) ; 21 | } 22 | \uncover<+>{ 23 | \node [stopped] at (2,0) {}; 24 | \draw [thread] (1,5) -- +(0,-4) ; 25 | \draw [thread] (2,5) -- +(0,-5) ; 26 | \draw [thread] (3,5) -- +(0,-3) ; 27 | } 28 | \uncover<+>{ 29 | \node [stopped] at (2,0) {}; 30 | \node [stopped] at (1,0) {}; 31 | \draw [thread] (1,5) -- +(0,-5) ; 32 | \draw [thread] (2,5) -- +(0,-5) ; 33 | \draw [thread] (3,5) -- +(0,-4) ; 34 | } 35 | \uncover<+>{ 36 | \node [stopped] at (2,0) {}; 37 | \node [stopped] at (1,0) {}; 38 | \node [stopped] at (3,0) {}; 39 | \draw [thread] (1,5) -- +(0,-5) ; 40 | \draw [thread] (2,5) -- +(0,-5) ; 41 | \draw [thread] (3,5) -- +(0,-5) ; 42 | } 43 | \uncover<+>{ 44 | \draw [thread] (1,5) -- +(0,-5) ; 45 | \draw [thread] (2,5) -- +(0,-5) ; 46 | \draw [thread] (3,5) -- +(0,-5) ; 47 | } 48 | \uncover<+>{ 49 | \draw [thread] (1,5) -- +(0,-6) ; 50 | \draw [thread] (2,5) -- +(0,-6) ; 51 | \draw [thread] (3,5) -- +(0,-6) ; 52 | } 53 | \end{tikzpicture} 54 | \end{center} 55 | \end{frame} 56 | 57 | -------------------------------------------------------------------------------- /slides/slides/cl-buffer-objects-v4.tex: -------------------------------------------------------------------------------- 1 | \begin{frame}[fragile]{Memory Objects: Buffers} 2 | \begin{lstlisting}[gobble=4] 3 | buf = cl.Buffer(context, flags, size=0, hostbuf=None) 4 | \end{lstlisting} 5 | \begin{columns} 6 | \column{0.7\textwidth} 7 | \begin{overlayarea}{\textwidth}{0.7\textheight} 8 | \only<+>{ 9 | \begin{itemize} 10 | \item Chunk of device memory 11 | \item No type information: ``Bag of bytes'' 12 | \item Observe: \emph{Not} tied to device. 13 | 14 | $\rightarrow$ no fixed memory address 15 | 16 | $\rightarrow$ pointers do \emph{not} survive kernel 17 | launches 18 | 19 | $\rightarrow$ movable between devices 20 | 21 | $\rightarrow$ not even allocated before first use! 22 | \item \texttt{flags}: 23 | \begin{itemize} 24 | \item \texttt{READ\_ONLY/WRITE\_ONLY/READ\_WRITE} 25 | \item \{\texttt{ALLOC,COPY,USE}\}\texttt{\_HOST\_PTR} 26 | \end{itemize} 27 | \end{itemize} 28 | } 29 | \only<+>{ 30 | \texttt{COPY\_HOST\_PTR}: 31 | \begin{itemize} 32 | \item Use \texttt{hostbuf} as initial content of buffer 33 | \end{itemize} 34 | \texttt{USE\_HOST\_PTR}: 35 | \begin{itemize} 36 | \item \texttt{hostbuf} \emph{is} the buffer. 37 | \item Caching in device memory is allowed. 38 | \end{itemize} 39 | \texttt{ALLOC\_HOST\_PTR}: 40 | \begin{itemize} 41 | \item \emph{New} host memory (unrelated to 42 | \texttt{hostbuf}) is visible from device 43 | \emph{and} host. 44 | \end{itemize} 45 | } 46 | \only<+>{ 47 | \begin{itemize} 48 | \item Specify \texttt{hostbuf} or \texttt{size} (or both) 49 | \item \texttt{hostbuf}: Needs Python Buffer Interface\\ 50 | e.g. \texttt{numpy.ndarray}, \texttt{str}. 51 | \subitem{Important: Memory layout matters} 52 | \item Passed to device code as pointers\\ 53 | (e.g. \texttt{float *}, \texttt{int *}) 54 | \item \texttt{enqueue\_copy}(queue, dest, src) 55 | \item Can be mapped into host address space:\\ 56 | \texttt{cl.MemoryMap}. 57 | \end{itemize} 58 | } 59 | \end{overlayarea} 60 | \column{0.3\textwidth} 61 | \includegraphics[width=\textwidth]{memory.png} 62 | \end{columns} 63 | \end{frame} 64 | \addimgcredit{RAM stick: sxc.hu/gobran11} 65 | -------------------------------------------------------------------------------- /slides/slides/cl-command-queue.tex: -------------------------------------------------------------------------------- 1 | \begin{frame}[fragile]{Command Queues and Events} 2 | \begin{lstlisting}[gobble=4] 3 | queue = cl.CommandQueue(context, device=None, 4 | properties=None | [(prop, value),...]) 5 | \end{lstlisting} 6 | \begin{columns} 7 | \column{0.65\textwidth} 8 | \begin{itemize} 9 | \item Attached to single device 10 | \item \text{cl.command\_queue\_properties}\dots 11 | \begin{itemize} 12 | \item \texttt{OUT\_OF\_ORDER\_EXEC\_MODE\_ENABLE}:\\ 13 | Do not force sequential execution 14 | \item \texttt{PROFILING\_ENABLE}:\\ 15 | Gather timing info 16 | \end{itemize} 17 | \end{itemize} 18 | \column{0.35\textwidth} 19 | \includegraphics[width=\textwidth]{queue.jpeg} 20 | \end{columns} 21 | \end{frame} 22 | \addimgcredit{Queue: sxc.hu/cobrasoft} 23 | 24 | -------------------------------------------------------------------------------- /slides/slides/cl-command-queues.tex: -------------------------------------------------------------------------------- 1 | { 2 | \newcommand{\brick}[6]{ 3 | \draw [fill=#4!50] 4 | (0,0) rectangle (#1,#2) coordinate [pos=0.5] (brickfront); 5 | \draw [fill=#4] 6 | (#1,0) -- (#1,0,-1) -- (#1,#2,-1) -- (#1,#2) --cycle; 7 | \draw [fill=#4] 8 | (0,#2) -- (0,#2,-1) -- (#1,#2,-1) -- (#1,#2) --cycle; 9 | #6 10 | \begin{pgfonlayer}{foreground} 11 | \node [fill=#4!50,inner xsep=2pt,inner ysep=2pt,opacity=0.7,#5] at (brickfront) { #3 } ; 12 | \node [#5] at (brickfront) { #3 } ; 13 | \end{pgfonlayer} 14 | } 15 | \newcommand{\drawevt}[2]{ 16 | \fill [#2,opacity=0.5] 17 | (0,#1) -- (1.5,#1) -- (1.5,#1,-1) 18 | -- (1.5,#1+0.2,-1) -- (1.5,#1+0.2) -- (0,#1+0.2) -- cycle ; 19 | } 20 | \begin{frame}{OpenCL: Command Queues} 21 | \begin{columns} 22 | \column{0.45\textwidth} 23 | \begin{itemize} 24 | \item Host and Device run asynchronously 25 | \item Host submits to queue: 26 | \uncover{ 27 | \begin{itemize} 28 | \item Computations 29 | \item Memory Transfers 30 | \item Sync primitives 31 | \item \dots 32 | \end{itemize} 33 | } 34 | \item Host can wait for\\drained queue 35 | \item Profiling 36 | 37 | \end{itemize} 38 | 39 | \column{0.5\textwidth} 40 | \begin{tikzpicture} 41 | \brick{1.25}{2}{Host}{gray}{}{} 42 | \begin{scope}[xshift=2.5cm,yshift=-1.5cm] 43 | \brick{2.5}{1.25}{Device}{gray}{}{} 44 | \end{scope} 45 | \begin{scope}[xshift=2.5cm] 46 | \brick{0.75}{2}{Queue 1}{blue}{text=white,rotate=90}{ 47 | \foreach\i in {0,0.2,...,1.4} 48 | \draw (0,\i) -- (0.75,\i) -- (0.75,\i,-1); 49 | } 50 | \end{scope} 51 | \begin{scope}[xshift=3.5cm] 52 | \brick{0.75}{2}{Queue 2}{blue}{text=white,rotate=90}{ 53 | \foreach\i in {0,0.2,...,0.9} 54 | \draw (0,\i) -- (0.75,\i) -- (0.75,\i,-1); 55 | } 56 | \end{scope} 57 | 58 | \node [font=\Large] at (5.25,1.25) {\dots} ; 59 | 60 | \draw [very thick,->] (1.25,1,-0.5) -| (2,2.5,-0.5) -| (2.875,2,-0.5); 61 | \draw [very thick,->] (2,2.5,-0.5) -| (3.875,2,-0.5); 62 | \draw [very thick,->] (2,2.5,-0.5) -| (4.875,2,-0.5); 63 | \draw [very thick,->] (2.5,-1) -| (0.625,0); 64 | 65 | \end{tikzpicture} 66 | \end{columns} 67 | \end{frame} 68 | } 69 | -------------------------------------------------------------------------------- /slides/slides/cl-compute-dag-v2.tex: -------------------------------------------------------------------------------- 1 | \begin{frame}{Capturing Dependencies} 2 | \begin{columns} 3 | \column{0.3\textwidth} 4 | B = f(A)\\ 5 | C = g(B)\\ 6 | E = f(C)\\ 7 | F = h(C)\\ 8 | G = g(E,F)\\ 9 | P = p(B)\\ 10 | Q = q(B)\\ 11 | R = r(G,P,Q) 12 | \column{0.6\textwidth} 13 | \begin{center} 14 | \input{general-dep-graph} 15 | \end{center} 16 | \end{columns} 17 | \uncover<2>{ 18 | \begin{tikzpicture} [overlay] 19 | \node [above right=1cm of current page.south west, draw,drop shadow,fill=white, 20 | text width=0.6\textwidth, inner xsep=0.5cm,inner ysep=0.5cm,thick] 21 | { 22 | \begin{itemize} 23 | \item Switch queue to out-of-order mode! 24 | 25 | \item Specify as list of events using 26 | \texttt{wait\_for=} optional keyword 27 | to \texttt{enqueue\_XXX}. 28 | 29 | \item Can also enqueue barrier. 30 | 31 | \item Common use case: Transmit/receive 32 | from other MPI ranks. 33 | 34 | \item Possible in hardware on Nv Fermi, AMD Cayman: 35 | Submit parallel work to increase machine use. 36 | \subitem{Not yet ubiquitously implemented} 37 | \end{itemize} 38 | } ; 39 | \end{tikzpicture} 40 | } 41 | \end{frame} 42 | -------------------------------------------------------------------------------- /slides/slides/cl-computing-as-a-service.tex: -------------------------------------------------------------------------------- 1 | { 2 | \def\evalprint#1{{\pgfmathtruncatemacro{\mathresult}{#1}\mathresult}} 3 | \begin{frame}{OpenCL: Computing as a Service} 4 | 5 | \begin{tikzpicture}[ 6 | z={(0.5cm,-1cm)}, 7 | every shadow/.style={shadow xshift=-0.1cm,shadow yshift=0.1cm}, 8 | memory/.style={fill=blue!40,draw=blue}, 9 | langarrow/.style={single arrow,shape border rotate=90, 10 | single arrow tip angle=165,single arrow head extend=0.6cm, 11 | draw,thick,fill=yellow}, 12 | ] 13 | \uncover<+->{ 14 | \node [draw,inner sep=5mm,fill=green!40,drop shadow, 15 | text width=1.5cm,text centered] (host) {Host\\(CPU)} ; 16 | \uncover<3-4>{ 17 | \node [above left=0.2cm of host.south east,font=\tiny,memory, 18 | inner sep=0.5mm,minimum width=1.3cm] 19 | { Memory } ; 20 | } 21 | } 22 | \uncover<+->{ 23 | \foreach \i in {0,...,3} 24 | { 25 | \pgfmathtruncatemacro{\plat}{\i/2} 26 | \node 27 | [draw,fill=yellow!50, anchor=west,text width=4.5cm,font=\small] 28 | at ($(host.east)+(1.75+\plat,0,-1.5+\i)$) 29 | (cdev\i) 30 | { 31 | Compute Device \evalprint{mod(\i,2)} 32 | {\tiny(Platform \evalprint{\i/2})}\\ 33 | \begin{tikzpicture} 34 | \foreach \j in {0,1,2} 35 | { 36 | \foreach \k in {0,1,2,7} 37 | \coordinate (pe\i\j\k) at (0.15*\k,0,0.2*\j) ; 38 | \node 39 | [draw,fill=orange!40,fit={(pe\i\j0) (pe\i\j7) (0,0.4,0.2*\j) }] 40 | (unit\i\j) 41 | {}; 42 | \foreach \k in {0,1,2,7} 43 | \filldraw 44 | [fill=red!30] 45 | (pe\i\j\k) ++(-0.05,0) rectangle ++ (0.1,0.4) ; 46 | \node at (4.5*0.15,0.2,0.2*\j) 47 | [anchor=center,font=\tiny,text width=] 48 | {$\cdots$} ; 49 | } 50 | \uncover<4>{ 51 | \draw (pe\i27) ++(0.5,0) 52 | node [anchor=south west,memory,text width=,minimum height=0.8cm] 53 | {Memory}; 54 | } 55 | \end{tikzpicture} 56 | } ; 57 | \draw [thick] 58 | (host.east) -- ++(1,0) -- ++(0,0,-1.5+\i) -- ++(\plat+0.75,0); 59 | } 60 | } 61 | 62 | % memory ------------------------------------------------------------------ 63 | \uncover<+>{} 64 | \uncover<+>{} 65 | 66 | % platforms --------------------------------------------------------------- 67 | \uncover<+>{} 68 | \uncover<+>{ 69 | \node [fit=(cdev0) (cdev1),draw,dashed,thick] (plat0) {} ; 70 | \node at (plat0.north west) [anchor=south west] 71 | {Platform 0 (e.g. CPUs)} ; 72 | } 73 | \uncover<+>{ 74 | \node [fit=(cdev2) (cdev3),draw,dashed,thick] (plat1) {} ; 75 | \node at (plat1.south west) [anchor=north west] 76 | {Platform 1 (e.g. GPUs)} ; 77 | } 78 | 79 | % hardware ---------------------------------------------------------------- 80 | \uncover<+>{} 81 | \uncover<+-+(2)>{ 82 | \draw [<-,thick] (cdev0) -- ++(-3,0.35) 83 | node [anchor=east,text width=2.5cm] 84 | {(think ``chip'',\\ has memory interface)} ; 85 | } 86 | \uncover<+-+(1)>{ 87 | \draw [<-,thick] (unit32.center) -- ++(-3,0.1) 88 | node [anchor=east,text width=3.25cm] 89 | {Compute Unit\\(think ``processor'',\\ has insn. fetch)} ; 90 | } 91 | \uncover<+>{ 92 | \draw [<-,thick] (pe327) -- ++(-1.5,-1) 93 | node [anchor=east,text width=3.35cm] 94 | {Processing Element\\(think ``SIMD lane'')} ; 95 | } 96 | 97 | % programming interfaces -------------------------------------------------- 98 | \uncover<+>{} 99 | \uncover<+-+(1)>{ 100 | \node [fit=(host)] (hostwrap) {} ; 101 | \node at (hostwrap.south) 102 | [anchor=north,langarrow] 103 | {Python} ; 104 | } 105 | \uncover<+->{ 106 | \node [fit=(plat0) (plat1)] (devwrap) {} ; 107 | \node at (devwrap.south) 108 | [anchor=north,draw,langarrow] 109 | {Device Language: $\sim$ C99} ; 110 | } 111 | \end{tikzpicture} 112 | \end{frame} 113 | } 114 | -------------------------------------------------------------------------------- /slides/slides/cl-context-v2.tex: -------------------------------------------------------------------------------- 1 | \begin{frame}[fragile]{Contexts} 2 | \begin{lstlisting}[gobble=4] 3 | context = cl.Context(devices=None | [dev1, dev2], dev_type=None) 4 | context = cl.create_some_context(interactive=True) 5 | \end{lstlisting} 6 | 7 | \begin{columns} 8 | \column{0.25\textwidth} 9 | \includegraphics[width=\textwidth]{context.jpeg} 10 | \column{0.75\textwidth} 11 | \begin{itemize} 12 | \item Spans one or more Devices 13 | \item Create from device type or list of devices 14 | \subitem{See docs for \texttt{cl.Platform}, \texttt{cl.Device}} 15 | \item \texttt{dev\_type}: 16 | \texttt{\textit{DEFAULT}}, 17 | \texttt{ALL}, \texttt{CPU}, \texttt{GPU} 18 | \item Needed to\dots 19 | \begin{itemize} 20 | \item \dots allocate Memory Objects 21 | \item \dots create and build Programs 22 | \item \dots host Command Queues 23 | \item \dots execute Grids 24 | \end{itemize} 25 | \end{itemize} 26 | \end{columns} 27 | \end{frame} 28 | \addimgcredit{Context: sxc.hu/svilen001} 29 | 30 | -------------------------------------------------------------------------------- /slides/slides/cl-device.tex: -------------------------------------------------------------------------------- 1 | \begin{frame}{CL ``Compute Device''} 2 | \begin{columns} 3 | \column{0.25\textwidth} 4 | \includegraphics[width=\textwidth]{c870.png} 5 | \column{0.75\textwidth} 6 | CL Compute Devices: 7 | \begin{itemize} 8 | \item CPUs, GPUs, accelerators, \dots 9 | \subitem{Anything that fits the programming model.} 10 | \item A processor die with an interface to off-chip memory 11 | \item Can get list of devices from platform. 12 | \end{itemize} 13 | \end{columns} 14 | \end{frame} 15 | -------------------------------------------------------------------------------- /slides/slides/cl-platform.tex: -------------------------------------------------------------------------------- 1 | \begin{frame}{CL ``Platform''} 2 | \begin{columns} 3 | \column{0.25\textwidth} 4 | \begin{tikzpicture}[x=1cm,y=2cm] 5 | \foreach \i in {1,...,10} 6 | { 7 | \pgfmathrand 8 | \let\myx=\pgfmathresult 9 | \pgfmathrand 10 | \let\myy=\pgfmathresult 11 | \node at (\myx, \myy) { 12 | \includegraphics[width=0.6\textwidth]{c870.png} 13 | } ; 14 | } 15 | \end{tikzpicture} 16 | \column{0.75\textwidth} 17 | \begin{itemize} 18 | \item ``Platform'': a collection of devices, all from 19 | the same \emph{vendor}. 20 | 21 | \item All devices in a platform use same CL driver/implementation. 22 | \item Multiple platforms can be used from one 23 | program $\rightarrow$ \emph{ICD}. 24 | 25 | \medskip 26 | \texttt{libOpenCL.so}: ICD loader 27 | 28 | \medskip 29 | \texttt{/etc/OpenCL/vendors/\textit{somename}.icd}: 30 | Plain text file with name of \texttt{.so} containing 31 | CL implementation. 32 | 33 | \end{itemize} 34 | \end{columns} 35 | \end{frame} 36 | -------------------------------------------------------------------------------- /slides/slides/cuda-cl-dictionary.tex: -------------------------------------------------------------------------------- 1 | \begin{frame}{OpenCL $\leftrightarrow$ CUDA: A dictionary} 2 | \begin{tikzpicture}[overlay] 3 | \node [anchor=south east,rotate=10,opacity=0.3] 4 | at ($(current page.south east) + (-0.5cm,2cm)$) 5 | { \includegraphics[width=6cm]{dictionary-desat.jpeg} } ; 6 | \end{tikzpicture} 7 | 8 | \begin{tabular}{r|l} 9 | \textbf{OpenCL} & \textbf{CUDA} \\ 10 | \hline 11 | Grid & Grid \\ 12 | Work Group& Block \\ 13 | Work Item & Thread \\ 14 | \texttt{\_\_kernel} & \texttt{\_\_global\_\_} \\ 15 | \texttt{\_\_global} & \texttt{\_\_device\_\_} \\ 16 | \texttt{\_\_local} & \texttt{\_\_shared\_\_} \\ 17 | \texttt{\_\_private} & \texttt{\_\_local\_\_} \\ 18 | \texttt{image$n$d\_t} & \texttt{texture\textless type, $n$, ...\textgreater} \\ 19 | \texttt{barrier(LMF)} & \texttt{\_\_syncthreads()} \\ 20 | \texttt{get\_local\_id(012)} & \texttt{threadIdx.xyz} \\ 21 | \texttt{get\_group\_id(012)} & \texttt{blockIdx.xyz} \\ 22 | \texttt{get\_global\_id(012)} & -- (reimplement) \\ 23 | \end{tabular} 24 | \end{frame} 25 | \addimgcredit{Dictionary: sxc.hu/topfer} 26 | -------------------------------------------------------------------------------- /slides/slides/gpu-cl-execution-model.tex: -------------------------------------------------------------------------------- 1 | \begin{frame}{OpenCL: Execution Model} 2 | \begin{columns} 3 | \column{.3\textwidth} 4 | \begin{tikzpicture}[font=\tiny\bfseries,y=-1cm,anchor=north west] 5 | \node at (0,-0.4) [inner sep=0] (gridtitle) {$n$D Grid}; 6 | \foreach \x in {0, 1, 2} 7 | \foreach \y in {0, 1} 8 | \node at (0.9*\x, 0.75*\y ) (wgroup\x\y) [draw, fill=red!60, rectangle, 9 | text width=0.6cm, text centered, inner sep=1mm] 10 | {Group $(\x, \y)$} ; 11 | 12 | \begin{pgfonlayer}{background} 13 | \node [draw,thick,fill=red!30,fit=(gridtitle) (wgroup21)] (gridbox) {} ; 14 | \end{pgfonlayer} 15 | 16 | \begin{scope}[yshift=-3cm] 17 | \node at (0, -0.4) [inner sep=0] (grouptitle) {Work Group $(1,0)$}; 18 | \foreach \x in {0, 1, 2, 3} 19 | \foreach \y in {0, 1, 2, 3} 20 | \node at (0.8*\x, .75*\y) [draw, fill=red!90, rectangle, text width=0.5cm, 21 | text centered,inner sep=1mm] 22 | (item\x\y) { Item $(\x, \y)$ }; 23 | \end{scope} 24 | 25 | \begin{pgfonlayer}{background} 26 | \node [draw,thick,fill=red!60,fit=(grouptitle) (item33)] (groupbox) {} ; 27 | \end{pgfonlayer} 28 | 29 | \draw[dashed] (wgroup11.south west) -- (groupbox.north west); 30 | \draw[dashed] (wgroup11.south east) -- (groupbox.north east); 31 | 32 | \end{tikzpicture} 33 | 34 | \column{0.7\textwidth} 35 | 36 | \begin{itemize} 37 | \item<+-> Two-tiered Parallelism 38 | \begin{itemize} 39 | \item Grid = $N_x\times N_y \times N_z$ work groups 40 | \item Work group = $S_x \times S_y\times S_z$ work items 41 | \item Total: $\prod_{i\in\{x,y,z\}} S_i N_i$ work items 42 | \end{itemize} 43 | \item<+-> Comm/Sync only within work group 44 | \begin{itemize} 45 | \item Work group maps to compute unit 46 | \end{itemize} 47 | \item<+-> Grid/Group $\approx$ outer loops in an algorithm 48 | \item<.-> Device Language:\\ 49 | \texttt{get\_\{global,group,local\}\_\{id,size\}\\(\texttt{axis})} 50 | \end{itemize} 51 | \end{columns} 52 | \end{frame} 53 | -------------------------------------------------------------------------------- /slides/slides/memory-fence.tex: -------------------------------------------------------------------------------- 1 | \begin{frame}{Synchronization} 2 | What is a Memory Fence? 3 | 4 | \bigskip 5 | \begin{center} 6 | \begin{tikzpicture}[scale=0.8, 7 | thread/.style={blue,very thick,->}, 8 | fence/.style={ultra thick}, 9 | memlocation/.style={thick,draw,fill=blue!20,minimum width=1.5cm}, 10 | write/.style={thick,->,red,dashed}, 11 | read/.style={thick,->,green,dashed}, 12 | meminstr/.style={pos=0.5,font=\small}, 13 | ] 14 | \only<1-5>{ 15 | \node [memlocation] (mem) at (0,1) { 17 } ; 16 | } 17 | \only<6-7>{ 18 | \node [memlocation] (mem) at (0,1) { 18 } ; 19 | } 20 | \uncover<+-+(1)>{ 21 | \draw [thread] (-3,5) -- +(0,-1) coordinate (t1write) ; 22 | \draw [thread] (3,5) -- +(0,-1) ; 23 | } 24 | \uncover<+-+(4)>{ 25 | \draw [write] (t1write) -- (mem) node [meminstr] {write 18}; 26 | } 27 | \uncover<+>{ 28 | \draw [thread] (-3,5) -- +(0,-2) ; 29 | \draw [thread] (3,5) -- +(0,-2) coordinate (t2read); 30 | } 31 | \uncover<.-.(1)>{ 32 | \draw [read] (t2read) -- (mem) node [meminstr] {read}; 33 | } 34 | \uncover<+>{ 35 | \draw [thread] (-3,5) -- +(0,-3) ; 36 | \draw [thread] (3,5) -- +(0,-3) coordinate (t2readc); 37 | \draw [read] (mem) -- (t2readc) node [meminstr] {17}; 38 | } 39 | \uncover<+>{ 40 | \draw [thread] (-3,5) -- +(0,-4) ; 41 | \draw [thread] (3,5) -- +(0,-4) ; 42 | } 43 | \uncover<+>{ 44 | \draw [thread] (-3,5) -- +(0,-5) ; 45 | \draw [thread] (3,5) -- +(0,-5) ; 46 | } 47 | \uncover<+>{ 48 | \draw [thread] (-3,5) -- +(0,-6) ; 49 | \draw [thread] (3,5) -- +(0,-6) ; 50 | } 51 | 52 | \end{tikzpicture} 53 | \end{center} 54 | \end{frame} 55 | % ----------------------------------------------------------------------------- 56 | \begin{frame}{Synchronization} 57 | What is a Memory Fence? An ordering restriction for memory access. 58 | 59 | \bigskip 60 | \begin{center} 61 | \begin{tikzpicture}[scale=0.8, 62 | thread/.style={blue,very thick,->}, 63 | fence/.style={ultra thick}, 64 | memlocation/.style={thick,draw,fill=blue!20,minimum width=1.5cm}, 65 | write/.style={thick,->,red,dashed}, 66 | read/.style={thick,->,green,dashed}, 67 | meminstr/.style={pos=0.5,font=\small}, 68 | stopped/.style={fill=red,shape=regular polygon,regular polygon sides=8}, 69 | ] 70 | \draw [fence] (-4,0) -- (4,0) ; 71 | \only<1-4>{ 72 | \node [memlocation] (mem) at (0,0) { 17 } ; 73 | } 74 | \only<5->{ 75 | \node [memlocation] (mem) at (0,0) { 18 } ; 76 | } 77 | \uncover<+-+(1)>{ 78 | \draw [thread] (-3,2) -- +(0,-1) coordinate (t1write) ; 79 | \draw [thread] (3,2) -- +(0,-1) ; 80 | } 81 | \uncover<+-+(2)>{ 82 | \draw [write] (t1write) -- (mem) node [meminstr] {write 18}; 83 | } 84 | \uncover<+-+(3)>{ 85 | \draw [thread] (-3,2) -- +(0,-2) ; 86 | \draw [thread] (3,2) -- +(0,-2) coordinate (t2read); 87 | } 88 | \uncover<+-+(1)>{ 89 | \node [stopped] at (-3,0) {}; 90 | \node [stopped] at (3,0) {}; 91 | } 92 | \addtocounter{beamerpauses}{2} 93 | 94 | \uncover<+>{ 95 | \draw [thread] (-3,2) -- +(0,-3) ; 96 | \draw [thread] (3,2) -- +(0,-3) coordinate (t2read); 97 | } 98 | \uncover<.->{ 99 | \draw [read] (t2read) -- (mem) node [meminstr] {read}; 100 | } 101 | \uncover<+->{ 102 | \draw [thread] (-3,2) -- +(0,-4) ; 103 | \draw [thread] (3,2) -- +(0,-4) coordinate (t2readc); 104 | \draw [read] (mem) -- (t2readc) node [meminstr] {18}; 105 | } 106 | 107 | \end{tikzpicture} 108 | \end{center} 109 | %\uncover<+->{ 110 | %Flavors: All \{reads, writes, accesses\} complete\\ 111 | %before continuing. 112 | %} 113 | \end{frame} 114 | 115 | -------------------------------------------------------------------------------- /slides/slides/what-is-opencl-v2.tex: -------------------------------------------------------------------------------- 1 | \begin{frame}{What is OpenCL?} 2 | \begin{columns} 3 | \column{0.7\textwidth} 4 | 5 | OpenCL (Open Computing Language) is an open, royalty-free 6 | standard for general purpose parallel programming across CPUs, 7 | GPUs and other processors. 8 | \hfill{\footnotesize[OpenCL 1.1 spec]} 9 | \bigskip 10 | 11 | 12 | \begin{itemize} 13 | \item Device-neutral (Nv GPU, AMD GPU, Intel/AMD CPU) 14 | \item Vendor-neutral 15 | \item Comes with RTCG 16 | \end{itemize} 17 | Defines: 18 | \begin{itemize} 19 | \item Host-side programming interface (library) 20 | \item Device-side programming language (!) 21 | \end{itemize} 22 | 23 | \column{0.3\textwidth} 24 | \includegraphics[width=\textwidth] {opencl-logo.png} 25 | 26 | \end{columns} 27 | \end{frame} 28 | -------------------------------------------------------------------------------- /slides/slides/why-gpu-scripting-v3.tex: -------------------------------------------------------------------------------- 1 | \begin{frame}{Why do Scripting for GPUs?} 2 | \begin{columns} 3 | \column{0.6\textwidth} 4 | \begin{itemize} 5 | \item GPUs are everything that scripting languages are not. 6 | \begin{itemize} 7 | \item Highly parallel 8 | \item Very architecture-sensitive 9 | \item Built for maximum FP/memory throughput 10 | \end{itemize} 11 | $\rightarrow$ complement each other 12 | \item CPU: largely restricted to control tasks ($\sim$1000/sec) 13 | \begin{itemize} 14 | \item Scripting fast enough 15 | \end{itemize} 16 | \item Python + CUDA = \textbf{PyCUDA} 17 | \item Python + OpenCL = \textbf{PyOpenCL} 18 | \end{itemize} 19 | \column{0.4\textwidth} 20 | \includegraphics[width=\textwidth]{c870.png} 21 | \end{columns} 22 | \end{frame} 23 | \addimgcredit{C870 GPU: Nvidia Corp.} 24 | 25 | -------------------------------------------------------------------------------- /slides/update-slides.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | 3 | set -e 4 | set -x 5 | 6 | for i in 01 03 06; do 7 | tgt_dir=$(echo ../$i*) 8 | cp out/$i-*.pdf $tgt_dir/0-slides.pdf 9 | done 10 | --------------------------------------------------------------------------------