├── .gitignore
├── LICENSE
├── Neural Networks with Torch (Bay Area Deep Learning School).pdf
├── README.md
└── notebooks
    ├── Deep Learning with Torch.ipynb
    └── Torch & Autograd Basics.ipynb


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled Lua sources
 2 | luac.out
 3 | 
 4 | # Notebooks and data
 5 | *.ipynb_checkpoints/
 6 | *.t7
 7 | 
 8 | # luarocks build files
 9 | *.src.rock
10 | *.zip
11 | *.tar.gz
12 | *.key
13 | 
14 | # Object files
15 | *.o
16 | *.os
17 | *.ko
18 | *.obj
19 | *.elf
20 | 
21 | # Precompiled Headers
22 | *.gch
23 | *.pch
24 | 
25 | # Libraries
26 | *.lib
27 | *.a
28 | *.la
29 | *.lo
30 | *.def
31 | *.exp
32 | 
33 | # Shared objects (inc. Windows DLLs)
34 | *.dll
35 | *.so
36 | *.so.*
37 | *.dylib
38 | 
39 | # Executables
40 | *.exe
41 | *.out
42 | *.app
43 | *.i*86
44 | *.x86_64
45 | *.hex
46 | 
47 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/Neural Networks with Torch (Bay Area Deep Learning School).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexbw/bayarea-dl-summerschool/fa79e47331416c90f3caac791bf3b1ec52e60c9a/Neural Networks with Torch (Bay Area Deep Learning School).pdf


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # bayarea-dl-summerschool
 2 | Torch notebooks and slides for the Bay Area Deep Learning Summer School
 3 | 
 4 | ## Installation Instructions
 5 | 
 6 | #### Install anaconda if you don't have it (instructions here for OS X)
 7 | ```
 8 | wget http://repo.continuum.io/miniconda/Miniconda-latest-MacOSX-x86_64.sh
 9 | sh Miniconda-latest-MacOSX-x86_64.sh -b -p $HOME/anaconda
10 | ```
11 | 
12 | #### Add anaconda to your $PATH
13 | ```
14 | export PATH=$HOME/anaconda/bin:$PATH
15 | ```
16 | 
17 | #### Install Lua & Torch
18 | ```
19 | conda install lua=5.2 lua-science -c alexbw
20 | # Although, you could install other Lua versions like 2.0 (LuaJIT), 5.1, 5.2 and 5.3
21 | ```
22 | 
23 | 
24 | #### Clone this repository and start the notebook server
25 | ```
26 | git clone https://github.com/alexbw/bayarea-dl-summerschool.git
27 | cd bayarea-dl-summerschool
28 | itorch notebook
29 | # Will open a browser tab, then you can navigate to the notebooks
30 | ```
31 | 
32 | 


--------------------------------------------------------------------------------
/notebooks/Deep Learning with Torch.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {
   6 |     "slideshow": {
   7 |      "slide_type": "slide"
   8 |     }
   9 |    },
  10 |    "source": [
  11 |     "# Hands-on Introduction to Torch\n",
  12 |     "<sub>\n",
  13 |     "Edited by Ronan Collobert<br>\n",
  14 |     "original version by Soumith Chintala\n",
  15 |     "</sub>\n",
  16 |     "\n",
  17 |     "\n",
  18 |     "[Get started](http://ronan.collobert.com/torch/intro.pdf)<br>\n",
  19 |     "[Get this itorch notebook](http://ronan.collobert.com/torch/Deep Learning with Torch.ipynb)\n",
  20 |     "\n",
  21 |     "Run itorch\n",
  22 |     "\n",
  23 |     "```sh\n",
  24 |     "itorch notebook\n",
  25 |     "```"
  26 |    ]
  27 |   },
  28 |   {
  29 |    "cell_type": "markdown",
  30 |    "metadata": {},
  31 |    "source": [
  32 |     "## Goal of this talk\n",
  33 |     "* Understand torch and the neural networks package at a high-level.\n",
  34 |     "* Train a small neural network on CPU and GPU"
  35 |    ]
  36 |   },
  37 |   {
  38 |    "cell_type": "markdown",
  39 |    "metadata": {},
  40 |    "source": [
  41 |     "## What is Torch?"
  42 |    ]
  43 |   },
  44 |   {
  45 |    "cell_type": "markdown",
  46 |    "metadata": {},
  47 |    "source": [
  48 |     "Torch is an scientific computing framework based on Lua[JIT] with strong CPU and CUDA backends.\n",
  49 |     "\n",
  50 |     "Strong points of Torch:\n",
  51 |     "\n",
  52 |     "* Efficient Tensor library (like NumPy) with an efficient CUDA backend\n",
  53 |     "* Neural Networks package -- build arbitrary acyclic computation graphs with automatic differentiation\n",
  54 |     "   * also with fast CUDA and CPU backends\n",
  55 |     "* Good community and industry support - several hundred community-built and maintained packages.\n",
  56 |     "* Easy to use Multi-GPU support and parallelizing neural networks\n",
  57 |     "\n",
  58 |     "[http://torch.ch](http://torch.ch)<br>\n",
  59 |     "[https://github.com/torch/torch7/wiki/Cheatsheet](https://github.com/torch/torch7/wiki/Cheatsheet)"
  60 |    ]
  61 |   },
  62 |   {
  63 |    "cell_type": "markdown",
  64 |    "metadata": {},
  65 |    "source": [
  66 |     "## Before getting started"
  67 |    ]
  68 |   },
  69 |   {
  70 |    "cell_type": "markdown",
  71 |    "metadata": {},
  72 |    "source": [
  73 |     "* Based on [Lua](http://lua.org) and C.\n",
  74 |     "* Currently runs on [LuaJIT](http://luajit.org) (Just-in-time compiler) which is fast and supports FFI.\n",
  75 |     "* Lua is pretty close to javascript.\n",
  76 |     "   * variables are global by default, unless `local` keyword is used\n",
  77 |     "   * Only has one data structure built-in, a table: `{}`. Doubles as a hash-table and an array.\n",
  78 |     "   * 1-based indexing.\n",
  79 |     "   * `foo:bar()` is the same as `foo.bar(foo)`\n",
  80 |     "   \n",
  81 |     "* Lua __glues__ C/C++ libraries together\n",
  82 |     "   * __Develop__ fast (scripting language), __run__ fast (minor overhead, C backend)\n",
  83 |     "   \n",
  84 |     "* The basic brick is the __Tensor__ object\n",
  85 |     "   * n-dimensional array\n",
  86 |     "   * used to store any kind of data\n",
  87 |     "   \n",
  88 |     "* The __torch__ package provides tensors... _hundred_ of packages are built upon it."
  89 |    ]
  90 |   },
  91 |   {
  92 |    "cell_type": "markdown",
  93 |    "metadata": {},
  94 |    "source": [
  95 |     "## Getting Started\n",
  96 |     "\n",
  97 |     "#### Lua Types\n",
  98 |     "\n",
  99 |     "Lua has 8 main types:"
 100 |    ]
 101 |   },
 102 |   {
 103 |    "cell_type": "code",
 104 |    "execution_count": null,
 105 |    "metadata": {
 106 |     "collapsed": false
 107 |    },
 108 |    "outputs": [],
 109 |    "source": [
 110 |     "print(type(nil))\n",
 111 |     "print(type(true))\n",
 112 |     "print(type(10.4*3))\n",
 113 |     "print(type(\"Hello world\"))\n",
 114 |     "print(type(function() print(\"Hello world\") end))\n",
 115 |     "print(type({a=3, b=4}))\n",
 116 |     "print(type(torch.Tensor()))"
 117 |    ]
 118 |   },
 119 |   {
 120 |    "cell_type": "markdown",
 121 |    "metadata": {},
 122 |    "source": [
 123 |     "The `thread` type will not be covered by this tutorial.\n",
 124 |     "Note that `userdata` allows to create C objects (like several Torch objects), and define your own type system over them."
 125 |    ]
 126 |   },
 127 |   {
 128 |    "cell_type": "markdown",
 129 |    "metadata": {},
 130 |    "source": [
 131 |     "#### Strings, numbers, tables - a tiny introduction"
 132 |    ]
 133 |   },
 134 |   {
 135 |    "cell_type": "code",
 136 |    "execution_count": null,
 137 |    "metadata": {
 138 |     "collapsed": false
 139 |    },
 140 |    "outputs": [],
 141 |    "source": [
 142 |     "a = 'hello'"
 143 |    ]
 144 |   },
 145 |   {
 146 |    "cell_type": "code",
 147 |    "execution_count": null,
 148 |    "metadata": {
 149 |     "collapsed": false
 150 |    },
 151 |    "outputs": [],
 152 |    "source": [
 153 |     "print(a)"
 154 |    ]
 155 |   },
 156 |   {
 157 |    "cell_type": "code",
 158 |    "execution_count": null,
 159 |    "metadata": {
 160 |     "collapsed": false
 161 |    },
 162 |    "outputs": [],
 163 |    "source": [
 164 |     "b = {}"
 165 |    ]
 166 |   },
 167 |   {
 168 |    "cell_type": "code",
 169 |    "execution_count": null,
 170 |    "metadata": {
 171 |     "collapsed": false
 172 |    },
 173 |    "outputs": [],
 174 |    "source": [
 175 |     "b[1] = a"
 176 |    ]
 177 |   },
 178 |   {
 179 |    "cell_type": "code",
 180 |    "execution_count": null,
 181 |    "metadata": {
 182 |     "collapsed": false
 183 |    },
 184 |    "outputs": [],
 185 |    "source": [
 186 |     "print(b)"
 187 |    ]
 188 |   },
 189 |   {
 190 |    "cell_type": "code",
 191 |    "execution_count": null,
 192 |    "metadata": {
 193 |     "collapsed": false
 194 |    },
 195 |    "outputs": [],
 196 |    "source": [
 197 |     "b[2] = 30"
 198 |    ]
 199 |   },
 200 |   {
 201 |    "cell_type": "code",
 202 |    "execution_count": null,
 203 |    "metadata": {
 204 |     "collapsed": false
 205 |    },
 206 |    "outputs": [],
 207 |    "source": [
 208 |     "for i=1,#b do -- the # operator is the length operator in Lua\n",
 209 |     "    print(b[i]) \n",
 210 |     "end"
 211 |    ]
 212 |   },
 213 |   {
 214 |    "cell_type": "markdown",
 215 |    "metadata": {},
 216 |    "source": [
 217 |     "#### Tensors"
 218 |    ]
 219 |   },
 220 |   {
 221 |    "cell_type": "code",
 222 |    "execution_count": null,
 223 |    "metadata": {
 224 |     "collapsed": false
 225 |    },
 226 |    "outputs": [],
 227 |    "source": [
 228 |     "a = torch.Tensor(5,3) -- construct a 5x3 matrix, uninitialized"
 229 |    ]
 230 |   },
 231 |   {
 232 |    "cell_type": "code",
 233 |    "execution_count": null,
 234 |    "metadata": {
 235 |     "collapsed": false
 236 |    },
 237 |    "outputs": [],
 238 |    "source": [
 239 |     "a = torch.rand(5,3)\n",
 240 |     "print(a)"
 241 |    ]
 242 |   },
 243 |   {
 244 |    "cell_type": "markdown",
 245 |    "metadata": {},
 246 |    "source": [
 247 |     "#### Views\n",
 248 |     "A tensor is a view over a piece of memory (a storage)"
 249 |    ]
 250 |   },
 251 |   {
 252 |    "cell_type": "code",
 253 |    "execution_count": null,
 254 |    "metadata": {
 255 |     "collapsed": false
 256 |    },
 257 |    "outputs": [],
 258 |    "source": [
 259 |     "print(a:storage())"
 260 |    ]
 261 |   },
 262 |   {
 263 |    "cell_type": "markdown",
 264 |    "metadata": {},
 265 |    "source": [
 266 |     "Torch relies heavily on views:\n",
 267 |     "  - narrow(dim, idx, size)\n",
 268 |     "  - select(dim, idx)\n",
 269 |     "  - unfold(dim, kw, dw)\n",
 270 |     "  - view(dim1, dim2, dim3, ...)\n",
 271 |     "  - index operator [{}]"
 272 |    ]
 273 |   },
 274 |   {
 275 |    "cell_type": "code",
 276 |    "execution_count": null,
 277 |    "metadata": {
 278 |     "collapsed": false
 279 |    },
 280 |    "outputs": [],
 281 |    "source": [
 282 |     "print(a:narrow(1, 3, 2))"
 283 |    ]
 284 |   },
 285 |   {
 286 |    "cell_type": "markdown",
 287 |    "metadata": {},
 288 |    "source": [
 289 |     "Remember that view = *pointer* in a storage"
 290 |    ]
 291 |   },
 292 |   {
 293 |    "cell_type": "code",
 294 |    "execution_count": null,
 295 |    "metadata": {
 296 |     "collapsed": false
 297 |    },
 298 |    "outputs": [],
 299 |    "source": [
 300 |     "an = a:narrow(1, 3, 2)\n",
 301 |     "an:zero()\n",
 302 |     "print(a)"
 303 |    ]
 304 |   },
 305 |   {
 306 |    "cell_type": "code",
 307 |    "execution_count": null,
 308 |    "metadata": {
 309 |     "collapsed": false
 310 |    },
 311 |    "outputs": [],
 312 |    "source": [
 313 |     "a:select(2, 2):fill(3.14)\n",
 314 |     "print(a)"
 315 |    ]
 316 |   },
 317 |   {
 318 |    "cell_type": "code",
 319 |    "execution_count": null,
 320 |    "metadata": {
 321 |     "collapsed": false
 322 |    },
 323 |    "outputs": [],
 324 |    "source": [
 325 |     "print(a:size())\n",
 326 |     "print(a:stride())\n",
 327 |     "print(a:storageOffset())"
 328 |    ]
 329 |   },
 330 |   {
 331 |    "cell_type": "code",
 332 |    "execution_count": null,
 333 |    "metadata": {
 334 |     "collapsed": false
 335 |    },
 336 |    "outputs": [],
 337 |    "source": [
 338 |     "print(a:select(2, 2):size())\n",
 339 |     "print(a:select(2, 2):stride())\n",
 340 |     "print(a:select(2, 2):storageOffset())"
 341 |    ]
 342 |   },
 343 |   {
 344 |    "cell_type": "markdown",
 345 |    "metadata": {},
 346 |    "source": [
 347 |     "#### Math operations\n",
 348 |     "\n",
 349 |     "See [torch documentation](https://github.com/torch/torch7/blob/master/doc/maths.md)\n",
 350 |     "for a survey on available math operations."
 351 |    ]
 352 |   },
 353 |   {
 354 |    "cell_type": "code",
 355 |    "execution_count": null,
 356 |    "metadata": {
 357 |     "collapsed": false
 358 |    },
 359 |    "outputs": [],
 360 |    "source": [
 361 |     "b=torch.rand(3,4)"
 362 |    ]
 363 |   },
 364 |   {
 365 |    "cell_type": "code",
 366 |    "execution_count": null,
 367 |    "metadata": {
 368 |     "collapsed": false
 369 |    },
 370 |    "outputs": [],
 371 |    "source": [
 372 |     "-- matrix-matrix multiplication: syntax 1\n",
 373 |     "a*b "
 374 |    ]
 375 |   },
 376 |   {
 377 |    "cell_type": "code",
 378 |    "execution_count": null,
 379 |    "metadata": {
 380 |     "collapsed": false
 381 |    },
 382 |    "outputs": [],
 383 |    "source": [
 384 |     "-- matrix-matrix multiplication: syntax 2\n",
 385 |     "torch.mm(a,b) "
 386 |    ]
 387 |   },
 388 |   {
 389 |    "cell_type": "code",
 390 |    "execution_count": null,
 391 |    "metadata": {
 392 |     "collapsed": false
 393 |    },
 394 |    "outputs": [],
 395 |    "source": [
 396 |     "-- matrix-matrix multiplication: syntax 3\n",
 397 |     "c=torch.Tensor(5,4)\n",
 398 |     "c:mm(a,b) -- store the result of a*b in c"
 399 |    ]
 400 |   },
 401 |   {
 402 |    "cell_type": "markdown",
 403 |    "metadata": {},
 404 |    "source": [
 405 |     "#### CUDA Tensors\n",
 406 |     "Tensors can be moved onto GPU using the :cuda function"
 407 |    ]
 408 |   },
 409 |   {
 410 |    "cell_type": "code",
 411 |    "execution_count": null,
 412 |    "metadata": {
 413 |     "collapsed": false
 414 |    },
 415 |    "outputs": [],
 416 |    "source": [
 417 |     "require 'cutorch';\n",
 418 |     "a = a:cuda()\n",
 419 |     "b = b:cuda()\n",
 420 |     "c = c:cuda()\n",
 421 |     "c:mm(a,b) -- done on GPU"
 422 |    ]
 423 |   },
 424 |   {
 425 |    "cell_type": "markdown",
 426 |    "metadata": {},
 427 |    "source": [
 428 |     "#### Functions"
 429 |    ]
 430 |   },
 431 |   {
 432 |    "cell_type": "code",
 433 |    "execution_count": null,
 434 |    "metadata": {
 435 |     "collapsed": false
 436 |    },
 437 |    "outputs": [],
 438 |    "source": [
 439 |     "N = 5\n",
 440 |     "\n",
 441 |     "-- make sure everybody has the same random seed\n",
 442 |     "torch.manualSeed(1234)\n",
 443 |     "\n",
 444 |     "-- create a random NxN matrix\n",
 445 |     "A = torch.rand(N, N)\n",
 446 |     "\n",
 447 |     "-- make it symmetric positive\n",
 448 |     "A = A*A:t()\n",
 449 |     "\n",
 450 |     "-- make it definite\n",
 451 |     "A:add(0.001, torch.eye(N))\n",
 452 |     "\n",
 453 |     "-- add a linear term\n",
 454 |     "b = torch.rand(N)\n",
 455 |     "\n",
 456 |     "-- create a quadratic form\n",
 457 |     "function J(x)\n",
 458 |     "    return 0.5*x:dot(A*x)-b:dot(x)\n",
 459 |     "end"
 460 |    ]
 461 |   },
 462 |   {
 463 |    "cell_type": "markdown",
 464 |    "metadata": {},
 465 |    "source": [
 466 |     "Function call, here at a random point"
 467 |    ]
 468 |   },
 469 |   {
 470 |    "cell_type": "code",
 471 |    "execution_count": null,
 472 |    "metadata": {
 473 |     "collapsed": false
 474 |    },
 475 |    "outputs": [],
 476 |    "source": [
 477 |     "print(J(torch.rand(N)))"
 478 |    ]
 479 |   },
 480 |   {
 481 |    "cell_type": "markdown",
 482 |    "metadata": {},
 483 |    "source": [
 484 |     "### Exercise: find the minimum of the quadratic function"
 485 |    ]
 486 |   },
 487 |   {
 488 |    "cell_type": "markdown",
 489 |    "metadata": {},
 490 |    "source": [
 491 |     "#### We can inverse the matrix"
 492 |    ]
 493 |   },
 494 |   {
 495 |    "cell_type": "code",
 496 |    "execution_count": null,
 497 |    "metadata": {
 498 |     "collapsed": false
 499 |    },
 500 |    "outputs": [],
 501 |    "source": [
 502 |     "xs = torch.inverse(A)*b\n",
 503 |     "print(string.format('J(x^*) = %g', J(xs)))"
 504 |    ]
 505 |   },
 506 |   {
 507 |    "cell_type": "markdown",
 508 |    "metadata": {},
 509 |    "source": [
 510 |     "#### Or we can do a gradient descent!"
 511 |    ]
 512 |   },
 513 |   {
 514 |    "cell_type": "code",
 515 |    "execution_count": null,
 516 |    "metadata": {
 517 |     "collapsed": false
 518 |    },
 519 |    "outputs": [],
 520 |    "source": [
 521 |     "function dJ(x)\n",
 522 |     "  return A*x-b\n",
 523 |     "end"
 524 |    ]
 525 |   },
 526 |   {
 527 |    "cell_type": "markdown",
 528 |    "metadata": {},
 529 |    "source": [
 530 |     "We define some current solution:"
 531 |    ]
 532 |   },
 533 |   {
 534 |    "cell_type": "code",
 535 |    "execution_count": null,
 536 |    "metadata": {
 537 |     "collapsed": false
 538 |    },
 539 |    "outputs": [],
 540 |    "source": [
 541 |     "x = torch.rand(N)"
 542 |    ]
 543 |   },
 544 |   {
 545 |    "cell_type": "markdown",
 546 |    "metadata": {},
 547 |    "source": [
 548 |     "and then apply gradient descent (with a given learning rate `lr`) for a while:"
 549 |    ]
 550 |   },
 551 |   {
 552 |    "cell_type": "code",
 553 |    "execution_count": null,
 554 |    "metadata": {
 555 |     "collapsed": false
 556 |    },
 557 |    "outputs": [],
 558 |    "source": [
 559 |     "lr = 0.01\n",
 560 |     "for i=1,20000 do\n",
 561 |     "  x = x - dJ(x)*lr\n",
 562 |     "  -- we print the value of the objective function every 1000 iterations\n",
 563 |     "  if i % 1000 == 0 then\n",
 564 |     "    print(string.format('at iter %d J(x) = %f', i, J(x)))\n",
 565 |     "  end\n",
 566 |     "end"
 567 |    ]
 568 |   },
 569 |   {
 570 |    "cell_type": "markdown",
 571 |    "metadata": {},
 572 |    "source": [
 573 |     "### Neural Networks\n",
 574 |     "Neural networks in Torch can be constructed using the `nn` package."
 575 |    ]
 576 |   },
 577 |   {
 578 |    "cell_type": "code",
 579 |    "execution_count": null,
 580 |    "metadata": {
 581 |     "collapsed": false
 582 |    },
 583 |    "outputs": [],
 584 |    "source": [
 585 |     "require 'nn';"
 586 |    ]
 587 |   },
 588 |   {
 589 |    "cell_type": "markdown",
 590 |    "metadata": {},
 591 |    "source": [
 592 |     "`Modules` are the bricks used to build neural networks. Each are themselves neural networks, but can be combined with other networks using `containers` to create complex neural networks"
 593 |    ]
 594 |   },
 595 |   {
 596 |    "cell_type": "markdown",
 597 |    "metadata": {},
 598 |    "source": [
 599 |     "For example, look at this network that classfies digit images:\n",
 600 |     "![LeNet](http://fastml.com/images/cifar/lenet5.png)"
 601 |    ]
 602 |   },
 603 |   {
 604 |    "cell_type": "markdown",
 605 |    "metadata": {},
 606 |    "source": [
 607 |     "It is a simple feed-forward network.   \n",
 608 |     "It takes the input, feeds it through several layers one after the other, and then finally gives the output.\n",
 609 |     "\n",
 610 |     "Such a network container is `nn.Sequential` which feeds the input through several layers."
 611 |    ]
 612 |   },
 613 |   {
 614 |    "cell_type": "code",
 615 |    "execution_count": null,
 616 |    "metadata": {
 617 |     "collapsed": false
 618 |    },
 619 |    "outputs": [],
 620 |    "source": [
 621 |     "net = nn.Sequential()\n",
 622 |     "\n",
 623 |     "-- 1 input image channel, 6 output channels, 5x5 convolution kernel\n",
 624 |     "net:add(nn.SpatialConvolution(1, 6, 5, 5))\n",
 625 |     "\n",
 626 |     "-- A max-pooling operation that looks at 2x2 windows and finds the max.\n",
 627 |     "net:add(nn.SpatialMaxPooling(2,2,2,2))\n",
 628 |     "\n",
 629 |     "-- non-linearity\n",
 630 |     "net:add(nn.Tanh())\n",
 631 |     "\n",
 632 |     "-- additional layers\n",
 633 |     "net:add(nn.SpatialConvolution(6, 16, 5, 5))\n",
 634 |     "net:add(nn.SpatialMaxPooling(2,2,2,2))\n",
 635 |     "net:add(nn.Tanh())\n",
 636 |     "\n",
 637 |     "-- reshapes from a 3D tensor of 16x5x5 into 1D tensor of 16*5*5\n",
 638 |     "net:add(nn.View(16*5*5))\n",
 639 |     "\n",
 640 |     "-- fully connected layers (matrix multiplication between input and weights)\n",
 641 |     "net:add(nn.Linear(16*5*5, 120))\n",
 642 |     "net:add(nn.Tanh())\n",
 643 |     "net:add(nn.Linear(120, 84))\n",
 644 |     "net:add(nn.Tanh())\n",
 645 |     "\n",
 646 |     "-- 10 is the number of outputs of the network (10 classes)\n",
 647 |     "net:add(nn.Linear(84, 10))\n",
 648 |     "print('Lenet5\\n', tostring(net));"
 649 |    ]
 650 |   },
 651 |   {
 652 |    "cell_type": "markdown",
 653 |    "metadata": {},
 654 |    "source": [
 655 |     "Other examples of nn containers are shown in the figure below:\n",
 656 |     "![containers](https://raw.githubusercontent.com/soumith/ex/gh-pages/assets/nn_containers.png)\n",
 657 |     "\n",
 658 |     "Every neural network module in torch has automatic differentiation.\n",
 659 |     "It has a `:forward(input)` function that computes the output for a given input, flowing the input through the network.\n",
 660 |     "and it has a `:backward(input, gradient)` function that will differentiate each neuron in the network w.r.t. the gradient that is passed in. This is done via the chain rule."
 661 |    ]
 662 |   },
 663 |   {
 664 |    "cell_type": "code",
 665 |    "execution_count": null,
 666 |    "metadata": {
 667 |     "collapsed": false
 668 |    },
 669 |    "outputs": [],
 670 |    "source": [
 671 |     "input = torch.rand(1,32,32) -- pass a random tensor as input to the network"
 672 |    ]
 673 |   },
 674 |   {
 675 |    "cell_type": "code",
 676 |    "execution_count": null,
 677 |    "metadata": {
 678 |     "collapsed": false
 679 |    },
 680 |    "outputs": [],
 681 |    "source": [
 682 |     "output = net:forward(input)"
 683 |    ]
 684 |   },
 685 |   {
 686 |    "cell_type": "code",
 687 |    "execution_count": null,
 688 |    "metadata": {
 689 |     "collapsed": false
 690 |    },
 691 |    "outputs": [],
 692 |    "source": [
 693 |     "print(output)"
 694 |    ]
 695 |   },
 696 |   {
 697 |    "cell_type": "code",
 698 |    "execution_count": null,
 699 |    "metadata": {
 700 |     "collapsed": false
 701 |    },
 702 |    "outputs": [],
 703 |    "source": [
 704 |     "net:zeroGradParameters() -- zero the internal gradient buffers of the network (will come to this later)"
 705 |    ]
 706 |   },
 707 |   {
 708 |    "cell_type": "code",
 709 |    "execution_count": null,
 710 |    "metadata": {
 711 |     "collapsed": false
 712 |    },
 713 |    "outputs": [],
 714 |    "source": [
 715 |     "gradInput = net:backward(input, torch.rand(10))"
 716 |    ]
 717 |   },
 718 |   {
 719 |    "cell_type": "code",
 720 |    "execution_count": null,
 721 |    "metadata": {
 722 |     "collapsed": false
 723 |    },
 724 |    "outputs": [],
 725 |    "source": [
 726 |     "print(#gradInput)"
 727 |    ]
 728 |   },
 729 |   {
 730 |    "cell_type": "markdown",
 731 |    "metadata": {},
 732 |    "source": [
 733 |     "One can then update the parameters with"
 734 |    ]
 735 |   },
 736 |   {
 737 |    "cell_type": "code",
 738 |    "execution_count": null,
 739 |    "metadata": {
 740 |     "collapsed": false
 741 |    },
 742 |    "outputs": [],
 743 |    "source": [
 744 |     "net:updateParameters(0.001) -- provide a learning rate"
 745 |    ]
 746 |   },
 747 |   {
 748 |    "cell_type": "markdown",
 749 |    "metadata": {},
 750 |    "source": [
 751 |     "### Criterion: Defining a loss function\n",
 752 |     "When you want a model to learn to do something, you give it feedback on how well it is doing. This function that computes an objective measure of the model's performance is called a __loss function__.\n",
 753 |     "\n",
 754 |     "A typical loss function takes in the model's output and the groundtruth and computes a value that quantifies the model's performance.\n",
 755 |     "\n",
 756 |     "The model then corrects itself to have a smaller loss.\n",
 757 |     "\n",
 758 |     "In Torch, loss functions are implemented just like neural network modules, and have automatic differentiation.  \n",
 759 |     "They have two functions\n",
 760 |     "  - `forward(input, target)`\n",
 761 |     "  - `backward(input, target)`\n",
 762 |     "\n",
 763 |     "For example:"
 764 |    ]
 765 |   },
 766 |   {
 767 |    "cell_type": "code",
 768 |    "execution_count": null,
 769 |    "metadata": {
 770 |     "collapsed": false
 771 |    },
 772 |    "outputs": [],
 773 |    "source": [
 774 |     "-- a negative log-likelihood criterion for multi-class classification\n",
 775 |     "criterion = nn.CrossEntropyCriterion()\n",
 776 |     "\n",
 777 |     "-- let's say the groundtruth was class number: 3\n",
 778 |     "criterion:forward(output, 3)\n",
 779 |     "gradients = criterion:backward(output, 3)"
 780 |    ]
 781 |   },
 782 |   {
 783 |    "cell_type": "code",
 784 |    "execution_count": null,
 785 |    "metadata": {
 786 |     "collapsed": false
 787 |    },
 788 |    "outputs": [],
 789 |    "source": [
 790 |     "gradInput = net:backward(input, gradients)"
 791 |    ]
 792 |   },
 793 |   {
 794 |    "cell_type": "markdown",
 795 |    "metadata": {},
 796 |    "source": [
 797 |     "#####Review of what you learnt so far\n",
 798 |     "* Network can have many layers of computation\n",
 799 |     "* Network takes an input and produces an output in the `:forward` pass\n",
 800 |     "* Criterion computes the loss of the network, and it's gradients w.r.t. the output of the network.\n",
 801 |     "* Network takes an (input, gradients) pair in it's `:backward` pass and calculates the gradients w.r.t. each layer (and neuron) in the network.\n",
 802 |     "\n",
 803 |     "##### Missing details\n",
 804 |     "> A neural network layer can have learnable parameters or not.\n",
 805 |     "\n",
 806 |     "A convolution layer learns it's convolution kernels to adapt to the input data and the problem being solved.  \n",
 807 |     "A max-pooling layer has no learnable parameters. It only finds the max of local windows.\n",
 808 |     "\n",
 809 |     "A layer in torch which has learnable weights, will typically have fields .weight (and optionally, .bias)"
 810 |    ]
 811 |   },
 812 |   {
 813 |    "cell_type": "code",
 814 |    "execution_count": null,
 815 |    "metadata": {
 816 |     "collapsed": false
 817 |    },
 818 |    "outputs": [],
 819 |    "source": [
 820 |     "m = nn.SpatialConvolution(1,3,2,2) -- learn 3 2x2 kernels\n",
 821 |     "print(m.weight) -- initially, the weights are randomly initialized"
 822 |    ]
 823 |   },
 824 |   {
 825 |    "cell_type": "code",
 826 |    "execution_count": null,
 827 |    "metadata": {
 828 |     "collapsed": false
 829 |    },
 830 |    "outputs": [],
 831 |    "source": [
 832 |     "print(m.bias) -- The operation in a convolution layer is: output = convolution(input,weight) + bias"
 833 |    ]
 834 |   },
 835 |   {
 836 |    "cell_type": "markdown",
 837 |    "metadata": {},
 838 |    "source": [
 839 |     "There are also two other important fields in a learnable layer. The gradWeight and gradBias.\n",
 840 |     "The gradWeight accumulates the gradients w.r.t. each weight in the layer, and the gradBias, w.r.t. each bias in the layer.\n",
 841 |     "\n",
 842 |     "#### Training the network\n",
 843 |     "\n",
 844 |     "For the network to adjust itself, it typically does this operation (if you do Stochastic Gradient Descent):\n",
 845 |     "\n",
 846 |     "> weight = weight - learningRate * gradWeight [equation 1]\n",
 847 |     "\n",
 848 |     "This update over time will adjust the network weights such that the output loss is decreasing."
 849 |    ]
 850 |   },
 851 |   {
 852 |    "cell_type": "markdown",
 853 |    "metadata": {},
 854 |    "source": [
 855 |     "Okay, now it is time to discuss one missing piece. Who visits each layer in your neural network and updates the weight according to Equation 1?\n",
 856 |     "  - You can do your own training loop\n",
 857 |     "    - Pro: easy customization for complicated network\n",
 858 |     "    - Con: code duplication\n",
 859 |     "    \n",
 860 |     "  - You can use existing packages\n",
 861 |     "    - [optim](https://github.com/torch/optim)\n",
 862 |     "    - nn.StochasticGradient\n",
 863 |     "\n",
 864 |     "We shall use the simple SGD trainer shipped with the neural network module: [__nn.StochasticGradient__](https://github.com/torch/nn/blob/master/doc/training.md#stochasticgradientmodule-criterion).\n",
 865 |     "\n",
 866 |     "It has a function :train(dataset) that takes a given dataset and simply trains your network by showing different samples from your dataset to the network."
 867 |    ]
 868 |   },
 869 |   {
 870 |    "cell_type": "markdown",
 871 |    "metadata": {},
 872 |    "source": [
 873 |     "### What about data?\n",
 874 |     "Generally, when you have to deal with image, text, audio or video data, you can use standard functions like: [__image.load__](https://github.com/torch/image#res-imageloadfilename-depth-tensortype) or [__audio.load__](https://github.com/soumith/lua---audio#usage) to load your data into a _torch.Tensor_ or a Lua table, as convenient.\n",
 875 |     "\n",
 876 |     "Let us now use some simple data to train our network.\n",
 877 |     "\n",
 878 |     "We shall use the CIFAR-10 dataset, which has the classes: 'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'.  \n",
 879 |     "The images in CIFAR-10 are of size 3x32x32, i.e. 3-channel color images of 32x32 pixels in size.\n",
 880 |     "![CIFAR-10 image](https://raw.githubusercontent.com/soumith/ex/gh-pages/assets/cifar10.png)\n",
 881 |     "\n",
 882 |     "The dataset has 50,000 training images and 10,000 test images in total.\n",
 883 |     "\n",
 884 |     "__We now have 5 steps left to do in training our first torch neural network__\n",
 885 |     "1. Load and normalize data\n",
 886 |     "2. Define a Neural Network\n",
 887 |     "3. Define Loss function\n",
 888 |     "4. Train network on training data\n",
 889 |     "5. Test network on test data.\n",
 890 |     "\n",
 891 |     "#### 1. Load and normalize data\n",
 892 |     "\n",
 893 |     "Today, in the interest of time, we prepared the data before-hand into a 4D torch ByteTensor of size 10000x3x32x32 (training) and 10000x3x32x32 (testing)\n",
 894 |     "Let us download the data..."
 895 |    ]
 896 |   },
 897 |   {
 898 |    "cell_type": "code",
 899 |    "execution_count": null,
 900 |    "metadata": {
 901 |     "collapsed": false
 902 |    },
 903 |    "outputs": [],
 904 |    "source": [
 905 |     "os.execute('wget -c https://s3.amazonaws.com/torch7/data/cifar10torchsmall.zip')\n",
 906 |     "os.execute('unzip -o cifar10torchsmall.zip')"
 907 |    ]
 908 |   },
 909 |   {
 910 |    "cell_type": "markdown",
 911 |    "metadata": {},
 912 |    "source": [
 913 |     "And let's inspect it!"
 914 |    ]
 915 |   },
 916 |   {
 917 |    "cell_type": "code",
 918 |    "execution_count": null,
 919 |    "metadata": {
 920 |     "collapsed": false
 921 |    },
 922 |    "outputs": [],
 923 |    "source": [
 924 |     "trainset = torch.load('cifar10-train.t7')\n",
 925 |     "testset = torch.load('cifar10-test.t7')\n",
 926 |     "classes = {'airplane', 'automobile', 'bird', 'cat',\n",
 927 |     "           'deer', 'dog', 'frog', 'horse', 'ship', 'truck'}"
 928 |    ]
 929 |   },
 930 |   {
 931 |    "cell_type": "code",
 932 |    "execution_count": null,
 933 |    "metadata": {
 934 |     "collapsed": false
 935 |    },
 936 |    "outputs": [],
 937 |    "source": [
 938 |     "print(trainset)"
 939 |    ]
 940 |   },
 941 |   {
 942 |    "cell_type": "code",
 943 |    "execution_count": null,
 944 |    "metadata": {
 945 |     "collapsed": false
 946 |    },
 947 |    "outputs": [],
 948 |    "source": [
 949 |     "print(#trainset.data)"
 950 |    ]
 951 |   },
 952 |   {
 953 |    "cell_type": "markdown",
 954 |    "metadata": {},
 955 |    "source": [
 956 |     "For fun, let us display an image:"
 957 |    ]
 958 |   },
 959 |   {
 960 |    "cell_type": "code",
 961 |    "execution_count": null,
 962 |    "metadata": {
 963 |     "collapsed": false
 964 |    },
 965 |    "outputs": [],
 966 |    "source": [
 967 |     "itorch.image(trainset.data[100]) -- display the 100-th image in dataset\n",
 968 |     "print(classes[trainset.label[100]])"
 969 |    ]
 970 |   },
 971 |   {
 972 |    "cell_type": "markdown",
 973 |    "metadata": {},
 974 |    "source": [
 975 |     "Now, to prepare the dataset to be used with __nn.StochasticGradient__, a couple of things have to be done according to it's [documentation](https://github.com/torch/nn/blob/master/doc/training.md#traindataset).\n",
 976 |     "1. The dataset has to have a :size() function.\n",
 977 |     "2. The dataset has to have a [i] index operator, so that dataset[i] returns the ith sample in the datset.\n",
 978 |     "\n",
 979 |     "Both can be done quickly:"
 980 |    ]
 981 |   },
 982 |   {
 983 |    "cell_type": "code",
 984 |    "execution_count": null,
 985 |    "metadata": {
 986 |     "collapsed": false
 987 |    },
 988 |    "outputs": [],
 989 |    "source": [
 990 |     "-- ignore setmetatable() for now, it is a feature beyond the scope of this tutorial.\n",
 991 |     "-- It sets the index operator.\n",
 992 |     "\n",
 993 |     "setmetatable(trainset, \n",
 994 |     "    {__index = function(t, i) \n",
 995 |     "                    return {\n",
 996 |     "                        t.data[i],\n",
 997 |     "                        t.label[i]\n",
 998 |     "                    } \n",
 999 |     "                end}\n",
1000 |     ");\n",
1001 |     "\n",
1002 |     "function trainset:size() \n",
1003 |     "    return self.data:size(1) \n",
1004 |     "end\n",
1005 |     "\n",
1006 |     "-- converts the data from a ByteTensor to a DoubleTensor.\n",
1007 |     "trainset.data = trainset.data:double()"
1008 |    ]
1009 |   },
1010 |   {
1011 |    "cell_type": "code",
1012 |    "execution_count": null,
1013 |    "metadata": {
1014 |     "collapsed": false
1015 |    },
1016 |    "outputs": [],
1017 |    "source": [
1018 |     "print(trainset:size()) -- just to test"
1019 |    ]
1020 |   },
1021 |   {
1022 |    "cell_type": "code",
1023 |    "execution_count": null,
1024 |    "metadata": {
1025 |     "collapsed": false
1026 |    },
1027 |    "outputs": [],
1028 |    "source": [
1029 |     "print(trainset[33]) -- load sample number 33.\n",
1030 |     "itorch.image(trainset[33][1])"
1031 |    ]
1032 |   },
1033 |   {
1034 |    "cell_type": "markdown",
1035 |    "metadata": {},
1036 |    "source": [
1037 |     "__One of the most important things you can do in conditioning your data (in general in data-science or machine learning) is to make your data to have a mean of 0.0 and standard-deviation of 1.0.__\n",
1038 |     "\n",
1039 |     "Let us do that as a final step of our data processing.\n",
1040 |     "\n",
1041 |     "We are going to do a per-channel normalization"
1042 |    ]
1043 |   },
1044 |   {
1045 |    "cell_type": "code",
1046 |    "execution_count": null,
1047 |    "metadata": {
1048 |     "collapsed": false
1049 |    },
1050 |    "outputs": [],
1051 |    "source": [
1052 |     "-- remember: our dataset is #samples x #channels x #height x #width\n",
1053 |     "-- this picks {all images, 1st channel, all vertical pixels, all horizontal pixels}\n",
1054 |     "redChannel = trainset.data:select(2, 1)"
1055 |    ]
1056 |   },
1057 |   {
1058 |    "cell_type": "code",
1059 |    "execution_count": null,
1060 |    "metadata": {
1061 |     "collapsed": false
1062 |    },
1063 |    "outputs": [],
1064 |    "source": [
1065 |     "print(#redChannel)"
1066 |    ]
1067 |   },
1068 |   {
1069 |    "cell_type": "markdown",
1070 |    "metadata": {},
1071 |    "source": [
1072 |     "Moving back to mean-subtraction and standard-deviation based scaling, doing this operation is simple, using the indexing operator that we learnt above:"
1073 |    ]
1074 |   },
1075 |   {
1076 |    "cell_type": "code",
1077 |    "execution_count": null,
1078 |    "metadata": {
1079 |     "collapsed": false
1080 |    },
1081 |    "outputs": [],
1082 |    "source": [
1083 |     "mean = {} -- store the mean, to normalize the test set in the future\n",
1084 |     "stdv  = {} -- store the standard-deviation for the future\n",
1085 |     "for i=1,3 do -- over each image channel\n",
1086 |     "    mean[i] = trainset.data:select(2, 1):mean() -- mean estimation\n",
1087 |     "    print('Channel ' .. i .. ', Mean: ' .. mean[i])\n",
1088 |     "    trainset.data:select(2, 1):add(-mean[i]) -- mean subtraction\n",
1089 |     "    \n",
1090 |     "    stdv[i] = trainset.data:select(2, i):std() -- std estimation\n",
1091 |     "    print('Channel ' .. i .. ', Standard Deviation: ' .. stdv[i])\n",
1092 |     "    trainset.data:select(2, i):div(stdv[i]) -- std scaling\n",
1093 |     "end"
1094 |    ]
1095 |   },
1096 |   {
1097 |    "cell_type": "markdown",
1098 |    "metadata": {},
1099 |    "source": [
1100 |     "Our training data is now normalized and ready to be used.\n",
1101 |     "\n",
1102 |     "#### 2. Time to define our neural network\n"
1103 |    ]
1104 |   },
1105 |   {
1106 |    "cell_type": "markdown",
1107 |    "metadata": {},
1108 |    "source": [
1109 |     "We use here a LeNet-like network, with 3 input channels and threshold units (ReLU):"
1110 |    ]
1111 |   },
1112 |   {
1113 |    "cell_type": "code",
1114 |    "execution_count": null,
1115 |    "metadata": {
1116 |     "collapsed": false
1117 |    },
1118 |    "outputs": [],
1119 |    "source": [
1120 |     "net = nn.Sequential()\n",
1121 |     "net:add(nn.SpatialConvolution(3, 6, 5, 5))\n",
1122 |     "net:add(nn.SpatialMaxPooling(2,2,2,2))\n",
1123 |     "net:add(nn.Threshold())\n",
1124 |     "\n",
1125 |     "net:add(nn.SpatialConvolution(6, 16, 5, 5))\n",
1126 |     "net:add(nn.SpatialMaxPooling(2,2,2,2))\n",
1127 |     "net:add(nn.Threshold())\n",
1128 |     "\n",
1129 |     "net:add(nn.View(16*5*5))\n",
1130 |     "\n",
1131 |     "net:add(nn.Linear(16*5*5, 120))\n",
1132 |     "net:add(nn.Threshold())\n",
1133 |     "net:add(nn.Linear(120, 84))\n",
1134 |     "net:add(nn.Threshold())\n",
1135 |     "net:add(nn.Linear(84, 10))"
1136 |    ]
1137 |   },
1138 |   {
1139 |    "cell_type": "markdown",
1140 |    "metadata": {},
1141 |    "source": [
1142 |     "#### 3. Let us define the Loss function\n",
1143 |     "\n",
1144 |     "Let us use the cross-entropy classification loss. It is well suited for most classification problems."
1145 |    ]
1146 |   },
1147 |   {
1148 |    "cell_type": "code",
1149 |    "execution_count": null,
1150 |    "metadata": {
1151 |     "collapsed": false
1152 |    },
1153 |    "outputs": [],
1154 |    "source": [
1155 |     "criterion = nn.CrossEntropyCriterion()"
1156 |    ]
1157 |   },
1158 |   {
1159 |    "cell_type": "markdown",
1160 |    "metadata": {},
1161 |    "source": [
1162 |     "#### 4. Train the neural network\n",
1163 |     "\n",
1164 |     "This is when things start to get interesting.  \n",
1165 |     "Let us first define an __nn.StochasticGradient__ object. Then we will give our dataset to this object's ___:train___ function, and that will get the ball rolling."
1166 |    ]
1167 |   },
1168 |   {
1169 |    "cell_type": "code",
1170 |    "execution_count": null,
1171 |    "metadata": {
1172 |     "collapsed": false
1173 |    },
1174 |    "outputs": [],
1175 |    "source": [
1176 |     "trainer = nn.StochasticGradient(net, criterion)\n",
1177 |     "trainer.learningRate = 0.001\n",
1178 |     "trainer.maxIteration = 5 -- just do 5 epochs of training."
1179 |    ]
1180 |   },
1181 |   {
1182 |    "cell_type": "code",
1183 |    "execution_count": null,
1184 |    "metadata": {
1185 |     "collapsed": false
1186 |    },
1187 |    "outputs": [],
1188 |    "source": [
1189 |     "trainer:train(trainset)"
1190 |    ]
1191 |   },
1192 |   {
1193 |    "cell_type": "markdown",
1194 |    "metadata": {},
1195 |    "source": [
1196 |     "#### 5. Test the network, print accuracy\n",
1197 |     "\n",
1198 |     "We have trained the network for 5 passes over the training dataset.  \n",
1199 |     "But we need to check if the network has learnt anything at all.  \n",
1200 |     "We will check this by predicting the class label that the neural network outputs, and checking it against the ground-truth. If the prediction is correct, we add the sample to the list of correct predictions."
1201 |    ]
1202 |   },
1203 |   {
1204 |    "cell_type": "markdown",
1205 |    "metadata": {},
1206 |    "source": [
1207 |     "Okay, first step. Let us display an image from the test set to get familiar."
1208 |    ]
1209 |   },
1210 |   {
1211 |    "cell_type": "code",
1212 |    "execution_count": null,
1213 |    "metadata": {
1214 |     "collapsed": false
1215 |    },
1216 |    "outputs": [],
1217 |    "source": [
1218 |     "print(classes[testset.label[100]])\n",
1219 |     "itorch.image(testset.data[100])"
1220 |    ]
1221 |   },
1222 |   {
1223 |    "cell_type": "markdown",
1224 |    "metadata": {},
1225 |    "source": [
1226 |     "Now that we are done with that, let us normalize the test data with the mean and standard-deviation from the training data."
1227 |    ]
1228 |   },
1229 |   {
1230 |    "cell_type": "code",
1231 |    "execution_count": null,
1232 |    "metadata": {
1233 |     "collapsed": false
1234 |    },
1235 |    "outputs": [],
1236 |    "source": [
1237 |     "testset.data = testset.data:double()   -- convert from Byte tensor to Double tensor\n",
1238 |     "for i=1,3 do -- over each image channel\n",
1239 |     "    local channel = testset.data:select(2, i)\n",
1240 |     "    channel:add(-mean[i]) -- mean subtraction\n",
1241 |     "    channel:div(stdv[i]) -- std scaling\n",
1242 |     "    print(string.format('channel %d: mean = %f stdv = %f', i, channel:mean(), channel:std()))\n",
1243 |     "end"
1244 |    ]
1245 |   },
1246 |   {
1247 |    "cell_type": "code",
1248 |    "execution_count": null,
1249 |    "metadata": {
1250 |     "collapsed": false
1251 |    },
1252 |    "outputs": [],
1253 |    "source": [
1254 |     "-- for fun, print the mean and standard-deviation of example-100\n",
1255 |     "horse = testset.data[100]\n",
1256 |     "print(horse:mean(), horse:std())"
1257 |    ]
1258 |   },
1259 |   {
1260 |    "cell_type": "markdown",
1261 |    "metadata": {},
1262 |    "source": [
1263 |     "Okay, now let us see what the neural network thinks these examples above are:"
1264 |    ]
1265 |   },
1266 |   {
1267 |    "cell_type": "code",
1268 |    "execution_count": null,
1269 |    "metadata": {
1270 |     "collapsed": false
1271 |    },
1272 |    "outputs": [],
1273 |    "source": [
1274 |     "print(classes[testset.label[100]])\n",
1275 |     "itorch.image(testset.data[100])\n",
1276 |     "predicted = net:forward(testset.data[100])"
1277 |    ]
1278 |   },
1279 |   {
1280 |    "cell_type": "code",
1281 |    "execution_count": null,
1282 |    "metadata": {
1283 |     "collapsed": false
1284 |    },
1285 |    "outputs": [],
1286 |    "source": [
1287 |     "-- show scores\n",
1288 |     "print(predicted)"
1289 |    ]
1290 |   },
1291 |   {
1292 |    "cell_type": "markdown",
1293 |    "metadata": {},
1294 |    "source": [
1295 |     "You can see the network predictions. The network assigned a probability to each classes, given the image.\n",
1296 |     "\n",
1297 |     "To make it clearer, let us tag each probability with it's class-name:"
1298 |    ]
1299 |   },
1300 |   {
1301 |    "cell_type": "code",
1302 |    "execution_count": null,
1303 |    "metadata": {
1304 |     "collapsed": false
1305 |    },
1306 |    "outputs": [],
1307 |    "source": [
1308 |     "for i=1,predicted:size(1) do\n",
1309 |     "    print(classes[i], predicted[i])\n",
1310 |     "end"
1311 |    ]
1312 |   },
1313 |   {
1314 |    "cell_type": "markdown",
1315 |    "metadata": {},
1316 |    "source": [
1317 |     "Alright, fine. How many in total seem to be correct over the test set?"
1318 |    ]
1319 |   },
1320 |   {
1321 |    "cell_type": "code",
1322 |    "execution_count": null,
1323 |    "metadata": {
1324 |     "collapsed": false
1325 |    },
1326 |    "outputs": [],
1327 |    "source": [
1328 |     "correct = 0\n",
1329 |     "for i=1,10000 do\n",
1330 |     "    local groundtruth = testset.label[i]\n",
1331 |     "    local prediction = net:forward(testset.data[i])\n",
1332 |     "    local confidences, indices = torch.sort(prediction, true)  -- true means sort in descending order\n",
1333 |     "    if groundtruth == indices[1] then\n",
1334 |     "        correct = correct + 1\n",
1335 |     "    end\n",
1336 |     "end"
1337 |    ]
1338 |   },
1339 |   {
1340 |    "cell_type": "code",
1341 |    "execution_count": null,
1342 |    "metadata": {
1343 |     "collapsed": false
1344 |    },
1345 |    "outputs": [],
1346 |    "source": [
1347 |     "print(correct, 100*correct/10000 .. ' % ')"
1348 |    ]
1349 |   },
1350 |   {
1351 |    "cell_type": "markdown",
1352 |    "metadata": {},
1353 |    "source": [
1354 |     "That looks waaay better than chance, which is 10% accuracy (randomly picking a class out of 10 classes). Seems like the network learnt something.\n",
1355 |     "\n",
1356 |     "Hmmm, what are the classes that performed well, and the classes that did not perform well:"
1357 |    ]
1358 |   },
1359 |   {
1360 |    "cell_type": "code",
1361 |    "execution_count": null,
1362 |    "metadata": {
1363 |     "collapsed": false
1364 |    },
1365 |    "outputs": [],
1366 |    "source": [
1367 |     "class_performance = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}\n",
1368 |     "for i=1,10000 do\n",
1369 |     "    local groundtruth = testset.label[i]\n",
1370 |     "    local prediction = net:forward(testset.data[i])\n",
1371 |     "    local confidences, indices = torch.sort(prediction, true)  -- true means sort in descending order\n",
1372 |     "    if groundtruth == indices[1] then\n",
1373 |     "        class_performance[groundtruth] = class_performance[groundtruth] + 1\n",
1374 |     "    end\n",
1375 |     "end"
1376 |    ]
1377 |   },
1378 |   {
1379 |    "cell_type": "code",
1380 |    "execution_count": null,
1381 |    "metadata": {
1382 |     "collapsed": false
1383 |    },
1384 |    "outputs": [],
1385 |    "source": [
1386 |     "for i=1,#classes do\n",
1387 |     "    print(classes[i], 100*class_performance[i]/1000 .. ' %')\n",
1388 |     "end"
1389 |    ]
1390 |   },
1391 |   {
1392 |    "cell_type": "markdown",
1393 |    "metadata": {},
1394 |    "source": [
1395 |     "Okay, so what next? How do we run this neural network on GPUs?\n",
1396 |     "\n",
1397 |     "#### cunn: neural networks on GPUs using CUDA"
1398 |    ]
1399 |   },
1400 |   {
1401 |    "cell_type": "code",
1402 |    "execution_count": null,
1403 |    "metadata": {
1404 |     "collapsed": false
1405 |    },
1406 |    "outputs": [],
1407 |    "source": [
1408 |     "require 'cunn'"
1409 |    ]
1410 |   },
1411 |   {
1412 |    "cell_type": "markdown",
1413 |    "metadata": {},
1414 |    "source": [
1415 |     "The idea is pretty simple. Take a neural network, and transfer it over to GPU:"
1416 |    ]
1417 |   },
1418 |   {
1419 |    "cell_type": "code",
1420 |    "execution_count": null,
1421 |    "metadata": {
1422 |     "collapsed": false
1423 |    },
1424 |    "outputs": [],
1425 |    "source": [
1426 |     "net = net:cuda()"
1427 |    ]
1428 |   },
1429 |   {
1430 |    "cell_type": "markdown",
1431 |    "metadata": {},
1432 |    "source": [
1433 |     "Also, transfer the criterion to GPU:"
1434 |    ]
1435 |   },
1436 |   {
1437 |    "cell_type": "code",
1438 |    "execution_count": null,
1439 |    "metadata": {
1440 |     "collapsed": false
1441 |    },
1442 |    "outputs": [],
1443 |    "source": [
1444 |     "criterion = criterion:cuda()"
1445 |    ]
1446 |   },
1447 |   {
1448 |    "cell_type": "markdown",
1449 |    "metadata": {},
1450 |    "source": [
1451 |     "Ok, now the data:"
1452 |    ]
1453 |   },
1454 |   {
1455 |    "cell_type": "code",
1456 |    "execution_count": null,
1457 |    "metadata": {
1458 |     "collapsed": false
1459 |    },
1460 |    "outputs": [],
1461 |    "source": [
1462 |     "trainset.data = trainset.data:cuda()"
1463 |    ]
1464 |   },
1465 |   {
1466 |    "cell_type": "markdown",
1467 |    "metadata": {},
1468 |    "source": [
1469 |     "Okay, let's train on GPU :) #sosimple"
1470 |    ]
1471 |   },
1472 |   {
1473 |    "cell_type": "code",
1474 |    "execution_count": null,
1475 |    "metadata": {
1476 |     "collapsed": false
1477 |    },
1478 |    "outputs": [],
1479 |    "source": [
1480 |     "trainer = nn.StochasticGradient(net, criterion)\n",
1481 |     "trainer.learningRate = 0.001\n",
1482 |     "trainer.maxIteration = 5 -- just do 5 epochs of training."
1483 |    ]
1484 |   },
1485 |   {
1486 |    "cell_type": "code",
1487 |    "execution_count": null,
1488 |    "metadata": {
1489 |     "collapsed": false
1490 |    },
1491 |    "outputs": [],
1492 |    "source": [
1493 |     "trainer:train(trainset)"
1494 |    ]
1495 |   },
1496 |   {
1497 |    "cell_type": "markdown",
1498 |    "metadata": {},
1499 |    "source": [
1500 |     "Why dont we notice MASSIVE speedup compared to CPU?\n",
1501 |     "Because your network is realllly small (and because my laptop sux). \n",
1502 |     "\n",
1503 |     "**Exercise:** Try increasing the size of the network (argument 1 and 2 of nn.SpatialConvolution(...), see what kind of speedup you get."
1504 |    ]
1505 |   },
1506 |   {
1507 |    "cell_type": "markdown",
1508 |    "metadata": {},
1509 |    "source": [
1510 |     "__Goals achieved:__\n",
1511 |     "  * Understand torch and the neural networks package at a high-level.\n",
1512 |     "  * Train a small neural network on CPU and GPU"
1513 |    ]
1514 |   },
1515 |   {
1516 |    "cell_type": "markdown",
1517 |    "metadata": {},
1518 |    "source": [
1519 |     "### Where do I go next?"
1520 |    ]
1521 |   },
1522 |   {
1523 |    "cell_type": "markdown",
1524 |    "metadata": {},
1525 |    "source": [
1526 |     "* Build crazy graphs of networks, without writing any graphs explicitly: https://github.com/twitter/autograd\n",
1527 |     "* Train on imagenet with multiple GPUs: https://github.com/soumith/imagenet-multiGPU.torch\n",
1528 |     "* Train recurrent networks with LSTM on text: https://github.com/wojzaremba/lstm\n",
1529 |     "\n",
1530 |     "* More demos and tutorials: https://github.com/torch/torch7/wiki/Cheatsheet\n",
1531 |     "\n",
1532 |     "* Chat with developers of Torch: http://gitter.im/torch/torch7\n",
1533 |     "* Ask for help: http://groups.google.com/forum/#!forum/torch7"
1534 |    ]
1535 |   }
1536 |  ],
1537 |  "metadata": {
1538 |   "kernelspec": {
1539 |    "display_name": "iTorch",
1540 |    "language": "lua",
1541 |    "name": "itorch"
1542 |   },
1543 |   "language_info": {
1544 |    "name": "lua",
1545 |    "version": "5.2"
1546 |   }
1547 |  },
1548 |  "nbformat": 4,
1549 |  "nbformat_minor": 0
1550 | }
1551 | 


--------------------------------------------------------------------------------
/notebooks/Torch & Autograd Basics.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "### Torch Basics"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {
 14 |     "collapsed": false
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "-- Scalar & tensor arithmetic\n",
 19 |     "A = torch.eye(3)\n",
 20 |     "b = 4\n",
 21 |     "c = 2\n",
 22 |     "print(A*b - c)"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "metadata": {
 29 |     "collapsed": false
 30 |    },
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "-- Max\n",
 34 |     "print(torch.max(torch.FloatTensor{1,3,5}))"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": null,
 40 |    "metadata": {
 41 |     "collapsed": false
 42 |    },
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "-- Clamp\n",
 46 |     "torch.clamp(torch.range(0,4),0,2)"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": null,
 52 |    "metadata": {
 53 |     "collapsed": false
 54 |    },
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "-- Matrix multiply\n",
 58 |     "A = torch.eye(3)\n",
 59 |     "B = torch.ones(3,1)*3\n",
 60 |     "print(A*B)"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": null,
 66 |    "metadata": {
 67 |     "collapsed": false
 68 |    },
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "-- Boolean fns\n",
 72 |     "A = torch.range(1,5)\n",
 73 |     "print(torch.le(A,3))"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "markdown",
 78 |    "metadata": {},
 79 |    "source": [
 80 |     "### Scientific Computing Basics"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": null,
 86 |    "metadata": {
 87 |     "collapsed": false
 88 |    },
 89 |    "outputs": [],
 90 |    "source": [
 91 |     "-- Special functions\n",
 92 |     "require 'cephes'\n",
 93 |     "print(cephes.gamma(0.5))"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": null,
 99 |    "metadata": {
100 |     "collapsed": false
101 |    },
102 |    "outputs": [],
103 |    "source": [
104 |     "print(cephes.atan2(3,1))"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": null,
110 |    "metadata": {
111 |     "collapsed": false
112 |    },
113 |    "outputs": [],
114 |    "source": [
115 |     "-- Sampling from a distribution\n",
116 |     "require 'randomkit'\n",
117 |     "a = torch.zeros(10000)\n",
118 |     "randomkit.negative_binomial(a,9,0.3)"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": null,
124 |    "metadata": {
125 |     "collapsed": false
126 |    },
127 |    "outputs": [],
128 |    "source": [
129 |     "Plot = require 'itorch.Plot'\n",
130 |     "local p = Plot()\n",
131 |     "    :histogram(a,80,1,80)\n",
132 |     "    :title(\"Histogram of Draws From Negative Binomial\")\n",
133 |     "    :draw();"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "markdown",
138 |    "metadata": {},
139 |    "source": [
140 |     "### Memory-layout"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": null,
146 |    "metadata": {
147 |     "collapsed": true
148 |    },
149 |    "outputs": [],
150 |    "source": [
151 |     "a = torch.DoubleTensor(4, 6) -- DoubleTensor, uninitialized memory\n",
152 |     "a:uniform() -- fills \"a\" with uniform noise with mean=0, stdev=1"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": null,
158 |    "metadata": {
159 |     "collapsed": false
160 |    },
161 |    "outputs": [],
162 |    "source": [
163 |     "print(a)"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": null,
169 |    "metadata": {
170 |     "collapsed": true
171 |    },
172 |    "outputs": [],
173 |    "source": [
174 |     "b = a:select(1, 3) -- Select from the 1st axis (rows), \n",
175 |     "                   -- the 3rd set of entries"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": null,
181 |    "metadata": {
182 |     "collapsed": false
183 |    },
184 |    "outputs": [],
185 |    "source": [
186 |     "print(b)"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "code",
191 |    "execution_count": null,
192 |    "metadata": {
193 |     "collapsed": false
194 |    },
195 |    "outputs": [],
196 |    "source": [
197 |     "b:fill(3);"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": null,
203 |    "metadata": {
204 |     "collapsed": false
205 |    },
206 |    "outputs": [],
207 |    "source": [
208 |     "print(b)"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "code",
213 |    "execution_count": null,
214 |    "metadata": {
215 |     "collapsed": false
216 |    },
217 |    "outputs": [],
218 |    "source": [
219 |     "print(a) -- Look at the 3rd row! It's been filled with 3."
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "markdown",
224 |    "metadata": {},
225 |    "source": [
226 |     "### Autograd"
227 |    ]
228 |   },
229 |   {
230 |    "cell_type": "code",
231 |    "execution_count": null,
232 |    "metadata": {
233 |     "collapsed": false
234 |    },
235 |    "outputs": [],
236 |    "source": [
237 |     "-- Arithmetic is no problem\n",
238 |     "grad = require 'autograd'\n",
239 |     "function f(a,b,c)\n",
240 |     "    return a + b * c\n",
241 |     "end\n",
242 |     "df = grad(f)\n",
243 |     "da, val = df(3.5, 2.1, 1.1)\n",
244 |     "print(\"Value: \"..val)\n",
245 |     "print(\"Gradient: \"..da)"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "code",
250 |    "execution_count": null,
251 |    "metadata": {
252 |     "collapsed": false
253 |    },
254 |    "outputs": [],
255 |    "source": [
256 |     "-- If statements are no problem\n",
257 |     "grad = require 'autograd'\n",
258 |     "function f(a,b,c)\n",
259 |     "    if b > c then\n",
260 |     "        return a * math.sin(b)\n",
261 |     "    else\n",
262 |     "        return a + b * c\n",
263 |     "    end\n",
264 |     "end\n",
265 |     "g = grad(f)\n",
266 |     "da, val = g(3.5, 2.1, 1.1)\n",
267 |     "print(\"Value: \"..val)\n",
268 |     "print(\"Gradient: \"..da)"
269 |    ]
270 |   },
271 |   {
272 |    "cell_type": "code",
273 |    "execution_count": null,
274 |    "metadata": {
275 |     "collapsed": false
276 |    },
277 |    "outputs": [],
278 |    "source": [
279 |     "-- Of course, works with tensors\n",
280 |     "grad = require 'autograd'\n",
281 |     "function f(a,b,c)\n",
282 |     "    if torch.sum(b) > torch.sum(c) then\n",
283 |     "        return torch.sum(torch.cmul(a,torch.sin(b)))\n",
284 |     "    else\n",
285 |     "        return torch.sum(a + torch.cmul(b,c))\n",
286 |     "    end\n",
287 |     "end\n",
288 |     "g = grad(f)\n",
289 |     "a = torch.randn(3,3)\n",
290 |     "b = torch.eye(3,3)\n",
291 |     "c = torch.randn(3,3)\n",
292 |     "da, val = g(a,b,c)\n",
293 |     "print(\"Value: \"..val)\n",
294 |     "print(\"Gradient: \")\n",
295 |     "print(da)"
296 |    ]
297 |   },
298 |   {
299 |    "cell_type": "code",
300 |    "execution_count": null,
301 |    "metadata": {
302 |     "collapsed": false
303 |    },
304 |    "outputs": [],
305 |    "source": [
306 |     "-- Autograd for loop\n",
307 |     "function f(a,b)\n",
308 |     "    for i=1,b do\n",
309 |     "        a = a*a\n",
310 |     "    end\n",
311 |     "    return a\n",
312 |     "end\n",
313 |     "g = grad(f)\n",
314 |     "da, val = g(3,2)\n",
315 |     "print(\"Value: \"..val)\n",
316 |     "print(\"Gradient: \"..da)"
317 |    ]
318 |   },
319 |   {
320 |    "cell_type": "code",
321 |    "execution_count": null,
322 |    "metadata": {
323 |     "collapsed": false
324 |    },
325 |    "outputs": [],
326 |    "source": [
327 |     "-- Autograd recursive function\n",
328 |     "function f(a,b)\n",
329 |     "    if b == 0 then\n",
330 |     "        return a\n",
331 |     "    else\n",
332 |     "        return f(a*a,b-1)\n",
333 |     "    end\n",
334 |     "end\n",
335 |     "g = grad(f)\n",
336 |     "da, val = g(3,2)\n",
337 |     "print(\"Value: \"..val)\n",
338 |     "print(\"Gradient: \"..da)"
339 |    ]
340 |   },
341 |   {
342 |    "cell_type": "code",
343 |    "execution_count": null,
344 |    "metadata": {
345 |     "collapsed": false
346 |    },
347 |    "outputs": [],
348 |    "source": [
349 |     "-- New ops aren't a problem\n",
350 |     "function f(a)\n",
351 |     "    return torch.sum(torch.floor(torch.pow(a,3)))\n",
352 |     "end\n",
353 |     "g = grad(f)\n",
354 |     "da, val = g(torch.eye(3))\n",
355 |     "print(\"Value: \"..val)\n",
356 |     "print(\"Gradient:\")\n",
357 |     "print(da)"
358 |    ]
359 |   },
360 |   {
361 |    "cell_type": "code",
362 |    "execution_count": null,
363 |    "metadata": {
364 |     "collapsed": true
365 |    },
366 |    "outputs": [],
367 |    "source": [
368 |     "-- New ops aren't a problem\n",
369 |     "grad = require 'autograd'\n",
370 |     "special = {}\n",
371 |     "special.floor = function(x) return torch.floor(x) end\n",
372 |     "-- Overload our new mini-module, called \"special\"\n",
373 |     "grad.overload.module(\"special\",special,function(module)\n",
374 |     "    -- Define a gradient for the member function \"floor\"\n",
375 |     "    module.gradient(\"floor\", {\n",
376 |     "                -- Here's our new partial derivative\n",
377 |     "                -- (if we had two arguments, \n",
378 |     "                -- we'd define two functions)\n",
379 |     "                function(g,ans,x) \n",
380 |     "                    return g\n",
381 |     "                end\n",
382 |     "            })\n",
383 |     "    end)"
384 |    ]
385 |   },
386 |   {
387 |    "cell_type": "code",
388 |    "execution_count": null,
389 |    "metadata": {
390 |     "collapsed": false
391 |    },
392 |    "outputs": [],
393 |    "source": [
394 |     "function f(a)\n",
395 |     "    return torch.sum(special.floor(torch.pow(a,3)))\n",
396 |     "end\n",
397 |     "g = grad(f)\n",
398 |     "da, val = g(torch.eye(3))\n",
399 |     "print(\"Value: \"..val)\n",
400 |     "print(\"Gradient:\")\n",
401 |     "print(da)"
402 |    ]
403 |   },
404 |   {
405 |    "cell_type": "code",
406 |    "execution_count": null,
407 |    "metadata": {
408 |     "collapsed": false
409 |    },
410 |    "outputs": [],
411 |    "source": [
412 |     "function f(a,b)\n",
413 |     "    c = a * b\n",
414 |     "    if c > 0 then\n",
415 |     "        d = torch.log(c)\n",
416 |     "    else\n",
417 |     "        d = torch.sin(c)\n",
418 |     "    end\n",
419 |     "    return d\n",
420 |     "end\n",
421 |     "print(f(2,3))"
422 |    ]
423 |   },
424 |   {
425 |    "cell_type": "code",
426 |    "execution_count": null,
427 |    "metadata": {
428 |     "collapsed": false
429 |    },
430 |    "outputs": [],
431 |    "source": [
432 |     "function f(a,b,c)\n",
433 |     "    if b > c then\n",
434 |     "        d = a * math.sin(b)\n",
435 |     "    else\n",
436 |     "        d = a + b * c\n",
437 |     "    end\n",
438 |     "    return d\n",
439 |     "end\n",
440 |     "print(f(3,2,1))"
441 |    ]
442 |   },
443 |   {
444 |    "cell_type": "code",
445 |    "execution_count": null,
446 |    "metadata": {
447 |     "collapsed": false
448 |    },
449 |    "outputs": [],
450 |    "source": [
451 |     "grad = require 'autograd'\n",
452 |     "g = grad(f)\n",
453 |     "print(g(3,2,1))"
454 |    ]
455 |   },
456 |   {
457 |    "cell_type": "raw",
458 |    "metadata": {
459 |     "collapsed": true
460 |    },
461 |    "source": [
462 |     "-- Representation of \"Wengert list\" or \"program\" trace of the evaluation of g(3,2,1)\n",
463 |     "a = 3\n",
464 |     "\n",
465 |     "b = 2\n",
466 |     "\n",
467 |     "c = 1\n",
468 |     "\n",
469 |     "d = a * math.sin(b) = 2.728\n",
470 |     "\n",
471 |     "return 2.728"
472 |    ]
473 |   },
474 |   {
475 |    "cell_type": "raw",
476 |    "metadata": {
477 |     "collapsed": true
478 |    },
479 |    "source": [
480 |     "-- \"Forward mode\" augmentation of the above program trace, for calculation of dd/da\n",
481 |     "a = 3\n",
482 |     "da = 1\n",
483 |     "b = 2\n",
484 |     "db = 0\n",
485 |     "c = 1\n",
486 |     "dc = 0\n",
487 |     "d = a * math.sin(b) = 2.728\n",
488 |     "dd = math.sin(b) = 0.909\n",
489 |     "return 0.909"
490 |    ]
491 |   },
492 |   {
493 |    "cell_type": "raw",
494 |    "metadata": {
495 |     "collapsed": true
496 |    },
497 |    "source": [
498 |     "-- \"Reverse mode\" augmentation of the above program trace, for calculation of dd/da\n",
499 |     "a = 3\n",
500 |     "b = 2\n",
501 |     "c = 1\n",
502 |     "d = a * math.sin(b) = 2.728\n",
503 |     "dd = 1\n",
504 |     "da = dd * math.sin(b) = 0.909\n",
505 |     "return 0.909, 2.728"
506 |    ]
507 |   }
508 |  ],
509 |  "metadata": {
510 |   "kernelspec": {
511 |    "display_name": "iTorch",
512 |    "language": "lua",
513 |    "name": "itorch"
514 |   },
515 |   "language_info": {
516 |    "name": "lua",
517 |    "version": "5.2"
518 |   }
519 |  },
520 |  "nbformat": 4,
521 |  "nbformat_minor": 0
522 | }
523 | 


--------------------------------------------------------------------------------