├── .gitignore
├── 01_foundations
    ├── Code.ipynb
    └── math.ipynb
├── 02_fundamentals
    ├── Code.ipynb
    ├── __pycache__
    │   └── helper.cpython-36.pyc
    └── math.ipynb
├── 03_dlfs
    └── Code.ipynb
├── 04_extensions
    ├── Code.ipynb
    └── Math.ipynb
├── 05_convolutions
    ├── Code.ipynb
    ├── Math.ipynb
    └── Numpy_Convolution_Demos.ipynb
├── 06_rnns
    ├── Autograd_Simple.ipynb
    ├── Math.ipynb
    ├── RNN_DLFS.ipynb
    └── input.txt
├── 07_PyTorch
    └── Code.ipynb
├── LICENSE
├── README.md
└── lincoln
    ├── .gitignore
    ├── LICENSE
    ├── README.md
    ├── lincoln.png
    ├── lincoln
        ├── activations.py
        ├── base.py
        ├── conv.py
        ├── dense.py
        ├── layers.py
        ├── losses.py
        ├── network.py
        ├── optimizers.py
        ├── pytorch
        │   ├── layers.py
        │   ├── model.py
        │   ├── preprocessor.py
        │   ├── train.py
        │   └── utils.py
        ├── reshape.py
        ├── train.py
        └── utils
        │   ├── mnist.py
        │   └── np_utils.py
    └── setup.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *__pycache__*
 2 | *.pyc*
 3 | *.ipynb_checkpoints*
 4 | *.DS_Store*
 5 | *.c
 6 | *.so
 7 | *.o
 8 | *.txt
 9 | 
10 | *data/*
11 | *.pkl*
12 | 
13 | *.pt
14 | *ubyte
15 |   
16 | */utils/data*
17 | 
18 | *.vscode


--------------------------------------------------------------------------------
/01_foundations/math.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "$$ f'(x) $$ a function\n",
  8 |     "\n",
  9 |     "$$ f'(a) $$ a number when $f$ is a function of one variable"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "# Derivative math"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "metadata": {},
 22 |    "source": [
 23 |     "$$ \\frac{df}{dx}(a) = \n",
 24 |     "\\lim_{\\Delta \\to 0} \\frac{{f \\left( {a + \\Delta } \\right) - f\\left( a - \\Delta \\right)}}{2 * \\Delta } $$\n",
 25 |     "\n",
 26 |     "$$ \\frac = \n",
 27 |     "\\lim_{\\Delta \\to 0} \\frac{{f \\left( {a + \\Delta } \\right) - f\\left( a - \\Delta \\right)}}{2 * \\Delta } $$"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "$$ f_2(f_1(x)) = y $$\n",
 35 |     "\n",
 36 |     "$$ f_1(x) = u $$\n",
 37 |     "\n",
 38 |     "$$ \\frac{df_2}{dx}(x) = \\frac{df_2}{du}(f_1(x)) * \\frac{df_1}{dx}(x) $$"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "markdown",
 43 |    "metadata": {},
 44 |    "source": [
 45 |     "$$ \\frac{df_3}{dx}(x) = \\frac{df_3}{dv}(f_2(f_1(x))) * \\frac{df_2}{du}(f_1(x)) * \\frac{df_1}{dx}(x) $$"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "markdown",
 50 |    "metadata": {},
 51 |    "source": [
 52 |     "$$ \\frac{df}{dx}\\bigr\\rvert_{x=a} = \n",
 53 |     "\\lim_{\\Delta \\to 0} \\frac{{f \\left( {a + \\Delta } \\right) - f\\left( a - \\Delta \\right)}}{2 * \\Delta } $$"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "markdown",
 58 |    "metadata": {},
 59 |    "source": [
 60 |     "# Function with multiple inputs example"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "metadata": {},
 66 |    "source": [
 67 |     "$$ f(x, y) = s$$\n",
 68 |     "\n",
 69 |     "$$ a = a(x, y) = x + y $$\n",
 70 |     "\n",
 71 |     "$$ s = \\sigma(a) $$"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "markdown",
 76 |    "metadata": {},
 77 |    "source": [
 78 |     "$$ f(x, y) = s(a(x, y)) $$\n",
 79 |     "\n",
 80 |     "$$ \\frac{\\partial f}{\\partial x} = \\frac{\\partial \\sigma}{\\partial u}(a(x, y)) * \\frac{\\partial a}{\\partial x}((x, y)) \\\\ = \\frac{\\partial \\sigma}{\\partial u}(x + y) * \\frac{\\partial a}{\\partial x}((x, y))$$"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "markdown",
 85 |    "metadata": {},
 86 |    "source": [
 87 |     "# Matrix multiplication example"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "markdown",
 92 |    "metadata": {},
 93 |    "source": [
 94 |     "$$ X = \\begin{bmatrix}\n",
 95 |     "x_{11} & x_{12} & x_{13} \\\\\n",
 96 |     "x_{21} & x_{22} & x_{23} \\\\\n",
 97 |     "x_{31} & x_{32} & x_{33}\n",
 98 |     "\\end{bmatrix} $$\n",
 99 |     "\n",
100 |     "$$ W = \\begin{bmatrix}\n",
101 |     "w_{11} & w_{12} \\\\\n",
102 |     "w_{21} & w_{22} \\\\\n",
103 |     "w_{31} & w_{32} \\\\\n",
104 |     "\\end{bmatrix} $$"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "markdown",
109 |    "metadata": {},
110 |    "source": [
111 |     "$$ \\nu(X, W) = X * W = \\begin{bmatrix}\n",
112 |     "x_{11} * w_{11} + x_{12} * w_{21} + x_{13} * w_{31} &\n",
113 |     "x_{11} * w_{12} + x_{12} * w_{22} + x_{13} * w_{32}\n",
114 |     "\\\\\n",
115 |     "x_{21} * w_{11} + x_{22} * w_{21} + x_{23} * w_{31} &\n",
116 |     "x_{21} * w_{12} + x_{22} * w_{22} + x_{23} * w_{32}\n",
117 |     "\\\\\n",
118 |     "x_{31} * w_{11} + x_{32} * w_{21} + x_{33} * w_{31} &\n",
119 |     "x_{31} * w_{12} + x_{32} * w_{22} + x_{33} * w_{32}\n",
120 |     "\\end{bmatrix} = \n",
121 |     "\\begin{bmatrix}\n",
122 |     "XW_{11} &\n",
123 |     "XW_{12}\n",
124 |     "\\\\\n",
125 |     "XW_{21} &\n",
126 |     "XW_{22}\n",
127 |     "\\\\\n",
128 |     "XW_{31} &\n",
129 |     "XW_{32}\n",
130 |     "\\end{bmatrix}\n",
131 |     "$$"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "markdown",
136 |    "metadata": {},
137 |    "source": [
138 |     "$$\n",
139 |     "\\sigma(XW_{11}) = \\sigma(x_{11} * w_{11} + x_{12} * w_{21} + x_{13} * w_{31}) \\\\\n",
140 |     "\\sigma(XW_{12}) = \\sigma(x_{11} * w_{12} + x_{12} * w_{22} + x_{13} * w_{32}) \\\\\n",
141 |     "\\cdots\n",
142 |     "$$"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "markdown",
147 |    "metadata": {},
148 |    "source": [
149 |     "$$ \\sigma(X * W) = \\begin{bmatrix}\n",
150 |     "\\sigma(x_{11} * w_{11} + x_{12} * w_{21} + x_{13} * w_{31}) &\n",
151 |     "\\sigma(x_{11} * w_{12} + x_{12} * w_{22} + x_{13} * w_{32})\n",
152 |     "\\\\\n",
153 |     "\\sigma(x_{21} * w_{11} + x_{22} * w_{21} + x_{23} * w_{31}) &\n",
154 |     "\\sigma(x_{21} * w_{12} + x_{22} * w_{22} + x_{23} * w_{32})\n",
155 |     "\\\\\n",
156 |     "\\sigma(x_{31} * w_{11} + x_{32} * w_{21} + x_{33} * w_{31}) &\n",
157 |     "\\sigma(x_{31} * w_{12} + x_{32} * w_{22} + x_{33} * w_{32})\n",
158 |     "\\end{bmatrix} = \n",
159 |     "\\begin{bmatrix}\n",
160 |     "\\sigma(XW_{11}) & \\sigma(XW_{12})\\\\\n",
161 |     "\\sigma(XW_{21}) & \\sigma(XW_{22})\\\\\n",
162 |     "\\sigma(XW_{31}) & \\sigma(XW_{32})\n",
163 |     "\\end{bmatrix}\n",
164 |     "$$"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "markdown",
169 |    "metadata": {},
170 |    "source": [
171 |     "$$ L = \\Lambda(\\sigma(X * W)) = \\Lambda(\\begin{bmatrix}\n",
172 |     "\\sigma(XW_{11}) & \\sigma(XW_{12})\\\\\n",
173 |     "\\sigma(XW_{21}) & \\sigma(XW_{22})\\\\\n",
174 |     "\\sigma(XW_{31}) & \\sigma(XW_{32})\n",
175 |     "\\end{bmatrix}) =  \\sigma(XW_{11}) + \\sigma(XW_{12}) + \\sigma(XW_{21}) + \\sigma(XW_{22}) + \\sigma(XW_{31}) + \\sigma(XW_{32})\n",
176 |     "$$"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "markdown",
181 |    "metadata": {},
182 |    "source": [
183 |     "$$ \\frac{\\partial \\Lambda}{\\partial u}(X) = \n",
184 |     "\\begin{bmatrix}\n",
185 |     "\\frac{\\partial \\Lambda}{\\partial u}(x_{11}) & \n",
186 |     "\\frac{\\partial \\Lambda}{\\partial u}(x_{12}) & \n",
187 |     "\\frac{\\partial \\Lambda}{\\partial u}(x_{13}) \\\\\n",
188 |     "\\frac{\\partial \\Lambda}{\\partial u}(x_{21}) & \n",
189 |     "\\frac{\\partial \\Lambda}{\\partial u}(x_{22}) & \n",
190 |     "\\frac{\\partial \\Lambda}{\\partial u}(x_{23}) \\\\\n",
191 |     "\\frac{\\partial \\Lambda}{\\partial u}(x_{31}) & \n",
192 |     "\\frac{\\partial \\Lambda}{\\partial u}(x_{32}) & \n",
193 |     "\\frac{\\partial \\Lambda}{\\partial u}(x_{33}) \n",
194 |     "\\end{bmatrix} $$"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "markdown",
199 |    "metadata": {},
200 |    "source": [
201 |     "$$ S = \\begin{bmatrix}\n",
202 |     "s_{11} & s_{12} \\\\\n",
203 |     "s_{21} & s_{22} \\\\\n",
204 |     "s_{31} & s_{32} \\\\\n",
205 |     "\\end{bmatrix} $$\n",
206 |     "\n",
207 |     "$$ \\frac{\\partial \\Lambda}{\\partial u}(S) = \\begin{bmatrix}\n",
208 |     "1 & 1\\\\\n",
209 |     "1 & 1\\\\\n",
210 |     "1 & 1\n",
211 |     "\\end{bmatrix}) $$"
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "markdown",
216 |    "metadata": {},
217 |    "source": [
218 |     "$$ \\frac{\\partial \\sigma}{\\partial u}(N) = \\begin{bmatrix}\n",
219 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{11}) &\n",
220 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{12}) \\\\\n",
221 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{21}) &\n",
222 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{22}) \\\\\n",
223 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{31}) &\n",
224 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{32})\n",
225 |     "\\end{bmatrix} $$"
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "markdown",
230 |    "metadata": {},
231 |    "source": [
232 |     "$ L = \\Lambda(\\sigma(\\nu(X, W))) $"
233 |    ]
234 |   },
235 |   {
236 |    "cell_type": "markdown",
237 |    "metadata": {},
238 |    "source": [
239 |     "$ \\frac{\\partial \\Lambda}{\\partial u}(X) = \n",
240 |     "\\frac{\\partial \\nu}{\\partial X}(X, W) *\n",
241 |     "\\frac{\\partial \\sigma}{\\partial u}(N) *\n",
242 |     "\\frac{\\partial \\Lambda}{\\partial u}(S) $"
243 |    ]
244 |   },
245 |   {
246 |    "cell_type": "markdown",
247 |    "metadata": {},
248 |    "source": [
249 |     "$$ \\frac{\\partial \\Lambda}{\\partial u}(N) = \\frac{\\partial \\Lambda}{\\partial u}(N) $$"
250 |    ]
251 |   },
252 |   {
253 |    "cell_type": "markdown",
254 |    "metadata": {},
255 |    "source": [
256 |     "$$ \\frac{\\partial \\Lambda}{\\partial X}(X) = \n",
257 |     "\\frac{\\partial \\Lambda}{\\partial u}(S) * ? = \n",
258 |     "\\begin{bmatrix}\n",
259 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{11}) &\n",
260 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{12}) \\\\\n",
261 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{21}) &\n",
262 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{22}) \\\\\n",
263 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{31}) &\n",
264 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{32})\n",
265 |     "\\end{bmatrix} * ? $$"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "markdown",
270 |    "metadata": {},
271 |    "source": [
272 |     "$$ \\sigma(XW_{11}) = \\sigma(x_{11} * w_{11} + x_{12} * w_{21} + x_{13} * w_{31}) $$"
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "markdown",
277 |    "metadata": {},
278 |    "source": [
279 |     "$$ \\frac{\\partial \\Lambda}{\\partial X}(X) = \n",
280 |     "\\begin{bmatrix}\n",
281 |     "\\frac{\\partial \\Lambda}{\\partial u}(x_{11}) & \n",
282 |     "\\frac{\\partial \\Lambda}{\\partial u}(x_{12}) & \n",
283 |     "\\frac{\\partial \\Lambda}{\\partial u}(x_{13}) \\\\\n",
284 |     "\\frac{\\partial \\Lambda}{\\partial u}(x_{21}) & \n",
285 |     "\\frac{\\partial \\Lambda}{\\partial u}(x_{22}) & \n",
286 |     "\\frac{\\partial \\Lambda}{\\partial u}(x_{23}) \\\\\n",
287 |     "\\frac{\\partial \\Lambda}{\\partial u}(x_{31}) & \n",
288 |     "\\frac{\\partial \\Lambda}{\\partial u}(x_{32}) & \n",
289 |     "\\frac{\\partial \\Lambda}{\\partial u}(x_{33}) \n",
290 |     "\\end{bmatrix} $$"
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "markdown",
295 |    "metadata": {},
296 |    "source": [
297 |     "$$ \\sigma(XW_{11}) = \\sigma(x_{11} * w_{11} + x_{12} * w_{21} + x_{13} * w_{31}) $$"
298 |    ]
299 |   },
300 |   {
301 |    "cell_type": "markdown",
302 |    "metadata": {},
303 |    "source": [
304 |     "$$ \\frac{\\partial \\sigma(XW_{11})}{\\partial X} = \\begin{bmatrix}\n",
305 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{11}) * w_{11} & \n",
306 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{11}) * w_{21} & \n",
307 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{11}) * w_{31} \\\\\n",
308 |     "0 &\n",
309 |     "0 & \n",
310 |     "0 \\\\\n",
311 |     "0 & \n",
312 |     "0 & \n",
313 |     "0 \n",
314 |     "\\end{bmatrix} $$"
315 |    ]
316 |   },
317 |   {
318 |    "cell_type": "markdown",
319 |    "metadata": {},
320 |    "source": [
321 |     "$$ \\sigma(XW_{32}) = \\sigma(x_{31} * w_{12} + x_{32} * w_{22} + x_{33} * w_{32}) $$"
322 |    ]
323 |   },
324 |   {
325 |    "cell_type": "markdown",
326 |    "metadata": {},
327 |    "source": [
328 |     "$$ \\frac{\\partial \\sigma(XW_{32})}{\\partial X} = \\begin{bmatrix}\n",
329 |     "0 & \n",
330 |     "0 & \n",
331 |     "0 \\\\\n",
332 |     "0 &\n",
333 |     "0 & \n",
334 |     "0 \\\\\n",
335 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{32}) * w_{12} & \n",
336 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{32}) * w_{22} & \n",
337 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{32}) * w_{32} \n",
338 |     "\\end{bmatrix} $$"
339 |    ]
340 |   },
341 |   {
342 |    "cell_type": "markdown",
343 |    "metadata": {},
344 |    "source": [
345 |     "# Derivative calculation for matrix multiplication example"
346 |    ]
347 |   },
348 |   {
349 |    "cell_type": "markdown",
350 |    "metadata": {},
351 |    "source": [
352 |     "This calculation is in the appendix of the book; it may be easier to follow here than it is to follow there."
353 |    ]
354 |   },
355 |   {
356 |    "cell_type": "markdown",
357 |    "metadata": {},
358 |    "source": [
359 |     "$$ \\frac{\\partial \\Lambda}{\\partial X}(S) = \n",
360 |     "\\frac{\\partial \\sigma(XW_{11})}{\\partial X} + \n",
361 |     "\\frac{\\partial \\sigma(XW_{12})}{\\partial X} + \n",
362 |     "\\frac{\\partial \\sigma(XW_{21})}{\\partial X} + \n",
363 |     "\\frac{\\partial \\sigma(XW_{22})}{\\partial X} + \n",
364 |     "\\frac{\\partial \\sigma(XW_{31})}{\\partial X} + \n",
365 |     "\\frac{\\partial \\sigma(XW_{32})}{\\partial X} = \n",
366 |     "\\begin{bmatrix}\n",
367 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{11}) * w_{11} & \n",
368 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{11}) * w_{21} & \n",
369 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{11}) * w_{31} \\\\\n",
370 |     "0 &\n",
371 |     "0 & \n",
372 |     "0 \\\\\n",
373 |     "0 & \n",
374 |     "0 & \n",
375 |     "0 \\end{bmatrix} +\n",
376 |     "\\begin{bmatrix}\n",
377 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{12}) * w_{12} & \n",
378 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{12}) * w_{22} & \n",
379 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{12}) * w_{32} \\\\\n",
380 |     "0 &\n",
381 |     "0 & \n",
382 |     "0 \\\\\n",
383 |     "0 & \n",
384 |     "0 & \n",
385 |     "0 \\end{bmatrix} + \n",
386 |     "\\begin{bmatrix}\n",
387 |     "0 & \n",
388 |     "0 & \n",
389 |     "0 \\\\\n",
390 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{21}) * w_{11} &\n",
391 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{21}) * w_{21} & \n",
392 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{21}) * w_{31} \\\\\n",
393 |     "0 & \n",
394 |     "0 & \n",
395 |     "0 \\end{bmatrix} + \n",
396 |     "\\begin{bmatrix}\n",
397 |     "0 & \n",
398 |     "0 & \n",
399 |     "0 \\\\\n",
400 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{22}) * w_{12} &\n",
401 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{22}) * w_{22} & \n",
402 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{22}) * w_{32} \\\\\n",
403 |     "0 & \n",
404 |     "0 & \n",
405 |     "0 \\end{bmatrix} +\n",
406 |     "\\begin{bmatrix}\n",
407 |     "0 & \n",
408 |     "0 &\n",
409 |     "0 \\\\\n",
410 |     "0 &\n",
411 |     "0 & \n",
412 |     "0 \\\\\n",
413 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{31}) * w_{11} &\n",
414 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{31}) * w_{21} & \n",
415 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{31}) * w_{31} \\end{bmatrix} +\n",
416 |     "\\begin{bmatrix}\n",
417 |     "0 &\n",
418 |     "0 &\n",
419 |     "0 \\\\\n",
420 |     "0 &\n",
421 |     "0 & \n",
422 |     "0 \\\\\n",
423 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{32}) * w_{12} & \n",
424 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{32}) * w_{22} & \n",
425 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{32}) * w_{32} \\end{bmatrix}\n",
426 |     "$$"
427 |    ]
428 |   },
429 |   {
430 |    "cell_type": "markdown",
431 |    "metadata": {},
432 |    "source": [
433 |     "$$ \\frac{\\partial \\Lambda}{\\partial X}(S) = \n",
434 |     "\\begin{bmatrix}\n",
435 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{11}) * w_{11} + \\frac{\\partial \\sigma}{\\partial u}(XW_{12}) * w_{12} & \n",
436 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{11}) * w_{21} + \\frac{\\partial \\sigma}{\\partial u}(XW_{12}) * w_{22} & \n",
437 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{11}) * w_{31} + \\frac{\\partial \\sigma}{\\partial u}(XW_{12}) * w_{32} \\\\ \n",
438 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{21}) * w_{11} + \\frac{\\partial \\sigma}{\\partial u}(XW_{22}) * w_{12} & \n",
439 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{21}) * w_{21} + \\frac{\\partial \\sigma}{\\partial u}(XW_{22}) * w_{22} & \n",
440 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{21}) * w_{31} + \\frac{\\partial \\sigma}{\\partial u}(XW_{22}) * w_{32} \\\\ \n",
441 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{31}) * w_{11} + \\frac{\\partial \\sigma}{\\partial u}(XW_{32}) * w_{12} & \n",
442 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{31}) * w_{21} + \\frac{\\partial \\sigma}{\\partial u}(XW_{32}) * w_{22} & \n",
443 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{31}) * w_{31} + \\frac{\\partial \\sigma}{\\partial u}(XW_{32}) * w_{32} \\end{bmatrix} \n",
444 |     "$$"
445 |    ]
446 |   },
447 |   {
448 |    "cell_type": "markdown",
449 |    "metadata": {},
450 |    "source": [
451 |     "$$ W = \\begin{bmatrix}\n",
452 |     "w_{11} & w_{12} \\\\\n",
453 |     "w_{21} & w_{22} \\\\\n",
454 |     "w_{31} & w_{32} \\end{bmatrix} $$"
455 |    ]
456 |   },
457 |   {
458 |    "cell_type": "markdown",
459 |    "metadata": {},
460 |    "source": [
461 |     "$$ \\frac{\\partial \\Lambda}{\\partial X}(X) = \n",
462 |     "\\frac{\\partial \\Lambda}{\\partial u}(S) = \n",
463 |     "\\frac{\\partial \\Lambda}{\\partial u}(S) * W^T\n",
464 |     "$$"
465 |    ]
466 |   },
467 |   {
468 |    "cell_type": "markdown",
469 |    "metadata": {},
470 |    "source": [
471 |     "$$  \n",
472 |     "\\frac{\\partial \\Lambda}{\\partial u}(S) = \n",
473 |     "\\begin{bmatrix}\n",
474 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{11}) &\n",
475 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{12}) \\\\\n",
476 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{21}) &\n",
477 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{22}) \\\\\n",
478 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{31}) &\n",
479 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{32})\n",
480 |     "\\end{bmatrix}\n",
481 |     "$$"
482 |    ]
483 |   },
484 |   {
485 |    "cell_type": "markdown",
486 |    "metadata": {},
487 |    "source": [
488 |     "$$ \\frac{\\partial \\Lambda}{\\partial X}(X) = \n",
489 |     "\\begin{bmatrix}\n",
490 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{11}) &\n",
491 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{12}) \\\\\n",
492 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{21}) &\n",
493 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{22}) \\\\\n",
494 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{31}) &\n",
495 |     "\\frac{\\partial \\sigma}{\\partial u}(XW_{32})\n",
496 |     "\\end{bmatrix} * \n",
497 |     "\\begin{bmatrix}\n",
498 |     "w_{11} & w_{21} & w_{31} \\\\\n",
499 |     "w_{12} & w_{22} & w_{32} \\\\\n",
500 |     "\\end{bmatrix} = \\frac{\\partial \\Lambda}{\\partial u}(S) * W^T\n",
501 |     "$$"
502 |    ]
503 |   },
504 |   {
505 |    "cell_type": "markdown",
506 |    "metadata": {},
507 |    "source": [
508 |     "Like meat off the bone!"
509 |    ]
510 |   },
511 |   {
512 |    "cell_type": "markdown",
513 |    "metadata": {},
514 |    "source": [
515 |     "$$ \\frac{\\partial \\sigma}{\\partial X}(X, W) = \\frac{\\partial \\sigma}{\\partial u}(N) * W^T $$ \n",
516 |     "\n",
517 |     "$$ \\frac{\\partial \\sigma}{\\partial W}(X, W) = X^T * \\frac{\\partial \\sigma}{\\partial u}(N) $$ "
518 |    ]
519 |   }
520 |  ],
521 |  "metadata": {
522 |   "kernelspec": {
523 |    "display_name": "Python 3",
524 |    "language": "python",
525 |    "name": "python3"
526 |   },
527 |   "language_info": {
528 |    "codemirror_mode": {
529 |     "name": "ipython",
530 |     "version": 3
531 |    },
532 |    "file_extension": ".py",
533 |    "mimetype": "text/x-python",
534 |    "name": "python",
535 |    "nbconvert_exporter": "python",
536 |    "pygments_lexer": "ipython3",
537 |    "version": "3.7.4"
538 |   }
539 |  },
540 |  "nbformat": 4,
541 |  "nbformat_minor": 2
542 | }
543 | 


--------------------------------------------------------------------------------
/02_fundamentals/__pycache__/helper.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SethHWeidman/DLFS_code/f4ec4de43049ef990d0f4ddece81223cef3a0e91/02_fundamentals/__pycache__/helper.cpython-36.pyc


--------------------------------------------------------------------------------
/02_fundamentals/math.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Forward matrix multiplication math for linear regression"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "$$ p_{batch} = X_{batch} * W = \\begin{bmatrix}\n",
 15 |     "x_{11} & x_{12} & x_{13} & \\ldots & x_{1k} \\\\\n",
 16 |     "x_{21} & x_{22} & x_{23} & \\ldots & x_{2k} \\\\\n",
 17 |     "x_{31} & x_{32} & x_{33} & \\ldots & x_{3k}\n",
 18 |     "\\end{bmatrix} * \\begin{bmatrix} w_1 \\\\ w_2 \\\\ w_3 \\\\ \\vdots \\\\ w_k  \\end{bmatrix}\n",
 19 |     "=\n",
 20 |     "\\begin{bmatrix}\n",
 21 |     "x_{11} * w_1 + x_{12} * w_2 + x_{13} * w_3 + \\ldots + & x_{1k} * w_k \\\\\n",
 22 |     "x_{21} * w_1 + x_{22} * w_2 + x_{23} * w_3 + \\ldots + & x_{2k} * w_k \\\\\n",
 23 |     "x_{31} * w_1 + x_{32} * w_2 + x_{33} * w_3 + \\ldots + & x_{3k} * w_k\n",
 24 |     "\\end{bmatrix}\n",
 25 |     "=\n",
 26 |     "\\begin{bmatrix}\n",
 27 |     "p_1 \\\\\n",
 28 |     "p_2 \\\\\n",
 29 |     "p_3\n",
 30 |     "\\end{bmatrix} $$"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "$$ p_{batch\\_with\\_bias} = \n",
 38 |     "\\begin{bmatrix}\n",
 39 |     "x_{11} * w_1 + x_{12} * w_2 + x_{13} * w_3 + \\ldots + & x_{1k} * w_k + b \\\\\n",
 40 |     "x_{21} * w_1 + x_{22} * w_2 + x_{23} * w_3 + \\ldots + & x_{2k} * w_k + b\\\\\n",
 41 |     "x_{31} * w_1 + x_{32} * w_2 + x_{33} * w_3 + \\ldots + & x_{3k} * w_k + b\n",
 42 |     "\\end{bmatrix}\n",
 43 |     "=\n",
 44 |     "\\begin{bmatrix}\n",
 45 |     "p_1 \\\\\n",
 46 |     "p_2 \\\\\n",
 47 |     "p_3\n",
 48 |     "\\end{bmatrix} $$"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "markdown",
 53 |    "metadata": {},
 54 |    "source": [
 55 |     "$$ MSE(p_{batch}, y_{batch}) = MSE(\\begin{bmatrix}\n",
 56 |     "p_1 \\\\\n",
 57 |     "p_2 \\\\\n",
 58 |     "p_3\n",
 59 |     "\\end{bmatrix}, \\begin{bmatrix}\n",
 60 |     "y_1 \\\\\n",
 61 |     "y_2 \\\\\n",
 62 |     "y_3\n",
 63 |     "\\end{bmatrix}) = \\frac{(y_1 - p_1)^2 + (y_2 - p_2)^2 + (y_3 - p_3)^2}{3} $$"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "markdown",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "$$ p_i = x_i * W + b = w_1 * x_{i1} + w_2 * x_{i2} + \\ldots + w_k * x_{ik} + b $$"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "markdown",
 75 |    "metadata": {},
 76 |    "source": [
 77 |     "# Sigmoid derivative"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "markdown",
 82 |    "metadata": {},
 83 |    "source": [
 84 |     "$$ \\frac{\\partial \\sigma}{\\partial u}(x) = \\sigma(x) * (1 - \\sigma(x)) $$ "
 85 |    ]
 86 |   }
 87 |  ],
 88 |  "metadata": {
 89 |   "kernelspec": {
 90 |    "display_name": "Python 3",
 91 |    "language": "python",
 92 |    "name": "python3"
 93 |   },
 94 |   "language_info": {
 95 |    "codemirror_mode": {
 96 |     "name": "ipython",
 97 |     "version": 3
 98 |    },
 99 |    "file_extension": ".py",
100 |    "mimetype": "text/x-python",
101 |    "name": "python",
102 |    "nbconvert_exporter": "python",
103 |    "pygments_lexer": "ipython3",
104 |    "version": "3.7.4"
105 |   }
106 |  },
107 |  "nbformat": 4,
108 |  "nbformat_minor": 2
109 | }
110 | 


--------------------------------------------------------------------------------
/03_dlfs/Code.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "code",
   5 |    "execution_count": 1,
   6 |    "metadata": {},
   7 |    "outputs": [],
   8 |    "source": [
   9 |     "import numpy as np\n",
  10 |     "from numpy import ndarray\n",
  11 |     "\n",
  12 |     "from typing import List"
  13 |    ]
  14 |   },
  15 |   {
  16 |    "cell_type": "code",
  17 |    "execution_count": 2,
  18 |    "metadata": {},
  19 |    "outputs": [],
  20 |    "source": [
  21 |     "def assert_same_shape(array: ndarray,\n",
  22 |     "                      array_grad: ndarray):\n",
  23 |     "    assert array.shape == array_grad.shape, \\\n",
  24 |     "        '''\n",
  25 |     "        Two ndarrays should have the same shape;\n",
  26 |     "        instead, first ndarray's shape is {0}\n",
  27 |     "        and second ndarray's shape is {1}.\n",
  28 |     "        '''.format(tuple(array_grad.shape), tuple(array.shape))\n",
  29 |     "    return None"
  30 |    ]
  31 |   },
  32 |   {
  33 |    "cell_type": "markdown",
  34 |    "metadata": {},
  35 |    "source": [
  36 |     "# `Operation` and `ParamOperation`"
  37 |    ]
  38 |   },
  39 |   {
  40 |    "cell_type": "code",
  41 |    "execution_count": 3,
  42 |    "metadata": {},
  43 |    "outputs": [],
  44 |    "source": [
  45 |     "class Operation(object):\n",
  46 |     "    '''\n",
  47 |     "    Base class for an \"operation\" in a neural network.\n",
  48 |     "    '''\n",
  49 |     "    def __init__(self):\n",
  50 |     "        pass\n",
  51 |     "\n",
  52 |     "    def forward(self, input_: ndarray):\n",
  53 |     "        '''\n",
  54 |     "        Stores input in the self._input instance variable\n",
  55 |     "        Calls the self._output() function.\n",
  56 |     "        '''\n",
  57 |     "        self.input_ = input_\n",
  58 |     "\n",
  59 |     "        self.output = self._output()\n",
  60 |     "\n",
  61 |     "        return self.output\n",
  62 |     "\n",
  63 |     "\n",
  64 |     "    def backward(self, output_grad: ndarray) -> ndarray:\n",
  65 |     "        '''\n",
  66 |     "        Calls the self._input_grad() function.\n",
  67 |     "        Checks that the appropriate shapes match.\n",
  68 |     "        '''\n",
  69 |     "        assert_same_shape(self.output, output_grad)\n",
  70 |     "\n",
  71 |     "        self.input_grad = self._input_grad(output_grad)\n",
  72 |     "\n",
  73 |     "        assert_same_shape(self.input_, self.input_grad)\n",
  74 |     "        return self.input_grad\n",
  75 |     "\n",
  76 |     "\n",
  77 |     "    def _output(self) -> ndarray:\n",
  78 |     "        '''\n",
  79 |     "        The _output method must be defined for each Operation\n",
  80 |     "        '''\n",
  81 |     "        raise NotImplementedError()\n",
  82 |     "\n",
  83 |     "\n",
  84 |     "    def _input_grad(self, output_grad: ndarray) -> ndarray:\n",
  85 |     "        '''\n",
  86 |     "        The _input_grad method must be defined for each Operation\n",
  87 |     "        '''\n",
  88 |     "        raise NotImplementedError()"
  89 |    ]
  90 |   },
  91 |   {
  92 |    "cell_type": "code",
  93 |    "execution_count": 4,
  94 |    "metadata": {},
  95 |    "outputs": [],
  96 |    "source": [
  97 |     "class ParamOperation(Operation):\n",
  98 |     "    '''\n",
  99 |     "    An Operation with parameters.\n",
 100 |     "    '''\n",
 101 |     "\n",
 102 |     "    def __init__(self, param: ndarray) -> ndarray:\n",
 103 |     "        '''\n",
 104 |     "        The ParamOperation method\n",
 105 |     "        '''\n",
 106 |     "        super().__init__()\n",
 107 |     "        self.param = param\n",
 108 |     "\n",
 109 |     "    def backward(self, output_grad: ndarray) -> ndarray:\n",
 110 |     "        '''\n",
 111 |     "        Calls self._input_grad and self._param_grad.\n",
 112 |     "        Checks appropriate shapes.\n",
 113 |     "        '''\n",
 114 |     "\n",
 115 |     "        assert_same_shape(self.output, output_grad)\n",
 116 |     "\n",
 117 |     "        self.input_grad = self._input_grad(output_grad)\n",
 118 |     "        self.param_grad = self._param_grad(output_grad)\n",
 119 |     "\n",
 120 |     "        assert_same_shape(self.input_, self.input_grad)\n",
 121 |     "        assert_same_shape(self.param, self.param_grad)\n",
 122 |     "\n",
 123 |     "        return self.input_grad\n",
 124 |     "\n",
 125 |     "    def _param_grad(self, output_grad: ndarray) -> ndarray:\n",
 126 |     "        '''\n",
 127 |     "        Every subclass of ParamOperation must implement _param_grad.\n",
 128 |     "        '''\n",
 129 |     "        raise NotImplementedError()"
 130 |    ]
 131 |   },
 132 |   {
 133 |    "cell_type": "markdown",
 134 |    "metadata": {},
 135 |    "source": [
 136 |     "## Specific `Operation`s"
 137 |    ]
 138 |   },
 139 |   {
 140 |    "cell_type": "code",
 141 |    "execution_count": 5,
 142 |    "metadata": {},
 143 |    "outputs": [],
 144 |    "source": [
 145 |     "class WeightMultiply(ParamOperation):\n",
 146 |     "    '''\n",
 147 |     "    Weight multiplication operation for a neural network.\n",
 148 |     "    '''\n",
 149 |     "\n",
 150 |     "    def __init__(self, W: ndarray):\n",
 151 |     "        '''\n",
 152 |     "        Initialize Operation with self.param = W.\n",
 153 |     "        '''\n",
 154 |     "        super().__init__(W)\n",
 155 |     "\n",
 156 |     "    def _output(self) -> ndarray:\n",
 157 |     "        '''\n",
 158 |     "        Compute output.\n",
 159 |     "        '''\n",
 160 |     "        return np.dot(self.input_, self.param)\n",
 161 |     "\n",
 162 |     "    def _input_grad(self, output_grad: ndarray) -> ndarray:\n",
 163 |     "        '''\n",
 164 |     "        Compute input gradient.\n",
 165 |     "        '''\n",
 166 |     "        return np.dot(output_grad, np.transpose(self.param, (1, 0)))\n",
 167 |     "\n",
 168 |     "    def _param_grad(self, output_grad: ndarray)  -> ndarray:\n",
 169 |     "        '''\n",
 170 |     "        Compute parameter gradient.\n",
 171 |     "        '''        \n",
 172 |     "        return np.dot(np.transpose(self.input_, (1, 0)), output_grad)"
 173 |    ]
 174 |   },
 175 |   {
 176 |    "cell_type": "code",
 177 |    "execution_count": 6,
 178 |    "metadata": {},
 179 |    "outputs": [],
 180 |    "source": [
 181 |     "class BiasAdd(ParamOperation):\n",
 182 |     "    '''\n",
 183 |     "    Compute bias addition.\n",
 184 |     "    '''\n",
 185 |     "\n",
 186 |     "    def __init__(self,\n",
 187 |     "                 B: ndarray):\n",
 188 |     "        '''\n",
 189 |     "        Initialize Operation with self.param = B.\n",
 190 |     "        Check appropriate shape.\n",
 191 |     "        '''\n",
 192 |     "        assert B.shape[0] == 1\n",
 193 |     "        \n",
 194 |     "        super().__init__(B)\n",
 195 |     "\n",
 196 |     "    def _output(self) -> ndarray:\n",
 197 |     "        '''\n",
 198 |     "        Compute output.\n",
 199 |     "        '''\n",
 200 |     "        return self.input_ + self.param\n",
 201 |     "\n",
 202 |     "    def _input_grad(self, output_grad: ndarray) -> ndarray:\n",
 203 |     "        '''\n",
 204 |     "        Compute input gradient.\n",
 205 |     "        '''\n",
 206 |     "        return np.ones_like(self.input_) * output_grad\n",
 207 |     "\n",
 208 |     "    def _param_grad(self, output_grad: ndarray) -> ndarray:\n",
 209 |     "        '''\n",
 210 |     "        Compute parameter gradient.\n",
 211 |     "        '''\n",
 212 |     "        param_grad = np.ones_like(self.param) * output_grad\n",
 213 |     "        return np.sum(param_grad, axis=0).reshape(1, param_grad.shape[1])"
 214 |    ]
 215 |   },
 216 |   {
 217 |    "cell_type": "code",
 218 |    "execution_count": 7,
 219 |    "metadata": {},
 220 |    "outputs": [],
 221 |    "source": [
 222 |     "class Sigmoid(Operation):\n",
 223 |     "    '''\n",
 224 |     "    Sigmoid activation function.\n",
 225 |     "    '''\n",
 226 |     "\n",
 227 |     "    def __init__(self) -> None:\n",
 228 |     "        '''Pass'''\n",
 229 |     "        super().__init__()\n",
 230 |     "\n",
 231 |     "    def _output(self) -> ndarray:\n",
 232 |     "        '''\n",
 233 |     "        Compute output.\n",
 234 |     "        '''\n",
 235 |     "        return 1.0/(1.0+np.exp(-1.0 * self.input_))\n",
 236 |     "\n",
 237 |     "    def _input_grad(self, output_grad: ndarray) -> ndarray:\n",
 238 |     "        '''\n",
 239 |     "        Compute input gradient.\n",
 240 |     "        '''\n",
 241 |     "        sigmoid_backward = self.output * (1.0 - self.output)\n",
 242 |     "        input_grad = sigmoid_backward * output_grad\n",
 243 |     "        return input_grad"
 244 |    ]
 245 |   },
 246 |   {
 247 |    "cell_type": "code",
 248 |    "execution_count": 8,
 249 |    "metadata": {},
 250 |    "outputs": [],
 251 |    "source": [
 252 |     "class Linear(Operation):\n",
 253 |     "    '''\n",
 254 |     "    \"Identity\" activation function\n",
 255 |     "    '''\n",
 256 |     "\n",
 257 |     "    def __init__(self) -> None:\n",
 258 |     "        '''Pass'''        \n",
 259 |     "        super().__init__()\n",
 260 |     "\n",
 261 |     "    def _output(self) -> ndarray:\n",
 262 |     "        '''Pass through'''\n",
 263 |     "        return self.input_\n",
 264 |     "\n",
 265 |     "    def _input_grad(self, output_grad: ndarray) -> ndarray:\n",
 266 |     "        '''Pass through'''\n",
 267 |     "        return output_grad"
 268 |    ]
 269 |   },
 270 |   {
 271 |    "cell_type": "markdown",
 272 |    "metadata": {},
 273 |    "source": [
 274 |     "# `Layer` and `Dense`"
 275 |    ]
 276 |   },
 277 |   {
 278 |    "cell_type": "code",
 279 |    "execution_count": 9,
 280 |    "metadata": {},
 281 |    "outputs": [],
 282 |    "source": [
 283 |     "class Layer(object):\n",
 284 |     "    '''\n",
 285 |     "    A \"layer\" of neurons in a neural network.\n",
 286 |     "    '''\n",
 287 |     "\n",
 288 |     "    def __init__(self,\n",
 289 |     "                 neurons: int):\n",
 290 |     "        '''\n",
 291 |     "        The number of \"neurons\" roughly corresponds to the \"breadth\" of the layer\n",
 292 |     "        '''\n",
 293 |     "        self.neurons = neurons\n",
 294 |     "        self.first = True\n",
 295 |     "        self.params: List[ndarray] = []\n",
 296 |     "        self.param_grads: List[ndarray] = []\n",
 297 |     "        self.operations: List[Operation] = []\n",
 298 |     "\n",
 299 |     "    def _setup_layer(self, num_in: int) -> None:\n",
 300 |     "        '''\n",
 301 |     "        The _setup_layer function must be implemented for each layer\n",
 302 |     "        '''\n",
 303 |     "        raise NotImplementedError()\n",
 304 |     "\n",
 305 |     "    def forward(self, input_: ndarray) -> ndarray:\n",
 306 |     "        '''\n",
 307 |     "        Passes input forward through a series of operations\n",
 308 |     "        ''' \n",
 309 |     "        if self.first:\n",
 310 |     "            self._setup_layer(input_)\n",
 311 |     "            self.first = False\n",
 312 |     "\n",
 313 |     "        self.input_ = input_\n",
 314 |     "\n",
 315 |     "        for operation in self.operations:\n",
 316 |     "\n",
 317 |     "            input_ = operation.forward(input_)\n",
 318 |     "\n",
 319 |     "        self.output = input_\n",
 320 |     "\n",
 321 |     "        return self.output\n",
 322 |     "\n",
 323 |     "    def backward(self, output_grad: ndarray) -> ndarray:\n",
 324 |     "        '''\n",
 325 |     "        Passes output_grad backward through a series of operations\n",
 326 |     "        Checks appropriate shapes\n",
 327 |     "        '''\n",
 328 |     "\n",
 329 |     "        assert_same_shape(self.output, output_grad)\n",
 330 |     "\n",
 331 |     "        for operation in reversed(self.operations):\n",
 332 |     "            output_grad = operation.backward(output_grad)\n",
 333 |     "\n",
 334 |     "        input_grad = output_grad\n",
 335 |     "        \n",
 336 |     "        self._param_grads()\n",
 337 |     "\n",
 338 |     "        return input_grad\n",
 339 |     "\n",
 340 |     "    def _param_grads(self) -> ndarray:\n",
 341 |     "        '''\n",
 342 |     "        Extracts the _param_grads from a layer's operations\n",
 343 |     "        '''\n",
 344 |     "\n",
 345 |     "        self.param_grads = []\n",
 346 |     "        for operation in self.operations:\n",
 347 |     "            if issubclass(operation.__class__, ParamOperation):\n",
 348 |     "                self.param_grads.append(operation.param_grad)\n",
 349 |     "\n",
 350 |     "    def _params(self) -> ndarray:\n",
 351 |     "        '''\n",
 352 |     "        Extracts the _params from a layer's operations\n",
 353 |     "        '''\n",
 354 |     "\n",
 355 |     "        self.params = []\n",
 356 |     "        for operation in self.operations:\n",
 357 |     "            if issubclass(operation.__class__, ParamOperation):\n",
 358 |     "                self.params.append(operation.param)"
 359 |    ]
 360 |   },
 361 |   {
 362 |    "cell_type": "code",
 363 |    "execution_count": 10,
 364 |    "metadata": {},
 365 |    "outputs": [],
 366 |    "source": [
 367 |     "class Dense(Layer):\n",
 368 |     "    '''\n",
 369 |     "    A fully connected layer which inherits from \"Layer\"\n",
 370 |     "    '''\n",
 371 |     "    def __init__(self,\n",
 372 |     "                 neurons: int,\n",
 373 |     "                 activation: Operation = Sigmoid()):\n",
 374 |     "        '''\n",
 375 |     "        Requires an activation function upon initialization\n",
 376 |     "        '''\n",
 377 |     "        super().__init__(neurons)\n",
 378 |     "        self.activation = activation\n",
 379 |     "\n",
 380 |     "    def _setup_layer(self, input_: ndarray) -> None:\n",
 381 |     "        '''\n",
 382 |     "        Defines the operations of a fully connected layer.\n",
 383 |     "        '''\n",
 384 |     "        if self.seed:\n",
 385 |     "            np.random.seed(self.seed)\n",
 386 |     "\n",
 387 |     "        self.params = []\n",
 388 |     "\n",
 389 |     "        # weights\n",
 390 |     "        self.params.append(np.random.randn(input_.shape[1], self.neurons))\n",
 391 |     "\n",
 392 |     "        # bias\n",
 393 |     "        self.params.append(np.random.randn(1, self.neurons))\n",
 394 |     "\n",
 395 |     "        self.operations = [WeightMultiply(self.params[0]),\n",
 396 |     "                           BiasAdd(self.params[1]),\n",
 397 |     "                           self.activation]\n",
 398 |     "\n",
 399 |     "        return None"
 400 |    ]
 401 |   },
 402 |   {
 403 |    "cell_type": "markdown",
 404 |    "metadata": {},
 405 |    "source": [
 406 |     "# `Loss` and `MeanSquaredError`"
 407 |    ]
 408 |   },
 409 |   {
 410 |    "cell_type": "code",
 411 |    "execution_count": 11,
 412 |    "metadata": {},
 413 |    "outputs": [],
 414 |    "source": [
 415 |     "class Loss(object):\n",
 416 |     "    '''\n",
 417 |     "    The \"loss\" of a neural network\n",
 418 |     "    '''\n",
 419 |     "\n",
 420 |     "    def __init__(self):\n",
 421 |     "        '''Pass'''\n",
 422 |     "        pass\n",
 423 |     "\n",
 424 |     "    def forward(self, prediction: ndarray, target: ndarray) -> float:\n",
 425 |     "        '''\n",
 426 |     "        Computes the actual loss value\n",
 427 |     "        '''\n",
 428 |     "        assert_same_shape(prediction, target)\n",
 429 |     "\n",
 430 |     "        self.prediction = prediction\n",
 431 |     "        self.target = target\n",
 432 |     "\n",
 433 |     "        loss_value = self._output()\n",
 434 |     "\n",
 435 |     "        return loss_value\n",
 436 |     "\n",
 437 |     "    def backward(self) -> ndarray:\n",
 438 |     "        '''\n",
 439 |     "        Computes gradient of the loss value with respect to the input to the loss function\n",
 440 |     "        '''\n",
 441 |     "        self.input_grad = self._input_grad()\n",
 442 |     "\n",
 443 |     "        assert_same_shape(self.prediction, self.input_grad)\n",
 444 |     "\n",
 445 |     "        return self.input_grad\n",
 446 |     "\n",
 447 |     "    def _output(self) -> float:\n",
 448 |     "        '''\n",
 449 |     "        Every subclass of \"Loss\" must implement the _output function.\n",
 450 |     "        '''\n",
 451 |     "        raise NotImplementedError()\n",
 452 |     "\n",
 453 |     "    def _input_grad(self) -> ndarray:\n",
 454 |     "        '''\n",
 455 |     "        Every subclass of \"Loss\" must implement the _input_grad function.\n",
 456 |     "        '''\n",
 457 |     "        raise NotImplementedError()"
 458 |    ]
 459 |   },
 460 |   {
 461 |    "cell_type": "code",
 462 |    "execution_count": 12,
 463 |    "metadata": {},
 464 |    "outputs": [],
 465 |    "source": [
 466 |     "class MeanSquaredError(Loss):\n",
 467 |     "\n",
 468 |     "    def __init__(self) -> None:\n",
 469 |     "        '''Pass'''\n",
 470 |     "        super().__init__()\n",
 471 |     "\n",
 472 |     "    def _output(self) -> float:\n",
 473 |     "        '''\n",
 474 |     "        Computes the per-observation squared error loss\n",
 475 |     "        '''\n",
 476 |     "        loss = (\n",
 477 |     "            np.sum(np.power(self.prediction - self.target, 2)) / \n",
 478 |     "            self.prediction.shape[0]\n",
 479 |     "        )\n",
 480 |     "\n",
 481 |     "        return loss\n",
 482 |     "\n",
 483 |     "    def _input_grad(self) -> ndarray:\n",
 484 |     "        '''\n",
 485 |     "        Computes the loss gradient with respect to the input for MSE loss\n",
 486 |     "        '''        \n",
 487 |     "\n",
 488 |     "        return 2.0 * (self.prediction - self.target) / self.prediction.shape[0]"
 489 |    ]
 490 |   },
 491 |   {
 492 |    "cell_type": "markdown",
 493 |    "metadata": {},
 494 |    "source": [
 495 |     "# `NeuralNetwork`"
 496 |    ]
 497 |   },
 498 |   {
 499 |    "cell_type": "code",
 500 |    "execution_count": 13,
 501 |    "metadata": {},
 502 |    "outputs": [],
 503 |    "source": [
 504 |     "class NeuralNetwork(object):\n",
 505 |     "    '''\n",
 506 |     "    The class for a neural network.\n",
 507 |     "    '''\n",
 508 |     "    def __init__(self, \n",
 509 |     "                 layers: List[Layer],\n",
 510 |     "                 loss: Loss,\n",
 511 |     "                 seed: int = 1) -> None:\n",
 512 |     "        '''\n",
 513 |     "        Neural networks need layers, and a loss.\n",
 514 |     "        '''\n",
 515 |     "        self.layers = layers\n",
 516 |     "        self.loss = loss\n",
 517 |     "        self.seed = seed\n",
 518 |     "        if seed:\n",
 519 |     "            for layer in self.layers:\n",
 520 |     "                setattr(layer, \"seed\", self.seed)        \n",
 521 |     "\n",
 522 |     "    def forward(self, x_batch: ndarray) -> ndarray:\n",
 523 |     "        '''\n",
 524 |     "        Passes data forward through a series of layers.\n",
 525 |     "        '''\n",
 526 |     "        x_out = x_batch\n",
 527 |     "        for layer in self.layers:\n",
 528 |     "            x_out = layer.forward(x_out)\n",
 529 |     "\n",
 530 |     "        return x_out\n",
 531 |     "\n",
 532 |     "    def backward(self, loss_grad: ndarray) -> None:\n",
 533 |     "        '''\n",
 534 |     "        Passes data backward through a series of layers.\n",
 535 |     "        '''\n",
 536 |     "\n",
 537 |     "        grad = loss_grad\n",
 538 |     "        for layer in reversed(self.layers):\n",
 539 |     "            grad = layer.backward(grad)\n",
 540 |     "\n",
 541 |     "        return None\n",
 542 |     "\n",
 543 |     "    def train_batch(self,\n",
 544 |     "                    x_batch: ndarray,\n",
 545 |     "                    y_batch: ndarray) -> float:\n",
 546 |     "        '''\n",
 547 |     "        Passes data forward through the layers.\n",
 548 |     "        Computes the loss.\n",
 549 |     "        Passes data backward through the layers.\n",
 550 |     "        '''\n",
 551 |     "        \n",
 552 |     "        predictions = self.forward(x_batch)\n",
 553 |     "\n",
 554 |     "        loss = self.loss.forward(predictions, y_batch)\n",
 555 |     "\n",
 556 |     "        self.backward(self.loss.backward())\n",
 557 |     "\n",
 558 |     "        return loss\n",
 559 |     "    \n",
 560 |     "    def params(self):\n",
 561 |     "        '''\n",
 562 |     "        Gets the parameters for the network.\n",
 563 |     "        '''\n",
 564 |     "        for layer in self.layers:\n",
 565 |     "            yield from layer.params\n",
 566 |     "\n",
 567 |     "    def param_grads(self):\n",
 568 |     "        '''\n",
 569 |     "        Gets the gradient of the loss with respect to the parameters for the network.\n",
 570 |     "        '''\n",
 571 |     "        for layer in self.layers:\n",
 572 |     "            yield from layer.param_grads    "
 573 |    ]
 574 |   },
 575 |   {
 576 |    "cell_type": "markdown",
 577 |    "metadata": {},
 578 |    "source": [
 579 |     "# `Optimizer` and `SGD`"
 580 |    ]
 581 |   },
 582 |   {
 583 |    "cell_type": "code",
 584 |    "execution_count": 14,
 585 |    "metadata": {},
 586 |    "outputs": [],
 587 |    "source": [
 588 |     "class Optimizer(object):\n",
 589 |     "    '''\n",
 590 |     "    Base class for a neural network optimizer.\n",
 591 |     "    '''\n",
 592 |     "    def __init__(self,\n",
 593 |     "                 lr: float = 0.01):\n",
 594 |     "        '''\n",
 595 |     "        Every optimizer must have an initial learning rate.\n",
 596 |     "        '''\n",
 597 |     "        self.lr = lr\n",
 598 |     "\n",
 599 |     "    def step(self) -> None:\n",
 600 |     "        '''\n",
 601 |     "        Every optimizer must implement the \"step\" function.\n",
 602 |     "        '''\n",
 603 |     "        pass"
 604 |    ]
 605 |   },
 606 |   {
 607 |    "cell_type": "code",
 608 |    "execution_count": 15,
 609 |    "metadata": {},
 610 |    "outputs": [],
 611 |    "source": [
 612 |     "class SGD(Optimizer):\n",
 613 |     "    '''\n",
 614 |     "    Stochasitc gradient descent optimizer.\n",
 615 |     "    '''    \n",
 616 |     "    def __init__(self,\n",
 617 |     "                 lr: float = 0.01) -> None:\n",
 618 |     "        '''Pass'''\n",
 619 |     "        super().__init__(lr)\n",
 620 |     "\n",
 621 |     "    def step(self):\n",
 622 |     "        '''\n",
 623 |     "        For each parameter, adjust in the appropriate direction, with the magnitude of the adjustment \n",
 624 |     "        based on the learning rate.\n",
 625 |     "        '''\n",
 626 |     "        for (param, param_grad) in zip(self.net.params(),\n",
 627 |     "                                       self.net.param_grads()):\n",
 628 |     "\n",
 629 |     "            param -= self.lr * param_grad"
 630 |    ]
 631 |   },
 632 |   {
 633 |    "cell_type": "markdown",
 634 |    "metadata": {},
 635 |    "source": [
 636 |     "# `Trainer`"
 637 |    ]
 638 |   },
 639 |   {
 640 |    "cell_type": "code",
 641 |    "execution_count": 16,
 642 |    "metadata": {},
 643 |    "outputs": [],
 644 |    "source": [
 645 |     "from copy import deepcopy\n",
 646 |     "from typing import Tuple\n",
 647 |     "\n",
 648 |     "class Trainer(object):\n",
 649 |     "    '''\n",
 650 |     "    Trains a neural network\n",
 651 |     "    '''\n",
 652 |     "    def __init__(self,\n",
 653 |     "                 net: NeuralNetwork,\n",
 654 |     "                 optim: Optimizer) -> None:\n",
 655 |     "        '''\n",
 656 |     "        Requires a neural network and an optimizer in order for training to occur. \n",
 657 |     "        Assign the neural network as an instance variable to the optimizer.\n",
 658 |     "        '''\n",
 659 |     "        self.net = net\n",
 660 |     "        self.optim = optim\n",
 661 |     "        self.best_loss = 1e9\n",
 662 |     "        setattr(self.optim, 'net', self.net)\n",
 663 |     "        \n",
 664 |     "    def generate_batches(self,\n",
 665 |     "                         X: ndarray,\n",
 666 |     "                         y: ndarray,\n",
 667 |     "                         size: int = 32) -> Tuple[ndarray]:\n",
 668 |     "        '''\n",
 669 |     "        Generates batches for training \n",
 670 |     "        '''\n",
 671 |     "        assert X.shape[0] == y.shape[0], \\\n",
 672 |     "        '''\n",
 673 |     "        features and target must have the same number of rows, instead\n",
 674 |     "        features has {0} and target has {1}\n",
 675 |     "        '''.format(X.shape[0], y.shape[0])\n",
 676 |     "\n",
 677 |     "        N = X.shape[0]\n",
 678 |     "\n",
 679 |     "        for ii in range(0, N, size):\n",
 680 |     "            X_batch, y_batch = X[ii:ii+size], y[ii:ii+size]\n",
 681 |     "\n",
 682 |     "            yield X_batch, y_batch\n",
 683 |     "\n",
 684 |     "            \n",
 685 |     "    def fit(self, X_train: ndarray, y_train: ndarray,\n",
 686 |     "            X_test: ndarray, y_test: ndarray,\n",
 687 |     "            epochs: int=100,\n",
 688 |     "            eval_every: int=10,\n",
 689 |     "            batch_size: int=32,\n",
 690 |     "            seed: int = 1,\n",
 691 |     "            restart: bool = True)-> None:\n",
 692 |     "        '''\n",
 693 |     "        Fits the neural network on the training data for a certain number of epochs.\n",
 694 |     "        Every \"eval_every\" epochs, it evaluated the neural network on the testing data.\n",
 695 |     "        '''\n",
 696 |     "\n",
 697 |     "        np.random.seed(seed)\n",
 698 |     "        if restart:\n",
 699 |     "            for layer in self.net.layers:\n",
 700 |     "                layer.first = True\n",
 701 |     "\n",
 702 |     "            self.best_loss = 1e9\n",
 703 |     "\n",
 704 |     "        for e in range(epochs):\n",
 705 |     "\n",
 706 |     "            if (e+1) % eval_every == 0:\n",
 707 |     "                \n",
 708 |     "                # for early stopping\n",
 709 |     "                last_model = deepcopy(self.net)\n",
 710 |     "\n",
 711 |     "            X_train, y_train = permute_data(X_train, y_train)\n",
 712 |     "\n",
 713 |     "            batch_generator = self.generate_batches(X_train, y_train,\n",
 714 |     "                                                    batch_size)\n",
 715 |     "\n",
 716 |     "            for ii, (X_batch, y_batch) in enumerate(batch_generator):\n",
 717 |     "\n",
 718 |     "                self.net.train_batch(X_batch, y_batch)\n",
 719 |     "\n",
 720 |     "                self.optim.step()\n",
 721 |     "\n",
 722 |     "            if (e+1) % eval_every == 0:\n",
 723 |     "\n",
 724 |     "                test_preds = self.net.forward(X_test)\n",
 725 |     "                loss = self.net.loss.forward(test_preds, y_test)\n",
 726 |     "\n",
 727 |     "                if loss < self.best_loss:\n",
 728 |     "                    print(f\"Validation loss after {e+1} epochs is {loss:.3f}\")\n",
 729 |     "                    self.best_loss = loss\n",
 730 |     "                else:\n",
 731 |     "                    print(f\"\"\"Loss increased after epoch {e+1}, final loss was {self.best_loss:.3f}, using the model from epoch {e+1-eval_every}\"\"\")\n",
 732 |     "                    self.net = last_model\n",
 733 |     "                    # ensure self.optim is still updating self.net\n",
 734 |     "                    setattr(self.optim, 'net', self.net)\n",
 735 |     "                    break"
 736 |    ]
 737 |   },
 738 |   {
 739 |    "cell_type": "markdown",
 740 |    "metadata": {},
 741 |    "source": [
 742 |     "#### Evaluation metrics"
 743 |    ]
 744 |   },
 745 |   {
 746 |    "cell_type": "code",
 747 |    "execution_count": 17,
 748 |    "metadata": {},
 749 |    "outputs": [],
 750 |    "source": [
 751 |     "def mae(y_true: ndarray, y_pred: ndarray):\n",
 752 |     "    '''\n",
 753 |     "    Compute mean absolute error for a neural network.\n",
 754 |     "    '''    \n",
 755 |     "    return np.mean(np.abs(y_true - y_pred))\n",
 756 |     "\n",
 757 |     "def rmse(y_true: ndarray, y_pred: ndarray):\n",
 758 |     "    '''\n",
 759 |     "    Compute root mean squared error for a neural network.\n",
 760 |     "    '''\n",
 761 |     "    return np.sqrt(np.mean(np.power(y_true - y_pred, 2)))\n",
 762 |     "\n",
 763 |     "def eval_regression_model(model: NeuralNetwork,\n",
 764 |     "                          X_test: ndarray,\n",
 765 |     "                          y_test: ndarray):\n",
 766 |     "    '''\n",
 767 |     "    Compute mae and rmse for a neural network.\n",
 768 |     "    '''\n",
 769 |     "    preds = model.forward(X_test)\n",
 770 |     "    preds = preds.reshape(-1, 1)\n",
 771 |     "    print(\"Mean absolute error: {:.2f}\".format(mae(preds, y_test)))\n",
 772 |     "    print()\n",
 773 |     "    print(\"Root mean squared error {:.2f}\".format(rmse(preds, y_test)))"
 774 |    ]
 775 |   },
 776 |   {
 777 |    "cell_type": "code",
 778 |    "execution_count": 18,
 779 |    "metadata": {},
 780 |    "outputs": [],
 781 |    "source": [
 782 |     "lr = NeuralNetwork(\n",
 783 |     "    layers=[Dense(neurons=1,\n",
 784 |     "                   activation=Linear())],\n",
 785 |     "    loss=MeanSquaredError(),\n",
 786 |     "    seed=20190501\n",
 787 |     ")\n",
 788 |     "\n",
 789 |     "nn = NeuralNetwork(\n",
 790 |     "    layers=[Dense(neurons=13,\n",
 791 |     "                   activation=Sigmoid()),\n",
 792 |     "            Dense(neurons=1,\n",
 793 |     "                   activation=Linear())],\n",
 794 |     "    loss=MeanSquaredError(),\n",
 795 |     "    seed=20190501\n",
 796 |     ")\n",
 797 |     "\n",
 798 |     "dl = NeuralNetwork(\n",
 799 |     "    layers=[Dense(neurons=13,\n",
 800 |     "                   activation=Sigmoid()),\n",
 801 |     "            Dense(neurons=13,\n",
 802 |     "                   activation=Sigmoid()),\n",
 803 |     "            Dense(neurons=1,\n",
 804 |     "                   activation=Linear())],\n",
 805 |     "    loss=MeanSquaredError(),\n",
 806 |     "    seed=20190501\n",
 807 |     ")"
 808 |    ]
 809 |   },
 810 |   {
 811 |    "cell_type": "markdown",
 812 |    "metadata": {},
 813 |    "source": [
 814 |     "### Read in the data, train-test split etc."
 815 |    ]
 816 |   },
 817 |   {
 818 |    "cell_type": "code",
 819 |    "execution_count": 19,
 820 |    "metadata": {},
 821 |    "outputs": [],
 822 |    "source": [
 823 |     "from sklearn.datasets import load_boston\n",
 824 |     "\n",
 825 |     "boston = load_boston()\n",
 826 |     "data = boston.data\n",
 827 |     "target = boston.target\n",
 828 |     "features = boston.feature_names"
 829 |    ]
 830 |   },
 831 |   {
 832 |    "cell_type": "code",
 833 |    "execution_count": 20,
 834 |    "metadata": {},
 835 |    "outputs": [],
 836 |    "source": [
 837 |     "# Scaling the data\n",
 838 |     "from sklearn.preprocessing import StandardScaler\n",
 839 |     "s = StandardScaler()\n",
 840 |     "data = s.fit_transform(data)"
 841 |    ]
 842 |   },
 843 |   {
 844 |    "cell_type": "code",
 845 |    "execution_count": 21,
 846 |    "metadata": {},
 847 |    "outputs": [],
 848 |    "source": [
 849 |     "def to_2d_np(a: ndarray, \n",
 850 |     "          type: str=\"col\") -> ndarray:\n",
 851 |     "    '''\n",
 852 |     "    Turns a 1D Tensor into 2D\n",
 853 |     "    '''\n",
 854 |     "\n",
 855 |     "    assert a.ndim == 1, \\\n",
 856 |     "    \"Input tensors must be 1 dimensional\"\n",
 857 |     "    \n",
 858 |     "    if type == \"col\":        \n",
 859 |     "        return a.reshape(-1, 1)\n",
 860 |     "    elif type == \"row\":\n",
 861 |     "        return a.reshape(1, -1)"
 862 |    ]
 863 |   },
 864 |   {
 865 |    "cell_type": "code",
 866 |    "execution_count": 22,
 867 |    "metadata": {},
 868 |    "outputs": [],
 869 |    "source": [
 870 |     "from sklearn.model_selection import train_test_split\n",
 871 |     "X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.3, random_state=80718)\n",
 872 |     "\n",
 873 |     "# make target 2d array\n",
 874 |     "y_train, y_test = to_2d_np(y_train), to_2d_np(y_test)"
 875 |    ]
 876 |   },
 877 |   {
 878 |    "cell_type": "markdown",
 879 |    "metadata": {},
 880 |    "source": [
 881 |     "### Train the three models"
 882 |    ]
 883 |   },
 884 |   {
 885 |    "cell_type": "code",
 886 |    "execution_count": 23,
 887 |    "metadata": {},
 888 |    "outputs": [],
 889 |    "source": [
 890 |     "# helper function\n",
 891 |     "\n",
 892 |     "def permute_data(X, y):\n",
 893 |     "    perm = np.random.permutation(X.shape[0])\n",
 894 |     "    return X[perm], y[perm]"
 895 |    ]
 896 |   },
 897 |   {
 898 |    "cell_type": "code",
 899 |    "execution_count": 24,
 900 |    "metadata": {},
 901 |    "outputs": [
 902 |     {
 903 |      "name": "stdout",
 904 |      "output_type": "stream",
 905 |      "text": [
 906 |       "Validation loss after 10 epochs is 30.293\n",
 907 |       "Validation loss after 20 epochs is 28.469\n",
 908 |       "Validation loss after 30 epochs is 26.293\n",
 909 |       "Validation loss after 40 epochs is 25.541\n",
 910 |       "Validation loss after 50 epochs is 25.087\n",
 911 |       "\n",
 912 |       "Mean absolute error: 3.52\n",
 913 |       "\n",
 914 |       "Root mean squared error 5.01\n"
 915 |      ]
 916 |     }
 917 |    ],
 918 |    "source": [
 919 |     "trainer = Trainer(lr, SGD(lr=0.01))\n",
 920 |     "\n",
 921 |     "trainer.fit(X_train, y_train, X_test, y_test,\n",
 922 |     "       epochs = 50,\n",
 923 |     "       eval_every = 10,\n",
 924 |     "       seed=20190501);\n",
 925 |     "print()\n",
 926 |     "eval_regression_model(lr, X_test, y_test)"
 927 |    ]
 928 |   },
 929 |   {
 930 |    "cell_type": "code",
 931 |    "execution_count": 25,
 932 |    "metadata": {},
 933 |    "outputs": [
 934 |     {
 935 |      "name": "stdout",
 936 |      "output_type": "stream",
 937 |      "text": [
 938 |       "Validation loss after 10 epochs is 27.435\n",
 939 |       "Validation loss after 20 epochs is 21.839\n",
 940 |       "Validation loss after 30 epochs is 18.918\n",
 941 |       "Validation loss after 40 epochs is 17.195\n",
 942 |       "Validation loss after 50 epochs is 16.215\n",
 943 |       "\n",
 944 |       "Mean absolute error: 2.60\n",
 945 |       "\n",
 946 |       "Root mean squared error 4.03\n"
 947 |      ]
 948 |     }
 949 |    ],
 950 |    "source": [
 951 |     "trainer = Trainer(nn, SGD(lr=0.01))\n",
 952 |     "\n",
 953 |     "trainer.fit(X_train, y_train, X_test, y_test,\n",
 954 |     "       epochs = 50,\n",
 955 |     "       eval_every = 10,\n",
 956 |     "       seed=20190501);\n",
 957 |     "print()\n",
 958 |     "eval_regression_model(nn, X_test, y_test)"
 959 |    ]
 960 |   },
 961 |   {
 962 |    "cell_type": "code",
 963 |    "execution_count": 26,
 964 |    "metadata": {},
 965 |    "outputs": [
 966 |     {
 967 |      "name": "stdout",
 968 |      "output_type": "stream",
 969 |      "text": [
 970 |       "Validation loss after 10 epochs is 44.143\n",
 971 |       "Validation loss after 20 epochs is 25.278\n",
 972 |       "Validation loss after 30 epochs is 22.339\n",
 973 |       "Validation loss after 40 epochs is 16.500\n",
 974 |       "Validation loss after 50 epochs is 14.655\n",
 975 |       "\n",
 976 |       "Mean absolute error: 2.45\n",
 977 |       "\n",
 978 |       "Root mean squared error 3.83\n"
 979 |      ]
 980 |     }
 981 |    ],
 982 |    "source": [
 983 |     "trainer = Trainer(dl, SGD(lr=0.01))\n",
 984 |     "\n",
 985 |     "trainer.fit(X_train, y_train, X_test, y_test,\n",
 986 |     "       epochs = 50,\n",
 987 |     "       eval_every = 10,\n",
 988 |     "       seed=20190501);\n",
 989 |     "print()\n",
 990 |     "eval_regression_model(dl, X_test, y_test)"
 991 |    ]
 992 |   }
 993 |  ],
 994 |  "metadata": {
 995 |   "kernelspec": {
 996 |    "display_name": "Python 3",
 997 |    "language": "python",
 998 |    "name": "python3"
 999 |   },
1000 |   "language_info": {
1001 |    "codemirror_mode": {
1002 |     "name": "ipython",
1003 |     "version": 3
1004 |    },
1005 |    "file_extension": ".py",
1006 |    "mimetype": "text/x-python",
1007 |    "name": "python",
1008 |    "nbconvert_exporter": "python",
1009 |    "pygments_lexer": "ipython3",
1010 |    "version": "3.7.4"
1011 |   }
1012 |  },
1013 |  "nbformat": 4,
1014 |  "nbformat_minor": 2
1015 | }
1016 | 


--------------------------------------------------------------------------------
/04_extensions/Code.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "This notebook contains experiments for:\n",
  8 |     "\n",
  9 |     "* Loss functions\n",
 10 |     "* Learning rate decay\n",
 11 |     "* Optimizers\n",
 12 |     "* Weight initialization"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "markdown",
 17 |    "metadata": {},
 18 |    "source": [
 19 |     "# `lincoln` imports"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 1,
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "import numpy as np\n",
 29 |     "\n",
 30 |     "from lincoln import activations\n",
 31 |     "from lincoln import layers\n",
 32 |     "from lincoln import losses\n",
 33 |     "from lincoln import optimizers\n",
 34 |     "from lincoln import network\n",
 35 |     "from lincoln import train\n",
 36 |     "from lincoln.utils import mnist\n",
 37 |     "\n",
 38 |     "RANDOM_SEED = 190119"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 2,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "X_train, y_train, X_test, y_test = mnist.load()"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": 3,
 53 |    "metadata": {},
 54 |    "outputs": [
 55 |     {
 56 |      "data": {
 57 |       "text/plain": [
 58 |        "60000"
 59 |       ]
 60 |      },
 61 |      "execution_count": 3,
 62 |      "metadata": {},
 63 |      "output_type": "execute_result"
 64 |     }
 65 |    ],
 66 |    "source": [
 67 |     "num_labels = len(y_train)\n",
 68 |     "num_labels"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": 4,
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "# one-hot encode\n",
 78 |     "num_labels = len(y_train)\n",
 79 |     "train_labels = np.zeros((num_labels, 10))\n",
 80 |     "for i in range(num_labels):\n",
 81 |     "    train_labels[i][y_train[i]] = 1\n",
 82 |     "\n",
 83 |     "num_labels = len(y_test)\n",
 84 |     "test_labels = np.zeros((num_labels, 10))\n",
 85 |     "for i in range(num_labels):\n",
 86 |     "    test_labels[i][y_test[i]] = 1"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "markdown",
 91 |    "metadata": {},
 92 |    "source": [
 93 |     "# MNIST Demos"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "markdown",
 98 |    "metadata": {},
 99 |    "source": [
100 |     "# Scale data to mean 0, variance 1"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": 5,
106 |    "metadata": {},
107 |    "outputs": [],
108 |    "source": [
109 |     "X_train, X_test = X_train - np.mean(X_train), X_test - np.mean(X_train)"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": 6,
115 |    "metadata": {},
116 |    "outputs": [
117 |     {
118 |      "data": {
119 |       "text/plain": [
120 |        "(-33.318421449829934,\n",
121 |        " 221.68157855017006,\n",
122 |        " -33.318421449829934,\n",
123 |        " 221.68157855017006)"
124 |       ]
125 |      },
126 |      "execution_count": 6,
127 |      "metadata": {},
128 |      "output_type": "execute_result"
129 |     }
130 |    ],
131 |    "source": [
132 |     "np.min(X_train), np.max(X_train), np.min(X_test), np.max(X_test)"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": 7,
138 |    "metadata": {},
139 |    "outputs": [],
140 |    "source": [
141 |     "X_train, X_test = X_train / np.std(X_train), X_test / np.std(X_train)"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": 8,
147 |    "metadata": {},
148 |    "outputs": [
149 |     {
150 |      "data": {
151 |       "text/plain": [
152 |        "(-0.424073894391566, 2.821543345689335, -0.424073894391566, 2.821543345689335)"
153 |       ]
154 |      },
155 |      "execution_count": 8,
156 |      "metadata": {},
157 |      "output_type": "execute_result"
158 |     }
159 |    ],
160 |    "source": [
161 |     "np.min(X_train), np.max(X_train), np.min(X_test), np.max(X_test)"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": 9,
167 |    "metadata": {},
168 |    "outputs": [],
169 |    "source": [
170 |     "def calc_accuracy_model(model, test_set):\n",
171 |     "    return print(\n",
172 |     "        '''The model validation accuracy is: {0:.2f}%'''.format(\n",
173 |     "            np.equal(np.argmax(model.forward(test_set), axis=1), y_test).sum()\n",
174 |     "            * 100.0\n",
175 |     "            / test_set.shape[0]\n",
176 |     "        )\n",
177 |     "    )"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "markdown",
182 |    "metadata": {},
183 |    "source": [
184 |     "## Softmax cross entropy"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "markdown",
189 |    "metadata": {},
190 |    "source": [
191 |     "### Trying sigmoid activation"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "code",
196 |    "execution_count": 10,
197 |    "metadata": {},
198 |    "outputs": [
199 |     {
200 |      "name": "stdout",
201 |      "output_type": "stream",
202 |      "text": [
203 |       "Validation loss after 5 epochs is 0.836\n",
204 |       "Validation loss after 10 epochs is 0.718\n",
205 |       "Validation loss after 15 epochs is 0.659\n",
206 |       "Validation loss after 20 epochs is 0.638\n",
207 |       "Validation loss after 25 epochs is 0.627\n",
208 |       "Validation loss after 30 epochs is 0.619\n",
209 |       "Validation loss after 35 epochs is 0.558\n",
210 |       "Validation loss after 40 epochs is 0.506\n",
211 |       "Validation loss after 45 epochs is 0.499\n",
212 |       "Validation loss after 50 epochs is 0.495\n",
213 |       "The model validation accuracy is: 57.16%\n"
214 |      ]
215 |     }
216 |    ],
217 |    "source": [
218 |     "model = network.NeuralNetwork(\n",
219 |     "    layers=[\n",
220 |     "        layers.Dense(neurons=89, activation=activations.Sigmoid()),\n",
221 |     "        layers.Dense(neurons=10, activation=activations.Sigmoid()),\n",
222 |     "    ],\n",
223 |     "    loss=losses.MeanSquaredError(normalize=False),\n",
224 |     "    seed=RANDOM_SEED,\n",
225 |     ")\n",
226 |     "\n",
227 |     "trainer = train.Trainer(model, optimizers.SGD(0.1))\n",
228 |     "trainer.fit(\n",
229 |     "    X_train,\n",
230 |     "    train_labels,\n",
231 |     "    X_test,\n",
232 |     "    test_labels,\n",
233 |     "    epochs=50,\n",
234 |     "    eval_every=5,\n",
235 |     "    seed=RANDOM_SEED,\n",
236 |     "    batch_size=60,\n",
237 |     ")\n",
238 |     "\n",
239 |     "calc_accuracy_model(model, X_test)"
240 |    ]
241 |   },
242 |   {
243 |    "cell_type": "markdown",
244 |    "metadata": {},
245 |    "source": [
246 |     "Note: even if we normalize the outputs of a classification model with mean squared error loss, it still doesn't help:"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "code",
251 |    "execution_count": 11,
252 |    "metadata": {},
253 |    "outputs": [
254 |     {
255 |      "name": "stdout",
256 |      "output_type": "stream",
257 |      "text": [
258 |       "Validation loss after 5 epochs is 0.573\n",
259 |       "\n",
260 |       "Loss increased after epoch 10, final loss was 0.573, \n",
261 |       "using the model from epoch 5\n",
262 |       "The model validation accuracy is: 62.54%\n"
263 |      ]
264 |     }
265 |    ],
266 |    "source": [
267 |     "model = network.NeuralNetwork(\n",
268 |     "    layers=[\n",
269 |     "        layers.Dense(neurons=89, activation=activations.Sigmoid()),\n",
270 |     "        layers.Dense(neurons=10, activation=activations.Sigmoid()),\n",
271 |     "    ],\n",
272 |     "    loss=losses.MeanSquaredError(normalize=True),\n",
273 |     "    seed=RANDOM_SEED,\n",
274 |     ")\n",
275 |     "\n",
276 |     "trainer = train.Trainer(model, optimizers.SGD(0.1))\n",
277 |     "trainer.fit(\n",
278 |     "    X_train,\n",
279 |     "    train_labels,\n",
280 |     "    X_test,\n",
281 |     "    test_labels,\n",
282 |     "    epochs=50,\n",
283 |     "    eval_every=5,\n",
284 |     "    seed=RANDOM_SEED,\n",
285 |     "    batch_size=60,\n",
286 |     ")\n",
287 |     "\n",
288 |     "calc_accuracy_model(model, X_test)"
289 |    ]
290 |   },
291 |   {
292 |    "cell_type": "markdown",
293 |    "metadata": {},
294 |    "source": [
295 |     "The reason is that we should be using softmax cross entropy loss!"
296 |    ]
297 |   },
298 |   {
299 |    "cell_type": "code",
300 |    "execution_count": 12,
301 |    "metadata": {},
302 |    "outputs": [
303 |     {
304 |      "name": "stdout",
305 |      "output_type": "stream",
306 |      "text": [
307 |       "Validation loss after 5 epochs is 0.719\n",
308 |       "Validation loss after 10 epochs is 0.611\n",
309 |       "Validation loss after 15 epochs is 0.559\n",
310 |       "Validation loss after 20 epochs is 0.530\n",
311 |       "Validation loss after 25 epochs is 0.505\n",
312 |       "Validation loss after 30 epochs is 0.488\n",
313 |       "Validation loss after 35 epochs is 0.475\n",
314 |       "Validation loss after 40 epochs is 0.467\n",
315 |       "Validation loss after 45 epochs is 0.459\n",
316 |       "Validation loss after 50 epochs is 0.453\n",
317 |       "\n",
318 |       "The model validation accuracy is: 92.61%\n"
319 |      ]
320 |     }
321 |    ],
322 |    "source": [
323 |     "model = network.NeuralNetwork(\n",
324 |     "    layers=[\n",
325 |     "        layers.Dense(neurons=89, activation=activations.Sigmoid()),\n",
326 |     "        layers.Dense(neurons=10, activation=activations.Linear()),\n",
327 |     "    ],\n",
328 |     "    loss=losses.SoftmaxCrossEntropy(),\n",
329 |     "    seed=RANDOM_SEED,\n",
330 |     ")\n",
331 |     "\n",
332 |     "trainer = train.Trainer(model, optimizers.SGD(0.1))\n",
333 |     "trainer.fit(\n",
334 |     "    X_train,\n",
335 |     "    train_labels,\n",
336 |     "    X_test,\n",
337 |     "    test_labels,\n",
338 |     "    epochs=50,\n",
339 |     "    eval_every=5,\n",
340 |     "    seed=RANDOM_SEED,\n",
341 |     "    batch_size=60,\n",
342 |     ")\n",
343 |     "print()\n",
344 |     "calc_accuracy_model(model, X_test)"
345 |    ]
346 |   },
347 |   {
348 |    "cell_type": "markdown",
349 |    "metadata": {},
350 |    "source": [
351 |     "## SGD Momentum"
352 |    ]
353 |   },
354 |   {
355 |    "cell_type": "code",
356 |    "execution_count": 13,
357 |    "metadata": {},
358 |    "outputs": [
359 |     {
360 |      "name": "stdout",
361 |      "output_type": "stream",
362 |      "text": [
363 |       "Validation loss after 5 epochs is 0.413\n",
364 |       "Validation loss after 10 epochs is 0.361\n",
365 |       "\n",
366 |       "Loss increased after epoch 15, final loss was 0.361, \n",
367 |       "using the model from epoch 10\n",
368 |       "The model validation accuracy is: 94.10%\n"
369 |      ]
370 |     }
371 |    ],
372 |    "source": [
373 |     "model = network.NeuralNetwork(\n",
374 |     "    layers=[\n",
375 |     "        layers.Dense(neurons=89, activation=activations.Sigmoid()),\n",
376 |     "        layers.Dense(neurons=10, activation=activations.Linear()),\n",
377 |     "    ],\n",
378 |     "    loss=losses.SoftmaxCrossEntropy(),\n",
379 |     "    seed=RANDOM_SEED,\n",
380 |     ")\n",
381 |     "\n",
382 |     "optim = optimizers.SGDMomentum(0.1, momentum=0.9)\n",
383 |     "\n",
384 |     "trainer = train.Trainer(model, optim)\n",
385 |     "trainer.fit(\n",
386 |     "    X_train,\n",
387 |     "    train_labels,\n",
388 |     "    X_test,\n",
389 |     "    test_labels,\n",
390 |     "    epochs=50,\n",
391 |     "    eval_every=5,\n",
392 |     "    seed=RANDOM_SEED,\n",
393 |     "    batch_size=60,\n",
394 |     ")\n",
395 |     "\n",
396 |     "calc_accuracy_model(model, X_test)"
397 |    ]
398 |   },
399 |   {
400 |    "cell_type": "markdown",
401 |    "metadata": {},
402 |    "source": [
403 |     "## Different weight decay"
404 |    ]
405 |   },
406 |   {
407 |    "cell_type": "code",
408 |    "execution_count": 14,
409 |    "metadata": {},
410 |    "outputs": [
411 |     {
412 |      "name": "stdout",
413 |      "output_type": "stream",
414 |      "text": [
415 |       "Validation loss after 5 epochs is 0.376\n",
416 |       "Validation loss after 10 epochs is 0.328\n",
417 |       "\n",
418 |       "Loss increased after epoch 15, final loss was 0.328, \n",
419 |       "using the model from epoch 10\n",
420 |       "The model validation accuracy is: 94.76%\n"
421 |      ]
422 |     }
423 |    ],
424 |    "source": [
425 |     "model = network.NeuralNetwork(\n",
426 |     "    layers=[\n",
427 |     "        layers.Dense(neurons=89, activation=activations.Sigmoid()),\n",
428 |     "        layers.Dense(neurons=10, activation=activations.Linear()),\n",
429 |     "    ],\n",
430 |     "    loss=losses.SoftmaxCrossEntropy(),\n",
431 |     "    seed=RANDOM_SEED,\n",
432 |     ")\n",
433 |     "\n",
434 |     "optimizer = optimizers.SGDMomentum(0.15, momentum=0.9, final_lr=0.05, decay_type='linear')\n",
435 |     "\n",
436 |     "trainer = train.Trainer(model, optimizer)\n",
437 |     "trainer.fit(\n",
438 |     "    X_train,\n",
439 |     "    train_labels,\n",
440 |     "    X_test,\n",
441 |     "    test_labels,\n",
442 |     "    epochs=25,\n",
443 |     "    eval_every=5,\n",
444 |     "    seed=RANDOM_SEED,\n",
445 |     "    batch_size=60,\n",
446 |     ")\n",
447 |     "\n",
448 |     "calc_accuracy_model(model, X_test)"
449 |    ]
450 |   },
451 |   {
452 |    "cell_type": "code",
453 |    "execution_count": 15,
454 |    "metadata": {},
455 |    "outputs": [
456 |     {
457 |      "name": "stdout",
458 |      "output_type": "stream",
459 |      "text": [
460 |       "Validation loss after 5 epochs is 0.387\n",
461 |       "Validation loss after 10 epochs is 0.336\n",
462 |       "\n",
463 |       "Loss increased after epoch 15, final loss was 0.336, \n",
464 |       "using the model from epoch 10\n",
465 |       "The model validation accuracy is: 94.81%\n"
466 |      ]
467 |     }
468 |    ],
469 |    "source": [
470 |     "model = network.NeuralNetwork(\n",
471 |     "    layers=[\n",
472 |     "        layers.Dense(neurons=89, activation=activations.Sigmoid()),\n",
473 |     "        layers.Dense(neurons=10, activation=activations.Linear()),\n",
474 |     "    ],\n",
475 |     "    loss=losses.SoftmaxCrossEntropy(),\n",
476 |     "    seed=RANDOM_SEED,\n",
477 |     ")\n",
478 |     "\n",
479 |     "optimizer = optimizers.SGDMomentum(0.2, momentum=0.9, final_lr=0.05, decay_type='exponential')\n",
480 |     "\n",
481 |     "trainer = train.Trainer(model, optimizer)\n",
482 |     "trainer.fit(\n",
483 |     "    X_train,\n",
484 |     "    train_labels,\n",
485 |     "    X_test,\n",
486 |     "    test_labels,\n",
487 |     "    epochs=25,\n",
488 |     "    eval_every=5,\n",
489 |     "    seed=RANDOM_SEED,\n",
490 |     "    batch_size=60,\n",
491 |     ")\n",
492 |     "\n",
493 |     "calc_accuracy_model(model, X_test)"
494 |    ]
495 |   },
496 |   {
497 |    "cell_type": "markdown",
498 |    "metadata": {},
499 |    "source": [
500 |     "## Changing weight init"
501 |    ]
502 |   },
503 |   {
504 |    "cell_type": "code",
505 |    "execution_count": 16,
506 |    "metadata": {},
507 |    "outputs": [
508 |     {
509 |      "name": "stdout",
510 |      "output_type": "stream",
511 |      "text": [
512 |       "Validation loss after 5 epochs is 0.169\n",
513 |       "Validation loss after 10 epochs is 0.160\n",
514 |       "\n",
515 |       "Loss increased after epoch 15, final loss was 0.160, \n",
516 |       "using the model from epoch 10\n",
517 |       "The model validation accuracy is: 97.46%\n"
518 |      ]
519 |     }
520 |    ],
521 |    "source": [
522 |     "model = network.NeuralNetwork(\n",
523 |     "    layers=[\n",
524 |     "        layers.Dense(neurons=89, activation=activations.Sigmoid(), weight_init=\"glorot\"),\n",
525 |     "        layers.Dense(neurons=10, activation=activations.Linear(), weight_init=\"glorot\"),\n",
526 |     "    ],\n",
527 |     "    loss=losses.SoftmaxCrossEntropy(),\n",
528 |     "    seed=RANDOM_SEED,\n",
529 |     ")\n",
530 |     "\n",
531 |     "optimizer = optimizers.SGDMomentum(0.2, momentum=0.9, final_lr=0.05, decay_type='exponential')\n",
532 |     "\n",
533 |     "trainer = train.Trainer(model, optimizer)\n",
534 |     "trainer.fit(\n",
535 |     "    X_train,\n",
536 |     "    train_labels,\n",
537 |     "    X_test,\n",
538 |     "    test_labels,\n",
539 |     "    epochs=25,\n",
540 |     "    eval_every=5,\n",
541 |     "    seed=RANDOM_SEED,\n",
542 |     "    batch_size=60,\n",
543 |     ")\n",
544 |     "\n",
545 |     "calc_accuracy_model(model, X_test)"
546 |    ]
547 |   }
548 |  ],
549 |  "metadata": {
550 |   "kernelspec": {
551 |    "display_name": "Python 3",
552 |    "language": "python",
553 |    "name": "python3"
554 |   },
555 |   "language_info": {
556 |    "codemirror_mode": {
557 |     "name": "ipython",
558 |     "version": 3
559 |    },
560 |    "file_extension": ".py",
561 |    "mimetype": "text/x-python",
562 |    "name": "python",
563 |    "nbconvert_exporter": "python",
564 |    "pygments_lexer": "ipython3",
565 |    "version": "3.11.4"
566 |   }
567 |  },
568 |  "nbformat": 4,
569 |  "nbformat_minor": 2
570 | }
571 | 


--------------------------------------------------------------------------------
/05_convolutions/Code.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "### Imports"
   8 |    ]
   9 |   },
  10 |   {
  11 |    "cell_type": "code",
  12 |    "execution_count": 1,
  13 |    "metadata": {},
  14 |    "outputs": [],
  15 |    "source": [
  16 |     "import numpy as np\n",
  17 |     "from numpy import ndarray"
  18 |    ]
  19 |   },
  20 |   {
  21 |    "cell_type": "markdown",
  22 |    "metadata": {},
  23 |    "source": [
  24 |     "# Helpers"
  25 |    ]
  26 |   },
  27 |   {
  28 |    "cell_type": "code",
  29 |    "execution_count": 2,
  30 |    "metadata": {},
  31 |    "outputs": [],
  32 |    "source": [
  33 |     "def assert_same_shape(output: ndarray, \n",
  34 |     "                      output_grad: ndarray):\n",
  35 |     "    assert output.shape == output_grad.shape, \\\n",
  36 |     "    '''\n",
  37 |     "    Two ndarray should have the same shape; instead, first ndarray's shape is {0}\n",
  38 |     "    and second ndarray's shape is {1}.\n",
  39 |     "    '''.format(tuple(output_grad.shape), tuple(output.shape))\n",
  40 |     "    return None"
  41 |    ]
  42 |   },
  43 |   {
  44 |    "cell_type": "code",
  45 |    "execution_count": 3,
  46 |    "metadata": {},
  47 |    "outputs": [],
  48 |    "source": [
  49 |     "def assert_dim(t: ndarray,\n",
  50 |     "               dim: ndarray):\n",
  51 |     "    assert len(t.shape) == dim, \\\n",
  52 |     "    '''\n",
  53 |     "    Tensor expected to have dimension {0}, instead has dimension {1}\n",
  54 |     "    '''.format(dim, len(t.shape))\n",
  55 |     "    return None"
  56 |    ]
  57 |   },
  58 |   {
  59 |    "cell_type": "markdown",
  60 |    "metadata": {},
  61 |    "source": [
  62 |     "# 1D Convolution"
  63 |    ]
  64 |   },
  65 |   {
  66 |    "cell_type": "markdown",
  67 |    "metadata": {},
  68 |    "source": [
  69 |     "1 input, 1 output"
  70 |    ]
  71 |   },
  72 |   {
  73 |    "cell_type": "markdown",
  74 |    "metadata": {},
  75 |    "source": [
  76 |     "## Padding"
  77 |    ]
  78 |   },
  79 |   {
  80 |    "cell_type": "code",
  81 |    "execution_count": 4,
  82 |    "metadata": {},
  83 |    "outputs": [],
  84 |    "source": [
  85 |     "input_1d = np.array([1,2,3,4,5])\n",
  86 |     "param_1d = np.array([1,1,1])"
  87 |    ]
  88 |   },
  89 |   {
  90 |    "cell_type": "code",
  91 |    "execution_count": 5,
  92 |    "metadata": {},
  93 |    "outputs": [],
  94 |    "source": [
  95 |     "def _pad_1d(inp: ndarray,\n",
  96 |     "            num: int) -> ndarray:\n",
  97 |     "    z = np.array([0])\n",
  98 |     "    z = np.repeat(z, num)\n",
  99 |     "    return np.concatenate([z, inp, z])"
 100 |    ]
 101 |   },
 102 |   {
 103 |    "cell_type": "code",
 104 |    "execution_count": 6,
 105 |    "metadata": {},
 106 |    "outputs": [
 107 |     {
 108 |      "data": {
 109 |       "text/plain": [
 110 |        "array([0, 1, 2, 3, 4, 5, 0])"
 111 |       ]
 112 |      },
 113 |      "execution_count": 6,
 114 |      "metadata": {},
 115 |      "output_type": "execute_result"
 116 |     }
 117 |    ],
 118 |    "source": [
 119 |     "_pad_1d(input_1d, 1)"
 120 |    ]
 121 |   },
 122 |   {
 123 |    "cell_type": "markdown",
 124 |    "metadata": {},
 125 |    "source": [
 126 |     "### Forward"
 127 |    ]
 128 |   },
 129 |   {
 130 |    "cell_type": "code",
 131 |    "execution_count": 7,
 132 |    "metadata": {},
 133 |    "outputs": [],
 134 |    "source": [
 135 |     "def conv_1d(inp: ndarray, \n",
 136 |     "            param: ndarray) -> ndarray:\n",
 137 |     "    \n",
 138 |     "    # assert correct dimensions\n",
 139 |     "    assert_dim(inp, 1)\n",
 140 |     "    assert_dim(param, 1)\n",
 141 |     "    \n",
 142 |     "    # pad the input\n",
 143 |     "    param_len = param.shape[0]\n",
 144 |     "    param_mid = param_len // 2\n",
 145 |     "    inp_pad = _pad_1d(inp, param_mid)\n",
 146 |     "    \n",
 147 |     "    # initialize the output\n",
 148 |     "    out = np.zeros(inp.shape)\n",
 149 |     "    \n",
 150 |     "    # perform the 1d convolution\n",
 151 |     "    for o in range(out.shape[0]):\n",
 152 |     "        for p in range(param_len):\n",
 153 |     "            out[o] += param[p] * inp_pad[o+p]\n",
 154 |     "\n",
 155 |     "    # ensure shapes didn't change            \n",
 156 |     "    assert_same_shape(inp, out)\n",
 157 |     "\n",
 158 |     "    return out"
 159 |    ]
 160 |   },
 161 |   {
 162 |    "cell_type": "code",
 163 |    "execution_count": 8,
 164 |    "metadata": {},
 165 |    "outputs": [],
 166 |    "source": [
 167 |     "def conv_1d_sum(inp: ndarray, \n",
 168 |     "                param: ndarray) -> ndarray:\n",
 169 |     "    out = conv_1d(inp, param)\n",
 170 |     "    return np.sum(out)"
 171 |    ]
 172 |   },
 173 |   {
 174 |    "cell_type": "code",
 175 |    "execution_count": 9,
 176 |    "metadata": {},
 177 |    "outputs": [
 178 |     {
 179 |      "data": {
 180 |       "text/plain": [
 181 |        "39.0"
 182 |       ]
 183 |      },
 184 |      "execution_count": 9,
 185 |      "metadata": {},
 186 |      "output_type": "execute_result"
 187 |     }
 188 |    ],
 189 |    "source": [
 190 |     "conv_1d_sum(input_1d, param_1d)"
 191 |    ]
 192 |   },
 193 |   {
 194 |    "cell_type": "markdown",
 195 |    "metadata": {},
 196 |    "source": [
 197 |     "## Testing gradients"
 198 |    ]
 199 |   },
 200 |   {
 201 |    "cell_type": "code",
 202 |    "execution_count": 10,
 203 |    "metadata": {},
 204 |    "outputs": [
 205 |     {
 206 |      "name": "stdout",
 207 |      "output_type": "stream",
 208 |      "text": [
 209 |       "4\n",
 210 |       "0\n"
 211 |      ]
 212 |     }
 213 |    ],
 214 |    "source": [
 215 |     "np.random.seed(190220)\n",
 216 |     "print(np.random.randint(0, input_1d.shape[0]))\n",
 217 |     "print(np.random.randint(0, param_1d.shape[0]))"
 218 |    ]
 219 |   },
 220 |   {
 221 |    "cell_type": "code",
 222 |    "execution_count": 11,
 223 |    "metadata": {},
 224 |    "outputs": [],
 225 |    "source": [
 226 |     "input_1d_2 = np.array([1,2,3,4,6])\n",
 227 |     "param_1d = np.array([1,1,1])"
 228 |    ]
 229 |   },
 230 |   {
 231 |    "cell_type": "code",
 232 |    "execution_count": 12,
 233 |    "metadata": {},
 234 |    "outputs": [
 235 |     {
 236 |      "name": "stdout",
 237 |      "output_type": "stream",
 238 |      "text": [
 239 |       "0.0\n"
 240 |      ]
 241 |     }
 242 |    ],
 243 |    "source": [
 244 |     "print(conv_1d_sum(input_1d_2, param_1d) - conv_1d_sum(input_1d_2, param_1d))"
 245 |    ]
 246 |   },
 247 |   {
 248 |    "cell_type": "code",
 249 |    "execution_count": 13,
 250 |    "metadata": {},
 251 |    "outputs": [
 252 |     {
 253 |      "name": "stdout",
 254 |      "output_type": "stream",
 255 |      "text": [
 256 |       "10.0\n"
 257 |      ]
 258 |     }
 259 |    ],
 260 |    "source": [
 261 |     "input_1d = np.array([1,2,3,4,5])\n",
 262 |     "param_1d_2 = np.array([2,1,1])\n",
 263 |     "\n",
 264 |     "print(conv_1d_sum(input_1d, param_1d_2) - conv_1d_sum(input_1d, param_1d))"
 265 |    ]
 266 |   },
 267 |   {
 268 |    "cell_type": "markdown",
 269 |    "metadata": {},
 270 |    "source": [
 271 |     "## Gradients"
 272 |    ]
 273 |   },
 274 |   {
 275 |    "cell_type": "code",
 276 |    "execution_count": 14,
 277 |    "metadata": {},
 278 |    "outputs": [],
 279 |    "source": [
 280 |     "def _param_grad_1d(inp: ndarray, \n",
 281 |     "                   param: ndarray, \n",
 282 |     "                   output_grad: ndarray = None) -> ndarray:\n",
 283 |     "    \n",
 284 |     "    param_len = param.shape[0]\n",
 285 |     "    param_mid = param_len // 2\n",
 286 |     "    input_pad = _pad_1d(inp, param_mid)\n",
 287 |     "    \n",
 288 |     "    if output_grad is None:\n",
 289 |     "        output_grad = np.ones_like(inp)\n",
 290 |     "    else:\n",
 291 |     "        assert_same_shape(inp, output_grad)\n",
 292 |     "\n",
 293 |     "    # Zero padded 1 dimensional convolution\n",
 294 |     "    param_grad = np.zeros_like(param)\n",
 295 |     "    input_grad = np.zeros_like(inp)\n",
 296 |     "\n",
 297 |     "    for o in range(inp.shape[0]):\n",
 298 |     "        for p in range(param.shape[0]):\n",
 299 |     "            param_grad[p] += input_pad[o+p] * output_grad[o]\n",
 300 |     "        \n",
 301 |     "    assert_same_shape(param_grad, param)\n",
 302 |     "    \n",
 303 |     "    return param_grad"
 304 |    ]
 305 |   },
 306 |   {
 307 |    "cell_type": "code",
 308 |    "execution_count": 15,
 309 |    "metadata": {},
 310 |    "outputs": [],
 311 |    "source": [
 312 |     "def _input_grad_1d(inp: ndarray, \n",
 313 |     "                   param: ndarray, \n",
 314 |     "                   output_grad: ndarray = None) -> ndarray:\n",
 315 |     "    \n",
 316 |     "    param_len = param.shape[0]\n",
 317 |     "    param_mid = param_len // 2\n",
 318 |     "    inp_pad = _pad_1d(inp, param_mid)\n",
 319 |     "    \n",
 320 |     "    if output_grad is None:\n",
 321 |     "        output_grad = np.ones_like(inp)\n",
 322 |     "    else:\n",
 323 |     "        assert_same_shape(inp, output_grad)\n",
 324 |     "    \n",
 325 |     "    output_pad = _pad_1d(output_grad, param_mid)\n",
 326 |     "    \n",
 327 |     "    # Zero padded 1 dimensional convolution\n",
 328 |     "    param_grad = np.zeros_like(param)\n",
 329 |     "    input_grad = np.zeros_like(inp)\n",
 330 |     "\n",
 331 |     "    for o in range(inp.shape[0]):\n",
 332 |     "        for f in range(param.shape[0]):\n",
 333 |     "            input_grad[o] += output_pad[o+param_len-f-1] * param[f]\n",
 334 |     "        \n",
 335 |     "    assert_same_shape(param_grad, param)\n",
 336 |     "    \n",
 337 |     "    return input_grad"
 338 |    ]
 339 |   },
 340 |   {
 341 |    "cell_type": "code",
 342 |    "execution_count": 16,
 343 |    "metadata": {},
 344 |    "outputs": [
 345 |     {
 346 |      "data": {
 347 |       "text/plain": [
 348 |        "array([2, 3, 3, 3, 2])"
 349 |       ]
 350 |      },
 351 |      "execution_count": 16,
 352 |      "metadata": {},
 353 |      "output_type": "execute_result"
 354 |     }
 355 |    ],
 356 |    "source": [
 357 |     "_input_grad_1d(input_1d, param_1d)"
 358 |    ]
 359 |   },
 360 |   {
 361 |    "cell_type": "code",
 362 |    "execution_count": 17,
 363 |    "metadata": {},
 364 |    "outputs": [
 365 |     {
 366 |      "data": {
 367 |       "text/plain": [
 368 |        "array([10, 15, 14])"
 369 |       ]
 370 |      },
 371 |      "execution_count": 17,
 372 |      "metadata": {},
 373 |      "output_type": "execute_result"
 374 |     }
 375 |    ],
 376 |    "source": [
 377 |     "_param_grad_1d(input_1d, param_1d)"
 378 |    ]
 379 |   },
 380 |   {
 381 |    "cell_type": "markdown",
 382 |    "metadata": {},
 383 |    "source": [
 384 |     "Works!"
 385 |    ]
 386 |   },
 387 |   {
 388 |    "cell_type": "markdown",
 389 |    "metadata": {},
 390 |    "source": [
 391 |     "## Batch size of 2"
 392 |    ]
 393 |   },
 394 |   {
 395 |    "cell_type": "markdown",
 396 |    "metadata": {},
 397 |    "source": [
 398 |     "### Pad"
 399 |    ]
 400 |   },
 401 |   {
 402 |    "cell_type": "code",
 403 |    "execution_count": 18,
 404 |    "metadata": {},
 405 |    "outputs": [],
 406 |    "source": [
 407 |     "input_1d_batch = np.array([[0,1,2,3,4,5,6], \n",
 408 |     "                           [1,2,3,4,5,6,7]])"
 409 |    ]
 410 |   },
 411 |   {
 412 |    "cell_type": "code",
 413 |    "execution_count": 19,
 414 |    "metadata": {},
 415 |    "outputs": [],
 416 |    "source": [
 417 |     "def _pad_1d(inp: ndarray,\n",
 418 |     "            num: int) -> ndarray:\n",
 419 |     "    z = np.array([0])\n",
 420 |     "    z = np.repeat(z, num)\n",
 421 |     "    return np.concatenate([z, inp, z])"
 422 |    ]
 423 |   },
 424 |   {
 425 |    "cell_type": "code",
 426 |    "execution_count": 20,
 427 |    "metadata": {},
 428 |    "outputs": [],
 429 |    "source": [
 430 |     "def _pad_1d_batch(inp: ndarray, \n",
 431 |     "                  num: int) -> ndarray:\n",
 432 |     "    outs = [_pad_1d(obs, num) for obs in inp]\n",
 433 |     "    return np.stack(outs)"
 434 |    ]
 435 |   },
 436 |   {
 437 |    "cell_type": "code",
 438 |    "execution_count": 21,
 439 |    "metadata": {
 440 |     "scrolled": true
 441 |    },
 442 |    "outputs": [
 443 |     {
 444 |      "data": {
 445 |       "text/plain": [
 446 |        "array([[0, 0, 1, 2, 3, 4, 5, 6, 0],\n",
 447 |        "       [0, 1, 2, 3, 4, 5, 6, 7, 0]])"
 448 |       ]
 449 |      },
 450 |      "execution_count": 21,
 451 |      "metadata": {},
 452 |      "output_type": "execute_result"
 453 |     }
 454 |    ],
 455 |    "source": [
 456 |     "_pad_1d_batch(input_1d_batch, 1)"
 457 |    ]
 458 |   },
 459 |   {
 460 |    "cell_type": "markdown",
 461 |    "metadata": {},
 462 |    "source": [
 463 |     "### Forward"
 464 |    ]
 465 |   },
 466 |   {
 467 |    "cell_type": "code",
 468 |    "execution_count": 22,
 469 |    "metadata": {},
 470 |    "outputs": [],
 471 |    "source": [
 472 |     "def conv_1d_batch(inp: ndarray, \n",
 473 |     "                  param: ndarray) -> ndarray:\n",
 474 |     "\n",
 475 |     "    outs = [conv_1d(obs, param) for obs in inp]\n",
 476 |     "    return np.stack(outs)"
 477 |    ]
 478 |   },
 479 |   {
 480 |    "cell_type": "code",
 481 |    "execution_count": 23,
 482 |    "metadata": {},
 483 |    "outputs": [
 484 |     {
 485 |      "data": {
 486 |       "text/plain": [
 487 |        "array([[ 1.,  3.,  6.,  9., 12., 15., 11.],\n",
 488 |        "       [ 3.,  6.,  9., 12., 15., 18., 13.]])"
 489 |       ]
 490 |      },
 491 |      "execution_count": 23,
 492 |      "metadata": {},
 493 |      "output_type": "execute_result"
 494 |     }
 495 |    ],
 496 |    "source": [
 497 |     "conv_1d_batch(input_1d_batch, param_1d)"
 498 |    ]
 499 |   },
 500 |   {
 501 |    "cell_type": "markdown",
 502 |    "metadata": {},
 503 |    "source": [
 504 |     "### Gradient"
 505 |    ]
 506 |   },
 507 |   {
 508 |    "cell_type": "code",
 509 |    "execution_count": 24,
 510 |    "metadata": {},
 511 |    "outputs": [],
 512 |    "source": [
 513 |     "def input_grad_1d_batch(inp: ndarray, \n",
 514 |     "                        param: ndarray) -> ndarray:\n",
 515 |     "\n",
 516 |     "    out = conv_1d_batch(inp, param)\n",
 517 |     "    \n",
 518 |     "    out_grad = np.ones_like(out)\n",
 519 |     "    \n",
 520 |     "    batch_size = out_grad.shape[0]\n",
 521 |     "        \n",
 522 |     "    grads = [_input_grad_1d(inp[i], param, out_grad[i]) for i in range(batch_size)]    \n",
 523 |     "\n",
 524 |     "    return np.stack(grads)"
 525 |    ]
 526 |   },
 527 |   {
 528 |    "cell_type": "code",
 529 |    "execution_count": 25,
 530 |    "metadata": {},
 531 |    "outputs": [],
 532 |    "source": [
 533 |     "def param_grad_1d_batch(inp: ndarray, \n",
 534 |     "                        param: ndarray) -> ndarray:\n",
 535 |     "\n",
 536 |     "    output_grad = np.ones_like(inp)\n",
 537 |     "    \n",
 538 |     "    inp_pad = _pad_1d_batch(inp, 1)\n",
 539 |     "    out_pad = _pad_1d_batch(inp, 1)\n",
 540 |     "\n",
 541 |     "    param_grad = np.zeros_like(param)    \n",
 542 |     "    \n",
 543 |     "    for i in range(inp.shape[0]):\n",
 544 |     "        for o in range(inp.shape[1]):\n",
 545 |     "            for p in range(param.shape[0]):\n",
 546 |     "                param_grad[p] += inp_pad[i][o+p] * output_grad[i][o]    \n",
 547 |     "\n",
 548 |     "    return param_grad"
 549 |    ]
 550 |   },
 551 |   {
 552 |    "cell_type": "markdown",
 553 |    "metadata": {},
 554 |    "source": [
 555 |     "## Checking gradients for `conv_1d_batch`"
 556 |    ]
 557 |   },
 558 |   {
 559 |    "cell_type": "code",
 560 |    "execution_count": 26,
 561 |    "metadata": {},
 562 |    "outputs": [],
 563 |    "source": [
 564 |     "def conv_1d_batch_sum(inp: ndarray, \n",
 565 |     "                      fil: ndarray) -> ndarray:\n",
 566 |     "    out = conv_1d_batch(inp, fil)\n",
 567 |     "    return np.sum(out)"
 568 |    ]
 569 |   },
 570 |   {
 571 |    "cell_type": "code",
 572 |    "execution_count": 27,
 573 |    "metadata": {},
 574 |    "outputs": [
 575 |     {
 576 |      "data": {
 577 |       "text/plain": [
 578 |        "133.0"
 579 |       ]
 580 |      },
 581 |      "execution_count": 27,
 582 |      "metadata": {},
 583 |      "output_type": "execute_result"
 584 |     }
 585 |    ],
 586 |    "source": [
 587 |     "conv_1d_batch_sum(input_1d_batch, param_1d)"
 588 |    ]
 589 |   },
 590 |   {
 591 |    "cell_type": "code",
 592 |    "execution_count": 28,
 593 |    "metadata": {},
 594 |    "outputs": [
 595 |     {
 596 |      "name": "stdout",
 597 |      "output_type": "stream",
 598 |      "text": [
 599 |       "0\n",
 600 |       "2\n"
 601 |      ]
 602 |     }
 603 |    ],
 604 |    "source": [
 605 |     "print(np.random.randint(0, input_1d_batch.shape[0]))\n",
 606 |     "print(np.random.randint(0, input_1d_batch.shape[1]))"
 607 |    ]
 608 |   },
 609 |   {
 610 |    "cell_type": "code",
 611 |    "execution_count": 29,
 612 |    "metadata": {},
 613 |    "outputs": [
 614 |     {
 615 |      "data": {
 616 |       "text/plain": [
 617 |        "3.0"
 618 |       ]
 619 |      },
 620 |      "execution_count": 29,
 621 |      "metadata": {},
 622 |      "output_type": "execute_result"
 623 |     }
 624 |    ],
 625 |    "source": [
 626 |     "input_1d_batch_2 = input_1d_batch.copy()\n",
 627 |     "input_1d_batch_2[0][2] += 1\n",
 628 |     "conv_1d_batch_sum(input_1d_batch_2, param_1d) - conv_1d_batch_sum(input_1d_batch, param_1d)"
 629 |    ]
 630 |   },
 631 |   {
 632 |    "cell_type": "code",
 633 |    "execution_count": 30,
 634 |    "metadata": {},
 635 |    "outputs": [
 636 |     {
 637 |      "data": {
 638 |       "text/plain": [
 639 |        "array([[2, 3, 3, 3, 3, 3, 2],\n",
 640 |        "       [2, 3, 3, 3, 3, 3, 2]])"
 641 |       ]
 642 |      },
 643 |      "execution_count": 30,
 644 |      "metadata": {},
 645 |      "output_type": "execute_result"
 646 |     }
 647 |    ],
 648 |    "source": [
 649 |     "input_grad_1d_batch(input_1d_batch, param_1d)"
 650 |    ]
 651 |   },
 652 |   {
 653 |    "cell_type": "code",
 654 |    "execution_count": 31,
 655 |    "metadata": {},
 656 |    "outputs": [
 657 |     {
 658 |      "name": "stdout",
 659 |      "output_type": "stream",
 660 |      "text": [
 661 |       "2\n"
 662 |      ]
 663 |     }
 664 |    ],
 665 |    "source": [
 666 |     "print(np.random.randint(0, param_1d.shape[0]))"
 667 |    ]
 668 |   },
 669 |   {
 670 |    "cell_type": "code",
 671 |    "execution_count": 32,
 672 |    "metadata": {},
 673 |    "outputs": [
 674 |     {
 675 |      "data": {
 676 |       "text/plain": [
 677 |        "48.0"
 678 |       ]
 679 |      },
 680 |      "execution_count": 32,
 681 |      "metadata": {},
 682 |      "output_type": "execute_result"
 683 |     }
 684 |    ],
 685 |    "source": [
 686 |     "param_1d_2 = param_1d.copy()\n",
 687 |     "param_1d_2[2] += 1\n",
 688 |     "conv_1d_batch_sum(input_1d_batch, param_1d_2) - conv_1d_batch_sum(input_1d_batch, param_1d) "
 689 |    ]
 690 |   },
 691 |   {
 692 |    "cell_type": "code",
 693 |    "execution_count": 33,
 694 |    "metadata": {},
 695 |    "outputs": [
 696 |     {
 697 |      "data": {
 698 |       "text/plain": [
 699 |        "array([36, 49, 48])"
 700 |       ]
 701 |      },
 702 |      "execution_count": 33,
 703 |      "metadata": {},
 704 |      "output_type": "execute_result"
 705 |     }
 706 |    ],
 707 |    "source": [
 708 |     "param_grad_1d_batch(input_1d_batch, param_1d)"
 709 |    ]
 710 |   },
 711 |   {
 712 |    "cell_type": "markdown",
 713 |    "metadata": {},
 714 |    "source": [
 715 |     "# 2D Convolutions"
 716 |    ]
 717 |   },
 718 |   {
 719 |    "cell_type": "code",
 720 |    "execution_count": 34,
 721 |    "metadata": {},
 722 |    "outputs": [],
 723 |    "source": [
 724 |     "imgs_2d_batch = np.random.randn(3, 28, 28)"
 725 |    ]
 726 |   },
 727 |   {
 728 |    "cell_type": "code",
 729 |    "execution_count": 35,
 730 |    "metadata": {},
 731 |    "outputs": [],
 732 |    "source": [
 733 |     "param_2d = np.random.randn(3, 3)"
 734 |    ]
 735 |   },
 736 |   {
 737 |    "cell_type": "markdown",
 738 |    "metadata": {},
 739 |    "source": [
 740 |     "## Padding"
 741 |    ]
 742 |   },
 743 |   {
 744 |    "cell_type": "code",
 745 |    "execution_count": 36,
 746 |    "metadata": {},
 747 |    "outputs": [],
 748 |    "source": [
 749 |     "def _pad_2d(inp: ndarray, \n",
 750 |     "            num: int):\n",
 751 |     "    '''\n",
 752 |     "    Input is a 3 dimensional tensor, first dimension batch size\n",
 753 |     "    '''\n",
 754 |     "    outs = [_pad_2d_obs(obs, num) for obs in inp]\n",
 755 |     "\n",
 756 |     "    return np.stack(outs)"
 757 |    ]
 758 |   },
 759 |   {
 760 |    "cell_type": "code",
 761 |    "execution_count": 37,
 762 |    "metadata": {},
 763 |    "outputs": [],
 764 |    "source": [
 765 |     "def _pad_2d_obs(inp: ndarray, \n",
 766 |     "                num: int):\n",
 767 |     "    '''\n",
 768 |     "    Input is a 2 dimensional, square, 2D Tensor\n",
 769 |     "    '''\n",
 770 |     "    inp_pad = _pad_1d_batch(inp, num)\n",
 771 |     "\n",
 772 |     "    other = np.zeros((num, inp.shape[0] + num * 2))\n",
 773 |     "\n",
 774 |     "    return np.concatenate([other, inp_pad, other])"
 775 |    ]
 776 |   },
 777 |   {
 778 |    "cell_type": "code",
 779 |    "execution_count": 38,
 780 |    "metadata": {},
 781 |    "outputs": [
 782 |     {
 783 |      "data": {
 784 |       "text/plain": [
 785 |        "(3, 30, 30)"
 786 |       ]
 787 |      },
 788 |      "execution_count": 38,
 789 |      "metadata": {},
 790 |      "output_type": "execute_result"
 791 |     }
 792 |    ],
 793 |    "source": [
 794 |     "_pad_2d(imgs_2d_batch, 1).shape"
 795 |    ]
 796 |   },
 797 |   {
 798 |    "cell_type": "markdown",
 799 |    "metadata": {},
 800 |    "source": [
 801 |     "## Compute output"
 802 |    ]
 803 |   },
 804 |   {
 805 |    "cell_type": "code",
 806 |    "execution_count": 39,
 807 |    "metadata": {},
 808 |    "outputs": [],
 809 |    "source": [
 810 |     "def _compute_output_obs_2d(obs: ndarray, \n",
 811 |     "                           param: ndarray):\n",
 812 |     "    '''\n",
 813 |     "    Obs is a 2d square Tensor, so is param\n",
 814 |     "    '''\n",
 815 |     "    param_mid = param.shape[0] // 2\n",
 816 |     "    \n",
 817 |     "    obs_pad = _pad_2d_obs(obs, param_mid)\n",
 818 |     "    \n",
 819 |     "    out = np.zeros_like(obs)\n",
 820 |     "    \n",
 821 |     "    for o_w in range(out.shape[0]):\n",
 822 |     "        for o_h in range(out.shape[1]):\n",
 823 |     "            for p_w in range(param.shape[0]):\n",
 824 |     "                for p_h in range(param.shape[1]):\n",
 825 |     "                    out[o_w][o_h] += param[p_w][p_h] * obs_pad[o_w+p_w][o_h+p_h]\n",
 826 |     "    return out"
 827 |    ]
 828 |   },
 829 |   {
 830 |    "cell_type": "code",
 831 |    "execution_count": 40,
 832 |    "metadata": {},
 833 |    "outputs": [],
 834 |    "source": [
 835 |     "def _compute_output_2d(img_batch: ndarray,\n",
 836 |     "                       param: ndarray):\n",
 837 |     "    \n",
 838 |     "    assert_dim(img_batch, 3)\n",
 839 |     "    \n",
 840 |     "    outs = [_compute_output_obs_2d(obs, param) for obs in img_batch]\n",
 841 |     "    \n",
 842 |     "    return np.stack(outs)"
 843 |    ]
 844 |   },
 845 |   {
 846 |    "cell_type": "code",
 847 |    "execution_count": 41,
 848 |    "metadata": {},
 849 |    "outputs": [
 850 |     {
 851 |      "data": {
 852 |       "text/plain": [
 853 |        "(3, 28, 28)"
 854 |       ]
 855 |      },
 856 |      "execution_count": 41,
 857 |      "metadata": {},
 858 |      "output_type": "execute_result"
 859 |     }
 860 |    ],
 861 |    "source": [
 862 |     "_compute_output_2d(imgs_2d_batch, param_2d).shape"
 863 |    ]
 864 |   },
 865 |   {
 866 |    "cell_type": "markdown",
 867 |    "metadata": {},
 868 |    "source": [
 869 |     "### Param grads"
 870 |    ]
 871 |   },
 872 |   {
 873 |    "cell_type": "code",
 874 |    "execution_count": 42,
 875 |    "metadata": {},
 876 |    "outputs": [],
 877 |    "source": [
 878 |     "def _compute_grads_obs_2d(input_obs: ndarray,\n",
 879 |     "                          output_grad_obs: ndarray, \n",
 880 |     "                          param: ndarray) -> ndarray:\n",
 881 |     "    '''\n",
 882 |     "    input_obs: 2D Tensor representing the input observation\n",
 883 |     "    output_grad_obs: 2D Tensor representing the output gradient  \n",
 884 |     "    param: 2D filter\n",
 885 |     "    '''\n",
 886 |     "    \n",
 887 |     "    param_size = param.shape[0]\n",
 888 |     "    output_obs_pad = _pad_2d_obs(output_grad_obs, param_size // 2)\n",
 889 |     "    input_grad = np.zeros_like(input_obs)\n",
 890 |     "\n",
 891 |     "    for i_w in range(input_obs.shape[0]):\n",
 892 |     "        for i_h in range(input_obs.shape[1]):\n",
 893 |     "            for p_w in range(param_size):\n",
 894 |     "                for p_h in range(param_size):\n",
 895 |     "                    input_grad[i_w][i_h] += output_obs_pad[i_w+param_size-p_w-1][i_h+param_size-p_h-1] \\\n",
 896 |     "                    * param[p_w][p_h]\n",
 897 |     "\n",
 898 |     "    return input_grad\n",
 899 |     "\n",
 900 |     "def _compute_grads_2d(inp: ndarray,\n",
 901 |     "                      output_grad: ndarray, \n",
 902 |     "                      param: ndarray) -> ndarray:\n",
 903 |     "\n",
 904 |     "    grads = [_compute_grads_obs_2d(inp[i], output_grad[i], param) for i in range(output_grad.shape[0])]    \n",
 905 |     "\n",
 906 |     "    return np.stack(grads)\n",
 907 |     "\n",
 908 |     "\n",
 909 |     "def _param_grad_2d(inp: ndarray,\n",
 910 |     "                   output_grad: ndarray, \n",
 911 |     "                   param: ndarray) -> ndarray:\n",
 912 |     "\n",
 913 |     "    param_size = param.shape[0]\n",
 914 |     "    inp_pad = _pad_2d(inp, param_size // 2)\n",
 915 |     "\n",
 916 |     "    param_grad = np.zeros_like(param)\n",
 917 |     "    img_shape = output_grad.shape[1:]\n",
 918 |     "    \n",
 919 |     "    for i in range(inp.shape[0]):\n",
 920 |     "        for o_w in range(img_shape[0]):\n",
 921 |     "            for o_h in range(img_shape[1]):\n",
 922 |     "                for p_w in range(param_size):\n",
 923 |     "                    for p_h in range(param_size):\n",
 924 |     "                        param_grad[p_w][p_h] += inp_pad[i][o_w+p_w][o_h+p_h] \\\n",
 925 |     "                        * output_grad[i][o_w][o_h]\n",
 926 |     "    return param_grad"
 927 |    ]
 928 |   },
 929 |   {
 930 |    "cell_type": "code",
 931 |    "execution_count": 43,
 932 |    "metadata": {},
 933 |    "outputs": [],
 934 |    "source": [
 935 |     "img_grads = _compute_grads_2d(imgs_2d_batch, \n",
 936 |     "                              np.ones_like(imgs_2d_batch),\n",
 937 |     "                              param_2d)"
 938 |    ]
 939 |   },
 940 |   {
 941 |    "cell_type": "code",
 942 |    "execution_count": 44,
 943 |    "metadata": {},
 944 |    "outputs": [
 945 |     {
 946 |      "data": {
 947 |       "text/plain": [
 948 |        "(3, 28, 28)"
 949 |       ]
 950 |      },
 951 |      "execution_count": 44,
 952 |      "metadata": {},
 953 |      "output_type": "execute_result"
 954 |     }
 955 |    ],
 956 |    "source": [
 957 |     "img_grads.shape"
 958 |    ]
 959 |   },
 960 |   {
 961 |    "cell_type": "code",
 962 |    "execution_count": 45,
 963 |    "metadata": {},
 964 |    "outputs": [
 965 |     {
 966 |      "data": {
 967 |       "text/plain": [
 968 |        "(3, 3)"
 969 |       ]
 970 |      },
 971 |      "execution_count": 45,
 972 |      "metadata": {},
 973 |      "output_type": "execute_result"
 974 |     }
 975 |    ],
 976 |    "source": [
 977 |     "param_grad = _param_grad_2d(imgs_2d_batch, \n",
 978 |     "                              np.ones_like(imgs_2d_batch),\n",
 979 |     "                              param_2d)\n",
 980 |     "param_grad.shape"
 981 |    ]
 982 |   },
 983 |   {
 984 |    "cell_type": "markdown",
 985 |    "metadata": {},
 986 |    "source": [
 987 |     "## Testing gradients"
 988 |    ]
 989 |   },
 990 |   {
 991 |    "cell_type": "markdown",
 992 |    "metadata": {},
 993 |    "source": [
 994 |     "### Input"
 995 |    ]
 996 |   },
 997 |   {
 998 |    "cell_type": "code",
 999 |    "execution_count": 46,
1000 |    "metadata": {},
1001 |    "outputs": [
1002 |     {
1003 |      "name": "stdout",
1004 |      "output_type": "stream",
1005 |      "text": [
1006 |       "0\n",
1007 |       "6\n",
1008 |       "18\n"
1009 |      ]
1010 |     }
1011 |    ],
1012 |    "source": [
1013 |     "print(np.random.randint(0, imgs_2d_batch.shape[0]))\n",
1014 |     "print(np.random.randint(0, imgs_2d_batch.shape[1]))\n",
1015 |     "print(np.random.randint(0, imgs_2d_batch.shape[2]))"
1016 |    ]
1017 |   },
1018 |   {
1019 |    "cell_type": "code",
1020 |    "execution_count": 47,
1021 |    "metadata": {},
1022 |    "outputs": [],
1023 |    "source": [
1024 |     "imgs_2d_batch_2 = imgs_2d_batch.copy()\n",
1025 |     "imgs_2d_batch_2[0][6][18] += 1"
1026 |    ]
1027 |   },
1028 |   {
1029 |    "cell_type": "code",
1030 |    "execution_count": 48,
1031 |    "metadata": {},
1032 |    "outputs": [],
1033 |    "source": [
1034 |     "def _compute_output_2d_sum(img_batch: ndarray,\n",
1035 |     "                           param: ndarray):\n",
1036 |     "    \n",
1037 |     "    out = _compute_output_2d(img_batch, param)\n",
1038 |     "    \n",
1039 |     "    return out.sum()"
1040 |    ]
1041 |   },
1042 |   {
1043 |    "cell_type": "code",
1044 |    "execution_count": 49,
1045 |    "metadata": {},
1046 |    "outputs": [
1047 |     {
1048 |      "data": {
1049 |       "text/plain": [
1050 |        "-3.1843477398599163"
1051 |       ]
1052 |      },
1053 |      "execution_count": 49,
1054 |      "metadata": {},
1055 |      "output_type": "execute_result"
1056 |     }
1057 |    ],
1058 |    "source": [
1059 |     "_compute_output_2d_sum(imgs_2d_batch_2, param_2d) - \\\n",
1060 |     "_compute_output_2d_sum(imgs_2d_batch, param_2d)"
1061 |    ]
1062 |   },
1063 |   {
1064 |    "cell_type": "code",
1065 |    "execution_count": 50,
1066 |    "metadata": {},
1067 |    "outputs": [
1068 |     {
1069 |      "data": {
1070 |       "text/plain": [
1071 |        "-3.184347739859924"
1072 |       ]
1073 |      },
1074 |      "execution_count": 50,
1075 |      "metadata": {},
1076 |      "output_type": "execute_result"
1077 |     }
1078 |    ],
1079 |    "source": [
1080 |     "img_grads[0][6][18]"
1081 |    ]
1082 |   },
1083 |   {
1084 |    "cell_type": "markdown",
1085 |    "metadata": {},
1086 |    "source": [
1087 |     "### Param"
1088 |    ]
1089 |   },
1090 |   {
1091 |    "cell_type": "code",
1092 |    "execution_count": 51,
1093 |    "metadata": {},
1094 |    "outputs": [
1095 |     {
1096 |      "name": "stdout",
1097 |      "output_type": "stream",
1098 |      "text": [
1099 |       "0\n",
1100 |       "2\n"
1101 |      ]
1102 |     }
1103 |    ],
1104 |    "source": [
1105 |     "print(np.random.randint(0, param_2d.shape[0]))\n",
1106 |     "print(np.random.randint(0, param_2d.shape[1]))"
1107 |    ]
1108 |   },
1109 |   {
1110 |    "cell_type": "code",
1111 |    "execution_count": 52,
1112 |    "metadata": {},
1113 |    "outputs": [],
1114 |    "source": [
1115 |     "param_2d_2 = param_2d.copy()\n",
1116 |     "param_2d_2[0][2] += 1"
1117 |    ]
1118 |   },
1119 |   {
1120 |    "cell_type": "code",
1121 |    "execution_count": 53,
1122 |    "metadata": {},
1123 |    "outputs": [
1124 |     {
1125 |      "data": {
1126 |       "text/plain": [
1127 |        "5.53349015923007"
1128 |       ]
1129 |      },
1130 |      "execution_count": 53,
1131 |      "metadata": {},
1132 |      "output_type": "execute_result"
1133 |     }
1134 |    ],
1135 |    "source": [
1136 |     "_compute_output_2d_sum(imgs_2d_batch, param_2d_2) - _compute_output_2d_sum(imgs_2d_batch, param_2d)"
1137 |    ]
1138 |   },
1139 |   {
1140 |    "cell_type": "code",
1141 |    "execution_count": 54,
1142 |    "metadata": {},
1143 |    "outputs": [
1144 |     {
1145 |      "data": {
1146 |       "text/plain": [
1147 |        "5.533490159230001"
1148 |       ]
1149 |      },
1150 |      "execution_count": 54,
1151 |      "metadata": {},
1152 |      "output_type": "execute_result"
1153 |     }
1154 |    ],
1155 |    "source": [
1156 |     "param_grad[0][2]"
1157 |    ]
1158 |   },
1159 |   {
1160 |    "cell_type": "markdown",
1161 |    "metadata": {},
1162 |    "source": [
1163 |     "## With channels + batch size"
1164 |    ]
1165 |   },
1166 |   {
1167 |    "cell_type": "markdown",
1168 |    "metadata": {},
1169 |    "source": [
1170 |     "### Helper"
1171 |    ]
1172 |   },
1173 |   {
1174 |    "cell_type": "code",
1175 |    "execution_count": 55,
1176 |    "metadata": {},
1177 |    "outputs": [],
1178 |    "source": [
1179 |     "def _pad_2d_channel(inp: ndarray, \n",
1180 |     "                    num: int):\n",
1181 |     "    '''\n",
1182 |     "    inp has dimension [num_channels, image_width, image_height] \n",
1183 |     "    '''\n",
1184 |     "    return np.stack([_pad_2d_obs(channel, num) for channel in inp])\n",
1185 |     "\n",
1186 |     "def _pad_conv_input(inp: ndarray,\n",
1187 |     "                    num: int):   \n",
1188 |     "    '''\n",
1189 |     "    inp has dimension [batch_size, num_channels, image_width, image_height]\n",
1190 |     "    '''    \n",
1191 |     "    return np.stack([_pad_2d_channel(obs, num) for obs in inp])"
1192 |    ]
1193 |   },
1194 |   {
1195 |    "cell_type": "markdown",
1196 |    "metadata": {},
1197 |    "source": [
1198 |     "### Forward"
1199 |    ]
1200 |   },
1201 |   {
1202 |    "cell_type": "code",
1203 |    "execution_count": 56,
1204 |    "metadata": {},
1205 |    "outputs": [],
1206 |    "source": [
1207 |     "def _compute_output_obs(obs: ndarray, \n",
1208 |     "                        param: ndarray):\n",
1209 |     "    '''\n",
1210 |     "    obs: [channels, img_width, img_height]\n",
1211 |     "    param: [in_channels, out_channels, fil_width, fil_height]    \n",
1212 |     "    '''\n",
1213 |     "    assert_dim(obs, 3)\n",
1214 |     "    assert_dim(param, 4)\n",
1215 |     "    \n",
1216 |     "    param_size = param.shape[2]\n",
1217 |     "    param_mid = param_size // 2\n",
1218 |     "    obs_pad = _pad_2d_channel(obs, param_mid)\n",
1219 |     "    \n",
1220 |     "    in_channels = param.shape[0]\n",
1221 |     "    out_channels = param.shape[1]\n",
1222 |     "    img_size = obs.shape[1]\n",
1223 |     "    \n",
1224 |     "    out = np.zeros((out_channels,) + obs.shape[1:])\n",
1225 |     "    for c_in in range(in_channels):\n",
1226 |     "        for c_out in range(out_channels):\n",
1227 |     "            for o_w in range(img_size):\n",
1228 |     "                for o_h in range(img_size):\n",
1229 |     "                    for p_w in range(param_size):\n",
1230 |     "                        for p_h in range(param_size):\n",
1231 |     "                            out[c_out][o_w][o_h] += \\\n",
1232 |     "                            param[c_in][c_out][p_w][p_h] * obs_pad[c_in][o_w+p_w][o_h+p_h]\n",
1233 |     "    return out    \n",
1234 |     "\n",
1235 |     "def _output(inp: ndarray,\n",
1236 |     "                    param: ndarray) -> ndarray:\n",
1237 |     "    '''\n",
1238 |     "    obs: [batch_size, channels, img_width, img_height]\n",
1239 |     "    fil: [in_channels, out_channels, fil_width, fil_height]    \n",
1240 |     "    '''\n",
1241 |     "    outs = [_compute_output_obs(obs, param) for obs in inp]    \n",
1242 |     "\n",
1243 |     "    return np.stack(outs)"
1244 |    ]
1245 |   },
1246 |   {
1247 |    "cell_type": "markdown",
1248 |    "metadata": {},
1249 |    "source": [
1250 |     "### Backward"
1251 |    ]
1252 |   },
1253 |   {
1254 |    "cell_type": "code",
1255 |    "execution_count": 57,
1256 |    "metadata": {},
1257 |    "outputs": [],
1258 |    "source": [
1259 |     "def _compute_grads_obs(input_obs: ndarray,\n",
1260 |     "                       output_grad_obs: ndarray,\n",
1261 |     "                       param: ndarray) -> ndarray:\n",
1262 |     "    '''\n",
1263 |     "    input_obs: [in_channels, img_width, img_height]\n",
1264 |     "    output_grad_obs: [out_channels, img_width, img_height]\n",
1265 |     "    param: [in_channels, out_channels, img_width, img_height]    \n",
1266 |     "    '''\n",
1267 |     "    input_grad = np.zeros_like(input_obs)    \n",
1268 |     "    param_size = param.shape[2]\n",
1269 |     "    param_mid = param_size // 2\n",
1270 |     "    img_size = input_obs.shape[1]\n",
1271 |     "    in_channels = input_obs.shape[0]\n",
1272 |     "    out_channels = param.shape[1]\n",
1273 |     "    output_obs_pad = _pad_2d_channel(output_grad_obs, param_mid)\n",
1274 |     "    \n",
1275 |     "    for c_in in range(in_channels):\n",
1276 |     "        for c_out in range(out_channels):\n",
1277 |     "            for i_w in range(input_obs.shape[1]):\n",
1278 |     "                for i_h in range(input_obs.shape[2]):\n",
1279 |     "                    for p_w in range(param_size):\n",
1280 |     "                        for p_h in range(param_size):\n",
1281 |     "                            input_grad[c_in][i_w][i_h] += \\\n",
1282 |     "                            output_obs_pad[c_out][i_w+param_size-p_w-1][i_h+param_size-p_h-1] \\\n",
1283 |     "                            * param[c_in][c_out][p_w][p_h]\n",
1284 |     "    return input_grad\n",
1285 |     "\n",
1286 |     "def _input_grad(inp: ndarray,\n",
1287 |     "                output_grad: ndarray, \n",
1288 |     "                param: ndarray) -> ndarray:\n",
1289 |     "\n",
1290 |     "    grads = [_compute_grads_obs(inp[i], output_grad[i], param) for i in range(output_grad.shape[0])]    \n",
1291 |     "\n",
1292 |     "    return np.stack(grads)"
1293 |    ]
1294 |   },
1295 |   {
1296 |    "cell_type": "code",
1297 |    "execution_count": 58,
1298 |    "metadata": {},
1299 |    "outputs": [],
1300 |    "source": [
1301 |     "def _param_grad(inp: ndarray,\n",
1302 |     "                output_grad: ndarray, \n",
1303 |     "                param: ndarray) -> ndarray:\n",
1304 |     "    '''\n",
1305 |     "    inp: [in_channels, img_width, img_height]\n",
1306 |     "    output_grad_obs: [out_channels, img_width, img_height]\n",
1307 |     "    param: [in_channels, out_channels, img_width, img_height]    \n",
1308 |     "    '''\n",
1309 |     "    param_grad = np.zeros_like(param)    \n",
1310 |     "    param_size = param.shape[2]\n",
1311 |     "    param_mid = param_size // 2\n",
1312 |     "    img_size = inp.shape[2]\n",
1313 |     "    in_channels = inp.shape[1]\n",
1314 |     "    out_channels = output_grad.shape[1]    \n",
1315 |     "\n",
1316 |     "    inp_pad = _pad_conv_input(inp, param_mid)\n",
1317 |     "    img_shape = output_grad.shape[2:]\n",
1318 |     "\n",
1319 |     "    for i in range(inp.shape[0]):\n",
1320 |     "        for c_in in range(in_channels):\n",
1321 |     "            for c_out in range(out_channels):\n",
1322 |     "                for o_w in range(img_shape[0]):\n",
1323 |     "                    for o_h in range(img_shape[1]):\n",
1324 |     "                        for p_w in range(param_size):\n",
1325 |     "                            for p_h in range(param_size):\n",
1326 |     "                                param_grad[c_in][c_out][p_w][p_h] += \\\n",
1327 |     "                                inp_pad[i][c_in][o_w+p_w][o_h+p_h] \\\n",
1328 |     "                                * output_grad[i][c_out][o_w][o_h]\n",
1329 |     "    return param_grad"
1330 |    ]
1331 |   },
1332 |   {
1333 |    "cell_type": "markdown",
1334 |    "metadata": {},
1335 |    "source": [
1336 |     "## Testing gradients"
1337 |    ]
1338 |   },
1339 |   {
1340 |    "cell_type": "code",
1341 |    "execution_count": 59,
1342 |    "metadata": {},
1343 |    "outputs": [],
1344 |    "source": [
1345 |     "cifar_imgs = np.random.randn(10, 3, 32, 32)\n",
1346 |     "cifar_param = np.random.randn(3, 16, 5, 5)"
1347 |    ]
1348 |   },
1349 |   {
1350 |    "cell_type": "code",
1351 |    "execution_count": 60,
1352 |    "metadata": {},
1353 |    "outputs": [
1354 |     {
1355 |      "name": "stdout",
1356 |      "output_type": "stream",
1357 |      "text": [
1358 |       "3\n",
1359 |       "1\n",
1360 |       "2\n",
1361 |       "19\n",
1362 |       "\n",
1363 |       "0\n",
1364 |       "8\n",
1365 |       "0\n",
1366 |       "2\n"
1367 |      ]
1368 |     }
1369 |    ],
1370 |    "source": [
1371 |     "print(np.random.randint(0, cifar_imgs.shape[0]))\n",
1372 |     "print(np.random.randint(0, cifar_imgs.shape[1]))\n",
1373 |     "print(np.random.randint(0, cifar_imgs.shape[2]))\n",
1374 |     "print(np.random.randint(0, cifar_imgs.shape[3]))\n",
1375 |     "print()\n",
1376 |     "print(np.random.randint(0, cifar_param.shape[0]))\n",
1377 |     "print(np.random.randint(0, cifar_param.shape[1]))\n",
1378 |     "print(np.random.randint(0, cifar_param.shape[2]))\n",
1379 |     "print(np.random.randint(0, cifar_param.shape[3]))"
1380 |    ]
1381 |   },
1382 |   {
1383 |    "cell_type": "code",
1384 |    "execution_count": 61,
1385 |    "metadata": {},
1386 |    "outputs": [],
1387 |    "source": [
1388 |     "def _compute_output_sum(imgs: ndarray,\n",
1389 |     "                        param: ndarray):\n",
1390 |     "    return _output(imgs, param).sum()"
1391 |    ]
1392 |   },
1393 |   {
1394 |    "cell_type": "markdown",
1395 |    "metadata": {},
1396 |    "source": [
1397 |     "### Input grad"
1398 |    ]
1399 |   },
1400 |   {
1401 |    "cell_type": "code",
1402 |    "execution_count": 62,
1403 |    "metadata": {},
1404 |    "outputs": [],
1405 |    "source": [
1406 |     "cifar_imgs_2 = cifar_imgs.copy()\n",
1407 |     "cifar_imgs_2[3][1][2][19] += 1"
1408 |    ]
1409 |   },
1410 |   {
1411 |    "cell_type": "code",
1412 |    "execution_count": 63,
1413 |    "metadata": {},
1414 |    "outputs": [
1415 |     {
1416 |      "data": {
1417 |       "text/plain": [
1418 |        "2.345298758707486"
1419 |       ]
1420 |      },
1421 |      "execution_count": 63,
1422 |      "metadata": {},
1423 |      "output_type": "execute_result"
1424 |     }
1425 |    ],
1426 |    "source": [
1427 |     "_compute_output_sum(cifar_imgs_2, cifar_param) - _compute_output_sum(cifar_imgs, cifar_param)"
1428 |    ]
1429 |   },
1430 |   {
1431 |    "cell_type": "code",
1432 |    "execution_count": 64,
1433 |    "metadata": {},
1434 |    "outputs": [
1435 |     {
1436 |      "data": {
1437 |       "text/plain": [
1438 |        "2.3452987587074423"
1439 |       ]
1440 |      },
1441 |      "execution_count": 64,
1442 |      "metadata": {},
1443 |      "output_type": "execute_result"
1444 |     }
1445 |    ],
1446 |    "source": [
1447 |     "_input_grad(cifar_imgs,\n",
1448 |     "            np.ones((10, 16, 32, 32)),\n",
1449 |     "            cifar_param)[3][1][2][19]"
1450 |    ]
1451 |   },
1452 |   {
1453 |    "cell_type": "markdown",
1454 |    "metadata": {},
1455 |    "source": [
1456 |     "### Param grad"
1457 |    ]
1458 |   },
1459 |   {
1460 |    "cell_type": "code",
1461 |    "execution_count": 65,
1462 |    "metadata": {},
1463 |    "outputs": [],
1464 |    "source": [
1465 |     "cifar_param_2 = cifar_param.copy()\n",
1466 |     "cifar_param_2[0][8][0][2] += 1"
1467 |    ]
1468 |   },
1469 |   {
1470 |    "cell_type": "code",
1471 |    "execution_count": 66,
1472 |    "metadata": {},
1473 |    "outputs": [
1474 |     {
1475 |      "data": {
1476 |       "text/plain": [
1477 |        "-47.09123124155292"
1478 |       ]
1479 |      },
1480 |      "execution_count": 66,
1481 |      "metadata": {},
1482 |      "output_type": "execute_result"
1483 |     }
1484 |    ],
1485 |    "source": [
1486 |     "_compute_output_sum(cifar_imgs, cifar_param_2) - _compute_output_sum(cifar_imgs, cifar_param)"
1487 |    ]
1488 |   },
1489 |   {
1490 |    "cell_type": "code",
1491 |    "execution_count": 67,
1492 |    "metadata": {},
1493 |    "outputs": [
1494 |     {
1495 |      "data": {
1496 |       "text/plain": [
1497 |        "-47.0912312415532"
1498 |       ]
1499 |      },
1500 |      "execution_count": 67,
1501 |      "metadata": {},
1502 |      "output_type": "execute_result"
1503 |     }
1504 |    ],
1505 |    "source": [
1506 |     "_param_grad(cifar_imgs,\n",
1507 |     "            np.ones((10, 16, 32, 32)),\n",
1508 |     "            cifar_param)[0][8][0][2]"
1509 |    ]
1510 |   }
1511 |  ],
1512 |  "metadata": {
1513 |   "kernelspec": {
1514 |    "display_name": "Python 3",
1515 |    "language": "python",
1516 |    "name": "python3"
1517 |   },
1518 |   "language_info": {
1519 |    "codemirror_mode": {
1520 |     "name": "ipython",
1521 |     "version": 3
1522 |    },
1523 |    "file_extension": ".py",
1524 |    "mimetype": "text/x-python",
1525 |    "name": "python",
1526 |    "nbconvert_exporter": "python",
1527 |    "pygments_lexer": "ipython3",
1528 |    "version": "3.7.4"
1529 |   }
1530 |  },
1531 |  "nbformat": 4,
1532 |  "nbformat_minor": 2
1533 | }
1534 | 


--------------------------------------------------------------------------------
/05_convolutions/Math.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 4,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "np.random.seed(20190420)"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 5,
 16 |    "metadata": {},
 17 |    "outputs": [
 18 |     {
 19 |      "data": {
 20 |       "text/plain": [
 21 |        "array([[5, 1, 0, 6],\n",
 22 |        "       [3, 5, 5, 6],\n",
 23 |        "       [9, 0, 3, 2],\n",
 24 |        "       [4, 7, 0, 7]])"
 25 |       ]
 26 |      },
 27 |      "execution_count": 5,
 28 |      "metadata": {},
 29 |      "output_type": "execute_result"
 30 |     }
 31 |    ],
 32 |    "source": [
 33 |     "np.random.randint(0, 10, size=(4,4))"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "$$ A = \\begin{bmatrix}5 & 1 & 0 & 6 \\\\ \n",
 41 |     "3 & 5 & 5 & 6 \\\\ \n",
 42 |     "9 & 0 & 3 & 2 \\\\ \n",
 43 |     "4 & 7 & 0 & 7 \\end{bmatrix} $$"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "markdown",
 48 |    "metadata": {},
 49 |    "source": [
 50 |     "$$ \\text{MaxPool}(A) = \\begin{bmatrix}5 & 6 \\\\ \n",
 51 |     "9 & 7 \\end{bmatrix} $$"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": 8,
 57 |    "metadata": {},
 58 |    "outputs": [
 59 |     {
 60 |      "data": {
 61 |       "text/plain": [
 62 |        "(3.5, 4.25, 5.0, 3.0)"
 63 |       ]
 64 |      },
 65 |      "execution_count": 8,
 66 |      "metadata": {},
 67 |      "output_type": "execute_result"
 68 |     }
 69 |    ],
 70 |    "source": [
 71 |     "np.mean([5,1,3,5]), np.mean([0,5,6,6]), np.mean([9,0,4,7]), np.mean([3,0,2,7])"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": null,
 77 |    "metadata": {},
 78 |    "outputs": [],
 79 |    "source": []
 80 |   }
 81 |  ],
 82 |  "metadata": {
 83 |   "kernelspec": {
 84 |    "display_name": "Python 3",
 85 |    "language": "python",
 86 |    "name": "python3"
 87 |   },
 88 |   "language_info": {
 89 |    "codemirror_mode": {
 90 |     "name": "ipython",
 91 |     "version": 3
 92 |    },
 93 |    "file_extension": ".py",
 94 |    "mimetype": "text/x-python",
 95 |    "name": "python",
 96 |    "nbconvert_exporter": "python",
 97 |    "pygments_lexer": "ipython3",
 98 |    "version": "3.6.6"
 99 |   }
100 |  },
101 |  "nbformat": 4,
102 |  "nbformat_minor": 2
103 | }
104 | 


--------------------------------------------------------------------------------
/05_convolutions/Numpy_Convolution_Demos.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Convolution demos"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "In this notebook, we use the batch, multi-channel convolution operation implemented in Numpy (that you can find [here](../lincoln/lincoln/conv.py)) to train a small convolutional neural network to more than 90% accuracy on MNIST."
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 1,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "import numpy as np\n",
 24 |     "\n",
 25 |     "import lincoln\n",
 26 |     "from lincoln.layers import Dense\n",
 27 |     "from lincoln.losses import SoftmaxCrossEntropy, MeanSquaredError\n",
 28 |     "from lincoln.optimizers import Optimizer, SGD, SGDMomentum\n",
 29 |     "from lincoln.activations import Sigmoid, Tanh, Linear, ReLU\n",
 30 |     "from lincoln.network import NeuralNetwork\n",
 31 |     "from lincoln.train import Trainer\n",
 32 |     "from lincoln.utils import mnist\n",
 33 |     "from lincoln.layers import Conv2D\n",
 34 |     "\n",
 35 |     "X_train, y_train, X_test, y_test = mnist.load()"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 2,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "%load_ext autoreload\n",
 45 |     "%autoreload 2"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": 3,
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "X_train, X_test = X_train - np.mean(X_train), X_test - np.mean(X_train)\n",
 55 |     "X_train, X_test = X_train / np.std(X_train), X_test / np.std(X_train)"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": 4,
 61 |    "metadata": {},
 62 |    "outputs": [],
 63 |    "source": [
 64 |     "X_train_conv, X_test_conv = X_train.reshape(-1, 1, 28, 28), X_test.reshape(-1, 1, 28, 28)"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": 5,
 70 |    "metadata": {},
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "num_labels = len(y_train)\n",
 74 |     "train_labels = np.zeros((num_labels, 10))\n",
 75 |     "for i in range(num_labels):\n",
 76 |     "    train_labels[i][y_train[i]] = 1\n",
 77 |     "\n",
 78 |     "num_labels = len(y_test)\n",
 79 |     "test_labels = np.zeros((num_labels, 10))\n",
 80 |     "for i in range(num_labels):\n",
 81 |     "    test_labels[i][y_test[i]] = 1"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": 6,
 87 |    "metadata": {},
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "def calc_accuracy_model(model, test_set):\n",
 91 |     "    return print(f'''The model validation accuracy is: \n",
 92 |     "    {np.equal(np.argmax(model.forward(test_set, inference=True), axis=1), y_test).sum() * 100.0 / test_set.shape[0]:.2f}%''')"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "markdown",
 97 |    "metadata": {},
 98 |    "source": [
 99 |     "# CNN from scratch"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": 7,
105 |    "metadata": {},
106 |    "outputs": [
107 |     {
108 |      "name": "stdout",
109 |      "output_type": "stream",
110 |      "text": [
111 |       "Validation accuracy after 100 batches is 86.85%\n",
112 |       "Validation accuracy after 200 batches is 83.78%\n",
113 |       "Validation accuracy after 300 batches is 90.42%\n",
114 |       "Validation accuracy after 400 batches is 89.08%\n",
115 |       "Validation accuracy after 500 batches is 90.01%\n",
116 |       "Validation accuracy after 600 batches is 90.57%\n",
117 |       "Validation accuracy after 700 batches is 84.27%\n",
118 |       "Validation accuracy after 800 batches is 91.85%\n",
119 |       "Validation accuracy after 900 batches is 92.50%\n",
120 |       "Validation loss after 1 epochs is 3.615\n"
121 |      ]
122 |     }
123 |    ],
124 |    "source": [
125 |     "model = NeuralNetwork(\n",
126 |     "    layers=[Conv2D(out_channels=16,\n",
127 |     "                   param_size=5,\n",
128 |     "                   dropout=0.8,\n",
129 |     "                   weight_init=\"glorot\",\n",
130 |     "                   flatten=True,\n",
131 |     "                  activation=Tanh()),\n",
132 |     "            Dense(neurons=10, \n",
133 |     "                  activation=Linear())],\n",
134 |     "            loss = SoftmaxCrossEntropy(), \n",
135 |     "seed=20190402)\n",
136 |     "\n",
137 |     "trainer = Trainer(model, SGDMomentum(lr = 0.1, momentum=0.9))\n",
138 |     "trainer.fit(X_train_conv, train_labels, X_test_conv, test_labels,\n",
139 |     "            epochs = 1,\n",
140 |     "            eval_every = 1,\n",
141 |     "            seed=20190402,\n",
142 |     "            batch_size=60,\n",
143 |     "            conv_testing=True);"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 8,
149 |    "metadata": {},
150 |    "outputs": [
151 |     {
152 |      "name": "stdout",
153 |      "output_type": "stream",
154 |      "text": [
155 |       "The model validation accuracy is: \n",
156 |       "    90.31%\n"
157 |      ]
158 |     }
159 |    ],
160 |    "source": [
161 |     "calc_accuracy_model(model, X_test_conv)"
162 |    ]
163 |   }
164 |  ],
165 |  "metadata": {
166 |   "kernelspec": {
167 |    "display_name": "Python 3",
168 |    "language": "python",
169 |    "name": "python3"
170 |   },
171 |   "language_info": {
172 |    "codemirror_mode": {
173 |     "name": "ipython",
174 |     "version": 3
175 |    },
176 |    "file_extension": ".py",
177 |    "mimetype": "text/x-python",
178 |    "name": "python",
179 |    "nbconvert_exporter": "python",
180 |    "pygments_lexer": "ipython3",
181 |    "version": "3.7.4"
182 |   }
183 |  },
184 |  "nbformat": 4,
185 |  "nbformat_minor": 2
186 | }
187 | 


--------------------------------------------------------------------------------
/06_rnns/Autograd_Simple.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Simple automatic differentiation illustration"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "from typing import Union, List\n",
 17 |     "\n",
 18 |     "import numpy as np\n",
 19 |     "\n",
 20 |     "np.set_printoptions(precision=4)"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 2,
 26 |    "metadata": {},
 27 |    "outputs": [
 28 |     {
 29 |      "data": {
 30 |       "text/plain": [
 31 |        "7"
 32 |       ]
 33 |      },
 34 |      "execution_count": 2,
 35 |      "metadata": {},
 36 |      "output_type": "execute_result"
 37 |     }
 38 |    ],
 39 |    "source": [
 40 |     "a = 3\n",
 41 |     "a.__add__(4)"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 3,
 47 |    "metadata": {},
 48 |    "outputs": [
 49 |     {
 50 |      "name": "stdout",
 51 |      "output_type": "stream",
 52 |      "text": [
 53 |       "[2 3 1 0]\n",
 54 |       "Addition using '__add__': [6 7 5 4]\n",
 55 |       "Addition using '+': [6 7 5 4]\n"
 56 |      ]
 57 |     }
 58 |    ],
 59 |    "source": [
 60 |     "a = np.array([2,3,1,0])\n",
 61 |     "\n",
 62 |     "print(a)\n",
 63 |     "print(\"Addition using '__add__':\", a.__add__(4))\n",
 64 |     "print(\"Addition using '+':\", a + 4)"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": 4,
 70 |    "metadata": {},
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "Numberable = Union[float, int]\n",
 74 |     "\n",
 75 |     "def ensure_number(num: Numberable):\n",
 76 |     "    if isinstance(num, NumberWithGrad):\n",
 77 |     "        return num\n",
 78 |     "    else:\n",
 79 |     "        return NumberWithGrad(num)        \n",
 80 |     "\n",
 81 |     "class NumberWithGrad(object):\n",
 82 |     "    \n",
 83 |     "    def __init__(self, \n",
 84 |     "                 num: Numberable,\n",
 85 |     "                 depends_on: List[Numberable] = None,\n",
 86 |     "                 creation_op: str = ''):\n",
 87 |     "        self.num = num\n",
 88 |     "        self.grad = None\n",
 89 |     "        self.depends_on = depends_on or []\n",
 90 |     "        self.creation_op = creation_op\n",
 91 |     "\n",
 92 |     "    def __add__(self, \n",
 93 |     "                other: Numberable):\n",
 94 |     "        return NumberWithGrad(self.num + ensure_number(other).num,\n",
 95 |     "                              depends_on = [self, ensure_number(other)],\n",
 96 |     "                              creation_op = 'add')\n",
 97 |     "    \n",
 98 |     "    def __mul__(self,\n",
 99 |     "                other: Numberable = None):\n",
100 |     "\n",
101 |     "        return NumberWithGrad(self.num * ensure_number(other).num,\n",
102 |     "                              depends_on = [self, ensure_number(other)],\n",
103 |     "                              creation_op = 'mul')\n",
104 |     "    \n",
105 |     "    def backward(self, backward_grad: Numberable = None):\n",
106 |     "        if backward_grad is None: # first time calling backward\n",
107 |     "            self.grad = 1\n",
108 |     "        else: \n",
109 |     "            # These lines allow gradients to accumulate.\n",
110 |     "            # If the gradient doesn't exist yet, simply set it equal\n",
111 |     "            # to backward_grad\n",
112 |     "            if self.grad is None:\n",
113 |     "                self.grad = backward_grad\n",
114 |     "            # Otherwise, simply add backward_grad to the existing gradient\n",
115 |     "            else:\n",
116 |     "                self.grad += backward_grad\n",
117 |     "        \n",
118 |     "        if self.creation_op == \"add\":\n",
119 |     "            # Simply send backward self.grad, since increasing either of these \n",
120 |     "            # elements will increase the output by that same amount\n",
121 |     "            self.depends_on[0].backward(self.grad)\n",
122 |     "            self.depends_on[1].backward(self.grad)    \n",
123 |     "\n",
124 |     "        if self.creation_op == \"mul\":\n",
125 |     "\n",
126 |     "            # Calculate the derivative with respect to the first element\n",
127 |     "            new = self.depends_on[1] * self.grad\n",
128 |     "            # Send backward the derivative with respect to that element\n",
129 |     "            self.depends_on[0].backward(new.num)\n",
130 |     "\n",
131 |     "            # Calculate the derivative with respect to the second element\n",
132 |     "            new = self.depends_on[0] * self.grad\n",
133 |     "            # Send backward the derivative with respect to that element\n",
134 |     "            self.depends_on[1].backward(new.num)"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": 5,
140 |    "metadata": {},
141 |    "outputs": [
142 |     {
143 |      "name": "stdout",
144 |      "output_type": "stream",
145 |      "text": [
146 |       "4\n",
147 |       "1\n"
148 |      ]
149 |     }
150 |    ],
151 |    "source": [
152 |     "a = NumberWithGrad(3)\n",
153 |     "b = a * 4\n",
154 |     "c = b + 3\n",
155 |     "c.backward()\n",
156 |     "print(a.grad) # as expected\n",
157 |     "print(b.grad) # as expected"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": 6,
163 |    "metadata": {},
164 |    "outputs": [],
165 |    "source": [
166 |     "a = NumberWithGrad(3)"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "code",
171 |    "execution_count": 7,
172 |    "metadata": {},
173 |    "outputs": [],
174 |    "source": [
175 |     "b = a * 4\n",
176 |     "c = b + 3\n",
177 |     "d = (a + 2)\n",
178 |     "e = c * d \n",
179 |     "e.backward() "
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": 8,
185 |    "metadata": {},
186 |    "outputs": [
187 |     {
188 |      "data": {
189 |       "text/plain": [
190 |        "35"
191 |       ]
192 |      },
193 |      "execution_count": 8,
194 |      "metadata": {},
195 |      "output_type": "execute_result"
196 |     }
197 |    ],
198 |    "source": [
199 |     "a.grad # as expected"
200 |    ]
201 |   }
202 |  ],
203 |  "metadata": {
204 |   "kernelspec": {
205 |    "display_name": "Python 3",
206 |    "language": "python",
207 |    "name": "python3"
208 |   },
209 |   "language_info": {
210 |    "codemirror_mode": {
211 |     "name": "ipython",
212 |     "version": 3
213 |    },
214 |    "file_extension": ".py",
215 |    "mimetype": "text/x-python",
216 |    "name": "python",
217 |    "nbconvert_exporter": "python",
218 |    "pygments_lexer": "ipython3",
219 |    "version": "3.7.4"
220 |   }
221 |  },
222 |  "nbformat": 4,
223 |  "nbformat_minor": 2
224 | }
225 | 


--------------------------------------------------------------------------------
/06_rnns/Math.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "$$ abcdb \\rightarrow \n",
 8 |     "\\begin{bmatrix} \\begin{bmatrix} 1 \\\\ 0 \\\\ 0 \\\\ 0 \\end{bmatrix} & \\begin{bmatrix} 0 \\\\ 1 \\\\ 0 \\\\ 0 \\end{bmatrix} & \\begin{bmatrix} 0 \\\\ 0 \\\\ 1 \\\\ 0 \\end{bmatrix} & \\begin{bmatrix} 0 \\\\ 0 \\\\ 0 \\\\ 1 \\end{bmatrix} & \\begin{bmatrix} 0 \\\\ 1 \\\\ 0 \\\\ 0 \\end{bmatrix} \\end{bmatrix} = \\begin{bmatrix} 1 & 0 & 0 & 0 & 0 \\\\ 0 & 1 & 0 & 0 & 1 \\\\ 0 & 0 & 1 & 0 & 0 \\\\ 0 & 0 & 0 & 1 & 0 \\end{bmatrix} $$\n",
 9 |     "\n",
10 |     "$$ bcdba \\rightarrow \\begin{bmatrix} \n",
11 |     "\\begin{bmatrix} 0 \\\\ 1 \\\\ 0 \\\\ 0 \\end{bmatrix} & \\begin{bmatrix} 0 \\\\ 0 \\\\ 1 \\\\ 0 \\end{bmatrix} & \\begin{bmatrix} 0 \\\\ 0 \\\\ 0 \\\\ 1 \\end{bmatrix} & \\begin{bmatrix} 0 \\\\ 1 \\\\ 0 \\\\ 0 \\end{bmatrix} & \\begin{bmatrix} 1 \\\\ 0 \\\\ 0 \\\\ 0 \\end{bmatrix} \\end{bmatrix} = \\begin{bmatrix} 0 & 0 & 0 & 0 & 1 \\\\ 1 & 0 & 0 & 1 & 0 \\\\ 0 & 1 & 0 & 0 & 0 \\\\ 0 & 0 & 1 & 0 & 0 \\end{bmatrix} $$"
12 |    ]
13 |   }
14 |  ],
15 |  "metadata": {
16 |   "kernelspec": {
17 |    "display_name": "Python 3",
18 |    "language": "python",
19 |    "name": "python3"
20 |   },
21 |   "language_info": {
22 |    "codemirror_mode": {
23 |     "name": "ipython",
24 |     "version": 3
25 |    },
26 |    "file_extension": ".py",
27 |    "mimetype": "text/x-python",
28 |    "name": "python",
29 |    "nbconvert_exporter": "python",
30 |    "pygments_lexer": "ipython3",
31 |    "version": "3.7.4"
32 |   }
33 |  },
34 |  "nbformat": 4,
35 |  "nbformat_minor": 2
36 | }
37 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Seth Weidman
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Deep Learning From Scratch code
 2 | 
 3 | This repo contains all the code from the book [Deep Learning From Scratch](https://www.amazon.com/Deep-Learning-Scratch-Building-Principles/dp/1492041416), published by O'Reilly in September 2019.
 4 | 
 5 | It was mostly for me to keep the code I was writing for the book organized, but my hope is readers can clone this repo and step through the code systematically themselves to better understand the concepts.
 6 | 
 7 | ## Structure
 8 | 
 9 | Each chapter has two notebooks: a `Code` notebook and a `Math` notebook. Each `Code` notebook contains the Python code for corresponding chapter and can be run start to finish to generate the results from the chapters. The `Math` notebooks were just for me to store the LaTeX equations used in the book, taking advantage of Jupyter's LaTeX rendering functionality.
10 | 
11 | ### `lincoln`
12 | 
13 | In the notebooks in the Chapters 4, 5, and 7 folders, I import classes from `lincoln`, rather than putting those classes in the Jupyter Notebook itself. `lincoln` is not currently a `pip` installable library; th way I'd recommend to be able to `import` it and run these notebooks is to add a line like the following your `.bashrc` file:
14 | 
15 | ```bash
16 | export PYTHONPATH=$PYTHONPATH:/Users/seth/development/DLFS_code/lincoln
17 | ```
18 | 
19 | This will cause Python to search this path for a module called `lincoln` when you run the `import` command (of course, you'll have to replace the path above with the relevant path on your machine once you clone this repo). Then, simply `source` your `.bashrc` file before running the `jupyter notebook` command and you should be good to go.
20 | 
21 | ### Chapter 5: Numpy Convolution Demos
22 | 
23 | While I don't spend much time delving into the details in the main text of the book, I have implemented the batch, multi-channel convolution operation in pure Numpy (I do describe how to do this and share the code in the book's Appendix). In [this notebook](05_convolutions/Numpy_Convolution_Demos.ipynb), I demonstrate using this operation to train a single layer CNN from scratch in pure Numpy to get over 90% accuracy on MNIST.
24 | 


--------------------------------------------------------------------------------
/lincoln/.gitignore:
--------------------------------------------------------------------------------
 1 | *__pycache__*
 2 | *.pyc*
 3 | *.ipynb_checkpoints*
 4 | *.DS_Store*
 5 | *.c
 6 | *.so
 7 | *.o
 8 | *.txt
 9 | 
10 | *data/*
11 | *.pkl*
12 | 
13 | *.pt
14 | *ubyte
15 | 


--------------------------------------------------------------------------------
/lincoln/LICENSE:
--------------------------------------------------------------------------------
 1 | ===========
 2 | MIT License
 3 | ===========
 4 | 
 5 | Copyright (c) 2018, Seth Weidman & Mat Leonard
 6 | 
 7 | Permission is hereby granted, free of charge, to any person obtaining a copy
 8 | of this software and associated documentation files (the "Software"), to deal
 9 | in the Software without restriction, including without limitation the rights
10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the Software is
12 | furnished to do so, subject to the following conditions:
13 | 
14 | The above copyright notice and this permission notice shall be included in all
15 | copies or substantial portions of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | SOFTWARE.


--------------------------------------------------------------------------------
/lincoln/README.md:
--------------------------------------------------------------------------------
 1 | # Lincoln
 2 | 
 3 | "A Deep Learning library by the people, for the people."
 4 | 
 5 | ![](lincoln.png)
 6 | 
 7 | ## Description
 8 | 
 9 | "Lincoln" is a minimal Deep Learning library accompanying the book "Deep Learning From Scratch", which will be published by O'Reilly in August 2019.
10 | 
11 | It is intended for beginners who want to understand the key components of how Deep Learning works by walking through a clean, minimal implementation.
12 | 


--------------------------------------------------------------------------------
/lincoln/lincoln.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SethHWeidman/DLFS_code/f4ec4de43049ef990d0f4ddece81223cef3a0e91/lincoln/lincoln.png


--------------------------------------------------------------------------------
/lincoln/lincoln/activations.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from lincoln import base
 3 | 
 4 | 
 5 | class Linear(base.Operation):
 6 |     """
 7 |     Linear activation function
 8 |     """
 9 | 
10 |     def __init__(self) -> None:
11 |         super().__init__()
12 | 
13 |     def _output(self) -> np.ndarray:
14 |         return self.input_
15 | 
16 |     def _input_grad(self, output_grad: np.ndarray) -> np.ndarray:
17 |         return output_grad
18 | 
19 | 
20 | class Sigmoid(base.Operation):
21 |     """
22 |     Sigmoid activation function
23 |     """
24 | 
25 |     def __init__(self) -> None:
26 |         super().__init__()
27 | 
28 |     def _output(self) -> np.ndarray:
29 |         return 1.0 / (1.0 + np.exp(-1.0 * self.input_))
30 | 
31 |     def _input_grad(self, output_grad: np.ndarray) -> np.ndarray:
32 |         sigmoid_backward = self.output * (1.0 - self.output)
33 |         input_grad = sigmoid_backward * output_grad
34 |         return input_grad
35 | 
36 | 
37 | class Tanh(base.Operation):
38 |     """
39 |     Hyperbolic tangent activation function
40 |     """
41 | 
42 |     def __init__(self) -> None:
43 |         super().__init__()
44 | 
45 |     def _output(self) -> np.ndarray:
46 |         return np.tanh(self.input_)
47 | 
48 |     def _input_grad(self, output_grad: np.ndarray) -> np.ndarray:
49 | 
50 |         return output_grad * (1 - self.output * self.output)
51 | 
52 | 
53 | class ReLU(base.Operation):
54 |     """
55 |     Hyperbolic tangent activation function
56 |     """
57 | 
58 |     def __init__(self) -> None:
59 |         super().__init__()
60 | 
61 |     def _output(self) -> np.ndarray:
62 |         return np.clip(self.input_, 0, None)
63 | 
64 |     def _input_grad(self, output_grad: np.ndarray) -> np.ndarray:
65 | 
66 |         mask = self.output >= 0
67 |         return output_grad * mask
68 | 


--------------------------------------------------------------------------------
/lincoln/lincoln/base.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from lincoln.utils import np_utils
 4 | 
 5 | 
 6 | class Operation(object):
 7 | 
 8 |     def __init__(self):
 9 |         pass
10 | 
11 |     def forward(self, input_: np.ndarray) -> np.ndarray:
12 | 
13 |         self.input_ = input_
14 | 
15 |         self.output = self._output()
16 | 
17 |         return self.output
18 | 
19 |     def backward(self, output_grad: np.ndarray) -> np.ndarray:
20 | 
21 |         np_utils.assert_same_shape(self.output, output_grad)
22 | 
23 |         self.input_grad = self._input_grad(output_grad)
24 | 
25 |         np_utils.assert_same_shape(self.input_, self.input_grad)
26 | 
27 |         return self.input_grad
28 | 
29 |     def _output(self) -> np.ndarray:
30 |         raise NotImplementedError()
31 | 
32 |     def _input_grad(self, output_grad: np.ndarray) -> np.ndarray:
33 |         raise NotImplementedError()
34 | 
35 | 
36 | class ParamOperation(Operation):
37 | 
38 |     def __init__(self, param: np.ndarray) -> np.ndarray:
39 |         super().__init__()
40 |         self.param = param
41 | 
42 |     def backward(self, output_grad: np.ndarray) -> np.ndarray:
43 | 
44 |         np_utils.assert_same_shape(self.output, output_grad)
45 | 
46 |         self.input_grad = self._input_grad(output_grad)
47 |         self.param_grad = self._param_grad(output_grad)
48 | 
49 |         np_utils.assert_same_shape(self.input_, self.input_grad)
50 | 
51 |         return self.input_grad
52 | 
53 |     def _param_grad(self, output_grad: np.ndarray) -> np.ndarray:
54 |         raise NotImplementedError()
55 | 


--------------------------------------------------------------------------------
/lincoln/lincoln/conv.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from lincoln import base
  4 | 
  5 | 
  6 | class Conv2D_Op(base.ParamOperation):
  7 | 
  8 |     def __init__(self, W: np.ndarray):
  9 |         super().__init__(W)
 10 |         self.param_size = W.shape[2]
 11 |         self.param_pad = self.param_size // 2
 12 | 
 13 |     def _pad_1d(self, inp: np.ndarray) -> np.ndarray:
 14 |         z = np.array([0])
 15 |         z = np.repeat(z, self.param_pad)
 16 |         return np.concatenate([z, inp, z])
 17 | 
 18 |     def _pad_1d_batch(self, inp: np.ndarray) -> np.ndarray:
 19 |         outs = [self._pad_1d(obs) for obs in inp]
 20 |         return np.stack(outs)
 21 | 
 22 |     def _pad_2d_obs(self, inp: np.ndarray):
 23 |         """
 24 |         Input is a 2 dimensional, square, 2D Tensor
 25 |         """
 26 |         inp_pad = self._pad_1d_batch(inp)
 27 | 
 28 |         other = np.zeros((self.param_pad, inp.shape[0] + self.param_pad * 2))
 29 | 
 30 |         return np.concatenate([other, inp_pad, other])
 31 | 
 32 |     def _pad_2d_channel(self, inp: np.ndarray):
 33 |         """
 34 |         inp has dimension [num_channels, image_width, image_height]
 35 |         """
 36 |         return np.stack([self._pad_2d_obs(channel) for channel in inp])
 37 | 
 38 |     def _get_image_patches(self, input_: np.ndarray):
 39 |         imgs_batch_pad = np.stack([self._pad_2d_channel(obs) for obs in input_])
 40 |         patches = []
 41 |         img_height = imgs_batch_pad.shape[2]
 42 |         for h in range(img_height - self.param_size + 1):
 43 |             for w in range(img_height - self.param_size + 1):
 44 |                 patch = imgs_batch_pad[:, :, h : h + self.param_size, w : w + self.param_size]
 45 |                 patches.append(patch)
 46 |         return np.stack(patches)
 47 | 
 48 |     def _output(self):
 49 |         """
 50 |         conv_in: [batch_size, channels, img_width, img_height]
 51 |         param: [in_channels, out_channels, fil_width, fil_height]
 52 |         """
 53 |         #     assert_dim(obs, 4)
 54 |         #     assert_dim(param, 4)
 55 |         batch_size = self.input_.shape[0]
 56 |         img_height = self.input_.shape[2]
 57 |         img_size = self.input_.shape[2] * self.input_.shape[3]
 58 |         patch_size = self.param.shape[0] * self.param.shape[2] * self.param.shape[3]
 59 | 
 60 |         patches = self._get_image_patches(self.input_)
 61 | 
 62 |         patches_reshaped = patches.transpose(1, 0, 2, 3, 4).reshape(batch_size, img_size, -1)
 63 | 
 64 |         param_reshaped = self.param.transpose(0, 2, 3, 1).reshape(patch_size, -1)
 65 | 
 66 |         output_reshaped = (
 67 |             np.matmul(patches_reshaped, param_reshaped)
 68 |             .reshape(batch_size, img_height, img_height, -1)
 69 |             .transpose(0, 3, 1, 2)
 70 |         )
 71 | 
 72 |         return output_reshaped
 73 | 
 74 |     def _input_grad(self, output_grad: np.ndarray) -> np.ndarray:
 75 | 
 76 |         batch_size = self.input_.shape[0]
 77 |         img_size = self.input_.shape[2] * self.input_.shape[3]
 78 |         img_height = self.input_.shape[2]
 79 | 
 80 |         output_patches = (
 81 |             self._get_image_patches(output_grad)
 82 |             .transpose(1, 0, 2, 3, 4)
 83 |             .reshape(batch_size * img_size, -1)
 84 |         )
 85 | 
 86 |         param_reshaped = self.param.reshape(self.param.shape[0], -1).transpose(1, 0)
 87 | 
 88 |         return (
 89 |             np.matmul(output_patches, param_reshaped)
 90 |             .reshape(batch_size, img_height, img_height, self.param.shape[0])
 91 |             .transpose(0, 3, 1, 2)
 92 |         )
 93 | 
 94 |     def _param_grad(self, output_grad: np.ndarray) -> np.ndarray:
 95 | 
 96 |         batch_size = self.input_.shape[0]
 97 |         img_size = self.input_.shape[2] * self.input_.shape[3]
 98 |         in_channels = self.param.shape[0]
 99 |         out_channels = self.param.shape[1]
100 | 
101 |         in_patches_reshape = (
102 |             self._get_image_patches(self.input_).reshape(batch_size * img_size, -1).transpose(1, 0)
103 |         )
104 | 
105 |         out_grad_reshape = output_grad.transpose(0, 2, 3, 1).reshape(batch_size * img_size, -1)
106 | 
107 |         return (
108 |             np.matmul(in_patches_reshape, out_grad_reshape)
109 |             .reshape(in_channels, self.param_size, self.param_size, out_channels)
110 |             .transpose(0, 3, 1, 2)
111 |         )
112 | 


--------------------------------------------------------------------------------
/lincoln/lincoln/dense.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from lincoln import base
 4 | 
 5 | 
 6 | class WeightMultiply(base.ParamOperation):
 7 | 
 8 |     def __init__(self, W: np.ndarray):
 9 |         super().__init__(W)
10 | 
11 |     def _output(self) -> np.ndarray:
12 |         return np.matmul(self.input_, self.param)
13 | 
14 |     def _input_grad(self, output_grad: np.ndarray) -> np.ndarray:
15 |         return np.matmul(output_grad, self.param.transpose(1, 0))
16 | 
17 |     def _param_grad(self, output_grad: np.ndarray) -> np.ndarray:
18 |         return np.matmul(self.input_.transpose(1, 0), output_grad)
19 | 
20 | 
21 | class BiasAdd(base.ParamOperation):
22 | 
23 |     def __init__(self, B: np.ndarray):
24 |         super().__init__(B)
25 | 
26 |     def _output(self) -> np.ndarray:
27 |         return self.input_ + self.param
28 | 
29 |     def _input_grad(self, output_grad: np.ndarray) -> np.ndarray:
30 |         return np.ones_like(self.input_) * output_grad
31 | 
32 |     def _param_grad(self, output_grad: np.ndarray) -> np.ndarray:
33 |         output_grad_reshape = np.sum(output_grad, axis=0).reshape(1, -1)
34 |         param_grad = np.ones_like(self.param)
35 |         return param_grad * output_grad_reshape
36 | 


--------------------------------------------------------------------------------
/lincoln/lincoln/layers.py:
--------------------------------------------------------------------------------
  1 | from typing import List
  2 | 
  3 | import numpy as np
  4 | 
  5 | from lincoln import activations
  6 | from lincoln import base
  7 | from lincoln import conv
  8 | from lincoln import dense
  9 | from lincoln import reshape
 10 | from lincoln.utils import np_utils
 11 | 
 12 | 
 13 | class Layer(object):
 14 | 
 15 |     def __init__(self, neurons: int) -> None:
 16 |         self.neurons = neurons
 17 |         self.first = True
 18 |         self.params: List[np.ndarray] = []
 19 |         self.param_grads: List[np.ndarray] = []
 20 |         self.operations: List[base.Operation] = []
 21 | 
 22 |     def _setup_layer(self, input_: np.ndarray) -> None:
 23 |         pass
 24 | 
 25 |     def forward(self, input_: np.ndarray) -> np.ndarray:
 26 | 
 27 |         if self.first:
 28 |             self._setup_layer(input_)
 29 |             self.first = False
 30 | 
 31 |         self.input_ = input_
 32 | 
 33 |         for operation in self.operations:
 34 | 
 35 |             input_ = operation.forward(input_)
 36 | 
 37 |         self.output = input_
 38 | 
 39 |         return self.output
 40 | 
 41 |     def backward(self, output_grad: np.ndarray) -> np.ndarray:
 42 | 
 43 |         np_utils.assert_same_shape(self.output, output_grad)
 44 | 
 45 |         for operation in self.operations[::-1]:
 46 |             output_grad = operation.backward(output_grad)
 47 | 
 48 |         input_grad = output_grad
 49 | 
 50 |         np_utils.assert_same_shape(self.input_, input_grad)
 51 | 
 52 |         self._param_grads()
 53 | 
 54 |         return input_grad
 55 | 
 56 |     def _param_grads(self) -> None:
 57 | 
 58 |         self.param_grads = []
 59 |         for operation in self.operations:
 60 |             if issubclass(operation.__class__, base.ParamOperation):
 61 |                 self.param_grads.append(operation.param_grad)
 62 | 
 63 |     def _params(self) -> None:
 64 | 
 65 |         self.params = []
 66 |         for operation in self.operations:
 67 |             if issubclass(operation.__class__, base.ParamOperation):
 68 |                 self.params.append(operation.param)
 69 | 
 70 | 
 71 | class Dense(Layer):
 72 | 
 73 |     def __init__(
 74 |         self,
 75 |         neurons: int,
 76 |         activation: base.Operation = activations.Linear(),
 77 |         conv_in: bool = False,
 78 |         weight_init: str = "standard",
 79 |     ) -> None:
 80 |         super().__init__(neurons)
 81 |         self.activation = activation
 82 |         self.conv_in = conv_in
 83 |         self.dropout = dropout
 84 |         self.weight_init = weight_init
 85 | 
 86 |     def _setup_layer(self, input_: np.ndarray) -> None:
 87 |         np.random.seed(self.seed)
 88 |         num_in = input_.shape[1]
 89 | 
 90 |         if self.weight_init == "glorot":
 91 |             scale = np.sqrt(2 / (num_in + self.neurons))
 92 |         else:
 93 |             scale = 1.0
 94 | 
 95 |         # weights
 96 |         self.params = []
 97 |         self.params.append(np.random.normal(loc=0, scale=scale, size=(num_in, self.neurons)))
 98 | 
 99 |         # bias
100 |         self.params.append(np.random.normal(loc=0, scale=scale, size=(1, self.neurons)))
101 | 
102 |         self.operations = [
103 |             dense.WeightMultiply(self.params[0]),
104 |             dense.BiasAdd(self.params[1]),
105 |             self.activation,
106 |         ]
107 | 
108 |         return None
109 | 
110 | 
111 | class Conv2D(Layer):
112 |     """
113 |     Once we define all the Operations and the outline of a layer,
114 |     all that remains to implement here is the _setup_layer function!
115 |     """
116 | 
117 |     def __init__(
118 |         self,
119 |         out_channels: int,
120 |         param_size: int,
121 |         dropout: int = 1.0,
122 |         weight_init: str = "normal",
123 |         activation: base.Operation = activations.Linear(),
124 |         flatten: bool = False,
125 |     ) -> None:
126 |         super().__init__(out_channels)
127 |         self.param_size = param_size
128 |         self.activation = activation
129 |         self.flatten = flatten
130 |         self.dropout = dropout
131 |         self.weight_init = weight_init
132 |         self.out_channels = out_channels
133 | 
134 |     def _setup_layer(self, input_: np.ndarray) -> np.ndarray:
135 | 
136 |         self.params = []
137 |         in_channels = input_.shape[1]
138 | 
139 |         if self.weight_init == "glorot":
140 |             scale = 2 / (in_channels + self.out_channels)
141 |         else:
142 |             scale = 1.0
143 | 
144 |         conv_param = np.random.normal(
145 |             loc=0,
146 |             scale=scale,
147 |             size=(
148 |                 input_.shape[1],  # input channels
149 |                 self.out_channels,
150 |                 self.param_size,
151 |                 self.param_size,
152 |             ),
153 |         )
154 | 
155 |         self.params.append(conv_param)
156 | 
157 |         self.operations = []
158 |         self.operations.append(conv.Conv2D_Op(conv_param))
159 |         self.operations.append(self.activation)
160 | 
161 |         if self.flatten:
162 |             self.operations.append(reshape.Flatten())
163 | 
164 |         if self.dropout < 1.0:
165 |             self.operations.append(dropout.Dropout(self.dropout))
166 | 
167 |         return None
168 | 


--------------------------------------------------------------------------------
/lincoln/lincoln/losses.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from lincoln.utils import np_utils
 4 | 
 5 | 
 6 | class Loss(object):
 7 | 
 8 |     def __init__(self):
 9 |         pass
10 | 
11 |     def forward(self, prediction: np.ndarray, target: np.ndarray) -> float:
12 | 
13 |         # batch size x num_classes
14 |         np_utils.assert_same_shape(prediction, target)
15 | 
16 |         self.prediction = prediction
17 |         self.target = target
18 | 
19 |         self.output = self._output()
20 | 
21 |         return self.output
22 | 
23 |     def backward(self) -> np.ndarray:
24 | 
25 |         self.input_grad = self._input_grad()
26 | 
27 |         np_utils.assert_same_shape(self.prediction, self.input_grad)
28 | 
29 |         return self.input_grad
30 | 
31 |     def _output(self) -> float:
32 |         raise NotImplementedError()
33 | 
34 |     def _input_grad(self) -> np.ndarray:
35 |         raise NotImplementedError()
36 | 
37 | 
38 | class MeanSquaredError(Loss):
39 | 
40 |     def __init__(self, normalize: bool = False) -> None:
41 |         super().__init__()
42 |         self.normalize = normalize
43 | 
44 |     def _output(self) -> float:
45 | 
46 |         if self.normalize:
47 |             self.prediction = self.prediction / self.prediction.sum(axis=1, keepdims=True)
48 | 
49 |         loss = np.sum(np.power(self.prediction - self.target, 2)) / self.prediction.shape[0]
50 | 
51 |         return loss
52 | 
53 |     def _input_grad(self) -> np.ndarray:
54 | 
55 |         return 2.0 * (self.prediction - self.target) / self.prediction.shape[0]
56 | 
57 | 
58 | class SoftmaxCrossEntropy(Loss):
59 |     def __init__(self, eps: float = 1e-9) -> None:
60 |         super().__init__()
61 |         self.eps = eps
62 |         self.single_class = False
63 | 
64 |     def _output(self) -> float:
65 | 
66 |         # if the network is just outputting probabilities
67 |         # of just belonging to one class:
68 |         if self.target.shape[1] == 0:
69 |             self.single_class = True
70 | 
71 |         # if "single_class", apply the "normalize" operation defined above:
72 |         if self.single_class:
73 |             self.prediction, self.target = np_utils.normalize(self.prediction), np_utils.normalize(
74 |                 self.target
75 |             )
76 | 
77 |         # applying the softmax function to each row (observation)
78 |         softmax_preds = np_utils.softmax(self.prediction, axis=1)
79 | 
80 |         # clipping the softmax output to prevent numeric instability
81 |         self.softmax_preds = np.clip(softmax_preds, self.eps, 1 - self.eps)
82 | 
83 |         # actual loss computation
84 |         softmax_cross_entropy_loss = -1.0 * self.target * np.log(self.softmax_preds) - (
85 |             1.0 - self.target
86 |         ) * np.log(1 - self.softmax_preds)
87 | 
88 |         return np.sum(softmax_cross_entropy_loss) / self.prediction.shape[0]
89 | 
90 |     def _input_grad(self) -> np.ndarray:
91 | 
92 |         # if "single_class", "un-normalize" probabilities before returning gradient:
93 |         if self.single_class:
94 |             return np_utils.unnormalize(self.softmax_preds - self.target)
95 |         else:
96 |             return (self.softmax_preds - self.target) / self.prediction.shape[0]
97 | 


--------------------------------------------------------------------------------
/lincoln/lincoln/network.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | import numpy as np
 3 | 
 4 | from lincoln import layers
 5 | from lincoln import losses
 6 | 
 7 | 
 8 | class LayerBlock(object):
 9 | 
10 |     def __init__(self, layers: typing.List[layers.Layer]):
11 |         super().__init__()
12 |         self.layers = layers
13 | 
14 |     def forward(self, X_batch: np.ndarray) -> np.ndarray:
15 | 
16 |         X_out = X_batch
17 |         for layer in self.layers:
18 |             X_out = layer.forward(X_out)
19 | 
20 |         return X_out
21 | 
22 |     def backward(self, loss_grad: np.ndarray) -> np.ndarray:
23 | 
24 |         grad = loss_grad
25 |         for layer in reversed(self.layers):
26 |             grad = layer.backward(grad)
27 | 
28 |         return grad
29 | 
30 |     def params(self):
31 |         for layer in self.layers:
32 |             yield from layer.params
33 | 
34 |     def param_grads(self):
35 |         for layer in self.layers:
36 |             yield from layer.param_grads
37 | 
38 |     def __iter__(self):
39 |         return iter(self.layers)
40 | 
41 |     def __repr__(self):
42 |         layer_strs = [str(layer) for layer in self.layers]
43 |         return f"{self.__class__.__name__}(\n  " + ",\n  ".join(layer_strs) + ")"
44 | 
45 | 
46 | class NeuralNetwork(LayerBlock):
47 |     """
48 |     Just a list of layers that runs forwards and backwards
49 |     """
50 | 
51 |     def __init__(
52 |         self,
53 |         layers: typing.List[layers.Layer],
54 |         loss: losses.Loss = losses.MeanSquaredError,
55 |         seed: int = 1,
56 |     ):
57 |         super().__init__(layers)
58 |         self.loss = loss
59 |         self.seed = seed
60 |         if seed:
61 |             for layer in self.layers:
62 |                 setattr(layer, "seed", self.seed)
63 | 
64 |     def forward_loss(self, X_batch: np.ndarray, y_batch: np.ndarray) -> float:
65 | 
66 |         prediction = self.forward(X_batch)
67 |         return self.loss.forward(prediction, y_batch)
68 | 
69 |     def train_batch(self, X_batch: np.ndarray, y_batch: np.ndarray) -> float:
70 | 
71 |         prediction = self.forward(X_batch)
72 | 
73 |         batch_loss = self.loss.forward(prediction, y_batch)
74 |         loss_grad = self.loss.backward()
75 | 
76 |         self.backward(loss_grad)
77 | 
78 |         return batch_loss
79 | 


--------------------------------------------------------------------------------
/lincoln/lincoln/optimizers.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | class Optimizer(object):
  5 |     def __init__(self, lr: float = 0.01, final_lr: float = 0, decay_type: str = None) -> None:
  6 |         self.lr = lr
  7 |         self.final_lr = final_lr
  8 |         self.decay_type = decay_type
  9 |         self.first = True
 10 | 
 11 |     def _setup_decay(self) -> None:
 12 | 
 13 |         if not self.decay_type:
 14 |             return
 15 |         elif self.decay_type == "exponential":
 16 |             self.decay_per_epoch = np.power(self.final_lr / self.lr, 1.0 / (self.max_epochs - 1))
 17 |         elif self.decay_type == "linear":
 18 |             self.decay_per_epoch = (self.lr - self.final_lr) / (self.max_epochs - 1)
 19 | 
 20 |     def _decay_lr(self) -> None:
 21 | 
 22 |         if not self.decay_type:
 23 |             return
 24 | 
 25 |         if self.decay_type == "exponential":
 26 |             self.lr *= self.decay_per_epoch
 27 | 
 28 |         elif self.decay_type == "linear":
 29 |             self.lr -= self.decay_per_epoch
 30 | 
 31 |     def step(self) -> None:
 32 | 
 33 |         for param, param_grad in zip(self.net.params(), self.net.param_grads()):
 34 |             self._update_rule(param=param, grad=param_grad)
 35 | 
 36 |     def _update_rule(self, **kwargs) -> None:
 37 |         raise NotImplementedError()
 38 | 
 39 | 
 40 | class SGD(Optimizer):
 41 |     def __init__(self, lr: float = 0.01, final_lr: float = 0, decay_type: str = None) -> None:
 42 |         super().__init__(lr, final_lr, decay_type)
 43 | 
 44 |     def _update_rule(self, **kwargs) -> None:
 45 | 
 46 |         update = self.lr * kwargs["grad"]
 47 |         kwargs["param"] -= update
 48 | 
 49 | 
 50 | class SGDMomentum(Optimizer):
 51 |     def __init__(
 52 |         self, lr: float = 0.01, final_lr: float = 0, decay_type: str = None, momentum: float = 0.9
 53 |     ) -> None:
 54 |         super().__init__(lr, final_lr, decay_type)
 55 |         self.momentum = momentum
 56 | 
 57 |     def step(self) -> None:
 58 |         if self.first:
 59 |             self.velocities = [np.zeros_like(param) for param in self.net.params()]
 60 |             self.first = False
 61 | 
 62 |         for param, param_grad, velocity in zip(
 63 |             self.net.params(), self.net.param_grads(), self.velocities
 64 |         ):
 65 |             self._update_rule(param=param, grad=param_grad, velocity=velocity)
 66 | 
 67 |     def _update_rule(self, **kwargs) -> None:
 68 | 
 69 |         # Update velocity
 70 |         kwargs["velocity"] *= self.momentum
 71 |         kwargs["velocity"] += self.lr * kwargs["grad"]
 72 | 
 73 |         # Use this to update parameters
 74 |         kwargs["param"] -= kwargs["velocity"]
 75 | 
 76 | 
 77 | class AdaGrad(Optimizer):
 78 |     def __init__(
 79 |         self, lr: float = 0.01, final_lr_exp: float = 0, final_lr_linear: float = 0
 80 |     ) -> None:
 81 |         super().__init__(lr, final_lr_exp, final_lr_linear)
 82 |         self.eps = 1e-7
 83 | 
 84 |     def step(self) -> None:
 85 |         if self.first:
 86 |             self.sum_squares = [np.zeros_like(param) for param in self.net.params()]
 87 |             self.first = False
 88 | 
 89 |         for param, param_grad, sum_square in zip(
 90 |             self.net.params(), self.net.param_grads(), self.sum_squares
 91 |         ):
 92 |             self._update_rule(param=param, grad=param_grad, sum_square=sum_square)
 93 | 
 94 |     def _update_rule(self, **kwargs) -> None:
 95 | 
 96 |         # Update running sum of squares
 97 |         kwargs["sum_square"] += self.eps + np.power(kwargs["grad"], 2)
 98 | 
 99 |         # Scale learning rate by running sum of squareds=5
100 |         lr = np.divide(self.lr, np.sqrt(kwargs["sum_square"]))
101 | 
102 |         # Use this to update parameters
103 |         kwargs["param"] -= lr * kwargs["grad"]
104 | 
105 | 
106 | class RegularizedSGD(Optimizer):
107 |     def __init__(self, lr: float = 0.01, alpha: float = 0.1) -> None:
108 |         super().__init__()
109 |         self.lr = lr
110 |         self.alpha = alpha
111 | 
112 |     def step(self) -> None:
113 | 
114 |         for param, param_grad in zip(self.net.params(), self.net.param_grads()):
115 | 
116 |             self._update_rule(param=param, grad=param_grad)
117 | 
118 |     def _update_rule(self, **kwargs) -> None:
119 | 
120 |         # Use this to update parameters
121 |         kwargs["param"] -= self.lr * kwargs["grad"] + self.alpha * kwargs["param"]
122 | 


--------------------------------------------------------------------------------
/lincoln/lincoln/pytorch/layers.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from torch import Tensor
 3 | 
 4 | 
 5 | def inference_mode(m: nn.Module):
 6 |     m.eval()
 7 | 
 8 | 
 9 | class PyTorchLayer(nn.Module):
10 | 
11 |     def __init__(self) -> None:
12 |         super().__init__()
13 | 
14 |     def forward(self, x: Tensor, inference: bool = False) -> Tensor:
15 |         raise NotImplementedError()
16 | 
17 | 
18 | class DenseLayer(PyTorchLayer):
19 |     def __init__(
20 |         self,
21 |         input_size: int,
22 |         neurons: int,
23 |         dropout: float = 1.0,
24 |         activation: nn.Module = None,
25 |     ) -> None:
26 | 
27 |         super().__init__()
28 |         self.linear = nn.Linear(input_size, neurons)
29 |         self.activation = activation
30 |         if dropout < 1.0:
31 |             self.dropout = nn.Dropout(1 - dropout)
32 | 
33 |     def forward(self, x: Tensor, inference: bool = False) -> Tensor:
34 |         if inference:
35 |             self.apply(inference_mode)
36 | 
37 |         x = self.linear(x)  # does weight multiplication + bias
38 |         if self.activation:
39 |             x = self.activation(x)
40 |         if hasattr(self, "dropout"):
41 |             x = self.dropout(x)
42 | 
43 |         return x
44 | 


--------------------------------------------------------------------------------
/lincoln/lincoln/pytorch/model.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple
 2 | 
 3 | from torch import nn, Tensor
 4 | 
 5 | 
 6 | class PyTorchModel(nn.Module):
 7 | 
 8 |     def __init__(self) -> None:
 9 |         super().__init__()
10 | 
11 |     def forward(self, x: Tensor) -> Tuple[Tensor]:
12 |         raise NotImplementedError()
13 | 


--------------------------------------------------------------------------------
/lincoln/lincoln/pytorch/preprocessor.py:
--------------------------------------------------------------------------------
 1 | from torch import Tensor
 2 | 
 3 | 
 4 | class PyTorchPreprocessor():
 5 |     def __init__(self):
 6 |         pass
 7 | 
 8 |     def transform(self, x: Tensor) -> Tensor:
 9 |         raise NotImplementedError()
10 | 
11 | 
12 | class ConvNetPreprocessor(PyTorchPreprocessor):
13 |     def __init__(self):
14 |         pass
15 | 
16 |     def transform(self, x: Tensor) -> Tensor:
17 |         return x.permute(0, 3, 1, 2)
18 | 


--------------------------------------------------------------------------------
/lincoln/lincoln/pytorch/train.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional, Tuple
  2 | 
  3 | import torch
  4 | from torch import Tensor
  5 | from torch.optim import Optimizer
  6 | from torch.optim import lr_scheduler
  7 | from torch.nn.modules.loss import _Loss
  8 | from torch.utils.data import DataLoader
  9 | 
 10 | from .utils import permute_data
 11 | from .model import PyTorchModel
 12 | 
 13 | 
 14 | class PyTorchTrainer(object):
 15 |     def __init__(self,
 16 |                  model: PyTorchModel,
 17 |                  optim: Optimizer,
 18 |                  criterion: _Loss):
 19 |         self.model = model
 20 |         self.optim = optim
 21 |         self.loss = criterion
 22 |         self._check_optim_net_aligned()
 23 | 
 24 |     def _check_optim_net_aligned(self):
 25 |         assert self.optim.param_groups[0]['params']\
 26 |             == list(self.model.parameters())
 27 | 
 28 |     def _generate_batches(self,
 29 |                           X: Tensor,
 30 |                           y: Tensor,
 31 |                           size: int = 32) -> Tuple[Tensor]:
 32 | 
 33 |         N = X.shape[0]
 34 | 
 35 |         for ii in range(0, N, size):
 36 |             X_batch, y_batch = X[ii:ii+size], y[ii:ii+size]
 37 | 
 38 |             yield X_batch, y_batch
 39 | 
 40 |     def fit(self, X_train: Tensor = None,
 41 |             y_train: Tensor = None,
 42 |             X_test: Tensor = None,
 43 |             y_test: Tensor = None,
 44 |             train_dataloader: DataLoader = None,
 45 |             test_dataloader: DataLoader = None,
 46 |             epochs: int=100,
 47 |             eval_every: int=10,
 48 |             batch_size: int=32,
 49 |             final_lr_exp: int = None):
 50 | 
 51 |         init_lr = self.optim.param_groups[0]['lr']
 52 |         if final_lr_exp:
 53 |             decay = (final_lr_exp / init_lr) ** (1.0 / (epochs + 1))
 54 |             scheduler = lr_scheduler.ExponentialLR(self.optim, gamma=decay)
 55 |         for e in range(epochs):
 56 | 
 57 |             if final_lr_exp:
 58 |                 scheduler.step()
 59 | 
 60 |             if not train_dataloader:
 61 |                 X_train, y_train = permute_data(X_train, y_train)
 62 | 
 63 |                 batch_generator = self._generate_batches(X_train, y_train,
 64 |                                                          batch_size)
 65 | 
 66 |                 self.model.train()
 67 | 
 68 |                 for ii, (X_batch, y_batch) in enumerate(batch_generator):
 69 | 
 70 |                     self.optim.zero_grad()   # zero the gradient buffers
 71 | 
 72 |                     output = self.model(X_batch)[0]
 73 | 
 74 |                     loss = self.loss(output, y_batch)
 75 |                     loss.backward()
 76 |                     self.optim.step()
 77 | 
 78 |                 if e % eval_every == 0:
 79 |                     with torch.no_grad():
 80 |                         self.model.eval()
 81 |                         output = self.model(X_test)[0]
 82 |                         loss = self.loss(output, y_test)
 83 |                         print("The loss after", e+1, "epochs was", loss.item())
 84 | 
 85 |             else:
 86 |                 for X_batch, y_batch in train_dataloader:
 87 | 
 88 |                     self.optim.zero_grad()
 89 | 
 90 |                     output = self.model(X_batch)[0]
 91 | 
 92 |                     loss = self.loss(output, y_batch)
 93 |                     loss.backward()
 94 |                     self.optim.step()
 95 | 
 96 |                 if e % eval_every == 0:
 97 |                     with torch.no_grad():
 98 |                         self.model.eval()
 99 |                         losses = []
100 |                         for X_batch, y_batch in test_dataloader:
101 |                             output = self.model(X_batch)[0]
102 |                             loss = self.loss(output, y_batch)
103 |                             losses.append(loss.item())
104 |                         print("The loss after", e, "epochs was",
105 |                               round(torch.Tensor(losses).mean().item(), 4))
106 | 


--------------------------------------------------------------------------------
/lincoln/lincoln/pytorch/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import Tensor
 3 | 
 4 | from typing import Tuple
 5 | 
 6 | 
 7 | def permute_data(X: Tensor, y: Tensor, seed=1) -> Tuple[Tensor]:
 8 |     perm = torch.randperm(X.shape[0])
 9 |     return X[perm], y[perm]
10 | 
11 | 
12 | def assert_dim(t: Tensor,
13 |                dim: Tensor):
14 |     assert len(t.shape) == dim, \
15 |         '''
16 |         Tensor expected to have dimension {0}, instead has dimension {1}
17 |         '''.format(dim, len(t.shape))
18 |     return None
19 | 


--------------------------------------------------------------------------------
/lincoln/lincoln/reshape.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from lincoln import base
 4 | 
 5 | 
 6 | class Flatten(base.Operation):
 7 |     def __init__(self):
 8 |         super().__init__()
 9 | 
10 |     def _output(self) -> np.ndarray:
11 |         return self.input_.reshape(self.input_.shape[0], -1)
12 | 
13 |     def _input_grad(self, output_grad: np.ndarray) -> np.ndarray:
14 |         return output_grad.reshape(self.input_.shape)
15 | 


--------------------------------------------------------------------------------
/lincoln/lincoln/train.py:
--------------------------------------------------------------------------------
  1 | import typing
  2 | from copy import deepcopy
  3 | 
  4 | import numpy as np
  5 | 
  6 | from lincoln import network
  7 | from lincoln import optimizers
  8 | from lincoln.utils import np_utils
  9 | 
 10 | 
 11 | class Trainer(object):
 12 |     """
 13 |     Just a list of layers that runs forwards and backwards
 14 |     """
 15 | 
 16 |     def __init__(self, net: network.NeuralNetwork, optim: optimizers.Optimizer) -> None:
 17 |         self.net = net
 18 |         self.optim = optim
 19 |         self.best_loss = 1e9
 20 |         setattr(self.optim, "net", self.net)
 21 | 
 22 |     def fit(
 23 |         self,
 24 |         X_train: np.ndarray,
 25 |         y_train: np.ndarray,
 26 |         X_test: np.ndarray,
 27 |         y_test: np.ndarray,
 28 |         epochs: int = 100,
 29 |         eval_every: int = 10,
 30 |         batch_size: int = 32,
 31 |         seed: int = 1,
 32 |         restart: bool = True,
 33 |         early_stopping: bool = True,
 34 |         conv_testing: bool = False,
 35 |     ) -> None:
 36 | 
 37 |         setattr(self.optim, "max_epochs", epochs)
 38 |         self.optim._setup_decay()
 39 | 
 40 |         np.random.seed(seed)
 41 |         if restart:
 42 |             for layer in self.net.layers:
 43 |                 layer.first = True
 44 | 
 45 |             self.best_loss = 1e9
 46 | 
 47 |         for e in range(epochs):
 48 | 
 49 |             if (e + 1) % eval_every == 0:
 50 | 
 51 |                 last_model = deepcopy(self.net)
 52 | 
 53 |             X_train, y_train = np_utils.permute_data(X_train, y_train)
 54 | 
 55 |             batch_generator = self.generate_batches(X_train, y_train, batch_size)
 56 | 
 57 |             for ii, (X_batch, y_batch) in enumerate(batch_generator):
 58 | 
 59 |                 self.net.train_batch(X_batch, y_batch)
 60 | 
 61 |                 self.optim.step()
 62 | 
 63 |                 if conv_testing:
 64 |                     if ii % 10 == 0:
 65 |                         test_preds = self.net.forward(X_batch)
 66 |                         batch_loss = self.net.loss.forward(test_preds, y_batch)
 67 |                         print("batch", ii, "loss", batch_loss)
 68 | 
 69 |                     if ii % 100 == 0 and ii > 0:
 70 |                         print(
 71 |                             "Validation accuracy after",
 72 |                             ii,
 73 |                             "batches is",
 74 |                             """{0:.2f}%""".format(
 75 |                                 np.equal(
 76 |                                     np.argmax(self.net.forward(X_test), axis=1),
 77 |                                     np.argmax(y_test, axis=1),
 78 |                                 ).sum()
 79 |                                 * 100.0
 80 |                                 / X_test.shape[0]
 81 |                             ),
 82 |                         )
 83 | 
 84 |             if (e + 1) % eval_every == 0:
 85 | 
 86 |                 test_preds = self.net.forward(X_test)
 87 |                 loss = self.net.loss.forward(test_preds, y_test)
 88 | 
 89 |                 if early_stopping:
 90 |                     if loss < self.best_loss:
 91 |                         print(f"Validation loss after {e+1} epochs is {loss:.3f}")
 92 |                         self.best_loss = loss
 93 |                     else:
 94 |                         print()
 95 |                         print(
 96 |                             "Loss increased after epoch {0}, final loss was {1:.3f},".format(
 97 |                                 e + 1, self.best_loss
 98 |                             ),
 99 |                             "\nusing the model from epoch {0}".format(e + 1 - eval_every),
100 |                         )
101 |                         self.net = last_model
102 |                         # ensure self.optim is still updating self.net
103 |                         setattr(self.optim, "net", self.net)
104 |                         break
105 |                 else:
106 |                     print(f"Validation loss after {e+1} epochs is {loss:.3f}")
107 | 
108 |             if self.optim.final_lr:
109 |                 self.optim._decay_lr()
110 | 
111 |     def generate_batches(
112 |         self, X: np.ndarray, y: np.ndarray, size: int = 32
113 |     ) -> typing.Generator[typing.Tuple[np.ndarray]]:
114 | 
115 |         assert (
116 |             X.shape[0] == y.shape[0]
117 |         ), """
118 |         features and target must have the same number of rows, instead
119 |         features has {0} and target has {1}
120 |         """.format(
121 |             X.shape[0], y.shape[0]
122 |         )
123 | 
124 |         N = X.shape[0]
125 | 
126 |         for ii in range(0, N, size):
127 |             X_batch, y_batch = X[ii : ii + size], y[ii : ii + size]
128 | 
129 |             yield X_batch, y_batch
130 | 


--------------------------------------------------------------------------------
/lincoln/lincoln/utils/mnist.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | # Get the directory where mnist.py is located
 4 | MODULE_DIR = os.path.dirname(os.path.abspath(__file__))
 5 | 
 6 | import pickle
 7 | 
 8 | from torchvision import datasets
 9 | 
10 | 
11 | def download_mnist():
12 |     # Downloads to './data' by default
13 |     train_dataset = datasets.MNIST("./data", train=True, download=True)
14 |     test_dataset = datasets.MNIST("./data", train=False, download=True)
15 | 
16 |     # Convert to numpy arrays in the same format as the original code
17 |     train_images = train_dataset.data.numpy().reshape(-1, 28 * 28)
18 |     train_labels = train_dataset.targets.numpy()
19 |     test_images = test_dataset.data.numpy().reshape(-1, 28 * 28)
20 |     test_labels = test_dataset.targets.numpy()
21 | 
22 |     # Save in the same format as the original code
23 |     mnist = {
24 |         "training_images": train_images,
25 |         "training_labels": train_labels,
26 |         "test_images": test_images,
27 |         "test_labels": test_labels,
28 |     }
29 | 
30 |     # Use absolute path for saving
31 |     pkl_path = os.path.join(MODULE_DIR, "mnist.pkl")
32 |     with open(pkl_path, "wb") as f:
33 |         pickle.dump(mnist, f)
34 |     print("Save complete.")
35 | 
36 | 
37 | def load():
38 |     # Use absolute path for loading
39 |     pkl_path = os.path.join(MODULE_DIR, "mnist.pkl")
40 |     with open(pkl_path, "rb") as f:
41 |         mnist = pickle.load(f)
42 |     return (
43 |         mnist["training_images"],
44 |         mnist["training_labels"],
45 |         mnist["test_images"],
46 |         mnist["test_labels"],
47 |     )
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     download_mnist()
52 | 


--------------------------------------------------------------------------------
/lincoln/lincoln/utils/np_utils.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple
 2 | import numpy as np
 3 | from scipy import special
 4 | 
 5 | 
 6 | def to_2d(a: np.ndarray, type: str = "col") -> np.ndarray:
 7 |     """
 8 |     Turns a 1D Tensor into 2D
 9 |     """
10 | 
11 |     assert a.ndim == 1, "Input tensors must be 1 dimensional"
12 | 
13 |     if type == "col":
14 |         return a.reshape(-1, 1)
15 |     elif type == "row":
16 |         return a.reshape(1, -1)
17 | 
18 | 
19 | def normalize(a: np.ndarray):
20 |     other = 1 - a
21 |     return np.concatenate([a, other], axis=1)
22 | 
23 | 
24 | def unnormalize(a: np.ndarray):
25 |     return a[np.newaxis, 0]
26 | 
27 | 
28 | def permute_data(X: np.ndarray, y: np.ndarray):
29 |     perm = np.random.permutation(X.shape[0])
30 |     return X[perm], y[perm]
31 | 
32 | 
33 | Batch = Tuple[np.ndarray, np.ndarray]
34 | 
35 | 
36 | def generate_batch(
37 |     X: np.ndarray, y: np.ndarray, start: int = 0, batch_size: int = 10
38 | ) -> Batch:
39 | 
40 |     assert (X.dim() == 2) and (y.dim() == 2), "X and Y must be 2 dimensional"
41 | 
42 |     if start + batch_size > X.shape[0]:
43 |         batch_size = X.shape[0] - start
44 | 
45 |     X_batch, y_batch = X[start : start + batch_size], y[start : start + batch_size]
46 | 
47 |     return X_batch, y_batch
48 | 
49 | 
50 | def assert_same_shape(output: np.ndarray, output_grad: np.ndarray):
51 |     assert (
52 |         output.shape == output_grad.shape
53 |     ), """
54 |         Two tensors should have the same shape;
55 |         instead, first Tensor's shape is {0}
56 |         and second Tensor's shape is {1}.
57 |         """.format(
58 |         tuple(output_grad.shape), tuple(output.shape)
59 |     )
60 |     return None
61 | 
62 | 
63 | def assert_dim(t: np.ndarray, dim: int):
64 |     assert (
65 |         t.ndim == dim
66 |     ), """
67 |         Tensor expected to have dimension {0}, instead has dimension {1}
68 |         """.format(
69 |         dim, len(t.shape)
70 |     )
71 |     return None
72 | 
73 | 
74 | def softmax(x: np.ndarray, axis=None) -> np.ndarray:
75 |     return np.exp(x - special.logsumexp(x, axis=axis, keepdims=True))
76 | 


--------------------------------------------------------------------------------
/lincoln/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | from Cython.Build import cythonize
3 | import numpy
4 | 
5 | setup(
6 |     ext_modules = cythonize("lincoln/operations/conv_cy.pyx"),
7 |     include_dirs=[numpy.get_include()]
8 | )
9 | 


--------------------------------------------------------------------------------