├── res
    ├── 2.1
    │   ├── compile.sh
    │   ├── slides.pdf
    │   └── quiz.tex
    ├── 1.0
    │   └── slides.pdf
    ├── 1.1
    │   └── slides.pdf
    ├── 1.2
    │   └── slides.pdf
    ├── 2.0
    │   ├── slides.pdf
    │   ├── compile.sh
    │   └── quiz.tex
    ├── 2.2
    │   ├── slides.pdf
    │   └── script.lua
    ├── 3.0
    │   ├── slides.pdf
    │   ├── lin.lua
    │   ├── 3conv.lua
    │   └── 3conv-pool.lua
    ├── 3.1
    │   ├── slides.pdf
    │   └── script.lua
    ├── 3.2
    │   ├── slides.pdf
    │   ├── LeNet5.lua
    │   ├── AlexNet.lua
    │   └── GoogLeNet.lua
    ├── 3.3
    │   ├── slides.pdf
    │   └── train.lua
    ├── 3.4
    │   └── slides.pdf
    ├── 4.0
    │   └── slides.pdf
    ├── 4.1
    │   └── slides.pdf
    ├── 4.2
    │   ├── slides.pdf
    │   └── script.lua
    ├── 4.3
    │   └── slides.pdf
    └── README.md
└── README.md


/res/2.1/compile.sh:
--------------------------------------------------------------------------------
1 | rubber --pdf quiz.tex
2 | 


--------------------------------------------------------------------------------
/res/1.0/slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Atcold/torch-Video-Tutorials/HEAD/res/1.0/slides.pdf


--------------------------------------------------------------------------------
/res/1.1/slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Atcold/torch-Video-Tutorials/HEAD/res/1.1/slides.pdf


--------------------------------------------------------------------------------
/res/1.2/slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Atcold/torch-Video-Tutorials/HEAD/res/1.2/slides.pdf


--------------------------------------------------------------------------------
/res/2.0/slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Atcold/torch-Video-Tutorials/HEAD/res/2.0/slides.pdf


--------------------------------------------------------------------------------
/res/2.1/slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Atcold/torch-Video-Tutorials/HEAD/res/2.1/slides.pdf


--------------------------------------------------------------------------------
/res/2.2/slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Atcold/torch-Video-Tutorials/HEAD/res/2.2/slides.pdf


--------------------------------------------------------------------------------
/res/3.0/slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Atcold/torch-Video-Tutorials/HEAD/res/3.0/slides.pdf


--------------------------------------------------------------------------------
/res/3.1/slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Atcold/torch-Video-Tutorials/HEAD/res/3.1/slides.pdf


--------------------------------------------------------------------------------
/res/3.2/slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Atcold/torch-Video-Tutorials/HEAD/res/3.2/slides.pdf


--------------------------------------------------------------------------------
/res/3.3/slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Atcold/torch-Video-Tutorials/HEAD/res/3.3/slides.pdf


--------------------------------------------------------------------------------
/res/3.4/slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Atcold/torch-Video-Tutorials/HEAD/res/3.4/slides.pdf


--------------------------------------------------------------------------------
/res/4.0/slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Atcold/torch-Video-Tutorials/HEAD/res/4.0/slides.pdf


--------------------------------------------------------------------------------
/res/4.1/slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Atcold/torch-Video-Tutorials/HEAD/res/4.1/slides.pdf


--------------------------------------------------------------------------------
/res/4.2/slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Atcold/torch-Video-Tutorials/HEAD/res/4.2/slides.pdf


--------------------------------------------------------------------------------
/res/4.3/slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Atcold/torch-Video-Tutorials/HEAD/res/4.3/slides.pdf


--------------------------------------------------------------------------------
/res/2.0/compile.sh:
--------------------------------------------------------------------------------
1 | # Builds 35 different quizes naming them testXX, XX = 01 -> 35
2 | 
3 | for i in 0{1..9} {10..35}; do
4 |     rubber --pdf --jobname test$i quiz.tex
5 | done
6 | 


--------------------------------------------------------------------------------
/res/3.0/lin.lua:
--------------------------------------------------------------------------------
 1 | -- Huge MLP
 2 | 
 3 | require 'nn'
 4 | 
 5 | local n = 256 * 256 * 3
 6 | local K = 1000
 7 | local net = nn.Sequential()
 8 | 
 9 | -- First layer
10 | net:add(nn.Linear(n, 2*n))
11 | net:add(nn.ReLU())
12 | -- Second layer
13 | net:add(nn.Linear(2*n, 2*n))
14 | net:add(nn.ReLU())
15 | -- Third layer
16 | net:add(nn.Linear(2*n, 2*n))
17 | net:add(nn.ReLU())
18 | -- Output
19 | net:add(nn.Linear(2*n, K))
20 | 
21 | return net
22 | 


--------------------------------------------------------------------------------
/res/3.0/3conv.lua:
--------------------------------------------------------------------------------
 1 | -- Basic CNN
 2 | 
 3 | require 'nn'
 4 | 
 5 | local K = 1000
 6 | local net = nn.Sequential()
 7 | 
 8 | -- First layer
 9 | net:add(nn.SpatialConvolution(3, 6, 5, 5, 1, 1, 2, 2))
10 | net:add(nn.ReLU())
11 | -- Second layer
12 | net:add(nn.SpatialConvolution(6, 6, 5, 5, 1, 1, 2, 2))
13 | net:add(nn.ReLU())
14 | -- Third layer
15 | net:add(nn.SpatialConvolution(6, 6, 5, 5, 1, 1, 2, 2))
16 | net:add(nn.ReLU())
17 | -- MLP
18 | net:add(nn.View(-1))
19 | 
20 | -- Prints the sizes of all layers output
21 | -- x = torch.rand(3, 256, 256)
22 | -- net:forward(x)
23 | -- net:apply(function (m) print(m.output:size()) end)
24 | 
25 | -- Output
26 | net:add(nn.Linear(393216, K))
27 | 
28 | return net
29 | 


--------------------------------------------------------------------------------
/res/3.0/3conv-pool.lua:
--------------------------------------------------------------------------------
 1 | -- CNN with pooling layers
 2 | 
 3 | require 'nn'
 4 | 
 5 | local K = 1000
 6 | local net = nn.Sequential()
 7 | 
 8 | -- First layer
 9 | net:add(nn.SpatialConvolution(3, 6, 5, 5, 2, 2, 2, 2))
10 | net:add(nn.ReLU())
11 | -- Second layer
12 | net:add(nn.SpatialConvolution(6, 6, 5, 5, 1, 1, 2, 2))
13 | net:add(nn.ReLU())
14 | net:add(nn.SpatialMaxPooling(2, 2, 2, 2))
15 | -- Third layer
16 | net:add(nn.SpatialConvolution(6, 6, 5, 5, 1, 1, 2, 2))
17 | net:add(nn.ReLU())
18 | net:add(nn.SpatialMaxPooling(2, 2, 2, 2))
19 | -- MLP
20 | net:add(nn.View(-1))
21 | 
22 | -- Prints the sizes of all layers output
23 | -- x = torch.rand(3, 256, 256)
24 | -- net:forward(x)
25 | -- net:apply(function (m) print(m.output:size()) end)
26 | 
27 | -- Output
28 | net:add(nn.Linear(6144, K))
29 | 
30 | return net
31 | 


--------------------------------------------------------------------------------
/res/3.2/LeNet5.lua:
--------------------------------------------------------------------------------
 1 | local net = nn.Sequential()
 2 | 
 3 | -- 1 input image channel, 6 output channels, 5x5 convolution kernel
 4 | net:add(nn.SpatialConvolution(1, 6, 5, 5))
 5 | 
 6 | -- A max-pooling operation that looks at 2x2 windows and finds the max.
 7 | net:add(nn.SpatialMaxPooling(2,2,2,2))
 8 | 
 9 | -- non-linearity
10 | net:add(nn.Tanh())
11 | 
12 | -- additional layers
13 | net:add(nn.SpatialConvolution(6, 16, 5, 5))
14 | net:add(nn.SpatialMaxPooling(2,2,2,2))
15 | net:add(nn.Tanh())
16 | 
17 | -- reshapes from a 3D tensor of 16x5x5 into 1D tensor of 16*5*5
18 | net:add(nn.View(16*5*5))
19 | 
20 | -- fully connected layers (matrix multiplication between input and weights)
21 | net:add(nn.Linear(16*5*5, 120))
22 | net:add(nn.Tanh())
23 | net:add(nn.Linear(120, 84))
24 | net:add(nn.Tanh())
25 | 
26 | -- 10 is the number of outputs of the network (10 classes)
27 | net:add(nn.Linear(84, 10))
28 | return net
29 | 


--------------------------------------------------------------------------------
/res/3.1/script.lua:
--------------------------------------------------------------------------------
 1 | -- qlua -lenv
 2 | torch.manualSeed(0)
 3 | net = require '3conv-pool'
 4 | require 'image'
 5 | 
 6 | x = image.scale(image.lena(), 256, 256)
 7 | = #x
 8 | image.display{image = x, legend = 'img'}
 9 | = #net:forward(x)
10 | 
11 | -- qlua
12 | = net:get(1)
13 | -- th
14 | net:get(1)
15 | {net:get(1)}
16 | 
17 | -- Input / output planes
18 | -- Stride
19 | -- Kernel size
20 | -- Check size of output
21 | -- Check size of kernel / gradKernel
22 | -- Check size of bias / gradBias
23 | 
24 | image.display{image = net:get(1).weight, legend = 'k(1)', zoom = 18, padding = 2}
25 | 
26 | function show(x, t)
27 |    print(x)
28 |    image.display{image = x.output, legend = t, scaleeach=true}
29 | end
30 | 
31 | show(net:get(1), 'y(1)')
32 | show(net:get(2), 'y(1)+')
33 | 
34 | show(net:get(3), 'y(2)')
35 | show(net:get(4), 'y(2)+')
36 | show(net:get(5), 'pool[y(2)+]')
37 | 
38 | show(net:get(6), 'y(3)')
39 | show(net:get(7), 'y(3)+')
40 | show(net:get(8), 'pool[y(3)+]')
41 | 


--------------------------------------------------------------------------------
/res/2.0/quiz.tex:
--------------------------------------------------------------------------------
 1 | \documentclass{article}
 2 | \usepackage{pgf}
 3 | \usepackage{amsmath,amssymb,bm}
 4 | % Random int
 5 | \pgfmathsetseed{\number\pdfrandomseed}
 6 | \newcommand\rint{\pgfmathparse{random(10)}\pgfmathresult}
 7 | \begin{document}
 8 | 
 9 | \title{Quiz: forward propagation dimensionality}
10 | \author{BME595 DeepLearning}
11 | \date{\today}
12 | \maketitle
13 | 
14 | Given a network with the size vector $\bm{s}=(\rint, \rint, \rint, \rint)^\top$, determine $n$, $K$, $L$ and write the dimensionality of:
15 | %
16 | \begin{align*}
17 |   \bm{x}            \\
18 |   \bm{\hat x}       \\
19 |   \bm{a}^{(1)}      \\
20 |   \bm{\hat a}^{(1)} \\
21 |   \bm{\Theta}^{(1)} \\
22 |   \bm{z}^{(2)}      \\
23 |   \bm{a}^{(2)}      \\
24 |   \bm{\hat a}^{(2)} \\
25 |   \bm{\Theta}^{(2)} \\
26 |   \bm{z}^{(3)}      \\
27 |   \bm{a}^{(3)}      \\
28 |   \bm{\hat a}^{(3)} \\
29 |   \bm{\Theta}^{(3)} \\
30 |   \bm{z}^{(4)}      \\
31 |   \bm{a}^{(4)}      \\
32 |   \bm{\hat a}^{(4)} \\
33 |   h_{\bm{\Theta}}(\bm{x}) \\
34 |   \bm{y}
35 | \end{align*}
36 | 
37 | \end{document}
38 | 


--------------------------------------------------------------------------------
/res/3.3/train.lua:
--------------------------------------------------------------------------------
 1 | local cuda = false
 2 | require 'nn'
 3 | 
 4 | torch.manualSeed(1234)
 5 | local model = nn.Sequential()
 6 | local n = 2
 7 | local K = 1
 8 | local s = {n, 5, 5, K}
 9 | model:add(nn.Linear(s[1], s[2]))
10 | model:add(nn.Tanh())
11 | model:add(nn.Linear(s[2], s[3]))
12 | model:add(nn.Tanh())
13 | model:add(nn.Linear(s[3], s[4]))
14 | 
15 | local loss = nn.MSECriterion()
16 | 
17 | local m = 128
18 | local X = torch.DoubleTensor(m, n) --CudaTensor
19 | local Y = torch.DoubleTensor(m)    --CudaTensor
20 | 
21 | for i = 1, m do
22 |    local x = torch.randn(2)
23 |    local y = x[1] * x[2] > 0 and -1 or 1
24 |    X[i]:copy(x) -- fine also for Cuda
25 |    Y[i] = y     -- fine also for Cuda
26 | end
27 | 
28 | if cuda then
29 |    -- Running on GPU
30 |    require 'cunn'
31 |    model:cuda()
32 |    loss:cuda()
33 |    X = X:cuda()
34 |    Y = Y:cuda()
35 | end
36 | 
37 | local theta, gradTheta = model:getParameters()
38 | 
39 | local optimState = {learningRate = 0.15}
40 | 
41 | require 'optim'
42 | 
43 | for epoch = 1, 1e3 do
44 |    function feval(theta)
45 |       gradTheta:zero()
46 |       local h_x = model:forward(X)
47 |       local J = loss:forward(h_x, Y)
48 |       print(J) -- just for debugging purpose
49 |       local dJ_dh_x = loss:backward(h_x, Y)
50 |       model:backward(X, dJ_dh_x) -- computes and updates gradTheta
51 |       return J, gradTheta
52 |    end
53 |    optim.sgd(feval, theta, optimState)
54 | end
55 | 
56 | print('Prev J: 0.1758')
57 | net = model
58 | 


--------------------------------------------------------------------------------
/res/2.1/quiz.tex:
--------------------------------------------------------------------------------
 1 | \documentclass{article}
 2 | \usepackage{pgf}
 3 | \usepackage{amsmath,amssymb,bm}
 4 | % Random int
 5 | \pgfmathsetseed{\number\pdfrandomseed}
 6 | \newcommand\rint{\pgfmathparse{random(10)}\pgfmathresult}
 7 | \begin{document}
 8 | 
 9 | \title{Quiz: backward propagation intuition}
10 | \author{BME595 DeepLearning}
11 | \date{\today}
12 | \maketitle
13 | 
14 | Given equations $(1)$ and $(2)$ and that $h_{\bm{\Theta}}(\bm{x}) := \bm{a}^{(L)}$ and $E = E(h_{\bm{\Theta}}(\bm{x}))$, A) \underline{define} $\bm{\delta}^{(\ell)}$ and B) show (or prove) where equations $(3)-(5)$ come from.
15 | %
16 | \begin{align}
17 |   \bm{a}^{(1)}&=\bm{x}, \bm{\hat a}^{(1)}=\genfrac{[}{]}{0pt}{1}{+1}{\bm{a}^{(1)}} \\
18 |   \bm{z}^{(\ell+1)}&=\bm{\Theta}^{(\ell)}\bm{\hat a}^{(\ell)}, \bm{a}^{(\ell)}=\sigma(\bm{z}^{(\ell)}), \bm{\hat a}^{(\ell)}=\genfrac{[}{]}{0pt}{1}{+1}{\bm{a}^{(\ell)}} \\
19 |   \bm{\delta}^{(L)}&=\nabla_h E\odot\sigma^\prime(\bm{z}^{(L)}) \\
20 |   \bm{\delta}^{(\ell)}&=[(\bm{\Theta}^{(\ell)})^\top\bm{\delta}^{(\ell+1)}]\odot\sigma^\prime(\bm{z}^{(\ell)}) \\
21 |   \frac{\partial E}{\partial\bm{\Theta}^{(\ell)}}&=\bm{\hat a}^{(\ell)}(\bm{\delta}^{(\ell+1)})^\top
22 | \end{align}
23 | %
24 | Finally, C) discuss about $\bm{\delta}^{(1)}$ and $\frac{\partial E}{\partial \bm{x}}$.
25 | 
26 | As a reminder, $\mathcal{L}(\bm{\Theta}) = \frac{1}{m} \sum_{i=1}^m E(h_{\bm{\Theta}}(\bm{x}^{(i)}))$. Sometimes, $\mathcal{L}(\bm{\Theta})$ is written $J(\bm{\Theta})$ or $J(\bm{\theta})$.
27 | 
28 | \end{document}
29 | 


--------------------------------------------------------------------------------
/res/3.2/AlexNet.lua:
--------------------------------------------------------------------------------
 1 | local fb1 = nn.Sequential() -- branch 1
 2 | fb1:add(nn.SpatialConvolution(3,48,11,11,4,4,2,2))       -- 224 -> 55
 3 | fb1:add(nn.ReLU(true))
 4 | fb1:add(nn.SpatialMaxPooling(3,3,2,2))                   -- 55 ->  27
 5 | fb1:add(nn.SpatialConvolution(48,128,5,5,1,1,2,2))       --  27 -> 27
 6 | fb1:add(nn.ReLU(true))
 7 | fb1:add(nn.SpatialMaxPooling(3,3,2,2))                   --  27 ->  13
 8 | fb1:add(nn.SpatialConvolution(128,192,3,3,1,1,1,1))      --  13 ->  13
 9 | fb1:add(nn.ReLU(true))
10 | fb1:add(nn.SpatialConvolution(192,192,3,3,1,1,1,1))      --  13 ->  13
11 | fb1:add(nn.ReLU(true))
12 | fb1:add(nn.SpatialConvolution(192,128,3,3,1,1,1,1))      --  13 ->  13
13 | fb1:add(nn.ReLU(true))
14 | fb1:add(nn.SpatialMaxPooling(3,3,2,2))                   -- 13 -> 6
15 | 
16 | local fb2 = fb1:clone() -- branch 2
17 | for k,v in ipairs(fb2:findModules('nn.SpatialConvolution')) do
18 |    v:reset() -- reset branch 2's weights
19 | end
20 | 
21 | local features = nn.Concat(2)
22 | features:add(fb1)
23 | features:add(fb2)
24 | 
25 | -- 1.3. Create Classifier (fully connected layers)
26 | local nClasses = 1e3
27 | local classifier = nn.Sequential()
28 | classifier:add(nn.View(256*6*6))
29 | classifier:add(nn.Dropout(0.5))
30 | classifier:add(nn.Linear(256*6*6, 4096))
31 | classifier:add(nn.Threshold(0, 1e-6))
32 | classifier:add(nn.Dropout(0.5))
33 | classifier:add(nn.Linear(4096, 4096))
34 | classifier:add(nn.Threshold(0, 1e-6))
35 | classifier:add(nn.Linear(4096, nClasses))
36 | classifier:add(nn.LogSoftMax())
37 | 
38 | local model = nn.Sequential():add(features):add(classifier)
39 | return model
40 | 


--------------------------------------------------------------------------------
/res/4.2/script.lua:
--------------------------------------------------------------------------------
  1 | require 'nngraph'
  2 | require 'pretty-nn'
  3 | torch.manualSeed(0)
  4 | 
  5 | -- nn vs. nngraph
  6 | net = nn.Sequential();
  7 | net:add(nn.Linear(20, 10));
  8 | net:add(nn.Tanh());
  9 | net:add(nn.Linear(10, 10));
 10 | net:add(nn.Tanh());
 11 | net:add(nn.Linear(10, 1));
 12 | net
 13 | 
 14 | h1 = net.modules[1]()
 15 | h1
 16 | {h1}
 17 | 
 18 | net.modules[1].bias:view(1, -1)
 19 | h1.data.module.bias:view(1, -1)
 20 | 
 21 | h2 = net.modules[5](net.modules[4](net.modules[3](net.modules[2](h1))))
 22 | gNet = nn.gModule({h1}, {h2})
 23 | 
 24 | graph.dot(gNet.fg, 'mlp', 'mlp')
 25 | $ open mlp.svg
 26 | 
 27 | x = torch.randn(20)
 28 | net:forward(x)
 29 | gNet:forward(x)
 30 | 
 31 | function nn.gModule:getNode(id)
 32 |    for _, n in ipairs(self.forwardnodes) do
 33 |       if n.id == id then return n.data.module end
 34 |    end
 35 |    return nil
 36 | end
 37 | 
 38 | gNet:getNode(4)
 39 | {gNet:getNode(4)}
 40 | 
 41 | -- Dash - notation
 42 | g1 = - nn.Linear(20, 10)
 43 | 
 44 | g2 = g1
 45 |    - nn.Tanh()
 46 |    - nn.Linear(10, 10)
 47 |    - nn.Tanh()
 48 |    - nn.Linear(10, 1)
 49 | mlp = nn.gModule({g1}, {g2})
 50 | 
 51 | graph.dot(mlp.fg, 'mlp2', 'mlp2')
 52 | $ open mlp2.svg
 53 | 
 54 | -- Fancy architecture
 55 | input = - nn.Identity()
 56 | L1 = input
 57 |    - nn.Linear(10, 20)
 58 |    - nn.Tanh()
 59 | L2 = {input, L1}
 60 |    - nn.JoinTable(1)
 61 |    - nn.Linear(30, 60)
 62 |    - nn.Tanh()
 63 | L3 = {L1, L2}
 64 |    - nn.JoinTable(1)
 65 |    - nn.Linear(80, 1)
 66 |    - nn.Tanh()
 67 | g = nn.gModule({input},{L3})
 68 | 
 69 | graph.dot(g.fg, 'fancy', 'fancy')
 70 | $ open fancy.svg
 71 | 
 72 | -- A RNN example in nngraph
 73 | n = 3
 74 | K = 1
 75 | d = 5
 76 | nHL = 2
 77 | T = 4
 78 | xx = - nn.Identity()
 79 | hh1 = - nn.Identity()
 80 | hh2 = - nn.Identity()
 81 | h1 = {xx, hh1} - nn.JoinTable(1) - nn.Linear(n+d, d) - nn.Tanh()
 82 | h2 = {h1, hh2} - nn.JoinTable(1) - nn.Linear(2*d, d) - nn.Tanh()
 83 | yy = h2 - nn.Linear(d, K) - nn.Tanh()
 84 | rnn = nn.gModule({xx, hh1, hh2}, {h1, h2, yy})
 85 | x = torch.randn(n)
 86 | h0 = torch.zeros(d)
 87 | rnn:forward({x, h0, h0})
 88 | graph.dot(rnn.fg, 'myRnn', 'myRnn')
 89 | $ open myRnn.svg
 90 | 
 91 | -- Using https://github.com/e-lab/torch7-demos/tree/master/RNN-train-sample
 92 | RNN = require 'RNN'
 93 | timeNet, net = RNN.getModel(n, d, nHL, K, T)
 94 | graph.dot(net.fg, 'net', 'net')
 95 | $ open net.svg
 96 | net:forward({x, h0, h0})
 97 | graph.dot(net.fg, 'netFw', 'netFw')
 98 | $ open netFw.svg
 99 | graph.dot(timeNet.fg, 'timeNet', 'timeNet')
100 | $ open timeNet.svg
101 | 


--------------------------------------------------------------------------------
/res/2.2/script.lua:
--------------------------------------------------------------------------------
  1 | -- Recording script, not a runnable script
  2 | 
  3 | require 'nn';
  4 | lin = nn.Linear(5, 3)
  5 | lin
  6 | {lin}
  7 | lin.weight
  8 | lin.bias
  9 | Theta_1 = torch.cat(lin.bias, lin.weight, 2) -- New Tensor
 10 | Theta_1
 11 | lin:zeroGradParameters()
 12 | 
 13 | sig = nn.Sigmoid()
 14 | {sig}
 15 | require 'gnuplot';
 16 | z = torch.linspace(-10, 10, 21)
 17 | gnuplot.plot(z, sig:forward(z))
 18 | -- Forward pass
 19 | x = torch.randn(5)
 20 | a1 = x
 21 | h_Theta = sig:forward(lin:forward(x)):clone()
 22 | z2 = Theta_1 * torch.cat(torch.ones(1), x, 1)
 23 | a2 = z_1:clone():apply(function (z) return 1/(1 + math.exp(-z)) end)
 24 | 
 25 | -- Backward pass
 26 | loss = nn.MSECriterion()
 27 | ? nn.MSECriterion
 28 | loss
 29 | loss.sizeAverage = false
 30 | y = torch.rand(3)
 31 | -- forward(input, target)
 32 | E = loss:forward(h_Theta, y)
 33 | E
 34 | (h_Theta - y):pow(2):sum()
 35 | 
 36 | dE_dh = loss:updateGradInput(h_Theta, y):clone()
 37 | dE_dh
 38 | 2 * (h_Theta - y)
 39 | 
 40 | delta2 = sig:updateGradInput(z2, dE_dh)
 41 | dE_dh:clone():cmul(a2):cmul(1 - a2)
 42 | 
 43 | lin:accGradParameters(x, delta2)
 44 | {lin}
 45 | lin.gradWeight
 46 | lin.gradBias
 47 | delta2:view(-1, 1) * torch.cat(torch.ones(1), x, 1):view(1, -1)
 48 | 
 49 | lin_gradInput = lin:updateGradInput(x, delta2)
 50 | lin.weight:t() * delta2
 51 | 
 52 | net = nn.Sequential()
 53 | net:add(lin);
 54 | net:add(sig);
 55 | net
 56 | 
 57 | -- While true
 58 | pred = net:forward(x)
 59 | pred
 60 | h_Theta
 61 | err = loss:forward(pred, y)
 62 | err
 63 | E
 64 | gradCriterion = loss:backward(pred, y)
 65 | gradCriterion
 66 | dE_dh
 67 | net:zeroGradParameters()
 68 | net:get(1)
 69 | torch.cat(net:get(1).gradBias, net:get(1).gradWeight, 2)
 70 | 
 71 | oldWeight = net:get(1).weight:clone()
 72 | oldBias = net:get(1).bias:clone()
 73 | etha = 0.01
 74 | net:updateParameters(etha)
 75 | net:get(1).weight
 76 | oldWeight - 0.01 * net:get(1).gradWeight
 77 | net:get(1).bias
 78 | oldBias - 0.01 * net:get(1).gradBias
 79 | 
 80 | -- X: design matrix
 81 | -- Y: labels / targets matrix / vector
 82 | 
 83 | for i = 1, m do
 84 |    local pred = net:forward(X[i])
 85 |    local err = criterion:forward(pred, Y[i])
 86 |    local gradCriterion = criterion:backward(pred, Y[i])
 87 |    net:zeroGradParameters()
 88 |    net:backward(X[i], gradCriterion)
 89 |    net:updateParameters(learningRate)
 90 | end
 91 | 
 92 | for i = 1, m, batchSize do
 93 |    net:zeroGradParameters()
 94 |    for j = 0, batchSize - 2 do
 95 |       if i+j > m then break end
 96 |       local pred = net:forward(X[i+j])
 97 |       local err = criterion:forward(pred, Y[i+j])
 98 |       local gradCriterion = criterion:backward(pred, Y[i+j])
 99 |       net:backward(X[i+j], gradCriterion)
100 |    end
101 |    net:updateParameters(learningRate)
102 | end
103 | 
104 | dataset = {}
105 | function dataset:size() return m end
106 | for i = 1, m do dataset[i] = {X[i], Y[i]} end
107 | trainer = nn.StochasticGradient(net, loss)
108 | trainer:train(dataset)
109 | 
110 | 


--------------------------------------------------------------------------------
/res/README.md:
--------------------------------------------------------------------------------
 1 | # Tools and tips
 2 | 
 3 | Since this is an open source project, I feel the need to share with you the tools I'm using and some useful tips.
 4 | I'll take as example the *Practical 2.2*, which summarises all my experience (being it my last video as of the time of writing this).
 5 | 
 6 | **Hardware**: *MacBook Pro* (15-inch, Late 2011), *Samsung Galaxy Note Pro* 12.2 (64GB, Late 2015).
 7 | 
 8 | **Software**: *Microsoft PowerPoint* 15.24, *LaTeXiT* 2.8.1, *Camtasia* 2.10.6, *Avidemux* 2.6.12 for *Mac* and *ezPDF* 2.6.9.10, *Mobizen* 3.1.1.11 for *Android*.
 9 | 
10 | ## Plot
11 | 
12 | I start by creating the first slide, with title and subtitle, rigorously on **black** background.
13 | 
14 | Then, I write a mathematical and a coding plot.
15 | This means, on a piece of paper, I do all derivations before hand (which took a day for back-prop) and I type on my keyboard the required coding which I later copy into a (non executable) script for the recording.
16 | 
17 | It comes the time now for some pretty math.
18 | I start up *LaTeXiT* and write the first terms of the most important formulas, in **white**, with `36 pt`.
19 | Further derivations and explanations are done at recording time, so that the speed of presentation is sustainable.
20 | I use `\bm{}` for my bold math, and `^\top` for transposition.
21 | 
22 | Show time!
23 | 
24 | ## The math
25 | 
26 | After converting the `ppt` into a `pdf` and having sent it to my *Note*, I fire up the first software I've ever bought: *ezPDF*.
27 | It's simply a must-have, if you own a *Note*, but I won't wander off right now.
28 | I choose *Page layout* and select *Horizontal scroll*, *No seamless continuous page scroll*, *No page flipping*, *Immersive mode*.
29 | I run *Mobizen* and start recording.
30 | Also on my *Mac* I start recording the audio with *Camtasia*, so that I don't have the sound of the *S pen* writing on the screen and I can also exploit the noise cancellation, particulary helpful when you have faltmates...
31 | I choose the *Brush* tool with a very light pastel palette and start the magic.
32 | 
33 | ### Tips
34 | 
35 | Anytime I make a mistake, I simply start over (I'll cut away anything and everything in the video editing phase).
36 | If I have to cough, sneeze, burp or xxx, I go on, better out than in.
37 | I'll cut it out later and laugh at it.
38 | 
39 | ## The coding
40 | 
41 | Back on my *Mac* I can start the coding part.
42 | I pull up the script I made before, send it to *Gist* and load it up on my *Note*.
43 | Then I start the recording with *Camtasia*, 3, 2, 1, action!
44 | 
45 | ### Tips
46 | 
47 | Same as before, plus: if I have to think (yes, sometimes I need to do that as well), I can press `⌘⇧2` to pause momentarily the recording.
48 | This is particularly helpful when I have to copy some web addresses or bigger chuncks of text.
49 | 
50 | ## The editing
51 | 
52 | Now that everything has been recorded (it's roughly 2 hours of video), I can start the chopping off (ending up with roughly 1 hour of video) and add effects like: zomming, masking, mouse torch, cartoonish rectagle drawing, text patches.
53 | 
54 | ### Tips
55 | 
56 | I **do apply compression** to my audio by choosing *Dynamic Processor* and setting *Compression* to `-14 dB` and leave the other settings unchanged.
57 | I have also to choose the canvas size to `720p HD` and zoom the video accordingly (*need to write zoom factor here*).
58 | 
59 | ## The summary
60 | 
61 | After having spent 2 hours for recording the videos and 8 hours for editing them, I have a *somehow* rough idea of what I've been talking all the way through (at this time I usually simply want to puke...).
62 | It's time to write the *Overview* in my slide, using `typewritter font` and colours for coding elements, and record that as well.
63 | Usually it takes 5 minutes of recording and 30 minutes of editing.
64 | 
65 | And this is pretty much it.
66 | I'll add something else, if it comes to mind.
67 | 
68 | ## Packaging and shipping
69 | 
70 | Lastly, I open *Avidemux*, load the overview video, append the math and the coding ones, select *Copy* for both audio and video streams, choose *MP4 muxer* and click on *Save*.
71 | At the end, I fire up *YouTube*, upload my video and cry: I just pushed 48 hours of my life online.
72 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Torch Video Tutorials
 2 | 
 3 | > *Light your way in Deep Learning with* Torch :flashlight:
 4 | 
 5 | This aims to be a growing collections of introductory video tutorials on the [*Torch*](http://torch.ch) ecosystem.
 6 | *Torch* is one of the fastest and most flexible framework existing for Machine and Deep Learning.
 7 | And yes, flexibility was used to come with an intimidating learning curve... until now.
 8 | 
 9 | Enjoy the view of these videos, transcripts and quizes (you can find in the [`res`](res) folder together with some notes about how I made these videos).
10 | 
11 | 
12 | ## 1 - Get the basics straight
13 | 
14 | ### 1.0 - An overview on *Lua* ([slides](res/1.0/slides.pdf))
15 | 
16 | [![Practical 1.0 - Lua](http://img.youtube.com/vi/QLYLOPeI92g/0.jpg)](https://youtu.be/QLYLOPeI92g?list=PLLHTzKZzVU9ebuL6DCclzI54MrPNFGqbW)
17 | 
18 | ### 1.1 - An overview on *Torch*’s `Tensor`s ([slides](res/1.1/slides.pdf))
19 | 
20 | [![Practical 1.1 - Torch](http://img.youtube.com/vi/o3aRgD1uzsc/0.jpg)](https://youtu.be/o3aRgD1uzsc?list=PLLHTzKZzVU9ebuL6DCclzI54MrPNFGqbW)
21 | 
22 | ### 1.2 - An overview on *Torch*’s `image` package ([slides](res/1.2/slides.pdf))
23 | 
24 | [![Practical 1.2 - image package](http://img.youtube.com/vi/dEjvydjcwOE/0.jpg)](https://youtu.be/dEjvydjcwOE?list=PLLHTzKZzVU9ebuL6DCclzI54MrPNFGqbW)
25 | 
26 | 
27 | ## 2 - Artificial Neural Networks
28 | 
29 | ### 2.0 - Neural Networks – feed forward (inference) ([slides](res/2.0/slides.pdf), [quiz](res/2.0/quiz.tex))
30 | 
31 | [![Practical 2.0 – NN forward](http://img.youtube.com/vi/hxA0wxibv8g/0.jpg)](https://youtu.be/hxA0wxibv8g?list=PLLHTzKZzVU9ebuL6DCclzI54MrPNFGqbW)
32 | 
33 | ### 2.1 - Neural Networks – back propagation (training) ([slides](res/2.1/slides.pdf), [quiz](res/2.1/quiz.tex))
34 | 
35 | [![Practical 2.1 - NN backward](http://img.youtube.com/vi/VaQUx7m3oR4/0.jpg)](https://youtu.be/VaQUx7m3oR4?list=PLLHTzKZzVU9ebuL6DCclzI54MrPNFGqbW)
36 | 
37 | ### 2.2 - Neural Networks – An overview on *Torch*’s `nn` package ([slides](res/2.2/slides.pdf), [script](res/2.2/script.lua))
38 | 
39 | [![Practical 2.2 - nn package](http://img.youtube.com/vi/atZYdZ8hVCw/0.jpg)](https://youtu.be/atZYdZ8hVCw?list=PLLHTzKZzVU9ebuL6DCclzI54MrPNFGqbW)
40 | 
41 | 
42 | ## 3 - Convolutional Neural Networks
43 | 
44 | ### 3.0 - CNN – Basics ([slides](res/3.0/slides.pdf), [`lin`](res/3.0/lin.lua), [`3conv`](res/3.0/3conv.lua), [`3conv-pool`](res/3.0/3conv-pool.lua))
45 | 
46 | [![Practical 3.0 - CNN basics](http://img.youtube.com/vi/kwCbmx3tFwY/0.jpg)](https://youtu.be/kwCbmx3tFwY?list=PLLHTzKZzVU9ebuL6DCclzI54MrPNFGqbW)
47 | 
48 | ### 3.1 - CNN – Internals ([slides](res/3.1/slides.pdf), [script](res/3.1/script.lua), [`3conv-pool`](res/3.0/3conv-pool.lua))
49 | 
50 | [![Practical 3.1 - CNN internals](http://img.youtube.com/vi/BCensUz_gQ8/0.jpg)](https://youtu.be/BCensUz_gQ8?list=PLLHTzKZzVU9ebuL6DCclzI54MrPNFGqbW)
51 | 
52 | ### 3.2 - CNN – Architectures ([slides](res/3.2/slides.pdf), [`LeNet5`](res/3.2/LeNet5.lua), [`AlexNet`](res/3.2/AlexNet.lua), [`GoogLeNet`](res/3.2/GoogLeNet.lua))
53 | 
54 | [![Practical 3.2 - CNN models](http://img.youtube.com/vi/LYYwUr0vCjg/0.jpg)](https://youtu.be/LYYwUr0vCjg?list=PLLHTzKZzVU9ebuL6DCclzI54MrPNFGqbW)
55 | 
56 | ### 3.3 - CNN – Training ([slides](res/3.3/slides.pdf), [`train.lua`](res/3.3/train.lua))
57 | 
58 | [![Practical 3.3 - CNN models](http://img.youtube.com/vi/kcOJEplX7i0/0.jpg)](https://youtu.be/kcOJEplX7i0?list=PLLHTzKZzVU9ebuL6DCclzI54MrPNFGqbW)
59 | 
60 | ### 3.4 - CNN – Loss functions ([slides](res/3.4/slides.pdf))
61 | 
62 | [![Practical 3.4 - CNN loss](http://img.youtube.com/vi/ejr6eaJKtcs/0.jpg)](https://youtu.be/ejr6eaJKtcs?list=PLLHTzKZzVU9ebuL6DCclzI54MrPNFGqbW)
63 | 
64 | 
65 | ## 4 - Recurrent Neural Networks
66 | 
67 | ### 4.0 - RNN – Vectors and sequences ([slides](res/4.0/slides.pdf))
68 | 
69 | [![Practical 4.0 - RNN, vec and seq](http://img.youtube.com/vi/bUIAsEw7_9U/0.jpg)](https://youtu.be/bUIAsEw7_9U?list=PLLHTzKZzVU9ebuL6DCclzI54MrPNFGqbW)
70 | 
71 | ### 4.1 - RNN – Forward and backward ([slides](res/4.1/slides.pdf))
72 | 
73 | [![Practical 4.1 - RNN, fwd and back](http://img.youtube.com/vi/WwslsYQX77s/0.jpg)](https://youtu.be/WwslsYQX77s?list=PLLHTzKZzVU9ebuL6DCclzI54MrPNFGqbW)
74 | 
75 | ### 4.2 - RNN – `nngraph` package ([slides](res/4.2/slides.pdf), [script](res/4.2/script.lua))
76 | 
77 | [![Practical 4.2 - nngraph package](http://img.youtube.com/vi/FL_VTcp9jvw/0.jpg)](https://youtu.be/FL_VTcp9jvw?list=PLLHTzKZzVU9ebuL6DCclzI54MrPNFGqbW)
78 | 
79 | ### 4.3 - RNN – Training ([slides](res/4.3/slides.pdf))
80 | 
81 | [![Practical 4.3 - RNN training](http://img.youtube.com/vi/lRN0wayLTeo/0.jpg)](https://youtu.be/lRN0wayLTeo?list=PLLHTzKZzVU9ebuL6DCclzI54MrPNFGqbW)
82 | 
83 | LSTM and training with `rnn` package coming soon! :blush:
84 | 


--------------------------------------------------------------------------------
/res/3.2/GoogLeNet.lua:
--------------------------------------------------------------------------------
  1 | --------------------------------------------------------------------------------
  2 | -- GoogLeNet neural network architecture
  3 | -- Reliable implementation of http://arxiv.org/abs/1409.4842
  4 | --------------------------------------------------------------------------------
  5 | -- Alfredo Canziani, Mar 16
  6 | --------------------------------------------------------------------------------
  7 | 
  8 | --require 'cudnn'
  9 | --require 'cunn'
 10 | require 'nn'
 11 | local cunn = nn
 12 | local cudnn = nn
 13 | local nClasses = 1e3
 14 | 
 15 | -- Some shortcuts
 16 | local SC  = cudnn.SpatialConvolution
 17 | local SMP = cudnn.SpatialMaxPooling
 18 | local RLU = cudnn.ReLU
 19 | 
 20 | -- Utility inc(eption) function ------------------------------------------------
 21 | local function inc(input_size, config) -- inception
 22 |    local depthCat = nn.Concat(2) -- should be 1, 2 considering batches
 23 | 
 24 |    local conv1 = nn.Sequential()
 25 |    conv1:add(SC(input_size, config[1][1], 1, 1)):add(RLU(true))
 26 |    depthCat:add(conv1)
 27 | 
 28 |    local conv3 = nn.Sequential()
 29 |    conv3:add(SC(input_size, config[2][1], 1, 1)):add(RLU(true))
 30 |    conv3:add(SC(config[2][1], config[2][2], 3, 3, 1, 1, 1, 1)):add(RLU(true))
 31 |    depthCat:add(conv3)
 32 | 
 33 |    local conv5 = nn.Sequential()
 34 |    conv5:add(SC(input_size, config[3][1], 1, 1)):add(RLU(true))
 35 |    conv5:add(SC(config[3][1], config[3][2], 5, 5, 1, 1, 2, 2)):add(RLU(true))
 36 |    depthCat:add(conv5)
 37 | 
 38 |    local pool = nn.Sequential()
 39 |    pool:add(SMP(config[4][1], config[4][1], 1, 1, 1, 1))
 40 |    pool:add(SC(input_size, config[4][2], 1, 1)):add(RLU(true))
 41 |    depthCat:add(pool)
 42 | 
 43 |    return depthCat
 44 | end
 45 | 
 46 | -- Utility fac(torised convolution) function -----------------------------------
 47 | local function fac()
 48 |    local conv = nn.Sequential()
 49 |    conv:add(nn.Contiguous())
 50 |    conv:add(nn.View(-1, 1, 224, 224))
 51 |    conv:add(SC(1, 8, 7, 7, 2, 2, 3, 3))
 52 | 
 53 |    local depthWiseConv = nn.Parallel(2, 2)
 54 |    depthWiseConv:add(conv)         -- R
 55 |    depthWiseConv:add(conv:clone()) -- G
 56 |    depthWiseConv:add(conv:clone()) -- B
 57 | 
 58 |    local factorised = nn.Sequential()
 59 |    factorised:add(depthWiseConv):add(RLU(true))
 60 |    factorised:add(SC(24, 64, 1, 1)):add(RLU(true))
 61 | 
 62 |    return factorised
 63 | end
 64 | 
 65 | --function createModel(nGPU)
 66 | 
 67 | --[[
 68 | 
 69 |    +-------+      +-------+        +-------+
 70 |    | main0 +--+---> main1 +----+---> main2 +----+
 71 |    +-------+  |   +-------+    |   +-------+    |
 72 |               |                |                |
 73 |               | +----------+   | +----------+   | +----------+
 74 |               +-> softMax0 +-+ +-> softMax1 +-+ +-> softMax2 +-+
 75 |                 +----------+ |   +----------+ |   +----------+ |
 76 |                              |                |                |   +-------+
 77 |                              +----------------v----------------v--->  out  |
 78 |                                                                    +-------+
 79 | --]]
 80 | 
 81 | -- Building blocks ----------------------------------------------------------
 82 | local main0 = nn.Sequential()
 83 | main0:add(fac()) -- 1
 84 | --main0:add(SC(3, 64, 7, 7, 2, 2, 3, 3))
 85 | main0:add(SMP(3, 3, 2, 2):ceil())
 86 | main0:add(SC(64, 64, 1, 1)):add(RLU(true)) -- 2
 87 | main0:add(SC(64, 192, 3, 3, 1, 1, 1, 1)):add(RLU(true)) -- 3
 88 | main0:add(SMP(3,3,2,2):ceil())
 89 | main0:add(inc(192, {{ 64}, { 96,128}, {16, 32}, {3, 32}})) -- 4,5 / 3(a)
 90 | main0:add(inc(256, {{128}, {128,192}, {32, 96}, {3, 64}})) -- 6,7 / 3(b)
 91 | main0:add(SMP(3, 3, 2, 2):ceil())
 92 | main0:add(inc(480, {{192}, { 96,208}, {16, 48}, {3, 64}})) -- 8,9 / 4(a)
 93 | 
 94 | local main1 = nn.Sequential()
 95 | main1:add(inc(512, {{160}, {112,224}, {24, 64}, {3, 64}})) -- 10,11 / 4(b)
 96 | main1:add(inc(512, {{128}, {128,256}, {24, 64}, {3, 64}})) -- 12,13 / 4(c)
 97 | main1:add(inc(512, {{112}, {144,288}, {32, 64}, {3, 64}})) -- 14,15 / 4(d)
 98 | 
 99 | local main2 = nn.Sequential()
100 | main2:add(inc(528, {{256}, {160,320}, {32,128}, {3,128}})) -- 16,17 / 4(e)
101 | main2:add(SMP(3, 3, 2, 2):ceil())
102 | main2:add(inc(832, {{256}, {160,320}, {32,128}, {3,128}})) -- 18,19 / 5(a)
103 | main2:add(inc(832, {{384}, {192,384}, {48,128}, {3,128}})) -- 20,21 / 5(b)
104 | 
105 | local sftMx0 = nn.Sequential() -- softMax0
106 | sftMx0:add(cudnn.SpatialAveragePooling(5, 5, 3, 3))
107 | sftMx0:add(SC(512, 128, 1, 1)):add(RLU(true))
108 | sftMx0:add(nn.View(128*4*4):setNumInputDims(3))
109 | sftMx0:add(nn.Linear(128*4*4, 1024)):add(nn.ReLU())
110 | sftMx0:add(nn.Dropout(0.7))
111 | sftMx0:add(nn.Linear(1024, nClasses)):add(nn.ReLU())
112 | sftMx0:add(nn.LogSoftMax())
113 | 
114 | local sftMx1 = nn.Sequential() -- softMax1
115 | sftMx1:add(cudnn.SpatialAveragePooling(5, 5, 3, 3))
116 | sftMx1:add(SC(528, 128, 1, 1)):add(RLU(true))
117 | sftMx1:add(nn.View(128*4*4):setNumInputDims(3))
118 | sftMx1:add(nn.Linear(128*4*4, 1024)):add(nn.ReLU())
119 | sftMx1:add(nn.Dropout(0.7))
120 | sftMx1:add(nn.Linear(1024, nClasses)):add(nn.ReLU())
121 | sftMx1:add(nn.LogSoftMax())
122 | 
123 | local sftMx2 = nn.Sequential() -- softMax2
124 | sftMx2:add(cudnn.SpatialAveragePooling(7, 7, 1, 1))
125 | sftMx2:add(nn.View(1024):setNumInputDims(3))
126 | sftMx2:add(nn.Dropout(0.4))
127 | sftMx2:add(nn.Linear(1024, nClasses)):add(nn.ReLU()) -- 22
128 | sftMx2:add(nn.LogSoftMax())
129 | 
130 | -- Macro blocks -------------------------------------------------------------
131 | local block2 = nn.Sequential()
132 | block2:add(main2)
133 | block2:add(sftMx2)
134 | 
135 | local split1 = nn.Concat(2)
136 | split1:add(block2)
137 | split1:add(sftMx1)
138 | 
139 | local block1 = nn.Sequential()
140 | block1:add(main1)
141 | block1:add(split1)
142 | 
143 | local split0 = nn.Concat(2)
144 | split0:add(block1)
145 | split0:add(sftMx0)
146 | 
147 | local block0 = nn.Sequential()
148 | block0:add(main0)
149 | block0:add(split0)
150 | 
151 | -- Main model definition ----------------------------------------------------
152 | local model = block0
153 | 
154 | -- Play safe with GPUs ------------------------------------------------------
155 | --   model:cuda()
156 | --   model = makeDataParallel(model, nGPU) -- defined in util.lua
157 | --   model.imageSize = 256
158 | --   model.imageCrop = 224
159 | --   model.auxClassifiers = 2
160 | --   model.auxWeights = {0.3, 0.3}
161 | 
162 | return model
163 | --end
164 | 


--------------------------------------------------------------------------------