├── test
    ├── CMakeLists.txt
    └── test_firemodule.lua
├── tutorials
    ├── lena.jpg
    ├── srd1.jpg
    ├── srd2.jpg
    ├── ladder.md
    └── ladder_network
    │   ├── ladder_help_funcs.lua
    │   └── ladder.lua
├── Criterion.lua
├── SpatialConvolutionMM.lua
├── SpatialMaxPooling.lua
├── Dictionary.lua
├── SpatialConvolution.lua
├── SpatialBatchNormalization.lua
├── CMakeLists.txt
├── LookupTable.lua
├── BatchNormalization.lua
├── Collapse.lua
├── rocks
    └── dpnn-scm-1.rockspec
├── PrintSize.lua
├── ZipTable.lua
├── ReverseTable.lua
├── WhiteNoise.lua
├── SoftMaxTree.lua
├── ZipTableOneToMany.lua
├── Clip.lua
├── TotalDropout.lua
├── CAddTensorTable.lua
├── Constant.lua
├── Container.lua
├── Serial.lua
├── LICENSE.txt
├── ModuleCriterion.lua
├── FireModule.lua
├── Decorator.lua
├── ParallelTable.lua
├── SoftMaxForest.lua
├── ReinforceBernoulli.lua
├── Reinforce.lua
├── OneHot.lua
├── ArgMax.lua
├── init.lua
├── ReinforceCategorical.lua
├── CategoricalEntropy.lua
├── NaN.lua
├── SpatialFeatNormalization.lua
├── SpatialBinaryLogisticRegression.lua
├── SpatialRegionDropout.lua
├── BinaryClassReward.lua
├── BinaryLogisticRegression.lua
├── NCECriterion.lua
├── SimpleColorTransform.lua
├── VRClassReward.lua
├── DontCast.lua
├── Sequential.lua
├── ReinforceNormal.lua
├── SpatialUniformCrop.lua
├── ReinforceGamma.lua
├── PCAColorTransform.lua
├── SpatialBinaryConvolution.lua
├── Kmeans.lua
├── SpatialGlimpse.lua
├── Inception.lua
├── Convert.lua
├── NCEModule.lua
└── Module.lua


/test/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | 
2 | install_files(${INSTALL_PREFIX} test.lua)
3 | 


--------------------------------------------------------------------------------
/tutorials/lena.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nicholas-leonard/dpnn/HEAD/tutorials/lena.jpg


--------------------------------------------------------------------------------
/tutorials/srd1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nicholas-leonard/dpnn/HEAD/tutorials/srd1.jpg


--------------------------------------------------------------------------------
/tutorials/srd2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nicholas-leonard/dpnn/HEAD/tutorials/srd2.jpg


--------------------------------------------------------------------------------
/Criterion.lua:
--------------------------------------------------------------------------------
1 | local Criterion = nn.Criterion
2 | 
3 | Criterion.toBatch = nn.Module.toBatch
4 | Criterion.fromBatch = nn.Module.fromBatch
5 | 


--------------------------------------------------------------------------------
/SpatialConvolutionMM.lua:
--------------------------------------------------------------------------------
1 | local SpatialConvolutionMM, parent = nn.SpatialConvolutionMM, nn.Module
2 | 
3 | SpatialConvolutionMM.dpnn_mediumEmpty = nn.SpatialConvolution.dpnn_mediumEmpty
4 | 


--------------------------------------------------------------------------------
/SpatialMaxPooling.lua:
--------------------------------------------------------------------------------
1 | local SpatialMaxPooling, parent = nn.SpatialMaxPooling, nn.Module
2 | local _ = require 'moses'
3 | 
4 | local empty = _.clone(parent.dpnn_mediumEmpty)
5 | table.insert(empty, 'indices')
6 | SpatialMaxPooling.dpnn_mediumEmpty = empty
7 | 


--------------------------------------------------------------------------------
/Dictionary.lua:
--------------------------------------------------------------------------------
1 | local Dictionary, parent = torch.class("nn.Dictionary", "nn.LookupTable")
2 | 
3 | -- don't use this with optim (useless), use nn.LookupTable instead
4 | function Dictionary:__init(dictSize, embeddingSize, accUpdate)
5 |    error"DEPRECATED Jan 14, 2016"
6 | end
7 | 


--------------------------------------------------------------------------------
/SpatialConvolution.lua:
--------------------------------------------------------------------------------
 1 | local SpatialConvolution, parent = nn.SpatialConvolution, nn.Module
 2 | local _ = require 'moses'
 3 | 
 4 | local empty = _.clone(parent.dpnn_mediumEmpty)
 5 | table.insert(empty, 'finput')
 6 | table.insert(empty, 'fgradinput')
 7 | table.insert(empty, '_input')
 8 | table.insert(empty, '_gradOutput')
 9 | SpatialConvolution.dpnn_mediumEmpty = empty
10 | 


--------------------------------------------------------------------------------
/SpatialBatchNormalization.lua:
--------------------------------------------------------------------------------
 1 | local BN, parent = nn.SpatialBatchNormalization, nn.Module
 2 | local _ = require 'moses'
 3 | 
 4 | local empty = _.clone(parent.dpnn_mediumEmpty)
 5 | table.insert(empty, 'buffer')
 6 | table.insert(empty, 'buffer2')
 7 | table.insert(empty, 'centered')
 8 | table.insert(empty, 'std')
 9 | table.insert(empty, 'normalized')
10 | table.insert(empty, 'output')
11 | table.insert(empty, 'gradInput')
12 | BN.dpnn_mediumEmpty = empty
13 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | CMAKE_MINIMUM_REQUIRED(VERSION 2.6 FATAL_ERROR)
 3 | CMAKE_POLICY(VERSION 2.6)
 4 | IF(LUAROCKS_PREFIX)
 5 |     MESSAGE(STATUS "Installing Torch through Luarocks")
 6 |     STRING(REGEX REPLACE "(.*)lib/luarocks/rocks.*" "\\1" CMAKE_INSTALL_PREFIX  "${LUAROCKS_PREFIX}")
 7 |     MESSAGE(STATUS "Prefix inferred from Luarocks: ${CMAKE_INSTALL_PREFIX}")
 8 | ENDIF()
 9 | FIND_PACKAGE(Torch REQUIRED)
10 | 
11 | SET(src)
12 | FILE(GLOB luasrc *.lua)
13 | 
14 | SET(luasrc ${luasrc} test/test.lua)
15 | ADD_TORCH_PACKAGE(dpnn "${src}" "${luasrc}" "Deep Neural Networks")
16 | 


--------------------------------------------------------------------------------
/LookupTable.lua:
--------------------------------------------------------------------------------
 1 | local LookupTable, parent = nn.LookupTable, nn.Module
 2 | 
 3 | function LookupTable:maxParamNorm(maxOutNorm, maxInNorm)
 4 |    maxOutNorm = self.maxOutNorm or maxOutNorm or self.maxInNorm or maxInNorm
 5 |    if not (maxOutNorm or maxInNorm) then
 6 |       return
 7 |    end
 8 |    
 9 |    if maxOutNorm and maxOutNorm > 0 then
10 |       -- cols feed into output neurons 
11 |       self.weight:renorm(2, 2, maxOutNorm)
12 |    end
13 |    if maxInNorm and maxInNorm > 0 then
14 |       -- rows feed out from input neurons
15 |       self.weight:renorm(2, 1, maxInNorm)
16 |    end
17 | end
18 | 


--------------------------------------------------------------------------------
/BatchNormalization.lua:
--------------------------------------------------------------------------------
 1 | local _ = require 'moses'
 2 | local BN, parent = nn.BatchNormalization, nn.Module
 3 | 
 4 | local empty = _.clone(parent.dpnn_mediumEmpty)
 5 | table.insert(empty, 'buffer')
 6 | table.insert(empty, 'buffer2')
 7 | table.insert(empty, 'centered')
 8 | table.insert(empty, 'std')
 9 | table.insert(empty, 'normalized')
10 | table.insert(empty, 'output')
11 | table.insert(empty, 'gradInput')
12 | BN.dpnn_mediumEmpty = empty
13 | 
14 | -- for sharedClone
15 | local params = _.clone(parent.dpnn_parameters)
16 | table.insert(params, 'running_mean')
17 | table.insert(params, 'running_var')
18 | BN.dpnn_parameters = params
19 | 


--------------------------------------------------------------------------------
/Collapse.lua:
--------------------------------------------------------------------------------
 1 | local Collapse, parent = torch.class('nn.Collapse', 'nn.Module')
 2 | 
 3 | -- collapses non-batch dims
 4 | function Collapse:__init(nInputDim)
 5 |    parent.__init(self)
 6 |    self.nInputDim = nInputDim
 7 | end
 8 | 
 9 | function Collapse:updateOutput(input)
10 |    if not input:isContiguous() then
11 |       self._input = self._input or input.new()
12 |       self._input:resize(input:size()):copy(input)
13 |       input = self._input
14 |    end
15 |    if input:dim() > self.nInputDim then
16 |       self.output:view(input,input:size(1),-1)
17 |    else
18 |       self.output:view(input,-1)
19 |    end
20 |    return self.output
21 | end
22 | 
23 | function Collapse:updateGradInput(input, gradOutput)
24 |    self.gradInput:view(gradOutput, input:size())
25 |    return self.gradInput
26 | end
27 | 


--------------------------------------------------------------------------------
/rocks/dpnn-scm-1.rockspec:
--------------------------------------------------------------------------------
 1 | package = "dpnn"
 2 | version = "scm-1"
 3 | 
 4 | source = {
 5 |    url = "git://github.com/Element-Research/dpnn",
 6 |    tag = "master"
 7 | }
 8 | 
 9 | description = {
10 |    summary = "deep extensions to nn Modules and Criterions",
11 |    detailed = [[sharedClone, type, outside, updateGradParameters, Serial, Inception, etc.]],
12 |    homepage = "https://github.com/Element-Research/dpnn",
13 |    license = "BSD"
14 | }
15 | 
16 | dependencies = {
17 |    "torch >= 7.0",
18 |    "torchx",
19 |    "nn >= 1.0",
20 |    "nnx >= 0.1",
21 |    "moses >= 1.3.1"
22 | }
23 | 
24 | build = {
25 |    type = "command",
26 |    build_command = [[
27 | cmake -E make_directory build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH="$(LUAROCKS_PREFIX)" -DCMAKE_INSTALL_PREFIX="$(PREFIX)" && $(MAKE)
28 |    ]],
29 |    install_command = "cd build && $(MAKE) install"
30 | }
31 | 


--------------------------------------------------------------------------------
/PrintSize.lua:
--------------------------------------------------------------------------------
 1 | local PrintSize, parent = torch.class('nn.PrintSize', 'nn.Module')
 2 | 
 3 | function PrintSize:__init(prefix)
 4 |    parent.__init(self)
 5 |    self.prefix = prefix or "PrintSize"
 6 | end
 7 | 
 8 | function PrintSize:updateOutput(input)
 9 |    self.output = input
10 |    local size
11 |    if torch.type(input) == 'table' then
12 |       size = input
13 |    elseif torch.type(input) == 'nil' then
14 |       size = 'missing size'
15 |    else
16 |       size = input:size()
17 |    end
18 |    print(self.prefix..":input\n", size)
19 |    return self.output
20 | end
21 | 
22 | 
23 | function PrintSize:updateGradInput(input, gradOutput)
24 |    local size 
25 |    if torch.type(gradOutput) == 'table' then
26 |       size = gradOutput
27 |    elseif torch.type(gradOutput) == 'nil' then
28 |       size = 'missing size'
29 |    else
30 |       size = gradOutput:size()
31 |    end
32 |    print(self.prefix..":gradOutput\n", size)
33 |    self.gradInput = gradOutput
34 |    return self.gradInput
35 | end
36 | 
37 | 


--------------------------------------------------------------------------------
/ZipTable.lua:
--------------------------------------------------------------------------------
 1 | local ZipTable, parent = torch.class('nn.ZipTable', 'nn.Container')
 2 | 
 3 | -- input : { {a1,a2}, {b1,b2}, {c1,c2} }
 4 | -- output : { {a1,b1,c1}, {a2,b2,c2} }
 5 | function ZipTable:__init()
 6 |    parent.__init(self)
 7 |    self.output = {}
 8 |    self.gradInput = {}
 9 | end
10 | 
11 | function ZipTable:updateOutput(inputTable)
12 |    self.output = {}
13 |    for i,inTable in ipairs(inputTable) do
14 |       for j,input in ipairs(inTable) do
15 |          local output = self.output[j] or {}
16 |          output[i] = input
17 |          self.output[j] = output 
18 |       end 
19 |    end
20 |    return self.output
21 | end
22 | 
23 | function ZipTable:updateGradInput(inputTable, gradOutputTable)
24 |    self.gradInput = {}
25 |    for i,gradOutTable in ipairs(gradOutputTable) do
26 |       for j,gradOutput in ipairs(gradOutTable) do
27 |          local gradInput = self.gradInput[j] or {}
28 |          gradInput[i] = gradOutput
29 |          self.gradInput[j] = gradInput 
30 |       end 
31 |    end
32 |    return self.gradInput
33 | end
34 | 
35 | 


--------------------------------------------------------------------------------
/test/test_firemodule.lua:
--------------------------------------------------------------------------------
 1 | require 'nn'
 2 | require 'dpnn'
 3 | require 'cunn'
 4 | require 'cutorch'
 5 | 
 6 | --torch.setdefaulttensortype('torch.FloatTensor')
 7 | 
 8 | -- FireModule issue 45
 9 | --[[
10 | m = nn.Sequential()
11 | m:add(nn.FireModule(1,1,1,1))
12 | _, p = m:getParameters()
13 | print(p:sum())
14 | 
15 | m = m:cuda()
16 | _, p = m:getParameters()
17 | print(p:sum())
18 | 
19 | m:zeroGradParameters()
20 | print(p:sum())--]]
21 | 
22 | 
23 | -- Testing FireModule
24 | input = torch.rand(1, 3, 6, 6)
25 | model = nn.FireModule(3, 1, 1, 1, 'Tanh')
26 | print(model)
27 | print(model.module)
28 | parameters, gradParameters = model:getParameters()
29 | output = model:forward(input)
30 | grads = torch.rand(output:size())
31 | gi = model:backward(input, grads)
32 | print(gi:mean(), gi:std(), gi:min(), gi:max())
33 | 
34 | cutorch.setDevice(1)
35 | model:cuda()
36 | print(model.module.modules[1].finput)
37 | cinput = input:cuda()
38 | output = model:forward(cinput)
39 | gi = model:backward(input:cuda(), grads:cuda())
40 | print(gi:mean(), gi:std(), gi:min(), gi:max())
41 | 


--------------------------------------------------------------------------------
/ReverseTable.lua:
--------------------------------------------------------------------------------
 1 | local ReverseTable, parent = torch.class("nn.ReverseTable", "nn.Module")
 2 | 
 3 | function ReverseTable:__init()
 4 |    parent.__init(self)
 5 |    self.output = {}
 6 |    self.gradInput = {}
 7 | end
 8 | 
 9 | function ReverseTable:updateOutput(inputTable)
10 |    assert(torch.type(inputTable) == 'table', "Expecting table at arg 1")
11 |    
12 |    -- empty output table
13 |    for k,v in ipairs(self.output) do
14 |       self.output[k] = nil
15 |    end
16 |    
17 |    -- reverse input
18 |    local k = 1
19 |    for i=#inputTable,1,-1 do
20 |       self.output[k] = inputTable[i]
21 |       k = k + 1
22 |    end
23 |    return self.output
24 | end
25 | 
26 | function ReverseTable:updateGradInput(inputTable, gradOutputTable)
27 |    -- empty gradInput table
28 |    for k,v in ipairs(self.gradInput) do
29 |       self.gradInput[k] = nil
30 |    end
31 |    
32 |    -- reverse gradOutput
33 |    local k = 1
34 |    for i=#gradOutputTable,1,-1 do
35 |       self.gradInput[k] = gradOutputTable[i]
36 |       k = k + 1
37 |    end
38 |    return self.gradInput
39 | end
40 | 


--------------------------------------------------------------------------------
/WhiteNoise.lua:
--------------------------------------------------------------------------------
 1 | local WhiteNoise, Parent = torch.class('nn.WhiteNoise', 'nn.Module')
 2 | 
 3 | function WhiteNoise:__init(mean, std)
 4 |    Parent.__init(self)
 5 |    -- std corresponds to 50% for MNIST training data std.
 6 |    self.mean = mean or 0
 7 |    self.std = std or 0.1
 8 |    self.noise = torch.Tensor()
 9 | end
10 | 
11 | function WhiteNoise:updateOutput(input)
12 |    self.output:resizeAs(input):copy(input)
13 |    if self.train ~= false then
14 |       self.noise:resizeAs(input)
15 |       self.noise:normal(self.mean, self.std)
16 |       self.output:add(self.noise)
17 |    else
18 |       if self.mean ~= 0 then
19 |          self.output:add(self.mean)
20 |       end
21 |    end
22 |    return self.output
23 | end
24 | 
25 | function WhiteNoise:updateGradInput(input, gradOutput)
26 |    if self.train ~= false then
27 |       -- Simply return the gradients.
28 |       self.gradInput:resizeAs(gradOutput):copy(gradOutput)
29 |    else
30 |       error('backprop only defined while training')
31 |    end
32 |    return self.gradInput
33 | end
34 | 
35 | function WhiteNoise:__tostring__()
36 |   return string.format('%s mean: %f, std: %f', 
37 |                         torch.type(self), self.mean, self.std)
38 | end
39 | 


--------------------------------------------------------------------------------
/SoftMaxTree.lua:
--------------------------------------------------------------------------------
 1 | local SoftMaxTree, parent = nn.SoftMaxTree, nn.Module
 2 | local _ = require 'moses'
 3 | 
 4 | function SoftMaxTree:momentumGradParameters()
 5 |    -- get dense view of momGradParams
 6 |    if not self.momGradParams or _.isEmpty(self.momGradParams) then
 7 |       assert(not self.accUpdate, "cannot use momentum with accUpdate")
 8 |       self.momGradParams = {self.gradWeight:clone():zero(), self.gradBias:clone():zero()}
 9 |    end
10 |    local momGradParams = self.momGradParams
11 |    if self.static and not _.isEmpty(self.updates) then      
12 |       local momGradWeight = momGradParams[1]
13 |       local momGradBias = momGradParams[2]
14 |       momGradParams = {}
15 |       -- only return the parameters affected by the forward/backward
16 |       for parentId, scale in pairs(self.updates) do
17 |          local node = self.parentChildren:select(1, parentId)
18 |          local parentIdx = node[1]
19 |          local nChildren = node[2]
20 |          momGradParams[parentId] = momGradWeight:narrow(1, parentIdx, nChildren)
21 |          local biasId = parentId+self.maxParentId
22 |          momGradParams[biasId] = momGradBias:narrow(1, parentIdx, nChildren)
23 |       end
24 |    end
25 |    return momGradParams
26 | end
27 | 


--------------------------------------------------------------------------------
/ZipTableOneToMany.lua:
--------------------------------------------------------------------------------
 1 | local ZipTableOneToMany, parent = torch.class('nn.ZipTableOneToMany', 'nn.Container')
 2 | 
 3 | -- based on ZipTable in dpnn
 4 | 
 5 | -- input : { v, {a, b, c} } 
 6 | -- output : { {v,a}, {v,b}, {v,c} }
 7 | function ZipTableOneToMany:__init()
 8 |    parent.__init(self)
 9 |    self.output = {}
10 |    self.gradInput = {}
11 |    -- make buffer to update during forward/backward
12 |    self.gradInputEl = torch.Tensor()
13 | end
14 | 
15 | function ZipTableOneToMany:updateOutput(input)
16 |    assert(#input == 2, "input must be table of element and table")
17 |    local inputEl, inputTable = input[1], input[2]
18 |    self.output = {}
19 |    for i,v in ipairs(inputTable) do
20 |       self.output[i] = {inputEl, v}
21 |    end
22 |    return self.output
23 | end
24 | 
25 | function ZipTableOneToMany:updateGradInput(input, gradOutput)
26 |    assert(#input == 2, "input must be table of element and table")
27 |    local inputEl, inputTable = input[1], input[2]
28 |    self.gradInputEl:resizeAs(inputEl):zero()
29 |    local gradInputTable = {}
30 |    for i,gradV in ipairs(gradOutput) do
31 |       self.gradInputEl:add(gradV[1])
32 |       gradInputTable[i] = gradV[2]
33 |    end
34 |    self.gradInput = {self.gradInputEl, gradInputTable}
35 |    return self.gradInput
36 | end
37 | 
38 | 


--------------------------------------------------------------------------------
/Clip.lua:
--------------------------------------------------------------------------------
 1 | ------------------------------------------------------------------------
 2 | --[[ Clip ]]--
 3 | -- clips values within minval and maxval
 4 | ------------------------------------------------------------------------
 5 | local Clip, parent = torch.class("nn.Clip", "nn.Module")
 6 | 
 7 | function Clip:__init(minval, maxval)
 8 |    assert(torch.type(minval) == 'number')
 9 |    assert(torch.type(maxval) == 'number')
10 |    self.minval = minval
11 |    self.maxval = maxval
12 |    parent.__init(self)
13 | end
14 | 
15 | function Clip:updateOutput(input)
16 |    -- bound results within height and width
17 |    self._mask = self._mask or input.new()
18 |    self._byte = self._byte or torch.ByteTensor()
19 |    self.output:resizeAs(input):copy(input)
20 |    self._mask:gt(self.output, self.maxval)
21 |    local byte = torch.type(self.output) == 'torch.CudaTensor' and self._mask 
22 |       or self._byte:resize(self._mask:size()):copy(self._mask)
23 |    self.output[byte] = self.maxval
24 |    self._mask:lt(self.output, self.minval)
25 |    byte = torch.type(self.output) == 'torch.CudaTensor' and self._mask 
26 |       or self._byte:resize(self._mask:size()):copy(self._mask)
27 |    self.output[byte] = self.minval
28 |    return self.output
29 | end
30 | 
31 | function Clip:updateGradInput(input, gradOutput)
32 |    self.gradInput:set(gradOutput)
33 |    return self.gradInput
34 | end
35 | 
36 | 


--------------------------------------------------------------------------------
/TotalDropout.lua:
--------------------------------------------------------------------------------
 1 | ------------------------------------------------------------------------
 2 | --[[ TotalDropout ]]--
 3 | -- Like vanilla Dropout, but on the entire inputs.
 4 | -- So either the input is entirely forwarded or entirely zeroed.
 5 | ------------------------------------------------------------------------
 6 | local TotalDropout, parent = torch.class("nn.TotalDropout", "nn.Module")
 7 | 
 8 | function TotalDropout:__init(p)
 9 |    self.p = p or 0.5
10 |    self.train = true
11 |    if self.p >= 1 or self.p < 0 then
12 |       error('<TotalDropout> illegal percentage, must be 0 <= p < 1')
13 |    end
14 |    parent.__init(self)
15 | end
16 | 
17 | function TotalDropout:updateOutput(input)
18 |    self.output:resizeAs(input):copy(input)
19 |    if self.train then
20 |       self.noise = torch.bernoulli(1-self.p)
21 |       self.output:mul(self.noise)
22 |    end
23 |    return self.output
24 | end
25 | 
26 | function TotalDropout:updateGradInput(input, gradOutput)
27 |    if self.train then
28 |       self.gradInput:resizeAs(gradOutput):copy(gradOutput)
29 |       self.gradInput:mul(self.noise) -- simply mask the gradients with the noise vector
30 |    else
31 |       error('backprop only defined while training')
32 |    end
33 |    return self.gradInput
34 | end
35 | 
36 | function TotalDropout:__tostring__()
37 |   return string.format('%s(%f)', torch.type(self), self.p)
38 | end
39 | 


--------------------------------------------------------------------------------
/CAddTensorTable.lua:
--------------------------------------------------------------------------------
 1 | 
 2 | local CAddTensorTable, parent = torch.class('nn.CAddTensorTable', 'nn.Module')
 3 | 
 4 | function CAddTensorTable:__init()
 5 |    parent.__init(self)
 6 |    self.gradInput = {}
 7 | end
 8 | 
 9 | -- input is a table with 2 entries. input[1] is the vector to be added.
10 | -- input[2] is the table to which we add the vector
11 | function CAddTensorTable:updateOutput(input)
12 |   local currentOutput = {}
13 |   for i=1,#input[2] do
14 |     currentOutput[i] = currentOutput[i] or input[1].new()
15 |     currentOutput[i]:resizeAs(input[1])
16 |     currentOutput[i]:copy(input[2][i])
17 |     currentOutput[i]:add(input[1])
18 |   end
19 |   for i = #input[2]+1, #currentOutput do
20 |     currentOutput[i] = nil
21 |   end
22 |   self.output = currentOutput
23 |   return self.output
24 | end
25 | 
26 | function CAddTensorTable:updateGradInput(input, gradOutput)
27 |   self.gradInput[1] = self.gradInput[1] or input[1].new()
28 |   self.gradInput[1]:resizeAs(input[1])
29 |   self.gradInput[1]:copy(gradOutput[1])
30 |   for i=2, #input[2] do
31 |     self.gradInput[1]:add(gradOutput[i])
32 |   end
33 |   self.gradInput[2] = self.gradInput[2] or {}
34 |   for i=1,#input[2] do
35 |     self.gradInput[2][i] = self.gradInput[2][i] or input[1].new()
36 |     self.gradInput[2][i]:resizeAs(input[1])
37 |     self.gradInput[2][i]:copy(gradOutput[i])
38 |   end
39 |   for i=#input[2]+1, #self.gradInput[2] do
40 |      self.gradInput[2][i] = nil
41 |   end
42 |   return self.gradInput
43 | end


--------------------------------------------------------------------------------
/Constant.lua:
--------------------------------------------------------------------------------
 1 | ------------------------------------------------------------------------
 2 | --[[ Constant ]]--
 3 | -- Outputs a constant value given an input.
 4 | -- If nInputDim is specified, uses the input to determine the size of 
 5 | -- the batch. The value is then replicated over the batch.
 6 | -- You can use this with nn.ConcatTable() to append constant inputs to
 7 | -- an input : nn.ConcatTable():add(nn.Constant(v)):add(nn.Identity()) .
 8 | ------------------------------------------------------------------------
 9 | local Constant, parent = torch.class("nn.Constant", "nn.Module")
10 | 
11 | function Constant:__init(value, nInputDim)
12 |    self.value = value
13 |    if torch.type(self.value) == 'number' then
14 |       self.value = torch.Tensor{self.value}
15 |    end
16 |    assert(torch.isTensor(self.value), "Expecting number or tensor at arg 1")
17 |    self.nInputDim = nInputDim
18 |    parent.__init(self)
19 | end
20 | 
21 | function Constant:updateOutput(input)
22 |    if self.nInputDim and input:dim() > self.nInputDim then
23 |       local vsize = self.value:size():totable()
24 |       self.output:resize(input:size(1), table.unpack(vsize))
25 |       local value = self.value:view(1, table.unpack(vsize))
26 |       self.output:copy(value:expand(self.output:size())) 
27 |    else
28 |       self.output:resize(self.value:size()):copy(self.value)
29 |    end
30 |    return self.output
31 | end
32 | 
33 | function Constant:updateGradInput(input, gradOutput)
34 |    self.gradInput:resizeAs(input):zero()
35 |    return self.gradInput
36 | end
37 | 


--------------------------------------------------------------------------------
/Container.lua:
--------------------------------------------------------------------------------
 1 | local Container = nn.Container
 2 | 
 3 | -- multi-add
 4 | function Container:extend(...)
 5 |    for i,module in ipairs{...} do
 6 |       self:add(module)
 7 |    end
 8 |    return self
 9 | end
10 | 
11 | function Container:sparseParameters()
12 |     local params = {}
13 |     local gradParams = {}
14 |     local scales = {}
15 |     local size = 0
16 |     for i=1,#self.modules do
17 |         local mParams, mGradParams, mScales, mSize = self.modules[i]:sparseParameters()
18 |         if mParams then
19 |             for k,param in pairs(mParams) do
20 |                assert(torch.type(param) ~= 'table')
21 |                params[size+k] = param
22 |                gradParams[size+k] = mGradParams[k]
23 |                scales[size+k] = mScales and mScales[k]
24 |             end
25 |             size = size + (mSize or #mParams)
26 |         end
27 |     end
28 |     return params, gradParams, scales, size
29 | end
30 | 
31 | function Container:parameters()
32 |     local function tinsert(to, from)
33 |         if torch.type(from) == 'table' then -- we change this line so that it works with torch.MultiCudaTensor
34 |             for i=1,#from do
35 |                 tinsert(to,from[i])
36 |             end
37 |         else
38 |             table.insert(to,from)
39 |         end
40 |     end
41 |     local w = {}
42 |     local gw = {}
43 |     for i=1,#self.modules do
44 |         local mw,mgw = self.modules[i]:parameters()
45 |         if mw then
46 |             tinsert(w,mw)
47 |             tinsert(gw,mgw)
48 |         end
49 |     end
50 |     return w,gw
51 | end
52 | 


--------------------------------------------------------------------------------
/Serial.lua:
--------------------------------------------------------------------------------
 1 | ------------------------------------------------------------------------
 2 | --[[ Serial ]]--
 3 | -- Decorator that modifies the serialization/deserialization 
 4 | -- behaviour of encapsulated module.
 5 | ------------------------------------------------------------------------
 6 | local _ = require 'moses'
 7 | local Serial, parent = torch.class("nn.Serial", "nn.Decorator")
 8 | 
 9 | function Serial:__init(module, tensortype)
10 |    parent.__init(self, module)
11 |    self.tensortype = tensortype
12 |    if self.tensortype then
13 |       assert(tensortype:find('torch.*Tensor'), "Expecting tensortype (e.g. torch.LongTensor) at arg1")
14 |    end
15 | end
16 | 
17 | function Serial:write(file)
18 |    local state = self:getSerialState()
19 |    
20 |    local function recursiveSetMetaTable(state)
21 |       for k,v in pairs(state) do
22 |          if torch.type(v) == 'table' then
23 |             recursiveSetMetaTable(v)
24 |          end
25 |       end
26 |       
27 |       if state.dpnn_typename then
28 |          torch.setmetatable(state, state.dpnn_typename)
29 |       end
30 |    end
31 |    
32 |    -- typecast before serialization (useful for cuda)
33 |    recursiveSetMetaTable(state)
34 |    
35 |    if self.tensortype then
36 |       state:type(self.tensortype)
37 |    end
38 |    
39 |    -- removes self's metatable
40 |    state = _.map(state, function(k,v) return v end)
41 |    
42 |    file:writeObject(state)
43 | end
44 | 
45 | function Serial:read(file)
46 |    local state = file:readObject()
47 |    for k,v in pairs(state) do
48 |       self[k] = v
49 |    end
50 | end
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2014-2016 Element Inc (Nicholas Leonard)
 2 | 
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright
 9 |    notice, this list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright
12 |    notice, this list of conditions and the following disclaimer in the
13 |    documentation and/or other materials provided with the distribution.
14 | 
15 | 3. Neither the names of Element Inc. nor the names of its contributors may be 
16 |    used to endorse or promote products derived from this software without 
17 |    specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 | POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/ModuleCriterion.lua:
--------------------------------------------------------------------------------
 1 | local ModuleCriterion, parent = torch.class("nn.ModuleCriterion", "nn.Criterion")
 2 | 
 3 | function ModuleCriterion:__init(criterion, inputModule, targetModule, castTarget)
 4 |    self.inputModule = inputModule
 5 |    self.targetModule = targetModule
 6 |    self.castTarget = (castTarget == nil) and true or castTarget
 7 |    if self.inputModule then
 8 |       local params = self.inputModule:parameters()
 9 |       if params and #params > 0 then
10 |          print"Warning: nn.ModuleCriterion doesn't support parameter updates"
11 |       end
12 |    end
13 |    self.criterion = criterion
14 | end
15 | 
16 | function ModuleCriterion:updateOutput(input, target)
17 |    if self.inputModule then
18 |       self.input = self.inputModule:forward(input)
19 |    end
20 |    if self.targetModule then
21 |       self.target = self.targetModule:forward(target)
22 |    end
23 |    self.output = self.criterion:forward(self.input or input, self.target or target)
24 |    return self.output
25 | end
26 | 
27 | function ModuleCriterion:updateGradInput(input, target)
28 |    self.gradInput = self.criterion:backward(self.input or input, self.target or target)
29 |    if self.inputModule then
30 |       self.gradInput = self.inputModule:backward(input, self.gradInput)
31 |    end
32 |    return self.gradInput
33 | end
34 | 
35 | function ModuleCriterion:type(type, typecache)
36 |    if self.inputModule then
37 |       self.inputModule:type(type, typecache)
38 |    end
39 |    if self.castTarget and self.targetModule then
40 |       self.targetModule:type(type, typecache)
41 |    end
42 |    self.criterion:type(type, typecache)
43 |    return parent.type(self, type, typecache)
44 | end
45 | 


--------------------------------------------------------------------------------
/FireModule.lua:
--------------------------------------------------------------------------------
 1 | --[[
 2 |   Fire module as explained in SqueezeNet http://arxiv.org/pdf/1602.07360v1.pdf.
 3 | --]]
 4 | --FIXME works only for batches.
 5 | 
 6 | local FireModule, Parent = torch.class('nn.FireModule', 'nn.Decorator')
 7 | 
 8 | function FireModule:__init(nInputPlane, s1x1, e1x1, e3x3, activation)
 9 |    self.nInputPlane = nInputPlane
10 |    self.s1x1 = s1x1
11 |    self.e1x1 = e1x1
12 |    self.e3x3 = e3x3
13 |    self.activation = activation or 'ReLU'
14 | 
15 |    if self.s1x1 > (self.e1x1 + self.e3x3) then
16 |       print('Warning: <FireModule> s1x1 is recommended to be smaller'..
17 |             ' then e1x1+e3x3')
18 |    end
19 |    
20 |    self.module = nn.Sequential()
21 |    self.squeeze = nn.SpatialConvolution(nInputPlane, s1x1, 1, 1)
22 |    self.expand = nn.Concat(2)
23 |    self.expand:add(nn.SpatialConvolution(s1x1, e1x1, 1, 1))
24 |    self.expand:add(nn.SpatialConvolution(s1x1, e3x3, 3, 3, 1, 1, 1, 1))
25 | 
26 |    -- Fire Module
27 |    self.module:add(self.squeeze)
28 |    self.module:add(nn[self.activation]())
29 |    self.module:add(self.expand)
30 |    self.module:add(nn[self.activation]())
31 |    
32 |    Parent.__init(self, self.module)
33 | end
34 | 
35 | --[[
36 | function FireModule:type(type, tensorCache)
37 |    assert(type, 'Module: must provide a type to convert to')
38 |    self.module = nn.utils.recursiveType(self.module, type, tensorCache)
39 | end
40 | --]]
41 | 
42 | function FireModule:__tostring__()
43 |    return string.format('%s inputPlanes: %d -> Squeeze Planes: %d -> '..
44 |                         'Expand: %d(1x1) + %d(3x3), activation: %s',
45 |                         torch.type(self), self.nInputPlane, self.s1x1,
46 |                         self.e1x1, self.e3x3, self.activation)
47 | end
48 | 


--------------------------------------------------------------------------------
/Decorator.lua:
--------------------------------------------------------------------------------
 1 | local Decorator, parent = torch.class("nn.Decorator", "nn.Container")
 2 | 
 3 | function Decorator:__init(module)
 4 |    parent.__init(self)
 5 |    self.module = module
 6 |    -- so that it can be handled like a Container
 7 |    self.modules[1] = module
 8 | end
 9 | 
10 | function Decorator:updateOutput(input)
11 |    self.output = self.module:updateOutput(input)
12 |    return self.output
13 | end
14 | 
15 | function Decorator:updateGradInput(input, gradOutput)
16 |    self.gradInput = self.module:updateGradInput(input, gradOutput)
17 |    return self.gradInput
18 | end
19 | 
20 | function Decorator:accGradParameters(input, gradOutput, scale) 
21 |    self.module:accGradParameters(input, gradOutput, scale)
22 | end
23 | 
24 | function Decorator:accUpdateGradParameters(input, gradOutput, lr)
25 |    self.module:accUpdateGradParameters(input, gradOutput, lr)
26 | end
27 | 
28 | function Decorator:sharedAccUpdateGradParameters(input, gradOutput, lr)
29 |    self.module:sharedAccUpdateGradParameters(input, gradOutput, lr)
30 | end
31 | 
32 | function Decorator:__tostring__()
33 |    if self.module.__tostring__ then
34 |       return torch.type(self) .. ' @ ' .. self.module:__tostring__()
35 |    else
36 |       return torch.type(self) .. ' @ ' .. torch.type(self.module)
37 |    end
38 | end
39 | 
40 | -- useful for multiple-inheritance
41 | function Decorator.decorate(class)
42 |    class.updateOutput = nn.Decorator.updateOutput
43 |    class.updateGradInput = nn.Decorator.updateGradInput
44 |    class.accGradParameters = nn.Decorator.accGradParameters
45 |    class.accUpdateGradParameters = nn.Decorator.accUpdateGradParameters
46 |    class.sharedAccUpdateGradParameters = nn.Decorator.sharedAccUpdateGradParameters
47 |    class.__tostring__ =  nn.Decorator.__tostring__
48 | end
49 | 


--------------------------------------------------------------------------------
/ParallelTable.lua:
--------------------------------------------------------------------------------
 1 | local ParallelTable, parent = nn.ParallelTable, nn.Container
 2 | 
 3 | function ParallelTable:profile()
 4 |    function ParallelTable:updateOutput(input)
 5 |       for i=1,#self.modules do
 6 |          local start = sys.clock()
 7 |          self.output[i] = self.modules[i]:updateOutput(input[i])
 8 |          if cutorch then cutorch.synchronize() end
 9 |          print(torch.type(self.modules[i])..' updateOutput: '..sys.clock() - start.." s")
10 |       end
11 |       return self.output
12 |    end
13 | 
14 |    function ParallelTable:updateGradInput(input, gradOutput)
15 |       for i,module in ipairs(self.modules) do
16 |          local start = sys.clock()
17 |          self.gradInput[i]= module:updateGradInput(input[i], gradOutput[i])
18 |          if cutorch then cutorch.synchronize() end
19 |          print(torch.type(module)..' updateGradInput: '..sys.clock() - start.." s")
20 |       end
21 |       return self.gradInput
22 |    end
23 | 
24 |    function ParallelTable:accGradParameters(input, gradOutput, scale)
25 |       scale = scale or 1
26 |       for i,module in ipairs(self.modules) do
27 |          local start = sys.clock()
28 |          module:accGradParameters(input[i], gradOutput[i], scale)
29 |          if cutorch then cutorch.synchronize() end
30 |          print(torch.type(module)..' accGradParameters: '..sys.clock() - start.." s")
31 |       end
32 |    end
33 | 
34 |    function ParallelTable:accUpdateGradParameters(input, gradOutput, lr)
35 |       lr = lr or 1
36 |       for i,module in ipairs(self.modules) do
37 |          local start = sys.clock()
38 |          module:accUpdateGradParameters(input[i], gradOutput[i], lr)
39 |          if cutorch then cutorch.synchronize() end
40 |          print(torch.type(module)..' accUpdateGradParameters: '..sys.clock() - start.." s")
41 |       end
42 |    end
43 |    parent.profile(self)
44 | end
45 | 


--------------------------------------------------------------------------------
/SoftMaxForest.lua:
--------------------------------------------------------------------------------
 1 | local SoftMaxForest, parent = torch.class("nn.SoftMaxForest", "nn.Container")
 2 | 
 3 | function SoftMaxForest:__init(inputSize, trees, rootIds, gaterSize, gaterAct, accUpdate)
 4 |    local gaterAct = gaterAct or nn.Tanh() 
 5 |    local gaterSize = gaterSize or {} 
 6 |    
 7 |    -- experts
 8 |    self.experts = nn.ConcatTable()
 9 |    self.smts = {}
10 |    for i,tree in ipairs(trees) do
11 |       local smt = nn.SoftMaxTree(inputSize, tree, rootIds[i], accUpdate)
12 |       table.insert(self._smts, smt)
13 |       self.experts:add(smt)
14 |    end
15 |    
16 |    -- gater
17 |    self.gater = nn.Sequential()
18 |    self.gater:add(nn.SelectTable(1)) -- ignore targets
19 |    for i,hiddenSize in ipairs(gaterSize) do 
20 |       self.gater:add(nn.Linear(inputSize, hiddenSize))
21 |       self.gater:add(gaterAct:clone())
22 |       inputSize = hiddenSize
23 |    end
24 |    self.gater:add(nn.Linear(inputSize, self.experts:size()))
25 |    self.gater:add(nn.SoftMax())
26 |    
27 |    -- mixture
28 |    self.trunk = nn.ConcatTable()
29 |    self.trunk:add(self._gater)
30 |    self.trunk:add(self._experts)
31 |    self.mixture = nn.MixtureTable()
32 |    self.module = nn.Sequential()
33 |    self.module:add(self.trunk)
34 |    self.module:add(self.mixture)
35 |    parent.__init(self)
36 |    self.modules[1] = self.module
37 | end
38 | 
39 | function SoftMaxForest:updateOutput(input)
40 |    self.output = self.module:updateOutput(input)
41 |    return self.output
42 | end
43 | 
44 | function SoftMaxForest:updateGradInput(input, gradOutput)
45 |    self.gradInput = self.module:updateGradInput(input, gradOutput)
46 |    return self.gradInput
47 | end
48 | 
49 | function SoftMaxForest:accGradParameters(input, gradOutput, scale)
50 |    self.module:accGradParameters(input, gradOutput, scale)
51 | end
52 | 
53 | function SoftMaxForest:accUpdateGradParameters(input, gradOutput, lr)
54 |    self.module:accUpdateGradParameters(input, gradOutput, lr)
55 | end
56 | 


--------------------------------------------------------------------------------
/ReinforceBernoulli.lua:
--------------------------------------------------------------------------------
 1 | ------------------------------------------------------------------------
 2 | --[[ ReinforceBernoulli ]]-- 
 3 | -- Ref A. http://incompleteideas.net/sutton/williams-92.pdf
 4 | -- Inputs are bernoulli probabilities (p) 
 5 | -- Ouputs are samples drawn from this distribution.
 6 | -- Uses the REINFORCE algorithm (ref. A p.230-236) which is 
 7 | -- implemented through the nn.Module:reinforce(reward) interface.
 8 | -- gradOutputs are ignored (REINFORCE algorithm).
 9 | ------------------------------------------------------------------------
10 | local ReinforceBernoulli, parent = torch.class("nn.ReinforceBernoulli", "nn.Reinforce")
11 | 
12 | function ReinforceBernoulli:updateOutput(input)
13 |    self.output:resizeAs(input)
14 |    if self.stochastic or self.train ~= false then
15 |       -- sample from bernoulli with P(output=1) = input
16 |       self._uniform = self._uniform or input.new()
17 |       self._uniform:resizeAs(input):uniform(0,1)
18 |       self.output:lt(self._uniform, input)
19 |    else
20 |       -- use p for evaluation
21 |       self.output:copy(input)
22 |    end
23 |    return self.output
24 | end
25 | 
26 | function ReinforceBernoulli:updateGradInput(input, gradOutput)
27 |    -- Note that gradOutput is ignored
28 |    -- f : bernoulli probability mass function
29 |    -- x : the sampled values (0 or 1) (self.output)
30 |    -- p : probability of sampling a 1
31 |    -- derivative of log bernoulli w.r.t. p
32 |    -- d ln(f(x,p))    (x - p)
33 |    -- ------------ = ---------
34 |    --     d p         p(1 - p)
35 |    self.gradInput:resizeAs(input)
36 |    -- (x - p)
37 |    self.gradInput:copy(self.output):add(-1, input)
38 |    -- divide by p(1 - p)
39 |    self._div = self._div or input.new()
40 |    self._div:resizeAs(input)
41 |    self._div:fill(1):add(-1, input):cmul(input)
42 |    self.gradInput:cdiv(self._div)
43 |    
44 |    -- multiply by reward 
45 |    self.gradInput:cmul(self:rewardAs(input))
46 |    -- multiply by -1 ( gradient descent on input )
47 |    self.gradInput:mul(-1)
48 |    return self.gradInput
49 | end
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/Reinforce.lua:
--------------------------------------------------------------------------------
 1 | ------------------------------------------------------------------------
 2 | --[[ Reinforce ]]--
 3 | -- Ref A. http://incompleteideas.net/sutton/williams-92.pdf
 4 | -- Abstract class for modules that use the REINFORCE algorithm (ref A).
 5 | -- The reinforce(reward) method is called by a special Reward Criterion.
 6 | -- After which, when backward is called, the reward will be used to 
 7 | -- generate gradInputs. The gradOutput is usually ignored.
 8 | ------------------------------------------------------------------------
 9 | local Reinforce, parent = torch.class("nn.Reinforce", "nn.Module")
10 | 
11 | function Reinforce:__init(stochastic)
12 |    parent.__init(self)
13 |    -- true makes it stochastic during evaluation and training
14 |    -- false makes it stochastic only during training
15 |    self.stochastic = stochastic
16 | end
17 | 
18 | -- a Reward Criterion will call this
19 | function Reinforce:reinforce(reward)
20 |    parent.reinforce(self, reward)
21 |    self.reward = reward
22 | end
23 | 
24 | function Reinforce:updateOutput(input)
25 |    self.output:set(input)
26 | end
27 | 
28 | function Reinforce:updateGradInput(input, gradOutput)
29 |    local reward = self:rewardAs(input)
30 |    self.gradInput:resizeAs(reward):copy(reward)
31 | end
32 | 
33 | -- this can be called by updateGradInput
34 | function Reinforce:rewardAs(input)
35 |    assert(self.reward:dim() == 1)
36 |    if input:isSameSizeAs(self.reward) then
37 |       return self.reward
38 |    else
39 |       if self.reward:size(1) ~= input:size(1) then
40 |          -- assume input is in online-mode
41 |          input = self:toBatch(input, input:dim())
42 |          assert(self.reward:size(1) == input:size(1), self.reward:size(1).." ~= "..input:size(1))
43 |       end
44 |       self._reward = self._reward or self.reward.new()
45 |       self.__reward = self.__reward or self.reward.new()
46 |       local size = input:size():fill(1):totable()
47 |       size[1] = self.reward:size(1)
48 |       self._reward:view(self.reward, table.unpack(size))
49 |       self.__reward:expandAs(self._reward, input)
50 |       return self.__reward
51 |    end
52 | end
53 | 


--------------------------------------------------------------------------------
/OneHot.lua:
--------------------------------------------------------------------------------
 1 | local OneHot, parent = torch.class('nn.OneHot', 'nn.Module')
 2 | 
 3 | -- adapted from https://github.com/karpathy/char-rnn
 4 | -- and https://github.com/hughperkins/char-lstm
 5 | 
 6 | function OneHot:__init(outputSize)
 7 |    parent.__init(self)
 8 |    self.outputSize = outputSize
 9 | end
10 | 
11 | function OneHot:updateOutput(input)
12 |    local size
13 |    if type(input) == 'number' then
14 |       if self:type() == 'torch.CudaTensor' then
15 |          self._single = self._single or torch.CudaTensor():resize(1);
16 |       else
17 |          self._single = self._single or torch.LongTensor():resize(1);
18 |       end
19 |       self._single[1] = input
20 |       input = self._single;
21 |       size = {}
22 |    else
23 |       size = input:size():totable()
24 |    end
25 |    table.insert(size, self.outputSize)
26 |    
27 |    self.output:resize(unpack(size)):zero()
28 |    
29 |    size[#size] = 1
30 |    local input_ = input:view(unpack(size))
31 |    
32 |    if torch.type(input) == 'torch.CudaTensor' or torch.type(input) == 'torch.ClTensor' then
33 |       self.output:scatter(self.output:dim(), input_, 1)
34 |    else
35 |       if torch.type(self.output) == 'torch.CudaTensor' then 
36 |          -- input is not cuda, module is, cast input to cuda
37 |          self._input = self._input or torch.CudaTensor()
38 |          self._input:resize(input_:size()):copy(input_)
39 |          input_ = self._input
40 |       elseif torch.type(input) ~= 'torch.LongTensor' then 
41 |          -- input is not long, module isnot cuda, cast input to long
42 |          self._input = self._input or torch.LongTensor()
43 |          self._input:resize(input_:size()):copy(input_)
44 |          input_ = self._input
45 |       end
46 |       self.output:scatter(self.output:dim(), input_, 1)
47 |    end
48 |    
49 |    return self.output
50 | end
51 | 
52 | function OneHot:updateGradInput(input, gradOutput)
53 |    if type(input) == 'number' then
54 |       return 0
55 |    else
56 |       self.gradInput:resize(input:size()):zero()
57 |       return self.gradInput
58 |    end
59 | end
60 | 
61 | function OneHot:type(type, typecache)
62 |    self._single = nil
63 |    self._input = nil
64 |    return parent.type(self, type, typecache)
65 | end
66 | 


--------------------------------------------------------------------------------
/ArgMax.lua:
--------------------------------------------------------------------------------
 1 | ------------------------------------------------------------------------
 2 | --[[ ArgMax ]]--
 3 | -- Returns the index of the maxima for dimension dim.
 4 | -- Cannot backpropagate through this module.
 5 | -- Created for use with ReinforceCategorical.
 6 | ------------------------------------------------------------------------
 7 | local ArgMax, parent = torch.class("nn.ArgMax", "nn.Module")
 8 | 
 9 | function ArgMax:__init(dim, nInputDim, asLong)
10 |    parent.__init(self)
11 |    self.dim = dim or 1
12 |    self.nInputDim = nInputDim or 9999
13 |    self.asLong = (asLong == nil) and true or asLong
14 |    if self.asLong then
15 |       self.output = torch.LongTensor()
16 |    end
17 | end
18 | 
19 | function ArgMax:updateOutput(input)
20 |    self._value = self._value or input.new()
21 |    self._indices = self._indices or
22 |       (torch.type(input) == 'torch.CudaTensor' and (torch.CudaLongTensor and torch.CudaLongTensor() or torch.CudaTensor()) or torch.LongTensor())
23 |    local dim = (input:dim() > self.nInputDim) and (self.dim + 1) or self.dim
24 |    
25 |    torch.max(self._value, self._indices, input, dim)
26 |    if input:dim() > 1 then
27 |       local idx = self._indices:select(dim, 1)
28 |       self.output:resize(idx:size()):copy(idx)
29 |    else
30 |       self.output:resize(self._indices:size()):copy(self._indices)
31 |    end
32 |    return self.output
33 | end
34 | 
35 | function ArgMax:updateGradInput(input, gradOutput)
36 |    -- cannot backprop from an index so just return a dummy zero tensor
37 |    self.gradInput:resizeAs(input):zero()
38 |    return self.gradInput
39 | end
40 | 
41 | function ArgMax:type(type)
42 |    -- torch.max expects a LongTensor as indices, whereas cutorch.max expects a CudaTensor.
43 |    if type == 'torch.CudaTensor' then
44 |       parent.type(self, type)
45 |    else
46 |       -- self._indices must be a LongTensor. Setting it to nil temporarily avoids
47 |       -- unnecessary memory allocations.
48 |       local indices
49 |       indices, self._indices = self._indices, nil
50 |       parent.type(self, type)
51 |       self._indices = indices and indices:long() or nil
52 |    end
53 |    if self.asLong then
54 |       self.output = torch.LongTensor()
55 |    end
56 |    return self
57 | end
58 | 


--------------------------------------------------------------------------------
/init.lua:
--------------------------------------------------------------------------------
 1 | require 'torch'
 2 | require 'nn'
 3 | require 'nnx'
 4 | local _ = require 'moses'
 5 | 
 6 | -- create global dpnn table
 7 | dpnn = {}
 8 | dpnn.version = 2
 9 | 
10 | unpack = unpack or table.unpack -- lua 5.2 compat
11 | 
12 | -- for testing:
13 | require('dpnn.test')
14 | 
15 | -- extensions to existing modules
16 | require('dpnn.Module')
17 | require('dpnn.Container')
18 | require('dpnn.Sequential')
19 | require('dpnn.ParallelTable')
20 | require('dpnn.LookupTable')
21 | require('dpnn.SpatialBinaryConvolution')
22 | require('dpnn.SimpleColorTransform')
23 | require('dpnn.PCAColorTransform')
24 | 
25 | -- extensions to existing criterions
26 | require('dpnn.Criterion')
27 | 
28 | -- extensions to make serialization more efficient
29 | require('dpnn.SpatialMaxPooling')
30 | require('dpnn.SpatialConvolution')
31 | require('dpnn.SpatialConvolutionMM')
32 | require('dpnn.SpatialBatchNormalization')
33 | require('dpnn.BatchNormalization')
34 | 
35 | -- decorator modules
36 | require('dpnn.Decorator')
37 | require('dpnn.Serial')
38 | require('dpnn.DontCast')
39 | require('dpnn.NaN')
40 | 
41 | -- modules
42 | require('dpnn.PrintSize')
43 | require('dpnn.Convert')
44 | require('dpnn.Constant')
45 | require('dpnn.Collapse')
46 | require('dpnn.ZipTable')
47 | require('dpnn.ZipTableOneToMany')
48 | require('dpnn.CAddTensorTable')
49 | require('dpnn.ReverseTable')
50 | require('dpnn.Dictionary')
51 | require('dpnn.Inception')
52 | require('dpnn.SoftMaxTree')
53 | require('dpnn.SoftMaxForest')
54 | require('dpnn.Clip')
55 | require('dpnn.SpatialUniformCrop')
56 | require('dpnn.SpatialGlimpse')
57 | require('dpnn.WhiteNoise')
58 | require('dpnn.ArgMax')
59 | require('dpnn.CategoricalEntropy')
60 | require('dpnn.TotalDropout')
61 | require('dpnn.Kmeans')
62 | require('dpnn.OneHot')
63 | require('dpnn.SpatialRegionDropout')
64 | require('dpnn.FireModule')
65 | require('dpnn.SpatialFeatNormalization')
66 | 
67 | -- Noise Contrastive Estimation
68 | require('dpnn.NCEModule')
69 | require('dpnn.NCECriterion')
70 | 
71 | -- REINFORCE
72 | require('dpnn.Reinforce')
73 | require('dpnn.ReinforceGamma')
74 | require('dpnn.ReinforceBernoulli')
75 | require('dpnn.ReinforceNormal')
76 | require('dpnn.ReinforceCategorical')
77 | 
78 | -- REINFORCE criterions
79 | require('dpnn.VRClassReward')
80 | require('dpnn.BinaryClassReward')
81 | 
82 | -- criterions
83 | require('dpnn.ModuleCriterion')
84 | require('dpnn.BinaryLogisticRegression')
85 | require('dpnn.SpatialBinaryLogisticRegression')
86 | 
87 | return dpnn
88 | 


--------------------------------------------------------------------------------
/ReinforceCategorical.lua:
--------------------------------------------------------------------------------
 1 | ------------------------------------------------------------------------
 2 | --[[ ReinforceCategorical ]]-- 
 3 | -- Ref A. http://incompleteideas.net/sutton/williams-92.pdf
 4 | -- Inputs are a vector of categorical prob : (p[1], p[2], ..., p[k]) 
 5 | -- Ouputs are samples drawn from this distribution.
 6 | -- Uses the REINFORCE algorithm (ref. A sec 6. p.230-236) which is 
 7 | -- implemented through the nn.Module:reinforce(r,b) interface.
 8 | -- gradOutputs are ignored (REINFORCE algorithm).
 9 | ------------------------------------------------------------------------
10 | local ReinforceCategorical, parent = torch.class("nn.ReinforceCategorical", "nn.Reinforce")
11 | 
12 | function ReinforceCategorical:updateOutput(input)
13 |    self.output:resizeAs(input)
14 |    self._index = self._index or ((torch.type(input) == 'torch.CudaTensor') and torch.CudaTensor() or torch.LongTensor())
15 |    if self.stochastic or self.train ~= false then
16 |       -- sample from categorical with p = input
17 |       self._input = self._input or input.new()
18 |       -- prevent division by zero error (see updateGradInput)
19 |       self._input:resizeAs(input):copy(input):add(0.00000001) 
20 |       input.multinomial(self._index, input, 1)
21 |       -- one hot encoding
22 |       self.output:zero()
23 |       self.output:scatter(2, self._index, 1)
24 |    else
25 |       -- use p for evaluation
26 |       self.output:copy(input)
27 |    end
28 |    return self.output
29 | end
30 | 
31 | function ReinforceCategorical:updateGradInput(input, gradOutput)
32 |    -- Note that gradOutput is ignored
33 |    -- f : categorical probability mass function
34 |    -- x : the sampled indices (one per sample) (self.output)
35 |    -- p : probability vector (p[1], p[2], ..., p[k]) 
36 |    -- derivative of log categorical w.r.t. p
37 |    -- d ln(f(x,p))     1/p[i]    if i = x  
38 |    -- ------------ =   
39 |    --     d p          0         otherwise
40 |    self.gradInput:resizeAs(input):zero()
41 |    self.gradInput:copy(self.output)
42 |    self._input = self._input or input.new()
43 |    -- prevent division by zero error
44 |    self._input:resizeAs(input):copy(input):add(0.00000001) 
45 |    self.gradInput:cdiv(self._input)
46 |    
47 |    -- multiply by reward 
48 |    self.gradInput:cmul(self:rewardAs(input))
49 |    -- multiply by -1 ( gradient descent on input )
50 |    self.gradInput:mul(-1)
51 |    return self.gradInput
52 | end
53 | 
54 | function ReinforceCategorical:type(type, tc)
55 |    self._index = nil
56 |    return parent.type(self, type, tc)
57 | end
58 | 


--------------------------------------------------------------------------------
/CategoricalEntropy.lua:
--------------------------------------------------------------------------------
 1 | ------------------------------------------------------------------------
 2 | --[[ CategoricalEntropy ]]--
 3 | -- Maximize the entropy of a categorical distribution (e.g. softmax ).
 4 | -- H(X) = E(-log(p(X)) = -sum(p(X)log(p(X)) 
 5 | -- where X = 1,...,N and N is the number of categories.
 6 | -- A batch with an entropy below minEntropy will be maximized.
 7 | -- d H(X=x)     p(x)
 8 | -- -------- = - ---- - log(p(x)) = -1 - log(p(x))
 9 | --   d p        p(x)
10 | ------------------------------------------------------------------------
11 | local CE, parent = torch.class("nn.CategoricalEntropy", "nn.Module")
12 | 
13 | function CE:__init(scale, minEntropy)
14 |    parent.__init(self)
15 |    self.scale = scale or 1
16 |    self.minEntropy = minEntropy
17 |    
18 |    -- get the P(X) using the batch as a prior
19 |    self.module = nn.Sequential()
20 |    self.module:add(nn.Sum(1)) -- sum categorical probabilities over batch
21 |    self._mul = nn.MulConstant(1)
22 |    self.module:add(self._mul) -- make them sum to one (i.e. probabilities)
23 |    
24 |    -- get entropy H(X)
25 |    local concat = nn.ConcatTable()
26 |    concat:add(nn.Identity()) -- p(X)
27 |    local seq = nn.Sequential()
28 |    seq:add(nn.AddConstant(0.000001)) -- prevent log(0) = nan errors
29 |    seq:add(nn.Log())
30 |    concat:add(seq)
31 |    self.module:add(concat) -- log(p(x))
32 |    self.module:add(nn.CMulTable()) -- p(x)log(p(x))
33 |    self.module:add(nn.Sum()) -- sum(p(x)log(p(x)))
34 |    self.module:add(nn.MulConstant(-1)) -- H(x)
35 |    
36 |    self.modules = {self.module}
37 |    
38 |    self.minusOne = torch.Tensor{-self.scale} -- gradient descent on maximization
39 |    self.sizeAverage = true
40 | end
41 | 
42 | function CE:updateOutput(input)
43 |    assert(input:dim() == 2, "CategoricalEntropy only works with batches")
44 |    self.output:set(input)
45 |    return self.output
46 | end
47 | 
48 | function CE:updateGradInput(input, gradOutput, scale)
49 |    assert(input:dim() == 2, "CategoricalEntropy only works with batches")
50 |    self.gradInput:resizeAs(input):copy(gradOutput)
51 |    
52 |    self._mul.constant_scalar = 1/input:sum() -- sum to one
53 |    self.entropy = self.module:updateOutput(input)[1]
54 |    if (not self.minEntropy) or (self.entropy < self.minEntropy) then
55 |       local gradEntropy = self.module:updateGradInput(input,  self.minusOne, scale)
56 |       if self.sizeAverage then
57 |          gradEntropy:div(input:size(1))
58 |       end
59 |       self.gradInput:add(gradEntropy)
60 |    end
61 |    
62 |    return self.gradInput
63 | end
64 | 


--------------------------------------------------------------------------------
/NaN.lua:
--------------------------------------------------------------------------------
 1 | ------------------------------------------------------------------------
 2 | --[[ NaN ]]--
 3 | -- Asserts that outputs and gradInputs do not contain NaNs.
 4 | -- Useful for locating the source of NaN errors.
 5 | ------------------------------------------------------------------------
 6 | local NaN, parent = torch.class("nn.NaN", "nn.Decorator")
 7 | 
 8 | local idseq = 0
 9 | function NaN.newId()
10 |    idseq = idseq + 1
11 |    return idseq
12 | end
13 | 
14 | function NaN:__init(module, id)
15 |    parent.__init(self, module)
16 |    self.id = id or NaN.newId()
17 | end
18 | 
19 | function NaN:recursiveIsNaN(tensor)
20 |    local isNaN = false
21 |    if torch.type(tensor) == 'table' then
22 |       for k,v in pairs(tensor) do
23 |          isNaN = self:recursiveIsNaN(v)
24 |          if isNaN then break end
25 |       end
26 |    else
27 |       local _ = require 'moses'
28 |       isNaN = _.isNaN(tensor:sum())
29 |    end
30 |    return isNaN
31 | end
32 | 
33 | function NaN:updateOutput(input)
34 |    self.output = self.module:updateOutput(input)
35 |    if self:recursiveIsNaN(self.output) then
36 |       if self:recursiveIsNaN(input) then
37 |          error(string.format("NaN found in input of module :\n%s", self:__tostring__()))
38 |       elseif self:recursiveIsNaN(self:parameters()) then
39 |          error(string.format("NaN found in parameters of module :\n%s", self:__tostring__()))
40 |       end
41 |       error(string.format("NaN found in output of module :\n%s", self:__tostring__()))
42 |    end
43 |    return self.output
44 | end
45 | 
46 | function NaN:updateGradInput(input, gradOutput)
47 |    self.gradInput = self.module:updateGradInput(input, gradOutput)
48 |    if self:recursiveIsNaN(self.gradInput) then
49 |       if self:recursiveIsNaN(gradOutput) then
50 |          error(string.format("NaN found in gradOutput of module :\n%s", self:__tostring__()))
51 |       end
52 |       error(string.format("NaN found in gradInput of module :\n%s", self:__tostring__()))
53 |    end
54 |    return self.gradInput
55 | end
56 | 
57 | function NaN:accGradParameters(input, gradOutput, scale) 
58 |    self.module:accGradParameters(input, gradOutput, scale)
59 |    local params, gradParams = self:parameters()
60 |    if self:recursiveIsNaN(gradParams) then
61 |       error(string.format("NaN found in gradParameters of module :\n%s", self:__tostring__()))
62 |    end
63 | end
64 | 
65 | function NaN:__tostring__()
66 |    local selfstring = torch.type(self) .. '(' .. self.id .. ')'
67 |    if self.module.__tostring__ then
68 |       return selfstring .. ' @ ' .. self.module:__tostring__()
69 |    else
70 |       return selfstring .. ' @ ' .. torch.type(self.module)
71 |    end
72 | end
73 | 


--------------------------------------------------------------------------------
/SpatialFeatNormalization.lua:
--------------------------------------------------------------------------------
 1 | --[[
 2 |    Color normalization (mean zeroing and dividing by standard deviation).
 3 |    Basic preprocessing step widely used in training classifier with images.
 4 | --]]
 5 | 
 6 | local SpatialFeatNormalization, Parent = torch.class('nn.SpatialFeatNormalization', 'nn.Module')
 7 | 
 8 | function SpatialFeatNormalization:__init(mean, std)
 9 |    Parent.__init(self)
10 |    if mean:dim() ~= 1 then
11 |       error('<SpatialFeatNormalization> Mean/Std should be 1D.')
12 |    end
13 |    self.mean = torch.Tensor()
14 |    self.mean:resizeAs(mean):copy(mean)
15 |    self.std = torch.Tensor()
16 |    self.std:resizeAs(mean)
17 |    if std ~= nil then self.std:copy(std) else self.std:fill(1) end
18 |    self.noOfFeats = mean:size(1)
19 | end
20 | 
21 | function SpatialFeatNormalization:updateOutput(input)
22 |    self.output:resizeAs(input):copy(input)
23 |    if input:dim() == 4 then
24 |       -- Batch of image/s
25 |       if input:size(2) ~= self.noOfFeats then
26 |          error('<SpatialFeatNormalization> No. of Feats dont match.')
27 |       else
28 |          for i=1, self.noOfFeats do
29 |             self.output[{{}, i, {}, {}}]:add(-self.mean[i])
30 |             self.output[{{}, i, {}, {}}]:div(self.std[i])
31 |          end
32 |       end
33 |    elseif input:dim() == 3 then
34 |       -- single image
35 |       if input:size(1) ~= self.noOfFeats then
36 |          error('<SpatialFeatNormalization> No. of Feats dont match.')
37 |       else
38 |          for i=1, self.noOfFeats do
39 |             self.output[{i, {}, {}}]:add(-self.mean[i])
40 |             self.output[{i, {}, {}}]:div(self.std[i])
41 |          end
42 |       end
43 |    else
44 |       error('<SpatialFeatNormalization> invalid input dims.')
45 |    end
46 |    return self.output 
47 | end
48 | 
49 | function SpatialFeatNormalization:updateGradInput(input, gradOutput)
50 |    self.gradInput:resizeAs(gradOutput):copy(gradOutput)
51 |    if self.gradInput:dim() == 4 then
52 |       -- Batch of image/s
53 |       if self.gradInput:size(2) ~= self.noOfFeats then
54 |          error('<SpatialFeatNormalization> No. of Feats dont match.')
55 |       else
56 |          for i=1, self.noOfFeats do
57 |             self.gradInput[{{}, i, {}, {}}]:div(self.std[i])
58 |          end
59 |       end
60 |    elseif self.gradInput:dim() == 3 then
61 |       -- single image
62 |       if self.gradInput:size(1) ~= self.noOfFeats then
63 |          error('<SpatialFeatNormalization> No. of Feats dont match.')
64 |       else
65 |          for i=1, self.noOfFeats do
66 |             self.gradInput[{i, {}, {}}]:div(self.std[i])
67 |          end
68 |       end
69 |    else
70 |       error('<SpatialFeatNormalization> invalid self.gradInput dims.')
71 |    end
72 |    return self.gradInput
73 | end
74 | 


--------------------------------------------------------------------------------
/SpatialBinaryLogisticRegression.lua:
--------------------------------------------------------------------------------
 1 | ------------------------------------------------------------------------
 2 | --[[ SpatialBinaryLogisticRegression ]]--
 3 | -- Takes an image of size batchSize x nChannel x width x height as input.
 4 | -- Computes Binary Logistic Regression Cost.
 5 | -- Useful for 2 class pixel classification.
 6 | ------------------------------------------------------------------------
 7 | 
 8 | local SpatialBinaryLogisticRegression, parent = torch.class('nn.SpatialBinaryLogisticRegression', 'nn.Criterion')
 9 | 
10 | function SpatialBinaryLogisticRegression:__init()
11 |    parent.__init(self)
12 |    self.sizeAverage = true
13 | end
14 | 
15 | function SpatialBinaryLogisticRegression:updateOutput(input, target)
16 |    local inputDim = input:nDimension()
17 |    local targetDim = target:nDimension()
18 | 
19 |    -- Check dimensions of input and target
20 |    assert(inputDim == targetDim, "nDimension of input and target don't match.")
21 |    assert(inputDim == 4 or inputDim == 3, "Expecting image or batch on images")
22 | 
23 |    for i=1,inputDim do
24 |       assert(input:size(i) == target:size(i),
25 |                                   "Input and target dimensions don't match.")
26 |    end
27 | 
28 |    -- Check batch or single image
29 |    if inputDim == 4 then
30 |       self._isBatch = true
31 |       assert(input:size(2) == 1, "No. of channels should be 1.")
32 |       self._k = input:size(1)
33 |       self._h = input:size(3)
34 |       self._w = input:size(4)
35 |    else
36 |       self._isBatch = false
37 |       assert(input:size(1) == 1, "No. of channels should be 1.")
38 |       self._k = 1
39 |       self._h = input:size(2)
40 |       self._w = input:size(3)
41 |    end
42 | 
43 |    self._baseExponents = self._baseExponents or input.new()
44 |    self._coeff = self._coeff or input.new()
45 |    self._logCoeff = self._logCoeff or input.new()
46 | 
47 |    --Compute exponent = -target*input
48 |    self._baseExponents:resize(input:size()):copy(input)
49 |    self._baseExponents:cmul(target)
50 |    self._baseExponents:mul(-1)
51 |    -- Compute exp(exponent)
52 |    self._baseExponents:exp()
53 | 
54 |    self._coeff:resize(input:size()):copy(self._baseExponents)
55 |    self._coeff:add(1)
56 | 
57 |    self._logCoeff:resize(input:size()):copy(self._coeff)
58 |    self._logCoeff:log()
59 | 
60 |    if self.sizeAverage then
61 |       return self._logCoeff:sum()/(2 * self._k * self._h * self._w)
62 |    else
63 |       return self._logCoeff:sum()/(2 * self._h * self._w)
64 |    end
65 | end
66 | 
67 | function SpatialBinaryLogisticRegression:updateGradInput(input, target)
68 |    self.gradInput = self.gradInput or input.new()
69 |    local gradInput = self.gradInput
70 |    gradInput:resize(target:size()):copy(target)
71 |    gradInput:mul(-1)
72 |    gradInput:cmul(self._baseExponents)
73 |    gradInput:cdiv(self._coeff)
74 |    if self.sizeAverage then
75 |       gradInput:div(2 * self._k * self._h * self._w)
76 |    else
77 |       gradInput:div(2 * self._h * self._w)
78 |    end
79 |    return gradInput
80 | end
81 | 


--------------------------------------------------------------------------------
/SpatialRegionDropout.lua:
--------------------------------------------------------------------------------
 1 | --[[
 2 |    Dropout edges rows or columns to simulate imperfect bounding boxes. 
 3 | --]]
 4 | 
 5 | local SpatialRegionDropout, Parent = torch.class('nn.SpatialRegionDropout', 'nn.Module')
 6 | 
 7 | function SpatialRegionDropout:__init(p)
 8 |    Parent.__init(self)
 9 |    self.p = p or 0.2 -- ratio of total number of rows or cols
10 |    self.train = true
11 |    self.noise = torch.Tensor()
12 |    if self.p >= 1 or self.p < 0 then
13 |       error('<SpatialRegionDropout> illegal percentage, must be 0 <= p < 1')
14 |    end
15 | end
16 | 
17 | function SpatialRegionDropout:setp(p)
18 |    self.p = p
19 | end
20 | 
21 | -- Region Types
22 | -- 1: Dropout p ratio of top rows
23 | -- 2: Dropout p ratio of bottom rows
24 | -- 3: Dropout p ratio of leftmost cols
25 | -- 4: Dropout p ratio of rightmost cols
26 | function SpatialRegionDropout:updateOutput(input)
27 |    self.output:resizeAs(input):copy(input)
28 |    if self.train then
29 |       self.noise:resizeAs(input):fill(1)
30 |       self.regionType = torch.random(4)
31 |       if input:dim() == 4 then
32 |          local height = input:size(3)
33 |          local width = input:size(4)
34 |          if self.regionType == 1 then
35 |             self.noise[{{}, {}, {1, math.floor(height*self.p)}}]:fill(0)
36 |          elseif self.regionType == 2 then
37 |             self.noise[{{}, {}, 
38 |                       {height-math.floor(height*self.p)+1, height}}]:fill(0)
39 |          elseif self.regionType == 3 then
40 |             self.noise[{{}, {}, {}, {1, math.floor(width*self.p)}}]:fill(0)
41 |          elseif self.regionType == 4 then
42 |             self.noise[{{}, {}, {},
43 |                        {width-math.floor(width*self.p)+1, width}}]:fill(0)
44 |          end
45 |       elseif input:dim() == 3 then
46 |          local height = input:size(2)
47 |          local width = input:size(3)
48 |          if self.regionType == 1 then
49 |             self.noise[{{}, {1, math.floor(height*self.p)}}]:fill(0)
50 |          elseif self.regionType == 2 then
51 |             self.noise[{{}, 
52 |                        {height-math.floor(height*self.p)+1, height}}]:fill(0)
53 |          elseif self.regionType == 3 then
54 |             self.noise[{{}, {}, {1, math.floor(width*self.p)}}]:fill(0)
55 |          elseif self.regionType == 4 then
56 |             self.noise[{{}, {}, 
57 |                        {width-math.floor(width*self.p)+1, width}}]:fill(0)
58 |          end
59 |       else
60 |          error('Input must be 4D (nbatch, nfeat, h, w) or 3D (nfeat, h, w)')
61 |       end
62 |       self.noise:div(1-self.p)
63 |       self.output:cmul(self.noise)
64 |    end
65 |    return self.output
66 | end
67 | 
68 | function SpatialRegionDropout:updateGradInput(input, gradOutput)
69 |    if self.train then
70 |       self.gradInput:resizeAs(gradOutput):copy(gradOutput)
71 |       self.gradInput:cmul(self.noise)
72 |    else
73 |       error('Backpropagation is only defined for training.')
74 |    end
75 |    return self.gradInput
76 | end
77 | 
78 | function SpatialRegionDropout:__tostring__()
79 |    return string.format('%s p: %f', torch.type(self), self.p)
80 | end
81 | 


--------------------------------------------------------------------------------
/BinaryClassReward.lua:
--------------------------------------------------------------------------------
 1 | ------------------------------------------------------------------------
 2 | --[[ BinaryClassReward ]]--
 3 | -- Variance reduced binary classification reinforcement criterion.
 4 | -- The binary class version of VRClassReward.
 5 | -- input : {class prediction, baseline reward}
 6 | -- Reward is 1 for success, Reward is 0 otherwise.
 7 | -- reward = scale*(Reward - baseline) where baseline is 2nd input element
 8 | -- Note : for RNNs with R = 1 for last step in sequence, encapsulate it
 9 | -- in nn.ModuleCriterion(BinaryClassReward, nn.SelectTable(-1))
10 | ------------------------------------------------------------------------
11 | local BinaryClassReward, parent = torch.class("nn.BinaryClassReward", "nn.Criterion")
12 | 
13 | function BinaryClassReward:__init(module, scale, criterion)
14 |    parent.__init(self)
15 |    self.module = module -- so it can call module:reinforce(reward)
16 |    self.scale = scale or 1 -- scale of reward
17 |    self.criterion = criterion or nn.MSECriterion() -- baseline criterion
18 |    self.sizeAverage = true
19 |    self.gradInput = {torch.Tensor()}
20 | end
21 | 
22 | function BinaryClassReward:updateOutput(input, target)
23 |    assert(torch.type(input) == 'table')
24 |    local input = input[1]
25 |    assert(input:dim() == 1)
26 |    assert(target:dim() == 1)
27 |    self._binary = self._binary or input.new()
28 |    self._binary:gt(input, 0.5)
29 |    
30 |    -- max class value is class prediction
31 |    if torch.type(self._binary) ~= torch.type(target) then
32 |       self._target = self._target or self._binary.new()
33 |       self._target:resize(target:size()):copy(target)
34 |       target = self._target
35 |    end
36 |    
37 |    -- reward = scale when correctly classified
38 |    self._reward = self._reward or input.new()
39 |    self._reward:eq(self._binary, target)
40 |    self.reward = self.reward or input.new()
41 |    self.reward:resize(self._reward:size(1)):copy(self._reward)
42 |    self.reward:mul(self.scale)
43 |    
44 |    -- loss = -sum(reward)
45 |    self.output = -self.reward:sum()
46 |    if self.sizeAverage then
47 |       self.output = self.output/input:size(1)
48 |    end
49 |    return self.output
50 | end
51 | 
52 | function BinaryClassReward:updateGradInput(inputTable, target)
53 |    local input, baseline = unpack(inputTable)
54 |    
55 |    -- reduce variance of reward using baseline
56 |    self.vrReward = self.vrReward or self.reward.new()
57 |    self.vrReward:resizeAs(self.reward):copy(self.reward)
58 |    self.vrReward:add(-1, baseline)
59 |    if self.sizeAverage then
60 |       self.vrReward:div(input:size(1))
61 |    end
62 |    -- broadcast reward to modules
63 |    self.module:reinforce(self.vrReward)  
64 |    
65 |    -- zero gradInput (this criterion has no gradInput for class pred)
66 |    self.gradInput[1]:resizeAs(input):zero()
67 |    
68 |    -- learn the baseline reward
69 |    self.gradInput[2] = self.criterion:backward(baseline, self.reward)
70 |    
71 |    return self.gradInput
72 | end
73 | 
74 | function BinaryClassReward:type(type)
75 |    self._binary = nil
76 |    self._target = nil
77 |    local module = self.module
78 |    self.module = nil
79 |    local ret = parent.type(self, type)
80 |    self.module = module
81 |    return ret
82 | end
83 | 


--------------------------------------------------------------------------------
/BinaryLogisticRegression.lua:
--------------------------------------------------------------------------------
 1 | ------------------------------------------------------------------------
 2 | --[[ BinaryLogisticRegression ]]--
 3 | -- Takes an image of size batchSize x 1 or  just batchSize as input.
 4 | -- Computes Binary Logistic Regression Cost.
 5 | -- Useful for 2 class classification.
 6 | ------------------------------------------------------------------------
 7 | 
 8 | local BinaryLogisticRegression, parent = torch.class('nn.BinaryLogisticRegression', 'nn.Criterion')
 9 | 
10 | function BinaryLogisticRegression:__init(sizeAverage)
11 |    parent.__init(self)
12 |    if sizeAverage ~= nil then
13 |       self.sizeAverage = sizeAverage
14 |    else
15 |       self.sizeAverage = true
16 |    end
17 | end
18 | 
19 | function BinaryLogisticRegression:updateOutput(input, target)
20 |    local inputDim = input:nDimension()
21 |    local targetDim = target:nDimension()
22 | 
23 |    -- Check dimensions of input and target
24 |    assert(inputDim == 1 or inputDim == 2,
25 |                                   "Input:Expecting batchSize or batchSize x 1")
26 |    assert(targetDim == 1 or targetDim == 2,
27 |                                  "Target:Expecting batchSize or batchSize x 1")
28 |    if inputDim == 2 then
29 |       assert(input:size(1)==1 or input:size(2)==1, 
30 |                                         "Input: Expecting batchSize x 1.")
31 |    end
32 |    if targetDim == 2 then
33 |       assert(target:size(1)==1 or target:size(2)==1,
34 |                                         "Target: Expecting batchSize x 1.")
35 |    end
36 | 
37 |    local inputElements = input:nElement()
38 |    local targetElements = target:nElement()
39 | 
40 |    assert(inputElements == targetElements,
41 |                            "No of input and target elements should be same.")
42 | 
43 |    self._k = inputElements
44 |    local input = input:view(-1)
45 |    local target = target:view(-1)
46 | 
47 |    self._baseExponents = self._baseExponents or input.new()
48 |    self._coeff = self._coeff or input.new()
49 |    self._logCoeff = self._logCoeff or input.new()
50 | 
51 |    --Compute exponent = -target*input
52 |    self._baseExponents:resize(input:size()):copy(input)
53 |    self._baseExponents:cmul(target)
54 |    self._baseExponents:mul(-1)
55 |    -- Compute exp(exponent)
56 |    self._baseExponents:exp()
57 | 
58 |    self._coeff:resize(input:size()):copy(self._baseExponents)
59 |    self._coeff:add(1)
60 | 
61 |    self._logCoeff:resize(input:size()):copy(self._coeff)
62 |    self._logCoeff:log()
63 | 
64 |    if self.sizeAverage then
65 |       return self._logCoeff:sum()/(self._k)
66 |    else
67 |       return self._logCoeff:sum()
68 |    end
69 | end
70 | 
71 | function BinaryLogisticRegression:updateGradInput(input, target)
72 |    self.gradInput = self.gradInput or input.new()
73 |    local gradInput = self.gradInput
74 |    gradInput:resize(input:size()):copy(target)
75 |    gradInput:mul(-1)
76 |    gradInput:cmul(self._baseExponents)
77 |    gradInput:cdiv(self._coeff)
78 |    if self.sizeAverage then
79 |       gradInput:div(self._k)
80 |    end
81 |    return gradInput
82 | end
83 | 
84 | function BinaryLogisticRegression:type(type, tensorCache)
85 |    if type then
86 |       self._baseExponents = nil
87 |       self._coeff = nil
88 |       self._logCoeff = nil
89 |    end
90 |    return parent.type(self, type, tensorCache)
91 | end
92 | 


--------------------------------------------------------------------------------
/NCECriterion.lua:
--------------------------------------------------------------------------------
  1 | ------------------------------------------------------------------------
  2 | --[[ Noise Contrast Estimation Criterion ]]--
  3 | -- Ref.: A. http://mi.eng.cam.ac.uk/~xc257/papers/ICASSP2015-rnnlm-nce.pdf
  4 | --       B. https://www.cs.toronto.edu/~amnih/papers/ncelm.pdf
  5 | ------------------------------------------------------------------------
  6 | local NCECriterion, parent = torch.class("nn.NCECriterion", "nn.Criterion")
  7 | local eps = 0.0000001
  8 | 
  9 | function NCECriterion:__init()
 10 |    parent.__init(self)  
 11 |    self.sizeAverage = true
 12 |    
 13 |    self.gradInput = {torch.Tensor(), torch.Tensor(), torch.Tensor(), torch.Tensor()}   
 14 | end
 15 | 
 16 | function NCECriterion:updateOutput(inputTable, target)
 17 |    -- P_model(target), P_model(sample), P_noise(target), P_noise(sample)
 18 |    local Pmt, Pms, Pnt, Pns = unpack(inputTable)
 19 |    local k = Pms:size(2)
 20 |    
 21 |    assert(Pmt:dim() == 1)
 22 |    assert(Pms:dim() == 2)
 23 |    assert(Pnt:dim() == 1)
 24 |    assert(Pns:dim() == 2)
 25 |    
 26 |    -- equation 5 in ref. A
 27 |    
 28 |    -- eq 5.1 : P(origin=model) = Pmt / (Pmt + k*Pnt) 
 29 |    self._Pom = self._Pom or Pmt.new()
 30 |    self._Pom:resizeAs(Pmt):copy(Pmt)
 31 |    self._Pomdiv = self._Pomdiv or Pmt.new()
 32 |    self._Pomdiv:resizeAs(Pmt):copy(Pmt)
 33 |    self._Pomdiv:add(k, Pnt):add(eps)
 34 |    self._Pom:cdiv(self._Pomdiv)
 35 |    
 36 |    -- eq 5.2 : P(origin=noise) = k*Pns / (Pms + k*Pns)
 37 |    self._Pon = self._Pon or Pns.new()
 38 |    self._Pon:resizeAs(Pns):copy(Pns):mul(k)
 39 |    self._Pondiv = self._Pondiv or Pms.new()
 40 |    self._Pondiv:resizeAs(Pms):copy(Pms)
 41 |    self._Pondiv:add(k, Pns):add(eps)
 42 |    self._Pon:cdiv(self._Pondiv)
 43 |    
 44 |    -- equation 6 in ref. A
 45 |    
 46 |    self._lnPom = self._lnPom or self._Pom.new()
 47 |    self._lnPom:log(self._Pom)
 48 |    
 49 |    self._lnPon = self._lnPon or self._Pon.new()
 50 |    self._lnPon:log(self._Pon)
 51 |    
 52 |    local lnPomsum = self._lnPom:sum()
 53 |    local lnPonsum = self._lnPon:sum()
 54 |    
 55 |    self.output = - (lnPomsum + lnPonsum)
 56 |    
 57 |    if self.sizeAverage then
 58 |       self.output = self.output / Pmt:size(1)
 59 |    end
 60 |    
 61 |    return self.output
 62 | end
 63 | 
 64 | function NCECriterion:updateGradInput(inputTable, target)
 65 |    assert(#self.gradInput == 4)
 66 |    local Pmt, Pms, Pnt, Pns = unpack(inputTable)
 67 |    local k = Pms:size(2)
 68 |    
 69 |    -- equation 7 in ref. A
 70 |    
 71 |    -- d ln(Pom) / d input = -k*Pnt / ( Pmt * (Pmt + k*Pnt) )
 72 |    local dlnPom = self.gradInput[1]
 73 |    dlnPom = dlnPom or Pnt.new()
 74 |    dlnPom:resizeAs(Pnt):copy(Pnt):mul(-k)
 75 |    dlnPom:cdiv(self._Pomdiv)
 76 |    Pmt:add(eps)
 77 |    dlnPom:cdiv(Pmt) -- d ln(Pmt) / d Pmt = 1 / d Pmt
 78 |    Pmt:add(-eps)
 79 |    
 80 |    -- d ln(Pon) / d input = Pms / ( Pms * (Pms + k*Pns) )
 81 |    local dlnPon = self.gradInput[2]
 82 |    dlnPon = dlnPon or Pms.new()
 83 |    dlnPon:resizeAs(Pms):copy(Pms)
 84 |    dlnPon:cdiv(self._Pondiv)
 85 |    Pms:add(eps)
 86 |    dlnPon:cdiv(Pms) -- d ln(Pms) / d Pms = 1 / d Pms
 87 |    Pms:add(-eps)
 88 |    
 89 |    if self.gradInput[3]:nElement() ~= Pnt:nElement() then
 90 |       self.gradInput[3]:resizeAs(Pnt):zero()
 91 |    end
 92 |    if self.gradInput[4]:nElement() ~= Pns:nElement() then
 93 |       self.gradInput[4]:resizeAs(Pns):zero()
 94 |    end
 95 |    
 96 |    if self.sizeAverage then
 97 |       dlnPom:div(Pmt:size(1))
 98 |       dlnPon:div(Pmt:size(1))
 99 |    end
100 |    
101 |    return self.gradInput   
102 | end
103 | 


--------------------------------------------------------------------------------
/SimpleColorTransform.lua:
--------------------------------------------------------------------------------
 1 | --[[
 2 |    Simple Color transformation module: This module implements a simple data
 3 |    augmentation technique of changing the pixel values of input image by adding
 4 |    sample sampled small quantities.
 5 |    Works only
 6 | --]]
 7 | 
 8 | local SimpleColorTransform, Parent = torch.class('nn.SimpleColorTransform', 'nn.Module')
 9 | 
10 | function SimpleColorTransform:__init(inputChannels, range)
11 |    Parent.__init(self)
12 | 
13 |    self.train = true
14 |    self.inputChannels = inputChannels
15 |    assert(inputChannels == range:nElement(),
16 |           "Number of input channels and number of range values don't match.")
17 |    self.range = range
18 | end
19 | 
20 | function SimpleColorTransform:updateOutput(input)
21 |    self.output:resizeAs(input):copy(input)
22 |    if self.train then
23 |       self.noise = self.noise or self.output.new()
24 |       self._tempNoise = self._tempNoise or self.output.new()
25 |       self._tempNoiseExpanded = self._tempNoiseExpanded or self.output.new()
26 |       self._tempNoiseSamples = self._tempNoiseSamples or self.output.new()
27 | 
28 |       if self.output:nDimension() == 4 then
29 |          local batchSize = self.output:size(1)
30 |          local channels = self.output:size(2)
31 |          local height = self.output:size(3)
32 |          local width = self.output:size(4)
33 |          assert(channels == self.inputChannels)
34 |          
35 |          -- Randomly sample noise for each channel 
36 |          self.noise:resize(batchSize, channels)
37 |          for i=1, channels do
38 |             self.noise[{{}, {i}}]:uniform(-self.range[i], self.range[i])
39 |          end
40 |          self._tempNoise = self.noise:view(batchSize, self.inputChannels, 1, 1)
41 |          self._tempNoiseExpanded:expand(self._tempNoise, batchSize,
42 |                                         channels, height, width)
43 |          self._tempNoiseSamples:resizeAs(self._tempNoiseExpanded)
44 |                                :copy(self._tempNoiseExpanded)
45 |          self.output:add(self._tempNoiseSamples)
46 | 
47 |       elseif self.output:nDimension() == 3 then
48 |          local channels = self.output:size(1)
49 |          local height = self.output:size(2)
50 |          local width = self.output:size(3)
51 |          assert(channels == self.inputChannels)
52 | 
53 |          -- Randomly sample noise for each channel 
54 |          self.noise:resize(channels)
55 |          for i=1, channels do
56 |             self.noise[i] = torch.uniform(-self.range[i], self.range[i])
57 |          end
58 |          self._tempNoise = self.noise:view(self.inputChannels, 1, 1)
59 |          self._tempNoiseExpanded:expand(self._tempNoise, channels,
60 |                                         height, width)
61 |          self._tempNoiseSamples:resizeAs(self._tempNoiseExpanded)
62 |                                :copy(self._tempNoiseExpanded)
63 |          self.output:add(self._tempNoiseSamples)
64 |       else
65 |          error("Invalid input dimensionality.")
66 |       end
67 |    end
68 |    return self.output
69 | end
70 | 
71 | function SimpleColorTransform:updateGradInput(input, gradOutput)
72 |    if self.train then
73 |       self.gradInput:resizeAs(gradOutput):copy(gradOutput)
74 |    else
75 |       error('backprop only defined while training')
76 |    end
77 |    return self.gradInput
78 | end
79 | 
80 | function SimpleColorTransform:type(type, tensorCache)
81 |    self.noise = nil
82 |    self._tempNoise = nil
83 |    self._tempNoiseExpanded = nil
84 |    self._tempNoiseSamples = nil
85 |    Parent.type(self, type, tensorCache)
86 | end
87 | 
88 | function SimpleColorTransform:__tostring__()
89 |   return string.format('SimpleColorTransform', torch.type(self))
90 | end
91 | 


--------------------------------------------------------------------------------
/VRClassReward.lua:
--------------------------------------------------------------------------------
 1 | ------------------------------------------------------------------------
 2 | --[[ VRClassReward ]]--
 3 | -- Variance reduced classification reinforcement criterion.
 4 | -- input : {class prediction, baseline reward}
 5 | -- Reward is 1 for success, Reward is 0 otherwise.
 6 | -- reward = scale*(Reward - baseline) where baseline is 2nd input element
 7 | -- Note : for RNNs with R = 1 for last step in sequence, encapsulate it
 8 | -- in nn.ModuleCriterion(VRClassReward, nn.SelectTable(-1))
 9 | ------------------------------------------------------------------------
10 | local VRClassReward, parent = torch.class("nn.VRClassReward", "nn.Criterion")
11 | 
12 | function VRClassReward:__init(module, scale, criterion)
13 |    parent.__init(self)
14 |    self.module = module -- so it can call module:reinforce(reward)
15 |    self.scale = scale or 1 -- scale of reward
16 |    self.criterion = criterion or nn.MSECriterion() -- baseline criterion
17 |    self.sizeAverage = true
18 |    self.gradInput = {torch.Tensor()}
19 | end
20 | 
21 | function VRClassReward:updateOutput(input, target)
22 |    assert(torch.type(input) == 'table')
23 |    local input = self:toBatch(input[1], 1)
24 |    self._maxVal = self._maxVal or input.new()
25 |    self._maxIdx = self._maxIdx or torch.type(input) == 'torch.CudaTensor' and torch.CudaLongTensor() or torch.LongTensor()
26 |    
27 |    -- max class value is class prediction
28 |    self._maxVal:max(self._maxIdx, input, 2)
29 |    
30 |    -- reward = scale when correctly classified
31 |    local maxIdx = self._maxIdx
32 |    if torch.type(self._maxIdx) == 'torch.CudaLongTensor' then
33 |       self.__maxIdx = self.__maxIdx or torch.CudaTensor()
34 |       self.__maxIdx:resize(maxIdx:size()):copy(maxIdx)
35 |       maxIdx = self.__maxIdx
36 |    end
37 |    
38 |    if torch.type(maxIdx) ~= torch.type(target) then
39 |       self._target = self._target or maxIdx.new()
40 |       self._target:resize(target:size()):copy(target)
41 |       target = self._target
42 |    end
43 |    
44 |    -- reward = scale when correctly classified
45 |    self._reward = self._reward or maxIdx.new()
46 |    self._reward:eq(maxIdx, target)
47 |    self.reward = self.reward or input.new()
48 |    self.reward:resize(self._reward:size(1)):copy(self._reward)
49 |    self.reward:mul(self.scale)
50 |    
51 |    -- loss = -sum(reward)
52 |    self.output = -self.reward:sum()
53 |    if self.sizeAverage then
54 |       self.output = self.output/input:size(1)
55 |    end
56 |    return self.output
57 | end
58 | 
59 | function VRClassReward:updateGradInput(inputTable, target)
60 |    local input = self:toBatch(inputTable[1], 1)
61 |    local baseline = self:toBatch(inputTable[2], 1)
62 |    
63 |    -- reduce variance of reward using baseline
64 |    self.vrReward = self.vrReward or self.reward.new()
65 |    self.vrReward:resizeAs(self.reward):copy(self.reward)
66 |    self.vrReward:add(-1, baseline)
67 |    if self.sizeAverage then
68 |       self.vrReward:div(input:size(1))
69 |    end
70 |    -- broadcast reward to modules
71 |    self.module:reinforce(self.vrReward)  
72 |    
73 |    -- zero gradInput (this criterion has no gradInput for class pred)
74 |    self.gradInput[1]:resizeAs(input):zero()
75 |    self.gradInput[1] = self:fromBatch(self.gradInput[1], 1)
76 |    
77 |    -- learn the baseline reward
78 |    self.criterion:forward(baseline, self.reward)
79 |    self.gradInput[2] = self.criterion:backward(baseline, self.reward)
80 |    self.gradInput[2] = self:fromBatch(self.gradInput[2], 1)
81 |    return self.gradInput
82 | end
83 | 
84 | function VRClassReward:type(type)
85 |    self._maxVal = nil
86 |    self._maxIdx = nil
87 |    self.__maxIdx = nil
88 |    self._target = nil
89 |    local module = self.module
90 |    self.module = nil
91 |    local ret = parent.type(self, type)
92 |    self.module = module
93 |    return ret
94 | end
95 | 


--------------------------------------------------------------------------------
/DontCast.lua:
--------------------------------------------------------------------------------
  1 | local DontCast, parent = torch.class("nn.DontCast", "nn.Decorator")
  2 | 
  3 | -- utility functions 
  4 | 
  5 | local function recursiveTypeCopy(dst, src, type_str)
  6 |    if torch.type(src) == 'table' then
  7 |       dst = (torch.type(dst) == 'table') and dst or {}
  8 |       for k, v in pairs(src) do
  9 |          dst[k] = recursiveTypeCopy(dst[k], v, type_str)
 10 |       end
 11 |    elseif torch.isTensor(src) then
 12 |       dst = (torch.type(dst) == type_str) and dst or torch.getmetatable(type_str).new()
 13 |       dst:resize(src:size())
 14 |       if src:nElement() > 0 then
 15 |          dst:copy(src)
 16 |       end
 17 |    end
 18 |    return dst
 19 | end
 20 | 
 21 | local function tableTensorType(src)
 22 |    if type(src) == 'table' then
 23 |       local type_str, found
 24 |       for k,v in pairs(src) do
 25 |          type_str, found = tableTensorType(v)
 26 |          if found then
 27 |             return type_str, true
 28 |          end
 29 |       end
 30 |       return type_str, found
 31 |    else
 32 |       return torch.type(src), torch.isTensor(src)
 33 |    end
 34 | end
 35 | 
 36 | -- DontCast methods and constructor
 37 | 
 38 | function DontCast:__init(module, castin, castout, moduleType)
 39 |    parent.__init(self, module)
 40 |    self.castin = castin
 41 |    self.castout = (castout == nil) and castin or castout
 42 |    self.moduleType = moduleType
 43 |    if (self.castin or self.castout) and not self.moduleType then 
 44 |       local moduleType, found = tableTensorType(module.output)
 45 |       if found then
 46 |          self.moduleType = moduleType
 47 |       else
 48 |          moduleType, found = tableTensorType(module:parameters())
 49 |          if found then
 50 |             self.moduleType = moduleType
 51 |          else
 52 |             error"Cannot extrapolate moduleType. Provide constructor argument 4"
 53 |          end
 54 |       end
 55 |    end
 56 | end
 57 | 
 58 | function DontCast:updateOutput(input)
 59 |    if self.castin and tableTensorType(input) ~= self.moduleType then
 60 |       self._input = recursiveTypeCopy(self._input, input, self.moduleType)
 61 |       input = self._input
 62 |    end
 63 |    
 64 |    local output = self.module:updateOutput(input)
 65 |    
 66 |    if self.castout then
 67 |       self.output = recursiveTypeCopy(self.output, output, tableTensorType(self.output))
 68 |    else
 69 |       self.output = output
 70 |    end
 71 |    return self.output
 72 | end
 73 | 
 74 | function DontCast:updateGradInput(input, gradOutput)
 75 |    if self.castin and tableTensorType(input) ~= self.moduleType then
 76 |       input = self._input
 77 |    end
 78 |    if self.castout and tableTensorType(gradOutput) ~= self.moduleType then
 79 |       self._gradOutput = recursiveTypeCopy(self._gradOutput, gradOutput, self.moduleType)
 80 |       gradOutput = self._gradOutput
 81 |    end
 82 |    
 83 |    local gradInput = self.module:updateGradInput(input, gradOutput)
 84 |    
 85 |    if self.castin then
 86 |       self.gradInput = recursiveTypeCopy(self.gradInput, gradInput, tableTensorType(self.gradInput))
 87 |    else
 88 |       self.gradInput = gradInput
 89 |    end
 90 |    return self.gradInput
 91 | end
 92 | 
 93 | function DontCast:accGradParameters(input, gradOutput, scale)
 94 |    if self.castin and tableTensorType(input) ~= self.moduleType then
 95 |       input = self._input
 96 |    end
 97 |    if self.castout and tableTensorType(gradOutput) ~= self.moduleType then
 98 |       gradOutput = self._gradOutput
 99 |    end
100 |    
101 |    self.module:accGradParameters(input, gradOutput, scale)
102 | end
103 | 
104 | function DontCast:accUpdateGradParameters(input, gradOutput, lr)
105 |    if self.castin and tableTensorType(input) ~= self.moduleType then
106 |       input = self._input
107 |    end
108 |    if self.castout and tableTensorType(gradOutput) ~= self.moduleType then
109 |       gradOutput = self._gradOutput
110 |    end
111 |    
112 |    self.module:accUpdateGradParameters(input, gradOutput, lr)
113 | end
114 | 
115 | -- dont cast (the essence thereof)
116 | function DontCast:type(type)
117 |    if self.castout and tableTensorType(self.output) ~= type then
118 |       self.output = recursiveTypeCopy(nil, self.output, type)
119 |    end
120 |    if self.castin and tableTensorType(self.gradInput) ~= type then
121 |       self.gradInput = recursiveTypeCopy(nil, self.gradInput, type)
122 |    end
123 |    return self
124 | end
125 | 


--------------------------------------------------------------------------------
/Sequential.lua:
--------------------------------------------------------------------------------
 1 | local Sequential, parent = nn.Sequential, nn.Container
 2 | 
 3 | function Sequential:profile()
 4 | 
 5 |    function Sequential:updateOutput(input)
 6 |       local currentOutput = input
 7 |       for i=1,#self.modules do
 8 |          local start = torch.Timer()
 9 |          currentOutput = self.modules[i]:updateOutput(currentOutput)
10 |          if cutorch then cutorch.synchronize() end
11 |          print(torch.type(self.modules[i])..' updateOutput: '..start:time().real.." s")
12 |       end
13 |       self.output = currentOutput
14 |       return currentOutput
15 |    end
16 | 
17 |    function Sequential:updateGradInput(input, gradOutput)
18 |       local currentGradOutput = gradOutput
19 |       local currentModule = self.modules[#self.modules]
20 |       for i=#self.modules-1,1,-1 do
21 |          local previousModule = self.modules[i]
22 |          local start = torch.Timer()
23 |          currentGradOutput = currentModule:updateGradInput(previousModule.output, currentGradOutput)
24 |          if cutorch then cutorch.synchronize() end
25 |          print(torch.type(currentModule)..' updateGradInput: '..start:time().real.." s")
26 |          currentModule = previousModule
27 |       end
28 |       local start = torch.Timer()
29 |       currentGradOutput = currentModule:updateGradInput(input, currentGradOutput)
30 |       if cutorch then cutorch.synchronize() end
31 |       print(torch.type(currentModule)..' updateGradInput: '..start:time().real.." s")
32 |       self.gradInput = currentGradOutput
33 |       return currentGradOutput
34 |    end
35 | 
36 |    function Sequential:accGradParameters(input, gradOutput, scale)
37 |       scale = scale or 1
38 | 
39 |       local currentGradOutput = gradOutput
40 |       local currentModule = self.modules[#self.modules]
41 |       for i=#self.modules-1,1,-1 do
42 |          local previousModule = self.modules[i]
43 |          local start = torch.Timer()
44 |          currentModule:accGradParameters(previousModule.output, currentGradOutput, scale)
45 |          if cutorch then cutorch.synchronize() end
46 |          print(torch.type(currentModule)..' accGradParameters: '..start:time().real.." s")
47 |          currentGradOutput = currentModule.gradInput
48 |          currentModule = previousModule
49 |       end
50 |       
51 |       local start = torch.Timer()
52 |       currentModule:accGradParameters(input, currentGradOutput, scale)
53 |       if cutorch then cutorch.synchronize() end
54 |       print(torch.type(currentModule)..' accGradParameters: '..start:time().real.." s")
55 |    end
56 | 
57 |    function Sequential:backward(input, gradOutput, scale)
58 |       scale = scale or 1
59 |       local currentGradOutput = gradOutput
60 |       local currentModule = self.modules[#self.modules]
61 |       for i=#self.modules-1,1,-1 do
62 |          local previousModule = self.modules[i]
63 |          local start = torch.Timer()
64 |          currentGradOutput = currentModule:backward(previousModule.output, currentGradOutput, scale)
65 |          if cutorch then cutorch.synchronize() end
66 |          print(torch.type(currentModule)..' backward: '..start:time().real.." s")
67 |          currentModule.gradInput = currentGradOutput
68 |          currentModule = previousModule
69 |       end
70 |       local start = torch.Timer()
71 |       currentGradOutput = currentModule:backward(input, currentGradOutput, scale)
72 |       if cutorch then cutorch.synchronize() end
73 |       print(torch.type(currentModule)..' backward: '..start:time().real.." s")
74 |       self.gradInput = currentGradOutput
75 |       return currentGradOutput
76 |    end
77 | 
78 |    function Sequential:accUpdateGradParameters(input, gradOutput, lr)
79 |       local currentGradOutput = gradOutput
80 |       local currentModule = self.modules[#self.modules]
81 |       for i=#self.modules-1,1,-1 do
82 |          local previousModule = self.modules[i]
83 |          local start = torch.Timer()
84 |          currentModule:accUpdateGradParameters(previousModule.output, currentGradOutput, lr)
85 |          if cutorch then cutorch.synchronize() end
86 |          print(torch.type(currentModule)..' accUpdateGradParameters: '..start:time().real.." s")
87 |          currentGradOutput = currentModule.gradInput
88 |          currentModule = previousModule
89 |       end
90 | 
91 |       local start = torch.Timer()
92 |       currentModule:accUpdateGradParameters(input, currentGradOutput, lr)
93 |       if cutorch then cutorch.synchronize() end
94 |       print(torch.type(currentModule)..' accUpdateGradParameters: '..start:time().real.." s")
95 |    end
96 | 
97 |    parent.profile(self)
98 | end
99 | 


--------------------------------------------------------------------------------
/ReinforceNormal.lua:
--------------------------------------------------------------------------------
  1 | ------------------------------------------------------------------------
  2 | --[[ ReinforceNormal ]]-- 
  3 | -- Ref A. http://incompleteideas.net/sutton/williams-92.pdf
  4 | -- Inputs are mean (mu) of multivariate normal distribution. 
  5 | -- Ouputs are samples drawn from these distributions.
  6 | -- Standard deviation is provided as constructor argument.
  7 | -- Uses the REINFORCE algorithm (ref. A sec 6. p.237-239) which is 
  8 | -- implemented through the nn.Module:reinforce(r,b) interface.
  9 | -- gradOutputs are ignored (REINFORCE algorithm).
 10 | ------------------------------------------------------------------------
 11 | local ReinforceNormal, parent = torch.class("nn.ReinforceNormal", "nn.Reinforce")
 12 | 
 13 | function ReinforceNormal:__init(stdev, stochastic)
 14 |    parent.__init(self, stochastic)
 15 |    self.stdev = stdev
 16 |    if not stdev then
 17 |       self.gradInput = {torch.Tensor(), torch.Tensor()}
 18 |    end
 19 | end
 20 | 
 21 | function ReinforceNormal:updateOutput(input)
 22 |    local mean, stdev = input, self.stdev
 23 |    if torch.type(input) == 'table' then
 24 |       -- input is {mean, stdev}
 25 |       assert(#input == 2)
 26 |       mean, stdev = unpack(input)
 27 |    end
 28 |    assert(stdev)
 29 |    
 30 |    self.output:resizeAs(mean)
 31 |    
 32 |    if self.stochastic or self.train ~= false then
 33 |       self.output:normal()
 34 |       -- multiply by standard deviations
 35 |       if torch.type(stdev) == 'number' then
 36 |          self.output:mul(stdev)
 37 |       elseif torch.isTensor(stdev) then
 38 |          if stdev:dim() == mean:dim() then
 39 |             assert(stdev:isSameSizeAs(mean))
 40 |             self.output:cmul(stdev)
 41 |          else
 42 |             assert(stdev:dim()+1 == mean:dim())
 43 |             self._stdev = self._stdev or stdev.new()
 44 |             self._stdev:view(stdev,1,table.unpack(stdev:size():totable()))
 45 |             self.__stdev = self.__stdev or stdev.new()
 46 |             self.__stdev:expandAs(self._stdev, mean)
 47 |             self.output:cmul(self.__stdev)
 48 |          end
 49 |       else
 50 |          error"unsupported mean type"
 51 |       end
 52 |       
 53 |       -- re-center the means to the mean
 54 |       self.output:add(mean)
 55 |    else
 56 |       -- use maximum a posteriori (MAP) estimate
 57 |       self.output:copy(mean)
 58 |    end
 59 |    return self.output
 60 | end
 61 | 
 62 | function ReinforceNormal:updateGradInput(input, gradOutput)
 63 |    -- Note that gradOutput is ignored
 64 |    -- f : normal probability density function
 65 |    -- x : the sampled values (self.output)
 66 |    -- u : mean (mu) (mean)
 67 |    -- s : standard deviation (sigma) (stdev)
 68 |    
 69 |    local mean, stdev = input, self.stdev
 70 |    local gradMean, gradStdev = self.gradInput, nil
 71 |    if torch.type(input) == 'table' then
 72 |       mean, stdev = unpack(input)
 73 |       gradMean, gradStdev = unpack(self.gradInput)
 74 |    end
 75 |    assert(stdev)   
 76 |     
 77 |    -- Derivative of log normal w.r.t. mean :
 78 |    -- d ln(f(x,u,s))   (x - u)
 79 |    -- -------------- = -------
 80 |    --      d u           s^2
 81 |    
 82 |    gradMean:resizeAs(mean)
 83 |    -- (x - u)
 84 |    gradMean:copy(self.output):add(-1, mean)
 85 |    
 86 |    -- divide by squared standard deviations
 87 |    if torch.type(stdev) == 'number' then
 88 |       gradMean:div(stdev^2)
 89 |    else
 90 |       if stdev:dim() == mean:dim() then
 91 |          gradMean:cdiv(stdev):cdiv(stdev)
 92 |       else
 93 |          gradMean:cdiv(self.__stdev):cdiv(self.__stdev)
 94 |       end
 95 |    end
 96 |    -- multiply by reward
 97 |    gradMean:cmul(self:rewardAs(mean) )
 98 |    -- multiply by -1 ( gradient descent on mean )
 99 |    gradMean:mul(-1)
100 |    
101 |    -- Derivative of log normal w.r.t. stdev :
102 |    -- d ln(f(x,u,s))   (x - u)^2 - s^2
103 |    -- -------------- = ---------------
104 |    --      d s              s^3
105 |    
106 |    if gradStdev then
107 |       gradStdev:resizeAs(stdev)
108 |       -- (x - u)^2
109 |       gradStdev:copy(self.output):add(-1, mean):pow(2)
110 |       -- subtract s^2
111 |       self._stdev2 = self._stdev2 or stdev.new()
112 |       self._stdev2:resizeAs(stdev):copy(stdev):cmul(stdev)
113 |       gradStdev:add(-1, self._stdev2)
114 |       -- divide by s^3
115 |       self._stdev2:cmul(stdev):add(0.00000001)
116 |       gradStdev:cdiv(self._stdev2)
117 |       -- multiply by reward
118 |       gradStdev:cmul(self:rewardAs(stdev))
119 |        -- multiply by -1 ( gradient descent on stdev )
120 |       gradStdev:mul(-1)
121 |    end
122 |    
123 |    return self.gradInput
124 | end
125 | 


--------------------------------------------------------------------------------
/SpatialUniformCrop.lua:
--------------------------------------------------------------------------------
  1 | local SpatialUniformCrop, parent = torch.class("nn.SpatialUniformCrop", "nn.Module")
  2 | 
  3 | function SpatialUniformCrop:__init(oheight, owidth, scale)
  4 |    parent.__init(self)
  5 |    self.scale = scale or nil
  6 |    if self.scale ~= nil then
  7 |       assert(torch.type(scale)=='table')
  8 |       self.scaler = nn.SpatialReSampling{owidth=owidth, oheight=oheight}
  9 |    end
 10 |    self.oheight = oheight
 11 |    self.owidth = owidth or oheight
 12 | end
 13 | 
 14 | function SpatialUniformCrop:updateOutput(input)
 15 |    input = self:toBatch(input, 3)
 16 |    
 17 |    self.output:resize(input:size(1), input:size(2), self.oheight, self.owidth)
 18 |    self.coord = self.coord or torch.IntTensor()
 19 |    self.coord:resize(input:size(1), 2)
 20 | 
 21 |    if self.scale ~= nil then
 22 |       self.scales = self.scales or torch.FloatTensor()
 23 |       self.scales:resize(input:size(1))
 24 |    end
 25 |   
 26 |    local iH, iW = input:size(3), input:size(4)
 27 |    if self.train ~= false then
 28 |       if self.scale ~= nil then
 29 |          for i=1,input:size(1) do
 30 |             -- do random crop
 31 |             local s = torch.uniform(self.scale['min'] or self.scale[1], self.scale['max'] or self.scale[2])
 32 |             local soheight = math.ceil(s*self.oheight)
 33 |             local sowidth = math.ceil(s*self.owidth)
 34 | 
 35 |             local h = math.ceil(torch.uniform(1e-2, iH-soheight))
 36 |             local w = math.ceil(torch.uniform(1e-2, iW-sowidth))
 37 |            
 38 |             local ch = math.ceil(iH/2 - (iH-soheight)/2 + h)
 39 |             local cw = math.ceil(iW/2 - (iH-sowidth)/2 + w)
 40 | 
 41 |             local h1 = ch - math.ceil(soheight/2)
 42 |             local w1 = cw - math.ceil(sowidth/2)
 43 |             if h1 < 1 then h1 = 1 end
 44 |             if w1 < 1 then w1 = 1 end
 45 | 
 46 |             local crop = input[i]:narrow(2, h1, soheight):narrow(3, w1, sowidth)
 47 | 
 48 |             self.output[i]:copy(self.scaler:forward(crop))
 49 |             -- save crop coordinates and scale for backward
 50 |             self.scales[i] = s
 51 |             self.coord[{i,1}] = h
 52 |             self.coord[{i,2}] = w
 53 |          end
 54 |       else
 55 |          for i=1,input:size(1) do
 56 |             -- do random crop
 57 |             local h1 = math.ceil(torch.uniform(1e-2, iH-self.oheight))
 58 |             local w1 = math.ceil(torch.uniform(1e-2, iW-self.owidth))
 59 |             local crop = input[i]:narrow(2,h1,self.oheight):narrow(3,w1,self.owidth)
 60 |             self.output[i]:copy(crop)
 61 |             -- save crop coordinates for backward
 62 |             self.coord[{i,1}] = h1
 63 |             self.coord[{i,2}] = w1
 64 |          end
 65 |       end
 66 |    else
 67 |       -- use center crop
 68 |       local h1 = math.ceil((iH-self.oheight)/2)
 69 |       local w1 = math.ceil((iW-self.owidth)/2)
 70 |       local crop = input:narrow(3,h1,self.oheight):narrow(4,w1,self.owidth)
 71 |       self.output:copy(crop)
 72 |    end
 73 |    
 74 |    self.output = self:fromBatch(self.output, 1)
 75 |    return self.output
 76 | end
 77 | 
 78 | function SpatialUniformCrop:updateGradInput(input, gradOutput)
 79 |    input = self:toBatch(input, 3)
 80 |    gradOutput = self:toBatch(gradOutput, 3)
 81 |    
 82 |    self.gradInput:resizeAs(input):zero()
 83 |    if self.scale ~= nil then
 84 |       local iH, iW = input:size(3), input:size(4)
 85 |       for i=1,input:size(1) do
 86 |          local s = self.scales[i]
 87 |          local soheight = math.ceil(s*self.oheight)
 88 |          local sowidth = math.ceil(s*self.owidth)
 89 | 
 90 |          local h, w = self.coord[{i,1}], self.coord[{i,2}]
 91 |         
 92 |          local ch = math.ceil(iH/2 - (iH-soheight)/2 + h)
 93 |          local cw = math.ceil(iW/2 - (iH-sowidth)/2 + w)
 94 | 
 95 |          local h1 = ch - math.ceil(soheight/2)
 96 |          local w1 = cw - math.ceil(sowidth/2)
 97 |          if h1 < 1 then h1 = 1 end
 98 |          if w1 < 1 then w1 = 1 end
 99 | 
100 |          local crop = input[i]:narrow(2, h1, soheight):narrow(3, w1, sowidth)
101 |          local samplerGradInput = self.scaler:updateGradInput(crop, gradOutput[i])
102 | 
103 |          self.gradInput[i]:narrow(2, h1, soheight):narrow(3, w1, sowidth):copy(samplerGradInput)
104 |       end
105 |    else
106 |       for i=1,input:size(1) do
107 |          local h1, w1 = self.coord[{i,1}], self.coord[{i,2}]
108 |          self.gradInput[i]:narrow(2,h1,self.oheight):narrow(3,w1,self.owidth):copy(gradOutput[i])
109 |       end
110 |    end
111 |    
112 |    self.gradInput = self:fromBatch(self.gradInput, 1)
113 |    return self.gradInput
114 | end
115 | 
116 | function SpatialUniformCrop:type(type, cache)
117 |    self.coord = nil
118 |    return parent.type(self, type, cache)
119 | end
120 | 


--------------------------------------------------------------------------------
/ReinforceGamma.lua:
--------------------------------------------------------------------------------
  1 | ------------------------------------------------------------------------
  2 | --[[ ReinforceGamma ]]-- 
  3 | -- Ref A. http://incompleteideas.net/sutton/williams-92.pdf
  4 | -- Inputs are shape (k) and scale (theta) of multivariate Gamma distribution. 
  5 | -- Ouputs are samples drawn from these distributions.
  6 | -- Scale is provided as constructor argument.
  7 | -- Uses the REINFORCE algorithm (ref. A sec 6. p.237-239) which is 
  8 | -- implemented through the nn.Module:reinforce(r,b) interface.
  9 | -- gradOutputs are ignored (REINFORCE algorithm).
 10 | ------------------------------------------------------------------------
 11 | 
 12 | 
 13 | local ReinforceGamma, parent = torch.class("nn.ReinforceGamma", "nn.Reinforce")
 14 | 
 15 | function ReinforceGamma:__init(scale, stochastic)
 16 |    require('randomkit') -- needed to sample gamma dist : luarocks install randomkit
 17 |    require('cephes') -- needed to compute digamma for gradient : 
 18 |    parent.__init(self, stochastic)
 19 |    self.scale = scale
 20 |    if not scale then
 21 |       self.gradInput = {torch.Tensor(), torch.Tensor()}
 22 |    end
 23 | end
 24 | 
 25 | function ReinforceGamma:updateOutput(input)
 26 |    local shape, scale = input, self.scale
 27 |    if torch.type(input) == 'table' then
 28 |       -- input is {shape, scale}
 29 |       assert(#input == 2)
 30 |       shape, scale = unpack(input)
 31 |    end
 32 |    assert(scale)
 33 |    
 34 |    self.output:resizeAs(shape)
 35 | 
 36 |    if torch.type(scale) == 'number' then
 37 |      scale = shape.new():resizeAs(shape):fill(scale)
 38 |    elseif torch.isTensor(scale) then
 39 |       if scale:dim() == shape:dim() then
 40 |          assert(scale:isSameSizeAs(shape))
 41 |       else
 42 |          assert(scale:dim()+1 == shape:dim())
 43 |          self._scale = self._scale or scale.new()
 44 |          self._scale:view(scale,1,table.unpack(scale:size():totable()))
 45 |          self.__scale = self.__scale or scale.new()
 46 |          self.__scale:expandAs(self._scale, shape)
 47 |          scale = self.__scale
 48 |       end
 49 |    else
 50 |       error"unsupported shape type"
 51 |    end
 52 | 
 53 |    if self.stochastic or self.train ~= false then
 54 |       self.output:copy(randomkit.gamma(shape:squeeze():float(),scale:squeeze():float()))
 55 |    else
 56 |       -- use maximum a posteriori (MAP) estimate
 57 |       self.output:copy(shape):cmul(scale)
 58 |    end
 59 | 
 60 |    return self.output
 61 | end
 62 | 
 63 | function ReinforceGamma:updateGradInput(input, gradOutput)
 64 |    -- Note that gradOutput is ignored
 65 |    -- f : Gamma probability density function
 66 |    -- g : Digamma probability density function
 67 |    -- x : the sampled values (self.output)
 68 |    -- shape : shape parameter of gamma dist
 69 |    -- scale: scale parameter of gamma dist
 70 | 
 71 |    local shape, scale = input, self.scale
 72 |    local gradShape, gradScale = self.gradInput, nil
 73 |    if torch.type(input) == 'table' then
 74 |       shape, scale = unpack(input)
 75 |       gradShape, gradScale = unpack(self.gradInput)
 76 |    end
 77 |    assert(scale)
 78 |     
 79 |    -- Derivative of log gamma w.r.t. shape :
 80 |    -- d ln(f(x,shape,scale))
 81 |    -- ---------------------- = ln(x) - g(shape) - ln(scale)
 82 |    --         d shape
 83 |    gradShape:resizeAs(shape)
 84 | 
 85 |    if torch.type(scale) == 'number' then
 86 |       scale = shape.new():resizeAs(shape):fill(scale)
 87 |    else
 88 |       if not scale:dim() == shape:dim() then
 89 |          scale:copy(self.__scale)
 90 |       end
 91 |    end
 92 |    gradShape:copy(cephes.digamma(shape:float()))
 93 |    gradShape:mul(-1)
 94 | 
 95 |    self._logOutput = self._logOutput or self.output.new()
 96 |    self._logOutput:log( self.output )
 97 |    
 98 |    self._logScale = self._logScale or scale.new()
 99 |    self._logScale:log( scale )
100 | 
101 |    gradShape:add( self._logOutput )
102 |    gradShape:add(-1, self._logScale )
103 | 
104 |    -- multiply by variance reduced reward
105 |    gradShape:cmul(self:rewardAs(shape) )
106 |    -- multiply by -1 ( gradient descent on shape )
107 |    gradShape:mul(-1)
108 |    
109 |    -- Derivative of log Gamma w.r.t. scale :
110 |    -- d ln(f(x,shape,scale))      x      shape
111 |    -- ---------------------- = ------- - -----
112 |    --         d scale          scale^2   scale
113 |    
114 |    if gradScale then
115 |       gradScale:resizeAs(scale)
116 |       gradScale:copy( torch.cdiv(self.output, torch.pow(scale,2)) )
117 |       gradScale:add(-1, torch.cdiv(shape, scale) )
118 |       gradScale:cmul( self:rewardAs(scale) )
119 |       gradScale:mul(-1)
120 |    end
121 | 
122 |    return self.gradInput
123 | end
124 | 
125 | function ReinforceGamma:type(type,cache)
126 |    self._logOutput = nil
127 |    self._logScale = nil
128 |    return parent.type(self,type,cache)
129 | end
130 | 


--------------------------------------------------------------------------------
/PCAColorTransform.lua:
--------------------------------------------------------------------------------
  1 | --[[
  2 |    Color transformation module: Commonly used data augmentation technique.
  3 |    Random color noise is added to input image/images based on the Principal
  4 |    Component Analysis (PCA) of pixel values.
  5 | 
  6 |    Arguments
  7 |    -> eigenVectors: Each row represent an eigen vector.
  8 |    -> eigenValues: Corresponding eigen values.
  9 |    -> std: std of gaussian distribution for augmentation (default 0.1).
 10 | --]]
 11 | 
 12 | local PCAColorTransform, Parent = torch.class('nn.PCAColorTransform', 'nn.Module')
 13 | 
 14 | function PCAColorTransform:__init(inputChannels, eigenVectors, eigenValues, std)
 15 |    Parent.__init(self)
 16 | 
 17 |    self.train = true
 18 |    self.inputChannels = inputChannels
 19 |    assert(inputChannels == eigenVectors:size(1),
 20 |           "Number of input channels do not match number of eigen vectors.")
 21 |    assert(eigenVectors:size(2) == eigenVectors:size(1),
 22 |           "Invalid dimensionality: eigen vectors.")
 23 |    assert(inputChannels == eigenValues:nElement(),
 24 |           "Number of input channels do not match number of eigen values.")
 25 | 
 26 |    self.eigenVectors = eigenVectors
 27 |    self.eigenValues = eigenValues
 28 |    self.std = std or 0.1
 29 | end
 30 | 
 31 | function PCAColorTransform:updateOutput(input)
 32 |    self.output:resizeAs(input):copy(input)
 33 |    if self.train then
 34 |       self.noise = self.noise or self.output.new()
 35 |       self.alphas = self.alphas or self.output.new()
 36 |       self._tempNoise = self._tempNoise or self.output.new()
 37 |       self._tempNoiseExpanded = self._tempNoiseExpanded or self.output.new()
 38 |       self._tempNoiseSamples = self._tempNoiseSamples or self.output.new()
 39 |       self._tempLambda = self._tempLambda or self.output.new()
 40 |       self._tempLambdaExpanded = self._tempLambdaExpanded or self.output.new()
 41 | 
 42 |       if self.output:nDimension() == 4 then
 43 |          local batchSize = self.output:size(1)
 44 |          local channels = self.output:size(2)
 45 |          local height = self.output:size(3)
 46 |          local width = self.output:size(4)
 47 |          assert(channels == self.inputChannels)
 48 |          
 49 |          -- Randomly sample noise for each channel and scale by eigen values
 50 |          self.alphas:resize(channels, batchSize)
 51 |          self.alphas:normal(0, self.std)
 52 |          self._tempLambda = self.eigenValues:view(self.inputChannels, 1)
 53 |          self._tempLambdaExpanded = self._tempLambda:expand(channels, batchSize)
 54 |          self.alphas:cmul(self._tempLambdaExpanded)
 55 | 
 56 |          -- Scale by eigen vectors 
 57 |          self.noise:resize(batchSize, self.inputChannels):zero()
 58 |          self.noise:t():addmm(self.eigenVectors, self.alphas)
 59 | 
 60 |          -- Add noise to the input
 61 |          self._tempNoise = self.noise:view(batchSize, self.inputChannels, 1, 1)
 62 |          self._tempNoiseExpanded:expand(self._tempNoise, batchSize,
 63 |                                         channels, height, width)
 64 |          self.output:add(self._tempNoiseExpanded)
 65 | 
 66 |       elseif self.output:nDimension() == 3 then
 67 |          local channels = self.output:size(1)
 68 |          local height = self.output:size(2)
 69 |          local width = self.output:size(3)
 70 |          assert(channels == self.inputChannels)
 71 | 
 72 |          -- Randomly sample noise for each channel and scale by eigen values
 73 |          self.alphas:resize(channels, 1)
 74 |          self.alphas:normal(0, self.std)
 75 |          self._tempLambda = self.eigenValues:view(self.inputChannels, 1)
 76 |          self._tempLambdaExpanded = self._tempLambda:expand(channels, 1)
 77 |          self.alphas:cmul(self._tempLambdaExpanded)
 78 | 
 79 |          -- Scale by eigen vectors 
 80 |          self.noise:resize(1, self.inputChannels):zero()
 81 |          self.noise:t():addmm(self.eigenVectors, self.alphas)
 82 | 
 83 |          -- Add noise to the input
 84 |          self._tempNoise = self.noise:view(self.inputChannels, 1, 1)
 85 |          self._tempNoiseExpanded:expand(self._tempNoise, channels,
 86 |                                         height, width)
 87 |          self.output:add(self._tempNoiseExpanded)
 88 |       else
 89 |          error("Invalid input dimensionality.")
 90 |       end
 91 |    end
 92 |    return self.output
 93 | end
 94 | 
 95 | function PCAColorTransform:updateGradInput(input, gradOutput)
 96 |    if self.train then
 97 |       self.gradInput:resizeAs(gradOutput):copy(gradOutput)
 98 |    else
 99 |       error('backprop only defined while training')
100 |    end
101 |    return self.gradInput
102 | end
103 | 
104 | function PCAColorTransform:type(type, tensorCache)
105 |    self.noise = nil
106 |    self.alphas = nil
107 |    self._tempLambda = nil
108 |    self._tempLambdaExpanded = nil
109 |    self._tempNoise = nil
110 |    self._tempNoiseExpanded = nil
111 |    Parent.type(self, type, tensorCache)
112 | end
113 | 
114 | function PCAColorTransform:__tostring__()
115 |   return string.format('%s channels: %d, std: %f', torch.type(self),
116 |                         self.inputChannels, self.std)
117 | end
118 | 


--------------------------------------------------------------------------------
/tutorials/ladder.md:
--------------------------------------------------------------------------------
  1 | # Lateral Connections in Denoising Autoencoders Support Supervised Learning
  2 | 
  3 | In this tutorial we will understand how to implement ladder network as explained in [[1](http://arxiv.org/pdf/1504.08215.pdf)]. In this paper the authors have shown how unsupervised learning using a denoising autoencoder with lateral connections help improve the classification accuracy in supervised learning.
  4 | 
  5 | To produce results as mentioned in the paper please run following command (best test error we got was **`0.6%`**). To run this script you will need following torch packages: [`nn`](https://github.com/torch/nn), [`nngraph`](https://github.com/torch/nngraph), [`dp`](https://github.com/nicholas-leonard/dp), [`dpnn`](https://github.com/Element-Research/dpnn), [`optim`](https://github.com/torch/optim) and [`cunn`](https://github.com/torch/cunn) & [`cutorch`](https://github.com/torch/cutorch) if using cuda (```--useCuda``` flag).
  6 | ```
  7 |    th tutorials/ladder.lua --verbose --eta 500 --epochs 100 --learningRate 0.002 --linearDecay --endLearningRate 0 --startEpoch 50 --useCuda --deviceId 1 --noiseSigma 0.3 --useBatchNorm --batchSize 100 --adam --noValidation --attempts 10
  8 | ```
  9 | 
 10 | The unsupervised learning (denoising) task supplements the supervised learning task (classification in this case). As in autoencoders this network has an encoder and a decoder. The output of encoder is also used for classification. The output of encoder is **`N`** dimensional where **`N`** is number of classes. This **`N`** dimensional vector is used for computing classification cost as well as feeds into the decoder.
 11 | 
 12 | ## Classification
 13 | Encoder/classifier units are defined as
 14 | ```lua
 15 |    Z = nn.BatchNormalization(hidden_units)(nn.Linear(inputDims, hidden_units)(previous_H))
 16 | ```
 17 | where
 18 | ```lua
 19 |    H = nn.ReLU()(nn.CMul()(nn.Add()(Z)))
 20 | ```
 21 | For first layer **`previous_H`** is the corrupted input.
 22 | ```lua
 23 |    input = nn.WhiteNoise(mean, sigma)
 24 | ```
 25 | 
 26 | **`H`** for last encoder unit is defined as
 27 | ```lua
 28 |    H = nn.LogSoftMax()(nn.CMul()(nn.Add()(Z)))
 29 | ```
 30 | Last **`H`** feeds into the negative log likelihood criterion.
 31 | 
 32 | ## Denoising
 33 | Typically in denoising autoencoder the input samples are corrupted using Dropout [```nn.Dropout```](https://github.com/torch/nn/blob/master/Dropout.lua) but in this paper the authors use isotropic Gaussian noise [```nn.WhiteNoise```](https://github.com/Element-Research/dpnn/blob/master/WhiteNoise.lua) with zero mean.
 34 | 
 35 | ### Lateral Connections in Autoencoder
 36 | **`Z`** units in encoder are laterally connected to corresponding unit in the decoder. The output of decoder unit for neuron `i` is defined by
 37 | ```
 38 |    z^_i = a_i1 * z_i + a_i2 * sigmoid(a_i3 + a_i4) + a_i5
 39 | ```
 40 | where 
 41 | ```
 42 |    a_ij = c_ij * u_i + d_ij
 43 | ```
 44 | **`U`** is output of decoder unit's ```nn.Linear()```. For the top most layer  **`U`** is zero. **`Z`** is output of corresponding encoder unit (this is lateral connection, decoder takes output from its previous unit through **`U`** as well as corresponding encoder unit). For the lowest layer of decoder **`Z`** is the corrupted input signal. **`c_j`** and **`d_j`** are trainable weight vectors. This forms the crux of the ladder network. This can be easily implemented using **`nngraph`** as follows
 45 | 
 46 | For the topmost layer **`U`**`= 0` and **`Z`** is the batch normalized output from the corresponding (in this case last) encoder/classifier unit. **`Z^`** for topmost layer is defined as
 47 | ```lua
 48 |    z_hat1 = nn.CMul(hiddens[i])(Z)
 49 |    z_hat2 = nn.CMul(hiddens[i])(Z)
 50 |    z_hat3 = nn.CMul(hiddens[i])(Z)
 51 |    z_hat34 = nn.Add(hiddens[i])(z_hat3)
 52 |    z_hatSigmoid34 = nn.Sigmoid()(z_hat34)
 53 |    z_hat234 = nn.CMulTable()({z_hat2, z_hatSigmoid34})
 54 |    z_hat5 = nn.CMul(hiddens_units)(Z)
 55 | 
 56 |    -- Z_hat = z^
 57 |    Z_hat = nn.CAddTable()({z_hat1, z_hat234, z_hat5})
 58 | ```
 59 | 
 60 | For lower decoder units **`Z^`** is defined as
 61 | ```lua
 62 |    
 63 |       u = nn.Linear()(previous_Z_hat)
 64 | 
 65 |       cu1 = nn.CMul(hidden_units)(u)
 66 |       du1 = nn.Add(hidden_units])(u)
 67 |       a1 = nn.CAddTable()({cu1, du1})
 68 |       cu2 = nn.CMul(hidden_units)(u)
 69 |       du2 = nn.Add(hidden_units)(u)
 70 |       a2 = nn.CAddTable()({cu2, du2})
 71 |       cu3 = nn.CMul(hidden_units)(u)
 72 |       du3 = nn.Add(hidden_units)(u)
 73 |       a3 = nn.CAddTable()({cu3, du3})
 74 |       cu4 = nn.CMul(hidden_units)(u)
 75 |       du4 = nn.Add(hidden_units)(u)
 76 |       a4 = nn.CAddTable()({cu4, du4})
 77 |       cu5 = nn.CMul(hidden_units)(u)
 78 |       du5 = nn.Add(hidden_units)(u)
 79 |       a5 = nn.CAddTable()({cu5, du5})
 80 | 
 81 |       z_hat1 = nn.CMulTable()({a1, z})
 82 |       z_hat2 = nn.CMulTable()({a3, z})
 83 |       z_hat3 = nn.Sigmoid()(nn.CAddTable()({z_hat2, a4}))
 84 |       z_hat4 = nn.CMulTable()({a2, z_hat3})
 85 |       Z_hat = nn.CAddTable()({z_hat1, z_hat4, a5})
 86 | ```
 87 | `Z_hat` is `z^`. Final `Z_hat` is the output of decoder and feeds into the mean squared error criterion.
 88 | 
 89 | ## Criterions
 90 | Negative log likelihood criterion is used for classification task.
 91 | ```lua
 92 |    nll = nn.ClassNLLCriterion()
 93 | ```
 94 | Mean squared error is used for the auxillary task.
 95 | ```lua
 96 |    mse = nn.MSECriterion()
 97 | ```
 98 | These two training criterions are combined using `eta` which determines weight for auxillary task. If `eta` is zero then the model is trained for classification only.
 99 | Combined criterion
100 | ```lua
101 |    criterions = ParallelCriterion()
102 |    criterions:add(nll)
103 |    criterions:add(mse, eta)
104 | ```
105 | 
106 | ## References
107 | [1] Rasmus, Antti, Harri Valpola, and Tapani Raiko. "Lateral Connections in Denoising Autoencoders Support Supervised Learning." arXiv preprint arXiv:1504.08215 (2015).
108 | 


--------------------------------------------------------------------------------
/SpatialBinaryConvolution.lua:
--------------------------------------------------------------------------------
  1 | -- Reference: http://arxiv.org/abs/1603.05279
  2 | -- We use floating point Matrix-Matrix multiplication as in SpatialConvolution.
  3 | -- Filters are made binary {-1, +1} using Sign.
  4 | -- Convolution output is scaled by L1-norm of the filters.
  5 | 
  6 | -- Inheriting nn/SpatialConvolution.
  7 | 
  8 | local SpatialBinaryConvolution, parent = torch.class('nn.SpatialBinaryConvolution', 'nn.SpatialConvolution')
  9 | 
 10 | function SpatialBinaryConvolution:__init(nInputPlane, nOutputPlane, kW, kH, dW, dH, padW, padH)
 11 |    parent.__init(self, nInputPlane, nOutputPlane, kW, kH, dW, dH, padW, padH)
 12 |    parent.noBias(self)
 13 | 
 14 |    self.iwh = self.nInputPlane * self.kW * self.kH 
 15 |    self.owh = self.nOutputPlane * self.kW * self.kH 
 16 |    self.train = true
 17 | end
 18 | 
 19 | function SpatialBinaryConvolution:training()
 20 |    self.train = true
 21 | end
 22 | 
 23 | function SpatialBinaryConvolution:evaluate()
 24 |    self.train = false
 25 | end
 26 | 
 27 | -- Function to binarize weights and compute L1 norms
 28 | function SpatialBinaryConvolution:binarizeWeight()
 29 |    self.tempWeight = self.tempWeight or self.weight.new()
 30 | 
 31 |    -- Grad Input alphas
 32 |    self.gradInputAlphas = self.gradInputAlphas or self.weight.new()
 33 |    self.gradInputAlphas:resize(self.nInputPlane)
 34 | 
 35 |    local temp = self.weight:transpose(1,2)
 36 |    self.tempWeight:resizeAs(temp):copy(temp)
 37 |    self.gradInputAlphas:norm(self.tempWeight:view(self.nInputPlane, -1), 1, 2)
 38 |    self.gradInputAlphas:div(self.owh) -- 1/owh
 39 | 
 40 |    -- alphas
 41 |    self.tempWeight:resizeAs(self.weight):copy(self.weight)
 42 |    self.alphas = self.alphas or self.weight.new()
 43 |    self.alphas:resize(self.nOutputPlane)
 44 |    self.alphas:norm(self.weight:view(self.nOutputPlane, -1), 1, 2)
 45 |    self.alphas:div(self.iwh) -- 1/iwh
 46 | 
 47 |    -- Binarize weights
 48 |    if not self.wmask then
 49 |       if torch.type(self.weight) == 'torch.CudaTensor' then
 50 |          self.wmask = torch.CudaTensor()
 51 |       else
 52 |          self.wmask = torch.ByteTensor()
 53 |       end
 54 |    end
 55 | 
 56 |    -- Binarizing weights
 57 |    self.weight.ge(self.wmask, self.weight, 0)
 58 |    self.weight[self.wmask] = 1
 59 |    self.weight.lt(self.wmask, self.weight, 0)
 60 |    self.weight[self.wmask] = -1
 61 | end
 62 | 
 63 | function SpatialBinaryConvolution:updateOutput(input)
 64 |    -- Binarize Weights
 65 |    self.binarizeWeight(self)
 66 | 
 67 |    -- Convolution
 68 |    self.output = parent.updateOutput(self, input)
 69 | 
 70 |    -- Scale output by alphas
 71 |    self._tempAlphas = self._tempAlphas or self.output.new()   
 72 |    self._tempAlphasExpanded = self._tempAlphasExpanded or self.output.new() 
 73 |    self._tempAlphasSamples = self._tempAlphasSamples or self.output.new()
 74 |    if self.output:nDimension() == 4 then
 75 |       local batchSize = self.output:size(1)
 76 |       local height = self.output:size(3)
 77 |       local width = self.output:size(4)
 78 | 
 79 |       self._tempAlphas = self.alphas:view(1, self.nOutputPlane, 1, 1)
 80 |       self._tempAlphasExpanded:expand(self._tempAlphas, batchSize,
 81 |                                       self.nOutputPlane, height, width)
 82 |       self._tempAlphasSamples:resizeAs(self._tempAlphasExpanded)
 83 |                              :copy(self._tempAlphasExpanded)
 84 |       self.output:cmul(self._tempAlphasSamples)
 85 |    else
 86 |       local height = self.output:size(2)
 87 |       local width = self.output:size(3)
 88 | 
 89 |       self._tempAlphas = self.alphas:view(self.nOutputPlane, 1, 1)
 90 |       self._tempAlphasExpanded:expand(self._tempAlphas, self.nOutputPlane,
 91 |                                       height, width)
 92 |       self._tempAlphasSamples:resizeAs(self._tempAlphasExpanded)
 93 |                              :copy(self._tempAlphasExpanded)
 94 |       self.output:cmul(self._tempAlphasSamples)
 95 |    end
 96 | 
 97 |    -- In evaluate mode.
 98 |    if not self.train then self.weight:copy(self.tempWeight) end
 99 | 
100 |    return self.output 
101 | end
102 | 
103 | function SpatialBinaryConvolution:updateGradInput(input, gradOutput)
104 |    self.gradInput = parent.updateGradInput(self, input, gradOutput)
105 | 
106 |    -- Scale gradInput by gradAlphas
107 |    self._tempGradAlphas = self._temp or self.gradInput.new()
108 |    self._tempGradAlphasExpanded = self._temp or self.gradInput.new()
109 |    self._tempGradAlphasSamples = self._temp or self.gradInput.new()
110 |    if self.gradInput:nDimension() == 4 then
111 |       local batchSize = self.gradInput:size(1)
112 |       local height = self.gradInput:size(3)
113 |       local width = self.gradInput:size(4)
114 | 
115 |       self._tempGradAlphas = self.gradInputAlphas:view(1, self.nInputPlane,
116 |                                                        1, 1)
117 |       self._tempGradAlphasExpanded:expand(self._tempGradAlphas,
118 |                                           batchSize, self.nInputPlane,
119 |                                           height, width)
120 |       self._tempGradAlphasSamples:resizeAs(self._tempGradAlphasExpanded)
121 |                                  :copy(self._tempGradAlphasExpanded)
122 | 
123 |       self.gradInput:cmul(self._tempGradAlphasSamples)
124 |    else
125 |       local height = self.gradInput:size(2)
126 |       local width = self.gradInput:size(3)
127 | 
128 |       self._tempGradAlphas = self.gradInputAlphas:view(self.nInputPlane,
129 |                                                        1, 1)
130 |       self._tempGradAlphasExpanded:expand(self._tempGradAlphas,
131 |                                           self.nInputPlane,
132 |                                           height, width)
133 |       self._tempGradAlphasSamples:resizeAs(self._tempGradAlphasExpanded)
134 |                                  :copy(self._tempGradAlphasExpanded)
135 | 
136 |       self.gradInput:cmul(self._tempGradAlphasSamples)
137 |    end
138 |    return self.gradInput
139 | end
140 | 
141 | function SpatialBinaryConvolution:accGradParameters(input, gradOutput, scale)
142 | 
143 |    parent.accGradParameters(self, input, gradOutput, scale)
144 | 
145 |    --[[
146 |    Copy back floating point weights for weight update.
147 |    This could be done individually after forward and backward, but to avoid
148 |    additional copy is done at the end of backward.
149 |    --]]
150 | 
151 |    self.weight:copy(self.tempWeight)
152 | end
153 | 
154 | function SpatialBinaryConvolution:type(type, tensorCache)
155 |    self.tempWeight = nil
156 |    self.alphas = nil
157 |    self.gradInputAlphas = nil
158 |    self.wmask = nil
159 | 
160 |    self._tempAlphas = nil 
161 |    self._tempAlphasExpanded = nil
162 |    self._tempAlphasSamples = nil
163 | 
164 |    self._tempGradAlphas = nil
165 |    self._tempGradAlphasExpanded = nil
166 |    self._tempGradAlphasSamples = nil
167 | 
168 |    parent.type(self, type, tensorCache)
169 | end
170 | 
171 | function SpatialBinaryConvolution:__tostring__()
172 |    return "Binary Convolution: "..parent.__tostring__(self)
173 | end
174 | 


--------------------------------------------------------------------------------
/tutorials/ladder_network/ladder_help_funcs.lua:
--------------------------------------------------------------------------------
  1 | require 'csvigo'
  2 | require 'string'
  3 | require 'xlua'
  4 | require 'lfs'
  5 | 
  6 | -- Training function test
  7 | -- Processing a batch in one Go.
  8 | -- Has useCuda option to run on GPU [model and criterion expected in CUDA]
  9 | local conTargets, conOutputs
 10 | function model_train_multi_criterion(model, criterions, parameters,
 11 |                                      gradParameters, trainData, 
 12 |                                      optimMethod, optimState, batchSize,
 13 |                                      epoch, confusion, trainLogger,
 14 |                                      useCuda, displayProgress, classifierIndx)
 15 | 
 16 |    model:training()
 17 |    confusion:zero()
 18 |    local displayProgress = displayProgress or false
 19 |    local classifierIndx = classifierIndx or 1
 20 | 
 21 |    -- epoch tracker
 22 |    local epoch = epoch or 1
 23 | 
 24 |    local totalLoss = 0
 25 |    
 26 |    -- shuffle at each epoch
 27 |    local shuffle = torch.randperm(trainData.size())
 28 | 
 29 |    local sampleSize = trainData.data[1]:size()
 30 |    local isScalar = false
 31 |    local labelSize
 32 |    if trainData.labels:size():size() == 1 then
 33 |       isScalar = true
 34 |    else
 35 |       labelSize = trainData.labels[1]:size()
 36 |    end
 37 | 
 38 |    print("Doing epoch on training data:")
 39 |    print("Online epoch # " .. epoch .. " [batchSize = " .. batchSize .. "]")
 40 | 
 41 |    -- local variables
 42 |    local time = sys.clock()
 43 |    local inputs
 44 |    local targets
 45 |    if isScalar then
 46 |       targets = torch.Tensor(batchSize)
 47 |    else
 48 |       targets = torch.Tensor(batchSize, labelSize[1])
 49 |    end
 50 | 
 51 |    -- Samples
 52 |    sizeLen = sampleSize:size()
 53 |    if sizeLen == 1 then
 54 |       inputs = torch.Tensor(batchSize, sampleSize[1])
 55 |    elseif sizeLen == 2 then
 56 |       inputs = torch.Tensor(batchSize, sampleSize[1], sampleSize[2])
 57 |    elseif sizeLen == 3 then
 58 |       inputs = torch.Tensor(batchSize, sampleSize[1], sampleSize[2],
 59 |                                        sampleSize[3])
 60 |    else
 61 |       print("Invalid Sample Size")
 62 |    end
 63 | 
 64 |    local trainInputs = useCuda and torch.CudaTensor() or torch.FloatTensor()
 65 |    local trainTargets = useCuda and torch.CudaTensor() or torch.FloatTensor()
 66 |    local criterionTargets
 67 | 
 68 |    t = 1
 69 |    while t <= trainData.size() do
 70 |       if displayProgress then xlua.progress(t, trainData.size()) end
 71 |       noOfSamples = math.min(t + batchSize -1, trainData.size())
 72 |       --create mini batch
 73 |       indx = 1 
 74 |       for i=t,math.min(t+batchSize-1, trainData.size()) do
 75 |          -- Load new sample
 76 |          inputs[indx] = trainData.data[shuffle[i]]
 77 |          targets[indx] = trainData.labels[shuffle[i]]
 78 |          indx = indx + 1
 79 |       end
 80 |       indx = indx - 1
 81 | 
 82 |       local inputs_ = inputs[{{1,indx}}]
 83 |       trainInputs:resize(inputs_:size()):copy(inputs_)
 84 | 
 85 |       local targets_ = targets[{{1,indx}}]
 86 |       trainTargets:resize(targets_:size()):copy(targets_)
 87 | 
 88 |       criterionTargets = {trainTargets, trainInputs}
 89 | 
 90 |       t = t + batchSize
 91 | 
 92 |       -- create closure to evaluate F(X) and df/dX
 93 |       local feval = function(x)
 94 |          -- Get new parameters
 95 |          if x ~= parameters then
 96 |             parameters:copy(x)
 97 |          end
 98 | 
 99 |          -- reset gradients
100 |          gradParameters:zero()
101 | 
102 |          -- evaluate function for complete mini batch
103 |          local outputs = model:forward(trainInputs)
104 |          local f = criterions:forward(outputs, criterionTargets)
105 |          -- Total Loss
106 |          totalLoss = totalLoss + f
107 | 
108 |          local df_do = criterions:backward(outputs, criterionTargets)
109 |          model:backward(trainInputs, df_do)
110 | 
111 |          if useCuda then
112 |             conOutputs = outputs[classifierIndx]:float()
113 |             conTargets = trainTargets:float()
114 |          else
115 |             conOutputs = outputs[classifierIndx]
116 |             conTargets = trainTargets
117 |          end
118 | 
119 |          confusion:batchAdd(conOutputs, conTargets)
120 | 
121 |          -- Normalize gradients
122 |          gradParameters:div(trainInputs:size()[1])
123 |          f = f/trainInputs:size()[1]
124 | 
125 |          -- L1/L2 Regularization
126 |          if optimState.coefL1 ~= 0 or optimState.coefL2 ~= 0 then
127 |             -- locals"
128 |             local norm, sign = torch.norm, torch.sign
129 |          
130 |             -- Update loss with regularizer
131 |             f = f + optimState.coefL1 * norm(parameters, 1)
132 |             f = f + optimState.coefL2 * norm(parameters, 2)^2/2
133 | 
134 |             -- Gradients
135 |             gradParameters:add(sign(parameters):mul(optimState.coefL1)
136 |                                + parameters:clone():mul(opt.coefL2))
137 |          end
138 | 
139 |          -- return f and df/dX
140 |          return f, gradParameters
141 |       end
142 | 
143 |       -- optimize on current mini batch # Using SGD/adam
144 |       optimMethod(feval, parameters, optimState)
145 |    end
146 | 
147 |    -- time taken
148 |    time = sys.clock() - time
149 |    time = time/trainData.size()
150 |    print("\n==> time to learn 1 sample = " .. (time*1000) .. "ms")  
151 | 
152 |    -- Total loss
153 |    totalLoss = totalLoss/trainData.size()
154 | 
155 |    -- update logger
156 |    if trainLogger ~= nil then
157 |       trainLogger:add{["% mean class accuracy (train set)"] =
158 |                       confusion.totalValid * 100}
159 |    end
160 |    return totalLoss
161 | end
162 | 
163 | function model_test_multi_criterion(model, criterions, testData, confusion, 
164 |                                     useCuda, classifierIndx)
165 |    local time = sys.clock()
166 |    model:evaluate()
167 |    confusion:zero()
168 |    local classifierIndx = classifierIndx or 1
169 |    local totalLoss = 0
170 |    local criterionTargets
171 | 
172 |    if useCuda then
173 |       local batchSize = 64
174 |       local inputs = torch.CudaTensor()
175 |       local testInputs
176 |       local cpu_targets
177 |       local gpu_targets = torch.CudaTensor()
178 |       local gpu_preds
179 |       local cpu_preds
180 |       local i = 1
181 |       local j = 0
182 |       while i <= testData.size() do
183 |          j = math.min(i + batchSize -1, testData.size())
184 |          -- Copy input and targets to cuda
185 |          testInputs = testData.data[{{i, j}}]
186 |          inputs:resize(testInputs:size()):copy(testInputs)
187 |          cpu_targets = testData.labels[{{i, j}}]
188 |          gpu_targets:resize(cpu_targets:size()):copy(cpu_targets)
189 |          criterionTargets = {gpu_targets, inputs}
190 | 
191 |          gpu_preds = model:forward(inputs)
192 |          totalLoss = totalLoss + criterions:forward(gpu_preds,
193 |                                                     criterionTargets)
194 |          cpu_preds = gpu_preds[classifierIndx]:float()
195 |          confusion:batchAdd(cpu_preds, cpu_targets)
196 |          i = i + batchSize
197 |       end
198 |    else
199 |       local trainInputs = testData.data
200 |       local trainTargets = testData.labels
201 |       criterionTargets = {trainTargets, trainInputs}
202 | 
203 |       local outputs = model:forward(trainInputs)
204 |       totalLoss = criterions:forward(outputs, criterionTargets)
205 | 
206 |       local conOutputs = outputs[classifierIndx]
207 |       local conTargets = trainTargets
208 |       confusion:batchAdd(conOutputs, conTargets)
209 |    end
210 | 
211 |    -- time taken
212 |    time = sys.clock() - time
213 |    time = time/testData.size()
214 |    print("\n==> time to test 1 sample = " .. (time*1000) .. "ms")
215 | 
216 |    -- Total loss
217 |    totalLoss = totalLoss/testData.size()
218 | 
219 |    return totalLoss
220 | end
221 | 


--------------------------------------------------------------------------------
/Kmeans.lua:
--------------------------------------------------------------------------------
  1 | -- Online (Hard) Kmeans layer.
  2 | local Kmeans, parent = torch.class('nn.Kmeans', 'nn.Module')
  3 | 
  4 | function Kmeans:__init(k, dim, scale)
  5 |    parent.__init(self)
  6 |    self.k = k
  7 |    self.dim = dim
  8 | 
  9 |    -- scale for online kmean update
 10 |    self.scale = scale
 11 | 
 12 |    assert(k > 0, "Clusters cannot be 0 or negative.")
 13 |    assert(dim > 0, "Dimensionality cannot be 0 or negative.")
 14 | 
 15 |    -- Kmeans centers -> self.weight
 16 |    self.weight = torch.Tensor(self.k, self.dim)
 17 | 
 18 |    self.gradWeight = torch.Tensor(self.weight:size())
 19 |    self.loss = 0 -- within cluster error of the last forward
 20 | 
 21 |    self.clusterSampleCount = torch.Tensor(self.k)
 22 | 
 23 |    self:reset()
 24 | end
 25 | 
 26 | -- Reset
 27 | function Kmeans:reset(stdev)
 28 |    local stdev = stdev or 1
 29 |    self.weight:uniform(-stdev, stdev)
 30 | end
 31 | 
 32 | -- Initialize Kmeans weight with random samples from input.
 33 | function Kmeans:initRandom(input)
 34 |    local inputDim = input:nDimension()
 35 |    assert(inputDim == 2, "Incorrect input dimensionality. Expecting 2D.")
 36 | 
 37 |    local noOfSamples = input:size(1)
 38 |    local dim = input:size(2)
 39 |    assert(dim == self.dim, "Dimensionality of input and weight don't match.")
 40 |    assert(noOfSamples >= self.k, "Need atleast k samples for initialization.")
 41 | 
 42 |    local indices = torch.zeros(self.k)
 43 |    indices:random(1, noOfSamples)
 44 | 
 45 |    for i=1, self.k do
 46 |       self.weight[i]:copy(input[indices[i]])
 47 |    end
 48 | end
 49 | 
 50 | -- Initialize using Kmeans++
 51 | function Kmeans:initKmeansPlus(input, p)
 52 |    self.p = p or self.p or 0.95
 53 |    assert(self.p>=0 and self.p<=1, "P value should be between 0-1.")
 54 | 
 55 |    local inputDim = input:nDimension()
 56 |    assert(inputDim == 2, "Incorrect input dimensionality. Expecting 2D.")
 57 |    local noOfSamples = input:size(1)
 58 |    
 59 |    local pcount = math.ceil((1-self.p)*noOfSamples)
 60 |    if pcount <= 0 then pcount = 1 end
 61 | 
 62 |    local initializedK = 1
 63 |    self.weight[initializedK]:copy(input[torch.random(noOfSamples)])
 64 |    initializedK = initializedK + 1
 65 | 
 66 |    local clusters = self.weight.new()
 67 |    local clusterDistances = self.weight.new()
 68 |    local temp = self.weight.new()
 69 |    local expandedSample = self.weight.new()
 70 |    local distances = self.weight.new()
 71 |    distances:resize(noOfSamples):fill(math.huge)
 72 |    local maxScores = self.weight.new()
 73 |    local maxIndx = self.weight.new()
 74 |    
 75 |    for k=initializedK, self.k do
 76 |       clusters = self.weight[{{initializedK-1, initializedK-1}}]
 77 |       for i=1, noOfSamples do
 78 |          temp:expand(input[{{i}}], 1, self.dim)
 79 |          expandedSample:resize(temp:size()):copy(temp)
 80 |       
 81 |          -- Squared Euclidean distance
 82 |          expandedSample:add(-1, clusters)
 83 |          clusterDistances:norm(expandedSample, 2, 2)
 84 |          clusterDistances:pow(2)
 85 |          distances[i] = math.min(clusterDistances:min(), distances[i])
 86 |       end
 87 |       maxScores, maxIndx = distances:sort(true)
 88 |       local tempIndx = torch.random(pcount)
 89 |       local indx = maxIndx[tempIndx]
 90 |       self.weight[initializedK]:copy(input[indx])
 91 |       initializedK = initializedK + 1
 92 |    end
 93 | end
 94 | 
 95 | -- Kmeans updateOutput (forward)
 96 | function Kmeans:updateOutput(input)
 97 |    local inputDim = input:nDimension()
 98 |    assert(inputDim == 2, "Incorrect input dimensionality. Expecting 2D.")
 99 | 
100 |    local batchSize = input:size(1)
101 |    local dim = input:size(2)
102 |    assert(dim == self.dim, "Dimensionality of input and weight don't match.")
103 | 
104 |    assert(input:isContiguous(), "Input is not contiguous.")
105 | 
106 |    -- a sample copied k times to compute distance between sample and weight
107 |    self._expandedSamples = self._expandedSamples or self.weight.new()
108 | 
109 |    -- distance between a sample and weight
110 |    self._clusterDistances = self._clusterDistances or self.weight.new()
111 | 
112 |    self._temp = self._temp or input.new()
113 |    self._tempExpanded = self._tempExpanded or input.new()
114 | 
115 |    -- Expanding inputs
116 |    self._temp:view(input, 1, batchSize, self.dim)
117 |    self._tempExpanded:expand(self._temp, self.k, batchSize, self.dim)
118 |    self._expandedSamples:resize(self.k, batchSize, self.dim)
119 |                         :copy(self._tempExpanded)
120 | 
121 |    -- Expanding weights
122 |    self._tempWeight = self._tempWeight or self.weight.new()
123 |    self._tempWeightExp = self._tempWeightExp or self.weight.new()
124 |    self._expandedWeight = self._expanedWeight or self.weight.new()
125 |    self._tempWeight:view(self.weight, self.k, 1, self.dim)
126 |    self._tempWeightExp:expand(self._tempWeight, self._expandedSamples:size())
127 |    self._expandedWeight:resize(self.k, batchSize, self.dim)
128 |                        :copy(self._tempWeightExp)
129 | 
130 |    -- x-c
131 |    self._expandedSamples:add(-1, self._expandedWeight)
132 |    -- Squared Euclidean distance
133 |    self._clusterDistances:norm(self._expandedSamples, 2, 3)
134 |    self._clusterDistances:pow(2)
135 |    self._clusterDistances:resize(self.k, batchSize)
136 | 
137 |    self._minScore = self._minScore or self.weight.new()
138 |    self._minIndx = self._minIndx or torch.LongTensor()
139 |    self._minScore:min(self._minIndx, self._clusterDistances, 1)
140 |    self._minIndx:resize(batchSize)
141 |    
142 |    self.output:resize(batchSize):copy(self._minIndx)
143 |    self.loss = self._minScore:sum()
144 |   
145 |    return self.output 
146 | end
147 | 
148 | -- Kmeans has its own criterion hence gradInput are zeros
149 | function Kmeans:updateGradInput(input, gradOuput)
150 |    self.gradInput:resize(input:size()):zero()
151 |    
152 |    return self.gradInput
153 | end
154 | 
155 | -- We define kmeans update rule as c -> c + scale * 1/n * sum_i (x-c).
156 | -- n is no. of x's belonging to c.
157 | -- With this update rule and gradient descent will be negative the gradWeights.
158 | function Kmeans:accGradParameters(input, gradOutput, scale)
159 |    local scale = self.scale or scale or 1
160 |    assert(scale > 0 , " Scale has to be positive.")
161 | 
162 |    -- Update cluster sample count
163 |    local batchSize = input:size(1)
164 |    self._cscAdder = self._cscAdder or self.weight.new()
165 |    self._cscAdder:resize(batchSize):fill(1)
166 |    self.clusterSampleCount:zero()
167 |    self.clusterSampleCount:indexAdd(1, self._minIndx, self._cscAdder)
168 |    
169 |    -- scale * (x[k]-c[k]) where k is nearest cluster to x
170 |    self._gradWeight = self._gradWeight or self.gradWeight.new()
171 |    self._gradWeight:index(self.weight, 1, self._minIndx)
172 |    self._gradWeight:mul(-1) 
173 |    self._gradWeight:add(input)
174 |    self._gradWeight:mul(-scale)
175 |    
176 |    self._gradWeight2 = self._gradWeight2 or self.gradWeight.new()
177 |    self._gradWeight2:resizeAs(self.gradWeight):zero()
178 |    self._gradWeight2:indexAdd(1, self._minIndx, self._gradWeight)
179 |    
180 |    -- scale/n * sum_i (x-c)
181 |    self._ccounts = self._ccounts or self.clusterSampleCount.new()
182 |    self._ccounts:resize(self.k):copy(self.clusterSampleCount)
183 |    self._ccounts:add(0.0000001) -- prevent division by zero errors
184 |    
185 |    self._gradWeight2:cdiv(self._ccounts:view(self.k,1):expandAs(self.gradWeight))
186 |    
187 |    self.gradWeight:add(self._gradWeight2)
188 | end
189 | 
190 | function Kmeans:type(type, tensorCache)
191 |    if type then
192 |       -- prevent premature memory allocations
193 |       self._expandedSamples = nil
194 |       self._clusterDistances = nil
195 |       self._temp = nil
196 |       self._tempExpanded = nil
197 |       self._tempWeight = nil
198 |       self._tempWeightExp = nil
199 |       self._expandedWeight = nil
200 |       self._minScore = nil
201 |       self._minIndx = nil
202 |       self._cscAdder = nil
203 |    end
204 |    return parent.type(self, type, tensorCache)
205 | end
206 | 


--------------------------------------------------------------------------------
/SpatialGlimpse.lua:
--------------------------------------------------------------------------------
  1 | ------------------------------------------------------------------------
  2 | --[[ SpatialGlimpse ]]--
  3 | -- Ref A.: http://papers.nips.cc/paper/5542-recurrent-models-of-visual-attention.pdf
  4 | -- a glimpse is the concatenation of down-scaled cropped images of
  5 | -- increasing scale around a given location in a given image.
  6 | -- input is a pair of Tensors: {image, location}
  7 | -- locations are x,y coordinates of the center of cropped patches.
  8 | -- Coordinates are between -1,-1 (top-left) and 1,1 (bottom right)
  9 | -- output is a batch of glimpses taken in image at location (x,y)
 10 | -- glimpse size is {height, width}, or width only if square-shaped
 11 | -- depth is number of patches to crop per glimpse (one patch per scale)
 12 | -- Each successive patch is scale x size of the previous patch
 13 | ------------------------------------------------------------------------
 14 | local SpatialGlimpse, parent = torch.class("nn.SpatialGlimpse", "nn.Module")
 15 | 
 16 | function SpatialGlimpse:__init(size, depth, scale)
 17 |    require 'nnx'
 18 |    if torch.type(size)=='table' then
 19 |       self.height = size[1]
 20 |       self.width = size[2]
 21 |    else
 22 |       self.width = size
 23 |       self.height = size
 24 |    end
 25 |    self.depth = depth or 3
 26 |    self.scale = scale or 2
 27 | 
 28 |    assert(torch.type(self.width) == 'number')
 29 |    assert(torch.type(self.height) == 'number')
 30 |    assert(torch.type(self.depth) == 'number')
 31 |    assert(torch.type(self.scale) == 'number')
 32 |    parent.__init(self)
 33 |    self.gradInput = {torch.Tensor(), torch.Tensor()}
 34 |    if self.scale == 2 then
 35 |       self.module = nn.SpatialAveragePooling(2,2,2,2)
 36 |    else
 37 |       self.module = nn.SpatialReSampling{oheight=self.height,owidth=self.width}
 38 |    end
 39 |    self.modules = {self.module}
 40 | end
 41 | 
 42 | -- a bandwidth limited sensor which focuses on a location.
 43 | -- locations index the x,y coord of the center of the output glimpse
 44 | function SpatialGlimpse:updateOutput(inputTable)
 45 |    assert(torch.type(inputTable) == 'table')
 46 |    assert(#inputTable >= 2)
 47 |    local input, location = unpack(inputTable)
 48 |    input, location = self:toBatch(input, 3), self:toBatch(location, 1)
 49 |    assert(input:dim() == 4 and location:dim() == 2)
 50 | 
 51 |    self.output:resize(input:size(1), self.depth, input:size(2), self.height, self.width)
 52 | 
 53 |    self._crop = self._crop or self.output.new()
 54 |    self._pad = self._pad or input.new()
 55 | 
 56 |    for sampleIdx=1,self.output:size(1) do
 57 |       local outputSample = self.output[sampleIdx]
 58 |       local inputSample = input[sampleIdx]
 59 |       local yx = location[sampleIdx]
 60 |       -- (-1,-1) top left corner, (1,1) bottom right corner of image
 61 |       local y, x = yx:select(1,1), yx:select(1,2)
 62 |       -- (0,0), (1,1)
 63 |       y, x = (y+1)/2, (x+1)/2
 64 | 
 65 |       -- for each depth of glimpse : pad, crop, downscale
 66 |       local glimpseWidth = math.floor(self.width)
 67 |       local glimpseHeight = math.floor(self.height)
 68 |       for depth=1,self.depth do
 69 |          local dst = outputSample[depth]
 70 |          if depth > 1 then
 71 |             glimpseWidth = math.floor(glimpseWidth*self.scale)
 72 |             glimpseHeight = math.floor(glimpseHeight*self.scale)
 73 |          end
 74 | 
 75 |          -- add zero padding (glimpse could be partially out of bounds)
 76 |          local padWidth = math.floor((glimpseWidth-1)/2)
 77 |          local padHeight = math.floor((glimpseHeight-1)/2)
 78 |          self._pad:resize(input:size(2), input:size(3)+padHeight*2, input:size(4)+padWidth*2):zero()
 79 |          local center = self._pad:narrow(2,padHeight+1,input:size(3)):narrow(3,padWidth+1,input:size(4))
 80 |          center:copy(inputSample)
 81 | 
 82 |          -- crop it
 83 |          local h, w = self._pad:size(2)-glimpseHeight, self._pad:size(3)-glimpseWidth
 84 |          local y, x = math.floor(math.min(h,math.max(0,y*h))), math.floor(math.min(w,math.max(0,x*w)))
 85 | 
 86 |          if depth == 1 then
 87 |             dst:copy(self._pad:narrow(2,y+1,glimpseHeight):narrow(3,x+1,glimpseWidth))
 88 |          else
 89 |             self._crop:resize(input:size(2), glimpseHeight, glimpseWidth)
 90 |             self._crop:copy(self._pad:narrow(2,y+1,glimpseHeight):narrow(3,x+1,glimpseWidth))
 91 | 
 92 |             if torch.type(self.module) == 'nn.SpatialAveragePooling' then
 93 |                local poolWidth = glimpseWidth/self.width
 94 |                assert(poolWidth % 2 == 0)
 95 |                local poolHeight = glimpseHeight/self.height
 96 |                assert(poolHeight % 2 == 0)
 97 |                self.module.kW = poolWidth
 98 |                self.module.kH = poolHeight
 99 |                self.module.dW = poolWidth
100 |                self.module.dH = poolHeight
101 |             end
102 |             dst:copy(self.module:updateOutput(self._crop))
103 |          end
104 |       end
105 |    end
106 | 
107 |    self.output:resize(input:size(1), self.depth*input:size(2), self.height, self.width)
108 |    self.output = self:fromBatch(self.output, 1)
109 |    return self.output
110 | end
111 | 
112 | function SpatialGlimpse:updateGradInput(inputTable, gradOutput)
113 |    local input, location = unpack(inputTable)
114 |    if #self.gradInput ~= 2 then
115 |       self.gradInput = {input.new(), input.new()}
116 |    end
117 |    local gradInput, gradLocation = unpack(self.gradInput)
118 |    input, location = self:toBatch(input, 3), self:toBatch(location, 1)
119 |    gradOutput = self:toBatch(gradOutput, 3)
120 | 
121 |    gradInput:resizeAs(input):zero()
122 |    gradLocation:resizeAs(location):zero() -- no backprop through location
123 | 
124 |    gradOutput = gradOutput:view(input:size(1), self.depth, input:size(2), self.height, self.width)
125 | 
126 |    for sampleIdx=1,gradOutput:size(1) do
127 |       local gradOutputSample = gradOutput[sampleIdx]
128 |       local gradInputSample = gradInput[sampleIdx]
129 |       local yx = location[sampleIdx] -- height, width
130 |       -- (-1,-1) top left corner, (1,1) bottom right corner of image
131 |       local y, x = yx:select(1,1), yx:select(1,2)
132 |       -- (0,0), (1,1)
133 |       y, x = (y+1)/2, (x+1)/2
134 | 
135 |       -- for each depth of glimpse : pad, crop, downscale
136 |       local glimpseWidth = math.floor(self.width)
137 |       local glimpseHeight = math.floor(self.height)
138 |       for depth=1,self.depth do
139 |          local src = gradOutputSample[depth]
140 |          if depth > 1 then
141 |             glimpseWidth = math.floor(glimpseWidth*self.scale)
142 |             glimpseHeight = math.floor(glimpseHeight*self.scale)
143 |          end
144 | 
145 |          -- add zero padding (glimpse could be partially out of bounds)
146 |          local padWidth = math.floor((glimpseWidth-1)/2)
147 |          local padHeight = math.floor((glimpseHeight-1)/2)
148 |          self._pad:resize(input:size(2), input:size(3)+padHeight*2, input:size(4)+padWidth*2):zero()
149 | 
150 |          local h, w = self._pad:size(2)-glimpseHeight, self._pad:size(3)-glimpseWidth
151 |          local y, x = math.floor(math.min(h,math.max(0,y*h))), math.floor(math.min(w,math.max(0,x*w)))
152 |          local pad = self._pad:narrow(2, y+1, glimpseHeight):narrow(3, x+1, glimpseWidth)
153 | 
154 |          -- upscale glimpse for different depths
155 |          if depth == 1 then
156 |             pad:copy(src)
157 |          else
158 |             self._crop:resize(input:size(2), glimpseHeight, glimpseWidth)
159 | 
160 |             if torch.type(self.module) == 'nn.SpatialAveragePooling' then
161 |                local poolWidth = glimpseWidth/self.width
162 |                assert(poolWidth % 2 == 0)
163 |                local poolHeight = glimpseHeight/self.height
164 |                assert(poolHeight % 2 == 0)
165 |                self.module.kW = poolWidth
166 |                self.module.kH = poolHeight
167 |                self.module.dW = poolWidth
168 |                self.module.dH = poolHeight
169 |             end
170 | 
171 |             pad:copy(self.module:updateGradInput(self._crop, src))
172 |          end
173 | 
174 |          -- copy into gradInput tensor (excluding padding)
175 |          gradInputSample:add(self._pad:narrow(2, padHeight+1, input:size(3)):narrow(3, padWidth+1, input:size(4)))
176 |       end
177 |    end
178 | 
179 |    self.gradInput[1] = self:fromBatch(gradInput, 1)
180 |    self.gradInput[2] = self:fromBatch(gradLocation, 1)
181 | 
182 |    return self.gradInput
183 | end
184 | 


--------------------------------------------------------------------------------
/Inception.lua:
--------------------------------------------------------------------------------
  1 | ------------------------------------------------------------------------
  2 | -- [[ Inception ]]--
  3 | -- Uses n+2 parallel "columns". The original paper uses 2+2 where
  4 | -- the first two are (but there could be more than two):
  5 | -- 1x1 conv (reduce) -> relu -> 5x5 conv -> relu
  6 | -- 1x1 conv (reduce) -> relu -> 3x3 conv -> relu
  7 | -- and where the other two are :
  8 | -- 3x3 maxpool -> 1x1 conv (reduce/project) -> relu
  9 | -- 1x1 conv (reduce) -> relu.
 10 | -- This Model allows the first group of columns to be of any
 11 | -- number while the last group consist of exactly two columns.
 12 | -- The 1x1 conv are used to reduce the number of input channels
 13 | -- (or filters) such that the capacity of the network doesnt
 14 | -- explode. We refer to these here has "reduce". Since each
 15 | -- column seems to have one and only one reduce, their initial
 16 | -- configuration options are specified in lists of n+2 elements.
 17 | ------------------------------------------------------------------------
 18 | local Inception, parent = torch.class("nn.Inception", "nn.Decorator")
 19 | 
 20 | function Inception:__init(config)
 21 |    --[[ Required Arguments ]]--
 22 |    -- Number of input channels or colors
 23 |    self.inputSize = config.inputSize
 24 |    -- Number of filters in the non-1x1 convolution kernel sizes, e.g. {32,48}
 25 |    self.outputSize = config.outputSize
 26 |    -- Number of filters in the 1x1 convolutions (reduction)
 27 |    -- used in each column, e.g. {48,64,32,32}. The last 2 are
 28 |    -- used respectively for the max pooling (projection) column
 29 |    -- (the last column in the paper) and the column that has
 30 |    -- nothing but a 1x1 conv (the first column in the paper).
 31 |    -- This table should have two elements more than the outputSize
 32 |    self.reduceSize = config.reduceSize
 33 | 
 34 |    --[[ Optional Arguments ]]--
 35 |    -- The strides of the 1x1 (reduction) convolutions. Defaults to {1,1,...}
 36 |    self.reduceStride = config.reduceStride or {}
 37 |    -- A transfer function like nn.Tanh, nn.Sigmoid, nn.ReLU, nn.Identity, etc.
 38 |    -- It is used after each reduction (1x1 convolution) and convolution
 39 |    self.transfer = config.transfer or nn.ReLU()
 40 |    -- batch normalization can be awesome
 41 |    self.batchNorm = config.batchNorm
 42 |    -- Adding padding to the input of the convolutions such that
 43 |    -- input width and height are same as that of output.
 44 |    self.padding = true
 45 |    if config.padding ~= nil then
 46 |       self.padding = config.padding
 47 |    end
 48 |    -- The size (height=width) of the non-1x1 convolution kernels.
 49 |    self.kernelSize = config.kernelSize or {5,3}
 50 |    -- The stride (height=width) of the convolution.
 51 |    self.kernelStride = config.kernelStride or {1,1}
 52 |    -- The size (height=width) of the spatial max pooling used
 53 |    -- in the next-to-last column.
 54 |    self.poolSize = config.poolSize or 3
 55 |    -- The stride (height=width) of the spatial max pooling.
 56 |    self.poolStride = config.poolStride or 1
 57 |    -- The pooling layer.
 58 |    self.pool = config.pool or nn.SpatialMaxPooling(self.poolSize, self.poolSize, self.poolStride, self.poolStride)
 59 | 
 60 | 
 61 |    -- Variables checking that all of the output sizes are the same for a sample input.
 62 |    local iWidth, iHeight = 100, 200
 63 |    local oWidth, oHeight
 64 | 
 65 |    -- [[ Module Construction ]]--
 66 |    local depthConcat = nn.DepthConcat(2) -- concat on 'c' dimension
 67 |    -- 1x1 conv (reduce) -> 3x3 conv
 68 |    -- 1x1 conv (reduce) -> 5x5 conv
 69 |    -- ...
 70 |    for i=1,#self.kernelSize do
 71 |       local mlp = nn.Sequential()
 72 |       -- 1x1 conv
 73 |       local reduce = nn.SpatialConvolution(
 74 |          self.inputSize, self.reduceSize[i], 1, 1,
 75 |          self.reduceStride[i] or 1, self.reduceStride[i] or 1
 76 |       )
 77 |       mlp:add(reduce)
 78 |       if self.batchNorm then
 79 |          mlp:add(nn.SpatialBatchNormalization(self.reduceSize[i]))
 80 |       end
 81 |       mlp:add(self.transfer:clone())
 82 | 
 83 |       -- nxn conv
 84 |       local pad = self.padding and math.floor(self.kernelSize[i]/2) or 0
 85 |       local conv = nn.SpatialConvolution(
 86 |          self.reduceSize[i], self.outputSize[i],
 87 |          self.kernelSize[i], self.kernelSize[i],
 88 |          self.kernelStride[i], self.kernelStride[i],
 89 |          pad
 90 |       )
 91 |       mlp:add(conv)
 92 |       if self.batchNorm then
 93 |          mlp:add(nn.SpatialBatchNormalization(self.outputSize[i]))
 94 |       end
 95 |       mlp:add(self.transfer:clone())
 96 |       depthConcat:add(mlp)
 97 | 
 98 |       -- Check the output sizes.
 99 |       local oWidth_i = torch.floor(
100 |          (iWidth + 2*pad - self.kernelSize[i])/self.kernelStride[i] + 1)
101 |       local oHeight_i = torch.floor(
102 |          (iHeight + 2*pad - self.kernelSize[i])/self.kernelStride[i] + 1)
103 |       if oWidth == nil then
104 |          oWidth = oWidth_i
105 |          oHeight = oHeight_i
106 |       else
107 |          if oWidth ~= oWidth_i or oHeight ~= oHeight_i then
108 |             print("dpnn.Inception: Warning: Inconsistent output sizes.")
109 |          end
110 |       end
111 |    end
112 | 
113 |    -- pool -> 1x1 conv
114 |    local mlp = nn.Sequential()
115 |    mlp:add(self.pool)
116 |    -- not sure if transfer should go here? mlp:add(transfer:clone())
117 |    local i = #(self.kernelSize) + 1
118 |    if self.reduceSize[i] then
119 |       local reduce = nn.SpatialConvolution(
120 |          self.inputSize, self.reduceSize[i], 1, 1,
121 |          self.reduceStride[i] or 1, self.reduceStride[i] or 1
122 |       )
123 |       mlp:add(reduce)
124 |       if self.batchNorm then
125 |          mlp:add(nn.SpatialBatchNormalization(self.reduceSize[i]))
126 |       end
127 |       mlp:add(self.transfer:clone())
128 |    end
129 |    depthConcat:add(mlp)
130 | 
131 |    -- Check the output sizes. Infer the operation of the pooling layer.
132 |    if self.pool.kW ~= nil and self.pool.dW ~= nil and self.pool.padW ~= nil then
133 |       assert(oWidth ~= nil)
134 |       assert(oHeight ~= nil)
135 |       local oWidth_pool = torch.floor(
136 |          (iWidth + 2*self.pool.padW - self.pool.kW)/self.pool.dW + 1)
137 |       local oHeight_pool = torch.floor(
138 |          (iHeight + 2*self.pool.padH - self.pool.kH)/self.pool.dH + 1)
139 |       if oWidth ~= oWidth_pool or oHeight ~= oHeight_pool then
140 |          print("dpnn.Inception: Warning: Inconsistent output sizes in pooling.")
141 |       end
142 |    end
143 | 
144 |    -- reduce: 1x1 conv (channel-wise pooling)
145 |    i = i + 1
146 |    if self.reduceSize[i] then
147 |       local mlp = nn.Sequential()
148 |       local reduce = nn.SpatialConvolution(
149 |           self.inputSize, self.reduceSize[i], 1, 1,
150 |           self.reduceStride[i] or 1, self.reduceStride[i] or 1
151 |       )
152 |       mlp:add(reduce)
153 |       if self.batchNorm then
154 |           mlp:add(nn.SpatialBatchNormalization(self.reduceSize[i]))
155 |       end
156 |       mlp:add(self.transfer:clone())
157 |       depthConcat:add(mlp)
158 | 
159 |       -- Check the output sizes.
160 |       local oWidth_conv = torch.floor((iWidth - 1)/(self.reduceStride[i] or 1) + 1)
161 |       local oHeight_conv = torch.floor((iHeight - 1)/(self.reduceStride[i] or 1) + 1)
162 |       if oWidth ~= oWidth_conv or oHeight ~= oHeight_conv then
163 |          print("dpnn.Inception: Warning: Inconsistent output sizes in 1x1 conv.")
164 |       end
165 |    end
166 | 
167 |    parent.__init(self, depthConcat)
168 | end
169 | 
170 | function Inception:updateOutput(input)
171 |    local input = self:toBatch(input, 3)
172 |    local output = self.module:updateOutput(input)
173 |    self.output = self:fromBatch(output, 3)
174 |    return self.output
175 | end
176 | 
177 | function Inception:updateGradInput(input, gradOutput)
178 |    local input, gradOutput = self:toBatch(input, 3), self:toBatch(gradOutput, 3)
179 |    local gradInput = self.module:updateGradInput(input, gradOutput)
180 |    self.gradInput = self:fromBatch(gradInput, 3)
181 |    return self.gradInput
182 | end
183 | 
184 | function Inception:accGradParameters(input, gradOutput, scale)
185 |    local input, gradOutput = self:toBatch(input, 3), self:toBatch(gradOutput, 3)
186 |    self.module:accGradParameters(input, gradOutput, scale)
187 | end
188 | 
189 | function Inception:accUpdateGradParameters(input, gradOutput, lr)
190 |    local input, gradOutput = self:toBatch(input, 3), self:toBatch(gradOutput, 3)
191 |    self.module:accUpdateGradParameters(input, gradOutput, lr)
192 | end
193 | 


--------------------------------------------------------------------------------
/Convert.lua:
--------------------------------------------------------------------------------
  1 | ------------------------------------------------------------------------
  2 | --[ nn.Convert ]--
  3 | -- Module to convert between different data formats
  4 | -- nn.Convert('bchw', 'bf') or nn.Convert('chw', 'f')
  5 | -- Automatically converts input to same type as self.output
  6 | -- Simplest use is for automatic input type converions : nn.Convert()
  7 | ------------------------------------------------------------------------
  8 | local _ = require 'moses'
  9 | local Convert, parent = torch.class("nn.Convert", "nn.Container")
 10 | 
 11 | function Convert:__init(inputShape, outputShape)
 12 |    if outputShape and not inputShape then
 13 |       error"Expecting non-nil arg 1 when arg 2 is provided"
 14 |    end
 15 |    inputShape = inputShape or 'b*'
 16 |    outputShape = outputShape or inputShape
 17 |    self.inputShape = inputShape:find('b') and inputShape or ('b'..inputShape)
 18 |    self.outputShape = outputShape:find('b') and outputShape or ('b'..outputShape)
 19 |    self.inputBatchDim = self.inputShape:find('b')
 20 |    self.outputBatchDim = self.outputShape:find('b')
 21 |    if self.inputShape == 'b*' or self.outputShape == 'b*' then
 22 |       assert(self.inputShape == 'b*' and self.outputShape == 'b*', 'Both or neither shapes must be b*')
 23 |       self.nInputDim = -1
 24 |       self.nOutputDim = -1
 25 |       self.transposition = true
 26 |    else
 27 |       -- number of dims in batch mode
 28 |       self.nInputDim = #self.inputShape
 29 |       self.nOutputDim = #self.outputShape
 30 |       -- is the outputShape just a transposition of the inputShape?
 31 |       if self.nInputDim == self.nOutputDim then
 32 |          self.transposition = true
 33 |          for i=1,self.nInputDim do
 34 |             if not self.outputShape:find(self.inputShape:sub(i,i)) then
 35 |                self.transposition = false
 36 |                break
 37 |             end
 38 |          end
 39 |       end
 40 |    end
 41 |    parent.__init(self)
 42 | end
 43 | 
 44 | -- post-initialization
 45 | function Convert:buildConverter(input)
 46 |    if self.transposition then
 47 |       self.converter = self:transpose(self.outputShape)
 48 |    else
 49 |       if (torch.type(self[self.outputShape]) ~= 'function') then
 50 |          error(string.format("Unrecognized conversion of shape %s to %s", self.inputShape, self.outputShape))
 51 |       end
 52 |       self.converter = self[self.outputShape](self, input)
 53 |    end
 54 |    assert(torch.isTensor(self.output), "Expecting Tensor output")
 55 |    
 56 |    self.converter:type(torch.type(self.output))
 57 |    self.converter:serialMode(self.dpnn_serialEmpty, self.dpnn_serialType)
 58 |    
 59 |    self.modules[1] = self.converter
 60 | end
 61 | 
 62 | function Convert:updateOutput(input)
 63 |    assert(torch.isTensor(input), "expecting Tensor")
 64 |    if not torch.isTypeOf(input, torch.type(self.output)) then
 65 |       -- handle different input type
 66 |       self._input = self._input or self.output.new()
 67 |       self._input:resize(input:size()):copy(input)
 68 |       input = self._input
 69 |    end
 70 |    self.batchMode = true
 71 |    if input:dim() < self.nInputDim then
 72 |       -- handle non-batch mode
 73 |       local inputSize = input:size():totable()
 74 |       table.insert(inputSize, self.inputBatchDim, 1)
 75 |       self.__input = self.__input or input.new()
 76 |       self.__input:set(input):resize(unpack(inputSize))
 77 |       input = self.__input
 78 |       self.batchMode = false
 79 |    end
 80 |    if not self.converter then
 81 |       self:buildConverter(input)
 82 |    end
 83 |    
 84 |    self.output = self.converter:updateOutput(input)
 85 |    
 86 |    if not self.batchMode then
 87 |       local outputSize = self.output:size():totable()
 88 |       table.remove(outputSize, self.outputBatchDim)
 89 |       self.__output = self.__output or self.output.new()
 90 |       self.__output:set(self.output):resize(unpack(outputSize))
 91 |       self.output = self.__output
 92 |    end
 93 |    return self.output
 94 | end
 95 | 
 96 | function Convert:updateGradInput(input, gradOutput)
 97 |    local input_ = input
 98 |    input = self._input or input
 99 |    if not self.batchMode then
100 |       input = self.__input
101 |       self.__gradOutput = self.__gradOutput or gradOutput.new()
102 |       self.__gradOutput:set(gradOutput):resize(self.converter.output:size())
103 |       gradOutput = self.__gradOutput
104 |    end
105 |    
106 |    local gradInput = self.converter:updateGradInput(input, gradOutput)
107 |    
108 |    if not self.batchMode then
109 |       self.__gradInput = self.__gradInput or gradInput.new()
110 |       self.__gradInput:set(gradInput):resize(input_:size())
111 |       gradInput = self.__gradInput
112 |    end
113 |    if self._input then
114 |       self._gradInput = self._gradInput or input.new()
115 |       self._gradInput:resize(input:size()):copy(gradInput)
116 |       self.gradInput = self._gradInput
117 |    else
118 |       self.gradInput = gradInput
119 |    end
120 |    
121 |    return self.gradInput
122 | end
123 | 
124 | function Convert:accGradParameters(input, gradOutput, scale)
125 |    input = self.batchMode and self.__input or self._input or input
126 |    gradOutput = self.batchMode and self.__gradOutput or gradOutput
127 |    self.converter:accGradParameters(input, gradOutput, scale)
128 | end
129 | 
130 | function Convert:accUpdateGradParameters(input, gradOutput, lr)
131 |    input = self.batchMode and self.__input or self._input or input
132 |    gradOutput = self.batchMode and self.__gradOutput or gradOutput
133 |    self.converter:accUpdateGradParameters(input, gradOutput, lr)
134 | end
135 | 
136 | -- batch feature
137 | function Convert:bf(input)
138 |    local b_pos = self:findAxis('b', self.inputShape)
139 |    local dim = #self.inputShape
140 |    if self.inputShape == 'bt' then
141 |       error"Conversion of shape bt to bf not supported: open an issue on github"
142 |    end
143 |    -- was b
144 |    if dim == 1 then
145 |       return nn.Reshape(1)
146 |    end
147 |    -- was b...
148 |    local modula
149 |    if b_pos ~= 1 then
150 |       modula = nn.Transpose({1, b_pos})
151 |    end
152 |    if dim > 2 then
153 |       local transpose = modula
154 |       local sampleSize = input:select(self:findAxis('b'),1):nElement()
155 |       local reshape = nn.Reshape(sampleSize)
156 |       if transpose then
157 |          modula = nn.Sequential()
158 |          modula:add(transpose)
159 |          modula:add(reshape)
160 |       else
161 |          modula = reshape
162 |       end
163 |    end
164 |    return modula or nn.Identity()
165 | end
166 | 
167 | -- each example is a scalar; batch is a vector
168 | function Convert:b(input)
169 |    local b_pos = self:findAxis('b')
170 |    if self.inputShape == 'bt' or self.inputShape == 'tb' then
171 |       local t_pos = self:findAxis('t')
172 |       -- select first set of classes
173 |       return nn.Select(t_pos, 1)
174 |    elseif self.inputShape == 'bf' or self.inputShape == 'fb' then
175 |       -- this wont work as expected with size(f) > 1
176 |       local f_pos = self:findAxis('f')
177 |       if input:size(f_pos) > 1 then
178 |          error("Cannot convert shape "..self.inputShape.." to b when feature > 1")
179 |       end
180 |       return nn.Select(f_pos, 1)
181 |    else
182 |       error("Cannot convert shape "..self.inputShape.." to shape b")
183 |    end
184 | end
185 | 
186 | -- returns the current shape of the data
187 | function Convert:default()
188 |    return nn.Identity()
189 | end
190 | 
191 | -- multi-class (batch target)
192 | function Convert:bt()
193 |    local b_pos = self:findAxis('b')
194 |    local modula
195 |    if self.inputShape == 'b' then
196 |       modula = nn.Reshape(1)
197 |    else
198 |       error("cannot convert shape '"..self.inputShape.."' to bt")
199 |    end
200 |    return modula
201 | end
202 | 
203 | -- a generic function for transposing shape axes
204 | function Convert:transpose(newShape)
205 |    if newShape == self.inputShape then
206 |       return nn.Identity()
207 |    end
208 |    local inputShape = {}
209 |    for i=1,#self.inputShape do
210 |       table.insert(inputShape, self.inputShape:sub(i,i))
211 |    end
212 |    local transpositions = {}
213 |    for i=1,#newShape do
214 |       local j = _.indexOf(inputShape, newShape:sub(i,i))
215 |       if i ~= j then
216 |          local char = inputShape[i]
217 |          inputShape[i] = inputShape[j]
218 |          inputShape[j] = char
219 |          table.insert(transpositions, {j, i})
220 |       end
221 |    end
222 |    return nn.Transpose(unpack(transpositions))
223 | end
224 | 
225 | function Convert:findAxis(axis_char, shape, silent)
226 |    shape = shape or self.inputShape
227 |    local axis_pos = shape:find(axis_char)
228 |    if (not silent) and (not axis_pos) then
229 |       error("Provided shape '"..shape.."' has no axis '"..axis_char.."'", 2)
230 |    end
231 |    return axis_pos
232 | end
233 | 
234 | function Convert:type(type)
235 |    if not torch.isTypeOf(self.output, type) then
236 |       self._input = nil
237 |       self._gradInput = nil
238 |       self.__input = nil
239 |       self.__output = nil
240 |       self.__gradInput = nil
241 |       self.__gradOutput =  nil
242 |    end
243 |    return parent.type(self, type)
244 | end
245 | 


--------------------------------------------------------------------------------
/tutorials/ladder_network/ladder.lua:
--------------------------------------------------------------------------------
  1 | --[[!
  2 |    Implementation of ladder as mentioned in http://arxiv.org/pdf/1504.08215.pdf
  3 | --]]
  4 | 
  5 | require 'nn'
  6 | require 'dp'
  7 | require 'dpnn'
  8 | require 'math'
  9 | require 'xlua'
 10 | require 'optim'
 11 | require 'nngraph'
 12 | 
 13 | -- Cuda
 14 | require 'cutorch'
 15 | require 'cunn'
 16 | 
 17 | -- Help functions
 18 | require 'ladder_help_funcs'
 19 | 
 20 | torch.setdefaulttensortype("torch.FloatTensor")
 21 | op = xlua.OptionParser('%prog [options]')
 22 | 
 23 | -- Data
 24 | op:option{'--noValidation', action='store_true', dest='noValidation',
 25 |           help='Use validation data for training as well.', default=false}
 26 | op:option{'--best', action='store_true', dest='best',
 27 |           help='Use best training or validation model.', default=false}
 28 | 
 29 | -- Model parameters
 30 | op:option{'--noOfClasses', action='store', dest='noOfClasses',
 31 |           help='Number of classes.', default=10} -- MNIST data
 32 | op:option{'--noiseSigma', action='store', dest='noiseSigma',
 33 |           help='Stdev for noise for denoising autoencoder (Mean is zero).',
 34 |           default=0}
 35 | op:option{'--hiddens', action='store', dest='hiddens',
 36 |           help='Hiddens units', default='{1000, 500, 250, 250, 250}'}
 37 | op:option{'--useBatchNorm', action='store_true', dest='useBatchNorm',
 38 |           help='Use batch normalization.', default=false}
 39 | op:option{'--weightTied', action='store_true', dest='weightTied',
 40 |           help='Tie weights of decoder with encoder.', default=false}
 41 | 
 42 | -- Criterion and learning
 43 | op:option{'--attempts', action='store', dest='attempts',
 44 |           help='Run attempts independent experiments.', default=1}
 45 | op:option{'--eta', action='store', dest='eta',
 46 |           help='If zero then only classifier cost is considered.', default=0}
 47 | op:option{'--batchSize', action='store', dest='batchSize',
 48 |           help='Batch Size.',default=32}
 49 | op:option{'--epochs', action='store', dest='epochs',
 50 |           help='Number of epochs.',default=100}
 51 | op:option{'--maxTries', action='store', dest='maxTries',
 52 |           help='Number of tries for stopping.',default=0}
 53 | op:option{'--learningRate', action='store', dest='learningRate',
 54 |           help='Learning rate',default=0.002}
 55 | op:option{'--learningRateDecay', action='store', dest='learningRateDecay',
 56 |           help='Learning rate decay',default=1e-7}
 57 | op:option{'--linearDecay', action='store_true', dest='linearDecay',
 58 |           help='Linearly reduce learning rate', default=false}
 59 | op:option{'--startEpoch', action='store', dest='startEpoch',
 60 |           help='Epoch number when to start linear decay.',default=1}
 61 | op:option{'--endLearningRate', action='store', dest='endLearningRate',
 62 |           help='Learning rate at last epoch',default=0.0}
 63 | op:option{'--momentum', action='store', dest='momentum',
 64 |           help='Learning Momemtum',default=0}
 65 | op:option{'--loss', action='store_true', dest='loss',
 66 |           help='If true use loss for early stopping else confusion matrix.',
 67 |           default=false}
 68 | op:option{'--adam', action='store_true', dest='adam',
 69 |           help='Use adaptive moment estimation optimizer.', default=false}
 70 | 
 71 | -- Use Cuda
 72 | op:option{'--useCuda', action='store_true', dest='useCuda', help='Use GPU',
 73 |           default=false}
 74 | op:option{'--deviceId', action='store', dest='deviceId', help='GPU device Id',
 75 |           default=2}
 76 | 
 77 | -- Print debug messages
 78 | op:option{'--verbose', action='store_true', dest='verbose',
 79 |           help='Print apppropriate debug messages.', default=false}
 80 | 
 81 | -- Command line arguments
 82 | opt = op:parse()
 83 | op:summarize()
 84 | 
 85 | -- Data
 86 | noValidation = opt.noValidation
 87 | best = opt.best
 88 | verbose = opt.verbose
 89 | 
 90 |    -- Cuda
 91 | useCuda = opt.useCuda
 92 | deviceId = tonumber(opt.deviceId)
 93 | 
 94 | -- MNIST Data source
 95 | ds = dp.Mnist{}
 96 | 
 97 | attempts = tonumber(opt.attempts)
 98 | testAccus = torch.zeros(attempts)
 99 | trData = {}
100 | tvData = {}
101 | tsData = {}
102 | for attempt=1,attempts do
103 | 
104 |    local t1, t2
105 | 
106 |    trData.data, t1, t2 = ds:get('train', 'input', 'bchw', 'float')
107 |    trData.labels, t1, t2 = ds:get('train', 'target')
108 |    trData.size = function() return trData.data:size()[1] end
109 | 
110 |    tvData.data, t1, t2 = ds:get('valid', 'input', 'bchw', 'float')
111 |    tvData.labels, t1, t2 = ds:get('valid', 'target')
112 |    tvData.size = function() return tvData.data:size()[1] end
113 | 
114 |    tsData.data, t1, t2 = ds:get('test', 'input', 'bchw', 'float')
115 |    tsData.labels, t1, t2 = ds:get('test', 'target')
116 |    tsData.size = function() return tsData.data:size()[1] end
117 |    collectgarbage()
118 | 
119 |    local tempSample = trData.data[1]
120 |    local channels = tempSample:size(1)
121 |    local width = tempSample:size(2)
122 |    local height = tempSample:size(3)
123 |    local linFeats = channels * height * width
124 | 
125 |    -- MNIST
126 |    local classes = {'1', '2', '3', '4', '5', '6', '7', '8', '9', '10'}
127 |    local confusion = optim.ConfusionMatrix(classes)
128 | 
129 |    -- Model
130 |    local noOfClasses = tonumber(opt.noOfClasses)
131 |    local noiseSigma = tonumber(opt.noiseSigma)
132 |    local inputHiddens = dp.returnString(opt.hiddens)
133 |    local useBatchNorm = opt.useBatchNorm
134 |    local weightTied = opt.weightTied
135 | 
136 | 
137 |    hiddens = {linFeats}
138 |    for i=1,#inputHiddens do
139 |       hiddens[#hiddens+1] = inputHiddens[i]
140 |    end
141 |    hiddens[#hiddens+1] = noOfClasses
142 | 
143 |    -- encoder input
144 |    local input = nil
145 |    if noiseSigma ~= 0 then
146 |       if verbose then print("Add noise to the samples.") end
147 |       input = nn.WhiteNoise(0, noiseSigma)()
148 |    else
149 |       input = nn.Identity()()
150 |    end
151 | 
152 |    -- encoder model
153 |    local encoderLayers = {}
154 |    local Zs = {}
155 |    Zs[1] = input
156 |    local Hs = {}
157 |    Hs[1] = input
158 |    for i=2,#hiddens do
159 |       -- Zs
160 |       encoderLayers[i] = nn.Linear(hiddens[i-1], hiddens[i])
161 |       if useBatchNorm then
162 |          Zs[i] = nn.BatchNormalization(hiddens[i])
163 |                                       (encoderLayers[i](Hs[i-1]))
164 |       else
165 |          Zs[i] = encoderLayers[i](Hs[i-1])
166 |       end
167 |      
168 |       -- Hs
169 |       if i==#hiddens then
170 |          Hs[i] = nn.CMul(hiddens[i])(nn.Add(hiddens[i])(Zs[i]))
171 |       else
172 |          Hs[i] = nn.ReLU()(nn.CMul(hiddens[i])(nn.Add(hiddens[i])(Zs[i])))
173 |       end
174 |    end
175 | 
176 |    -- classifier
177 |    local classifier = nn.LogSoftMax()(Hs[#Hs])
178 | 
179 |    -- Decoder
180 |    local decoderLayers = {}
181 |    local Z_hats = {}
182 |    for i=#hiddens,1,-1 do
183 | 
184 |       -- u = 0 hence no cij
185 |       if i==#hiddens then
186 |          z_hat1 = nn.CMul(hiddens[i])(Zs[i])
187 |          z_hat2 = nn.CMul(hiddens[i])(Zs[i])
188 |          z_hat3 = nn.CMul(hiddens[i])(Zs[i])
189 |          z_hat34 = nn.Add(hiddens[i])(z_hat3)
190 |          z_hatSigmoid34 = nn.Sigmoid()(z_hat34)
191 |          z_hat234 = nn.CMulTable()({z_hat2, z_hatSigmoid34})
192 |          z_hat5 = nn.CMul(hiddens[i])(Zs[i])
193 |          Z_hats[i] = nn.CAddTable()({z_hat1, z_hat234, z_hat5})
194 |       else
195 |          decoderLayers[i] = nn.Linear(hiddens[i+1], hiddens[i])
196 |          if weightTied then
197 |             if verbose then print("Tying encoder-decoder weights.") end
198 |             decoderLayers[i].weight:set(encoderLayers[i+1].weight:t())
199 |             decoderLayers[i].gradWeight:set(encoderLayers[i+1].gradWeight:t())
200 |          end
201 | 
202 |          u = decoderLayers[i](Z_hats[i+1])
203 | 
204 |          cu1 = nn.CMul(hiddens[i])(u)
205 |          du1 = nn.Add(hiddens[i])(u)
206 |          a1 = nn.CAddTable()({cu1, du1})
207 |          cu2 = nn.CMul(hiddens[i])(u)
208 |          du2 = nn.Add(hiddens[i])(u)
209 |          a2 = nn.CAddTable()({cu2, du2})
210 |          cu3 = nn.CMul(hiddens[i])(u)
211 |          du3 = nn.Add(hiddens[i])(u)
212 |          a3 = nn.CAddTable()({cu3, du3})
213 |          cu4 = nn.CMul(hiddens[i])(u)
214 |          du4 = nn.Add(hiddens[i])(u)
215 |          a4 = nn.CAddTable()({cu4, du4})
216 |          cu5 = nn.CMul(hiddens[i])(u)
217 |          du5 = nn.Add(hiddens[i])(u)
218 |          a5 = nn.CAddTable()({cu5, du5})
219 | 
220 |          z_hat1 = nn.CMulTable()({a1, Zs[i]})
221 |          z_hat2 = nn.CMulTable()({a3, Zs[i]})
222 |          z_hat3 = nn.Sigmoid()(nn.CAddTable()({z_hat2, a4}))
223 |          z_hat4 = nn.CMulTable()({a2, z_hat3})
224 |          Z_hats[i] = nn.CAddTable()({z_hat1, z_hat4, a5})
225 |       end
226 |    end
227 |    local model = nn.gModule({input}, {classifier, Z_hats[1]--[[Decoder--]]})
228 |    if verbose then print(model) end
229 | 
230 |    -- Criterion and learning
231 |    -- Criterion
232 |    local eta = tonumber(opt.eta)
233 |    local criterions = nn.ParallelCriterion()
234 |    local nll = nn.ClassNLLCriterion()
235 |    local mse = nn.MSECriterion()
236 |    criterions:add(nll)
237 |    criterions:add(mse, eta)
238 | 
239 |    -- Learning
240 |    local batchSize = tonumber(opt.batchSize)
241 |    local epochs = tonumber(opt.epochs)
242 |    local maxTries = tonumber(opt.maxTries)
243 |    local learningRate = tonumber(opt.learningRate)
244 |    local learningRateDecay = tonumber(opt.learningRateDecay)
245 |    local linearDecay = opt.linearDecay
246 |    local startEpoch = tonumber(opt.startEpoch)
247 |    local endLearningRate = tonumber(opt.endLearningRate)
248 |    assert(epochs > startEpoch, "startEpoch should be smaller than epochs.")   
249 | 
250 |    if linearDecay then
251 |       if verbose then print("Using linear decay.") end
252 |       learningRates = torch.zeros(startEpoch):fill(learningRate)
253 |       local temp = torch.range(learningRate, endLearningRate,
254 |                                -learningRate/(epochs-startEpoch))
255 |       learningRates = torch.cat(learningRates, temp)
256 |    end
257 | 
258 |    local momentum = tonumber(opt.momentum)
259 |    local loss = opt.loss
260 |    local adam = opt.adam
261 | 
262 |    -- Optimizer
263 |    local optimState = {
264 |                        coefL1 = 0,
265 |                        coefL2 = 0,
266 |                        learningRate = learningRate,
267 |                        weightDecay = 0.0,
268 |                        momentum = momentum,
269 |                        learningRateDecay = learningRateDecay
270 |                       }
271 | 
272 |    -- If true use Adaptive moment estimation else SGD.
273 |    if adam then
274 |       if verbose then print("Using Adaptive moment estimation optimizer.") end
275 |       optimMethod = optim.adam
276 |    else
277 |       if verbose then print("Using Stocastic gradient descent optimizer.") end
278 |       optimMethod = optim.sgd
279 |    end
280 |    if verbose then
281 |       print(optimMethod)
282 |       print(optimState)
283 |    end
284 | 
285 | 
286 |    if useCuda then
287 |       if verbose then print("Using GPU: "..deviceId) end
288 |       cutorch.setDevice(deviceId)
289 |       if verbose then print("GPU set") end
290 |       model:cuda()
291 |       if verbose then print("Model copied to GPU.") end
292 |       criterions:cuda()
293 |       if verbose then print("Criterion copied to GPU.") end
294 |    else
295 |       if verbose then print("Not using GPU.") end
296 |    end
297 | 
298 |    -- Retrieve parameters and gradients
299 |    parameters, gradParameters = model:getParameters()
300 | 
301 |    -- Reshape samples from images to vectors
302 |    trData.data = trData.data:reshape(trData.size(1), linFeats)
303 |    tvData.data = tvData.data:reshape(tvData.size(1), linFeats)
304 |    tsData.data = tsData.data:reshape(tsData.size(1), linFeats)
305 |    collectgarbage()
306 | 
307 |    if noValidation then
308 |       trData.data = torch.cat(trData.data, tvData.data, 1)
309 |       trData.labels = torch.cat(trData.labels, tvData.labels, 1)
310 |       tvData.data = nil
311 |       tvData.labels = nil
312 |       collectgarbage()
313 |    end
314 | 
315 |    if verbose then
316 |       print(trData)
317 |       print(tvData)
318 |       print(tsData)
319 |    end
320 | 
321 |    -- Training
322 |    local displayProgress = verbose
323 |    local classifierIndx = 1
324 |    local trainAccu = 0
325 |    local validAccu = 0
326 |    local bestTrainAccu = 0
327 |    local bestValidAccu = 0
328 |    local trainLoss = 0
329 |    local validLoss = 0
330 |    local bestTrainLoss = math.huge
331 |    local bestValidLoss = math.huge
332 |    local bestTrainModel = nn.Sequential()
333 |    local bestValidModel = nn.Sequential()
334 |    local earlyStopCount = 0
335 |    for i=1, epochs do
336 |       if linearDecay then
337 |          optimState.learningRate = learningRates[i]
338 |       end
339 |       -- Training
340 |       trainLoss = model_train_multi_criterion(model, criterions,
341 |                                               parameters, gradParameters, trData,
342 |                                               optimMethod, optimState, batchSize,
343 |                                               i, confusion, trainLogger,
344 |                                               useCuda, displayProgress,
345 |                                               classiferIndx)
346 |       confusion:updateValids()
347 |       if loss then
348 |          if verbose then
349 |             print("Current train loss: ".. trainLoss
350 |                      ..", best train loss: " .. bestTrainLoss)
351 |          end
352 |          if trainLoss < bestTrainLoss then
353 |             bestTrainLoss = trainLoss
354 |             bestTrainModel = model:clone()
355 |             print(confusion)
356 |          end
357 |       else -- Using classification accuracy for saving best train model
358 |          trainAccu = confusion.totalValid * 100
359 |          if bestTrainAccu < trainAccu then
360 |             bestTrainAccu = trainAccu
361 |             bestTrainModel = model:clone()
362 |             bestTrainLoss = trainLoss
363 |          end
364 |          if verbose then
365 |             print("Current train accu: ".. trainAccu
366 |                      ..", best train accu: " .. bestTrainAccu
367 |                      ..", best train loss: " .. bestTrainLoss)
368 |          end
369 |       end
370 | 
371 |       -- Validating
372 |       if not noValidation then
373 |          validLoss = model_test_multi_criterion(model, criterions,
374 |                                                 tvData, confusion,
375 |                                                 useCuda, classifierIndx)
376 |          confusion:updateValids()
377 |          if loss then
378 |             if verbose then
379 |                print("Current valid loss: ".. validLoss
380 |                         ..", best valid loss: " .. bestValidLoss)
381 |             end
382 |             if validLoss < bestValidLoss then
383 |                earlyStopCount = 0
384 |                bestValidLoss = validLoss
385 |                bestValidModel = model:clone()
386 |                print(confusion)
387 |             else
388 |                earlyStopCount = earlyStopCount + 1
389 |             end
390 |          else
391 |             validAccu = confusion.totalValid * 100
392 |             if bestValidAccu < validAccu then
393 |                earlyStopCount = 0
394 |                bestValidAccu = validAccu
395 |                bestValidModel = model:clone()
396 |                bestValidLoss = validLoss
397 |             else
398 |                earlyStopCount = earlyStopCount + 1
399 |             end
400 |             if verbose then
401 |                print("Current valid accu: ".. validAccu
402 |                      ..", best valid accu: " .. bestValidAccu
403 |                      ..", best valid loss: " .. bestValidLoss)
404 |             end
405 |          end
406 |          if verbose then
407 |             print(noiseSigma, weightTied, useBatchNorm, eta, earlyStopCount)
408 |          end
409 |       end
410 | 
411 |       if maxTries ~= 0 then
412 |          if earlyStopCount >= maxTries then
413 |             if verbose then print("Early stopping at epoch: " .. i) end
414 |             break
415 |          end
416 |       end
417 |    end
418 | 
419 |    -- Testing
420 |    if best then
421 |       if noValidation then
422 |          testLoss = model_test_multi_criterion(bestTrainModel, criterions,
423 |                                                tsData, confusion,
424 |                                                useCuda, classifierIndx)
425 |       else
426 |          testLoss = model_test_multi_criterion(bestValidModel, criterions,
427 |                                                tsData, confusion,
428 |                                                useCuda, classifierIndx)
429 |       end
430 |    else
431 |       testLoss = model_test_multi_criterion(model, criterions,
432 |                                             tsData, confusion,
433 |                                             useCuda, classifierIndx)
434 |    end
435 |    confusion:updateValids()
436 |    testAccu = confusion.totalValid * 100
437 |    testAccus[attempt] = testAccu
438 |    if verbose then
439 |       print("Attempt: " .. tostring(attempt) .. " Test Accu: " .. testAccu)
440 |    end
441 | end
442 | print("Test accuracies.")
443 | print(testAccus)
444 | print("Max Test Error is: " .. tostring(100 - testAccus:max()) .. "%")
445 | 


--------------------------------------------------------------------------------
/NCEModule.lua:
--------------------------------------------------------------------------------
  1 | ------------------------------------------------------------------------
  2 | --[[ Noise Contrast Estimation Module]]--
  3 | -- Ref.: A. https://www.cs.toronto.edu/~amnih/papers/ncelm.pdf
  4 | ------------------------------------------------------------------------
  5 | local _ = require 'moses'
  6 | local NCEModule, parent = torch.class("nn.NCEModule", "nn.Linear")
  7 | NCEModule.version = 6 -- better bias init
  8 | 
  9 | -- for efficient serialization using nn.Serial
 10 | local empty = _.clone(parent.dpnn_mediumEmpty)
 11 | table.insert(empty, 'sampleidx')
 12 | table.insert(empty, 'sampleprob')
 13 | table.insert(empty, '_noiseidx')
 14 | table.insert(empty, '_noiseprob')
 15 | table.insert(empty, '_weight')
 16 | table.insert(empty, '_gradWeight')
 17 | table.insert(empty, '_gradOutput')
 18 | table.insert(empty, '_tgradOutput')
 19 | NCEModule.dpnn_mediumEmpty = empty
 20 | 
 21 | -- for sharedClone
 22 | local params = _.clone(parent.dpnn_parameters)
 23 | table.insert(params, 'unigrams')
 24 | table.insert(params, 'Z')
 25 | NCEModule.dpnn_parameters = params
 26 | 
 27 | function NCEModule:__init(inputSize, outputSize, k, unigrams, Z)
 28 |    parent.__init(self, inputSize, outputSize)
 29 |    assert(torch.type(k) == 'number')
 30 |    assert(torch.isTensor(unigrams))
 31 |    self.k = k
 32 |    self.unigrams = unigrams
 33 |    self.Z = torch.Tensor{Z or -1}
 34 |    
 35 |    self.batchnoise = true
 36 |    
 37 |    self:fastNoise()
 38 |    
 39 |    -- output is {P_linear(target|input), P_linear(samples|input), P_noise(target), P_noise(samples)}
 40 |    self.output = {torch.Tensor(), torch.Tensor(), torch.Tensor(), torch.Tensor()}
 41 |    self.gradInput = {torch.Tensor(), torch.Tensor()}
 42 | end
 43 | 
 44 | function NCEModule:reset(stdv)
 45 |    if stdv then
 46 |       self.weight:uniform(-stdv, stdv)
 47 |       self.bias:uniform(-stdv, stdv)
 48 |    else
 49 |       stdv = stdv or 1./math.sqrt(self.weight:size(2))
 50 |       self.weight:uniform(-stdv, stdv)
 51 |       -- this is useful for Z = 1
 52 |       self.bias:fill(-math.log(self.bias:size(1)))
 53 |    end
 54 |    return self
 55 | end
 56 | 
 57 | function NCEModule:fastNoise()
 58 |    -- we use alias to speedup multinomial sampling (see noiseSample method)
 59 |    require 'torchx'
 60 |    assert(torch.AliasMultinomial, "update torchx : luarocks install torchx")
 61 |    self.unigrams:div(self.unigrams:sum())
 62 |    self.aliasmultinomial = torch.AliasMultinomial(self.unigrams)
 63 |    self.aliasmultinomial.dpnn_parameters = {'J', 'q'}
 64 | end
 65 | 
 66 | function NCEModule:updateOutput(inputTable)
 67 |    local input, target = unpack(inputTable)
 68 |    assert(input:dim() == 2)
 69 |    assert(target:dim() == 1)
 70 |    local batchsize = input:size(1)
 71 |    local inputsize = self.weight:size(2)
 72 |    
 73 |    if self.train == false and self.normalized then
 74 |       self.linout = self.linout or input.new()
 75 |       -- full linear + softmax
 76 |       local nElement = self.linout:nElement()
 77 |       self.linout:resize(batchsize, self.weight:size(1))
 78 |       if self.linout:nElement() ~= nElement then
 79 |          self.linout:zero()
 80 |       end
 81 |       self.addBuffer = self.addBuffer or input.new()
 82 |       if self.addBuffer:nElement() ~= batchsize then
 83 |          self.addBuffer:resize(batchsize):fill(1)
 84 |       end
 85 |       self.weight.addmm(self.linout, 0, self.linout, 1, input, self.weight:t())
 86 |       if self.bias then self.linout:addr(1, self.addBuffer, self.bias) end
 87 |       self.output = torch.type(self.output) == 'table' and input.new() or self.output
 88 |       if self.logsoftmax then
 89 |          input.THNN.LogSoftMax_updateOutput(
 90 |             self.linout:cdata(),
 91 |             self.output:cdata()
 92 |          )
 93 |       else
 94 |          input.THNN.SoftMax_updateOutput(
 95 |             self.linout:cdata(),
 96 |             self.output:cdata()
 97 |          )
 98 |       end
 99 |    elseif self.batchnoise then
100 |       self.output = (torch.type(self.output) == 'table' and #self.output == 4) and self.output
101 |          or {input.new(), input.new(), input.new(), input.new()}
102 |       assert(torch.type(target) == 'torch.CudaTensor' or torch.type(target) == 'torch.LongTensor')
103 |       self.sampleidx = self.sampleidx or target.new()
104 |       
105 |       -- the last elements contain the target indices
106 |       self.sampleidx:resize(self.k + batchsize)
107 |       self.sampleidx:narrow(1,self.k+1,batchsize):copy(target)
108 |       
109 |       -- sample k noise samples
110 |       self:noiseSample(self.sampleidx, 1, self.k)
111 |       self.sampleidx:resize(self.k + batchsize)
112 |       
113 |       -- build (batchsize+k, inputsize) weight tensor
114 |       self._weight = self._weight or self.bias.new()
115 |       self.weight.index(self._weight, self.weight, 1, self.sampleidx)
116 |       assert(self._weight:nElement() == (self.k+batchsize)*inputsize)
117 |       self._weight:resize(self.k+batchsize, inputsize)
118 |       
119 |       -- build (batchsize+k,) bias tensor
120 |       self._bias = self._bias or self.bias.new()
121 |       self._bias:index(self.bias, 1, self.sampleidx)
122 |       assert(self._bias:nElement() == (self.k+batchsize))
123 |       self._bias:resize(self.k+batchsize)
124 |       
125 |       -- separate sample and target weight matrices and bias vectors
126 |       local sweight = self._weight:narrow(1, 1, self.k)
127 |       local tweight = self._weight:narrow(1, self.k+1, batchsize)
128 |       local sbias = self._bias:narrow(1, 1, self.k)
129 |       local tbias = self._bias:narrow(1, self.k+1, batchsize)
130 |       
131 |       -- get model probability of targets (batchsize,)
132 |       local Pmt = self.output[1]
133 |       self._pm = self._pm or input.new()
134 |       self._pm:cmul(input, tweight)
135 |       Pmt:sum(self._pm, 2):resize(batchsize)
136 |       Pmt:add(tbias)
137 |       Pmt:exp()
138 |       
139 |       -- get model probability of samples (batchsize x k) samples
140 |       local Pms = self.output[2]
141 |       Pms:resize(batchsize, self.k)
142 |       Pms:copy(sbias:view(1,self.k):expand(batchsize, self.k))
143 |       Pms:addmm(1, Pms, 1, input, sweight:t())
144 |       Pms:exp()
145 |       
146 |       if self.Z[1] <= 0 then
147 |          -- approximate Z using current batch
148 |          self.Z[1] = Pms:mean()*self.weight:size(1)
149 |          print("normalization constant Z approximated to "..self.Z[1])
150 |       end
151 |       
152 |       -- divide by normalization constant
153 |       Pms:div(self.Z[1]) 
154 |       Pmt:div(self.Z[1])
155 |       
156 |       -- get noise probability (pn) for all samples
157 |       
158 |       self.sampleprob = self.sampleprob or Pms.new()
159 |       self.sampleprob = self:noiseProb(self.sampleprob, self.sampleidx)
160 |       
161 |       local Pnt = self.sampleprob:narrow(1,self.k+1,target:size(1))
162 |       local Pns = self.sampleprob:narrow(1,1,self.k)
163 |       Pns = Pns:resize(1, self.k):expand(batchsize, self.k)
164 |       
165 |       self.output[3]:set(Pnt)
166 |       self.output[4]:set(Pns)
167 |    else
168 |       self.output = (torch.type(self.output) == 'table' and #self.output == 4) and self.output
169 |          or {input.new(), input.new(), input.new(), input.new()}
170 |       self.sampleidx = self.sampleidx or target.new()
171 |       
172 |       -- the last first column will contain the target indices
173 |       self.sampleidx:resize(batchsize, self.k+1)
174 |       self.sampleidx:select(2,1):copy(target)
175 |       
176 |       self._sampleidx = self._sampleidx or self.sampleidx.new()
177 |       self._sampleidx:resize(batchsize, self.k)
178 |       
179 |       -- sample (batchsize x k+1) noise samples
180 |       self:noiseSample(self._sampleidx, batchsize, self.k)
181 |       
182 |       self.sampleidx:narrow(2,2,self.k):copy(self._sampleidx)
183 |       
184 |       -- make sure that targets are still first column of sampleidx
185 |       if not self.testedtargets then
186 |          for i=1,math.min(target:size(1),3) do
187 |             assert(self.sampleidx[{i,1}] == target[i])
188 |          end
189 |          self.testedtargets = true
190 |       end
191 |       
192 |       -- build (batchsize x k+1 x inputsize) weight tensor
193 |       self._weight = self._weight or self.bias.new()
194 |       self.weight.index(self._weight, self.weight, 1, self.sampleidx:view(-1))
195 |       assert(self._weight:nElement() == batchsize*(self.k+1)*inputsize)
196 |       self._weight:resize(batchsize, self.k+1, inputsize)
197 |       
198 |       -- build (batchsize x k+1) bias tensor
199 |       self._bias = self._bias or self.bias.new()
200 |       self._bias:index(self.bias, 1, self.sampleidx:view(-1))
201 |       assert(self._bias:nElement() == batchsize*(self.k+1))
202 |       self._bias:resize(batchsize, self.k+1)
203 |       
204 |       -- get model probability (pm) of sample and target (batchsize x k+1) samples
205 |       self._pm = self._pm or input.new()
206 |       self._pm:resizeAs(self._bias):copy(self._bias)
207 |       self._pm:resize(batchsize, 1, self.k+1)
208 |       local _input = input:view(batchsize, 1, inputsize)
209 |       self._pm:baddbmm(1, self._pm, 1, _input, self._weight:transpose(2,3))
210 |       self._pm:resize(batchsize, self.k+1)
211 |       self._pm:exp()
212 |       
213 |       if self.Z[1] <= 0 then
214 |          -- approximate Z using current batch
215 |          self.Z[1] = self._pm:mean()*self.weight:size(1)
216 |          print("normalization constant Z approximated to "..self.Z[1])
217 |       end
218 |       
219 |       self._pm:div(self.Z[1]) -- divide by normalization constant
220 |       
221 |       -- separate target from sample model probabilities
222 |       local Pmt = self._pm:select(2,1)
223 |       local Pms = self._pm:narrow(2,2,self.k)
224 |       
225 |       self.output[1]:set(Pmt)
226 |       self.output[2]:set(Pms)
227 |       
228 |       -- get noise probability (pn) for all samples
229 |       
230 |       self.sampleprob = self.sampleprob or self._pm.new()
231 |       self.sampleprob = self:noiseProb(self.sampleprob, self.sampleidx)
232 |       
233 |       local Pnt = self.sampleprob:select(2,1)
234 |       local Pns = self.sampleprob:narrow(2,2,self.k)
235 |       
236 |       self.output[3]:set(Pnt)
237 |       self.output[4]:set(Pns)
238 |    end
239 |    
240 |    return self.output
241 | end
242 | 
243 | function NCEModule:updateGradInput(inputTable, gradOutput)
244 |    local input, target = unpack(inputTable)
245 |    assert(input:dim() == 2)
246 |    assert(target:dim() == 1)
247 |    local dPmt, dPms = gradOutput[1], gradOutput[2]
248 |    local batchsize = input:size(1)
249 |    local inputsize = self.weight:size(2)
250 |    
251 |    if self.batchnoise then
252 |       local Pmt, Pms = self.output[1], self.output[2]
253 |       
254 |       -- separate sample and target weight matrices
255 |       local sweight = self._weight:narrow(1, 1, self.k)
256 |       local tweight = self._weight:narrow(1, self.k+1, batchsize)
257 |       
258 |       -- the rest of equation 7
259 |       -- d Pm / d linear = exp(linear)/z
260 |       self._gradOutput = self._gradOutput or dPms.new()
261 |       self._tgradOutput = self._tgradOutput or dPmt.new()
262 |       self._gradOutput:cmul(dPms, Pms)
263 |       self._tgradOutput:cmul(dPmt, Pmt)
264 |       
265 |       -- gradient of linear
266 |       self.gradInput[1] = self.gradInput[1] or input.new()
267 |       self.gradInput[1]:cmul(self._tgradOutput:view(batchsize, 1):expandAs(tweight), tweight)
268 |       self.gradInput[1]:addmm(1, 1, self._gradOutput, sweight)
269 |    else
270 |       -- the rest of equation 7 (combine both sides of + sign into one tensor)
271 |       self._gradOutput = self._gradOutput or dPmt.new()
272 |       self._gradOutput:resize(batchsize, self.k+1)
273 |       self._gradOutput:select(2,1):copy(dPmt)
274 |       self._gradOutput:narrow(2,2,self.k):copy(dPms)
275 |       self._gradOutput:resize(batchsize, 1, self.k+1)
276 |       -- d Pm / d linear = exp(linear)/z
277 |       self._gradOutput:cmul(self._pm)
278 |       
279 |       -- gradient of linear
280 |       self.gradInput[1] = self.gradInput[1] or input.new()
281 |       self.gradInput[1]:resize(batchsize, 1, inputsize):zero()
282 |       self.gradInput[1]:baddbmm(0, 1, self._gradOutput, self._weight)
283 |       self.gradInput[1]:resizeAs(input)
284 |    end
285 |    
286 |    self.gradInput[2] = self.gradInput[2] or input.new()
287 |    if self.gradInput[2]:nElement() ~= target:nElement() then
288 |       self.gradInput[2]:resize(target:size()):zero()
289 |    end
290 |    
291 |    return self.gradInput
292 | end
293 | 
294 | function NCEModule:accGradParameters(inputTable, gradOutput, scale)
295 |    local input, target = unpack(inputTable)
296 |    assert(input:dim() == 2)
297 |    assert(target:dim() == 1)
298 |    local batchsize = input:size(1)
299 |    local inputsize = self.weight:size(2)
300 |    
301 |    if self.batchnoise then
302 |       self._gradWeight = self._gradWeight or self.bias.new()
303 |       self._gradWeight:resizeAs(self._weight):zero() -- (batchsize + k) x inputsize
304 |       
305 |       local sgradWeight = self._gradWeight:narrow(1, 1, self.k)
306 |       local tgradWeight = self._gradWeight:narrow(1, self.k+1, batchsize)
307 |       
308 |       self._gradOutput:mul(scale)
309 |       self._tgradOutput:mul(scale)
310 |       
311 |       sgradWeight:addmm(0, sgradWeight, 1, self._gradOutput:t(), input)
312 |       tgradWeight:cmul(self._tgradOutput:view(batchsize, 1):expandAs(self.gradInput[1]), input)
313 |       
314 |       self.gradWeight:indexAdd(1, self.sampleidx, self._gradWeight)
315 |       self.gradBias:indexAdd(1, self.sampleidx:narrow(1,self.k+1,batchsize), self._tgradOutput)
316 |       self._tgradOutput:sum(self._gradOutput, 1) -- reuse buffer
317 |       self.gradBias:indexAdd(1, self.sampleidx:sub(1,self.k), self._tgradOutput:view(-1))
318 |       
319 |    else
320 |       self._gradWeight = self._gradWeight or self.bias.new()
321 |       self._gradWeight:resizeAs(self._weight):zero() -- batchsize x k+1 x inputsize
322 |       self._gradOutput:resize(batchsize, self.k+1, 1)
323 |       self._gradOutput:mul(scale)
324 |       local _input = input:view(batchsize, 1, inputsize)
325 |       self._gradWeight:baddbmm(0, self._gradWeight, 1, self._gradOutput, _input)
326 |       
327 |       local sampleidx = self.sampleidx:view(batchsize * (self.k+1))
328 |       local _gradWeight = self._gradWeight:view(batchsize * (self.k+1), inputsize)
329 |       self.gradWeight:indexAdd(1, sampleidx, _gradWeight)
330 |       
331 |       local _gradOutput = self._gradOutput:view(batchsize * (self.k+1))
332 |       self.gradBias:indexAdd(1, sampleidx, _gradOutput)
333 |    end
334 | end
335 | 
336 | function NCEModule:type(type, cache)
337 |    if type then
338 |       self.sampleidx = nil
339 |       self.sampleprob = nil
340 |       self._noiseidx = nil
341 |       self._noiseprob = nil
342 |       self._metaidx = nil
343 |       self._gradOutput = nil
344 |       self._tgradOutput = nil
345 |       self._gradWeight = nil
346 |       self._weight = nil
347 |    end
348 |    local unigrams = self.unigrams
349 |    self.unigrams = nil
350 |    local am = self.aliasmultinomial
351 |    
352 |    local rtn
353 |    if type and torch.type(self.weight) == 'torch.MultiCudaTensor' then
354 |       assert(type == 'torch.CudaTensor', "Cannot convert a multicuda NCEModule to anything other than cuda")
355 |       local weight = self.weight
356 |       local gradWeight = self.gradWeight
357 |       self.weight = nil
358 |       self.gradWeight = nil
359 |       
360 |       rtn = parent.type(self, type, cache)
361 |       
362 |       assert(torch.type(self.aliasmultinomial.J) ~= 'torch.CudaTensor')
363 |       self.weight = weight
364 |       self.gradWeight = gradWeight
365 |    else
366 |       rtn = parent.type(self, type, cache)
367 |    end
368 |    
369 |    self.unigrams = unigrams
370 |    self.aliasmultinomial = am
371 |    return rtn
372 | end
373 | 
374 | function NCEModule:noiseProb(sampleprob, sampleidx)
375 |    assert(sampleprob)
376 |    assert(sampleidx)
377 |    self._noiseprob = self._noiseprob or self.unigrams.new()
378 |    self._noiseidx = self._noiseidx or torch.LongTensor()
379 |    self._noiseidx:resize(sampleidx:size()):copy(sampleidx)
380 |    
381 |    self._noiseprob:index(self.unigrams, 1, self._noiseidx:view(-1))
382 |    
383 |    sampleprob:resize(sampleidx:size()):copy(self._noiseprob)
384 |    return sampleprob
385 | end
386 | 
387 | function NCEModule:noiseSample(sampleidx, batchsize, k)
388 |    if torch.type(sampleidx) ~= 'torch.LongTensor' then
389 |       self._noiseidx = self._noiseidx or torch.LongTensor()
390 |       self._noiseidx:resize(batchsize, k)
391 |       self.aliasmultinomial:batchdraw(self._noiseidx)
392 |       sampleidx:resize(batchsize, k):copy(self._noiseidx)
393 |    else
394 |       sampleidx:resize(batchsize, k)
395 |       self.aliasmultinomial:batchdraw(sampleidx)
396 |    end
397 |    return sampleidx
398 | end
399 | 
400 | function NCEModule:clearState()
401 |    self.sampleidx = nil
402 |    self.sampleprob = nil
403 |    self._noiseidx = nil
404 |    self._noiseprob = nil
405 |    self._tgradOutput = nil
406 |    self._gradOutput = nil
407 |    if torch.isTensor(self.output) then
408 |       self.output:set()
409 |    else
410 |       for i,output in ipairs(self.output) do
411 |          output:set()
412 |       end
413 |    end
414 |    for i,gradInput in ipairs(self.gradInput) do
415 |       gradInput:set()
416 |    end
417 | end
418 | 
419 | function NCEModule:multicuda(device1, device2)
420 |    assert(device1 and device2, "specify two devices as arguments")
421 |    require 'torchx'
422 |    assert(torchx.version and torchx.version >= 1, "update torchx: luarocks install torchx")
423 |    
424 |    self:float()
425 |    
426 |    local isize = self.weight:size(2)
427 |    local weights = {
428 |       cutorch.withDevice(device1, function() return self.weight[{{}, {1, torch.round(isize/2)}}]:cuda() end),
429 |       cutorch.withDevice(device2, function() return self.weight[{{}, {torch.round(isize/2)+1, isize}}]:cuda() end)
430 |    }
431 |    self.weight = torch.MultiCudaTensor(2, weights)
432 |    local gradWeights = {
433 |       cutorch.withDevice(device1, function() return self.gradWeight[{{}, {1, torch.round(isize/2)}}]:cuda() end),
434 |       cutorch.withDevice(device2, function() return self.gradWeight[{{}, {torch.round(isize/2)+1, isize}}]:cuda() end)
435 |    }
436 |    self.gradWeight = torch.MultiCudaTensor(2, gradWeights)
437 |    
438 |    self:cuda()
439 | end
440 | 


--------------------------------------------------------------------------------
/Module.lua:
--------------------------------------------------------------------------------
  1 | local _ = require 'moses'
  2 | 
  3 | local Module = nn.Module
  4 | 
  5 | function Module:sparseParameters()
  6 |    return self:parameters()
  7 | end
  8 | 
  9 | function Module:updateParameters(learningRate)
 10 |    -- sparse params can have different learningRate scales per param
 11 |    local params, gradParams, scales = self:sparseParameters()
 12 |    if params then
 13 |       for i,param in pairs(params) do -- pairs for sparse params
 14 |          local scale = scales and scales[i] or 1
 15 |          param:add(-learningRate*scale, gradParams[i])
 16 |       end
 17 |    end
 18 | end
 19 | 
 20 | function Module:zeroGradParameters()
 21 |    local _,gradParams = self:sparseParameters()
 22 |    if gradParams then
 23 |       for i,gradParam in pairs(gradParams) do -- pairs for sparse params
 24 |          gradParam:zero()
 25 |       end
 26 |    end
 27 | end
 28 | 
 29 | ------------------------ clone and type --------------------------------
 30 | 
 31 | Module.dpnn_parameters = {'weight', 'bias'}
 32 | Module.dpnn_gradParameters = {'gradWeight', 'gradBias'}
 33 | 
 34 | -- efficient version of :
 35 | -- clone = self:clone()
 36 | -- clone:share(self, paramNames, gradParamNames)
 37 | -- Note that this method is the very bane of my existence. 
 38 | -- I have worked on it too many times...
 39 | function Module:sharedClone(shareParams, shareGradParams, stepClone)  
 40 |    shareParams = (shareParams == nil) and true or shareParams
 41 |    shareGradParams = (shareGradParams == nil) and true or shareGradParams
 42 |    
 43 |    if stepClone and self.dpnn_stepclone then
 44 |       -- this is for AbstractRecurrent modules (in rnn)
 45 |       return self
 46 |    end
 47 |    
 48 |    local pointers = {} -- to params/gradParams (dont clone params/gradParams)
 49 |    local scdone = {}
 50 |    
 51 |    -- 1. remove all params/gradParams 
 52 |    local function recursiveRemove(obj) -- remove modules
 53 |       local moduleTree
 54 |       local isTable = type(obj) == 'table' 
 55 |       if torch.isTypeOf(obj, 'nn.Module') then
 56 |          assert(isTable)
 57 |          if stepClone and obj.dpnn_stepclone then
 58 |             -- this is for AbstractRecurrent modules (in rnn)
 59 |             moduleTree = obj
 60 |             obj = nil
 61 |             isTable = false
 62 |          elseif obj.dpnn_sharedClone then
 63 |             -- allow to use a custom sharedClone method on one module
 64 |             moduleTree = obj
 65 |             obj = nil
 66 |             isTable = false
 67 |          elseif scdone[torch.pointer(obj)] then
 68 |             moduleTree = scdone[torch.pointer(obj)]
 69 |          else
 70 |             -- remove the params, gradParams. Save for later.
 71 |             local params = {}
 72 |             
 73 |             if shareParams then
 74 |                for i,paramName in ipairs(obj.dpnn_parameters) do
 75 |                   local param = obj[paramName]
 76 |                   if param then
 77 |                      params[paramName] = param
 78 |                      obj[paramName] = nil
 79 |                      if torch.isTensor(param) and param.storage and param:storage() then
 80 |                         pointers[torch.pointer(param:storage():data())] = true
 81 |                      end
 82 |                   end
 83 |                end
 84 |             end
 85 |             
 86 |             if shareGradParams then
 87 |                for i,paramName in ipairs(obj.dpnn_gradParameters) do
 88 |                   local gradParam = obj[paramName]
 89 |                   if gradParam then
 90 |                      params[paramName] = gradParam
 91 |                      obj[paramName] = nil
 92 |                      if torch.isTensor(gradParam) and gradParam.storage and gradParam:storage() then
 93 |                         pointers[torch.pointer(gradParam:storage():data())] = true
 94 |                      end
 95 |                   end
 96 |                end
 97 |             end
 98 |             
 99 |             -- find all obj.attribute tensors that share storage with the shared params
100 |             for paramName, param in pairs(obj) do
101 |                if torch.isTensor(param) and param:storage() then
102 |                   if pointers[torch.pointer(param:storage():data())] then
103 |                      params[paramName] = param
104 |                      obj[paramName] = nil
105 |                   end
106 |                end
107 |             end
108 |             
109 |             moduleTree = params
110 |             
111 |             scdone[torch.pointer(obj)] = moduleTree
112 |             
113 |             for k,v in pairs(obj) do
114 |                moduleTree[k], obj[k] = recursiveRemove(v)
115 |             end
116 |             
117 |          end
118 |       elseif isTable then
119 |          if scdone[torch.pointer(obj)] then
120 |             moduleTree = scdone[torch.pointer(obj)]
121 |          else
122 |             assert(not moduleTree)
123 |             moduleTree = {}
124 |             for k,v in pairs(obj) do
125 |                moduleTree[k], obj[k] = recursiveRemove(v)
126 |             end 
127 |             scdone[torch.pointer(obj)] = moduleTree
128 |          end
129 |             
130 |       end
131 |       
132 |       return moduleTree, obj
133 |    end
134 |    
135 |    local moduleTree, original = recursiveRemove(self)
136 |    assert(original)
137 |    
138 |    -- 2. clone everything but parameters, gradients and modules (removed above)
139 |    
140 |    local clone = self:clone()
141 |  
142 |    -- 3. add back to self/clone everything that was removed in step 1
143 |    
144 |    local function recursiveSet(clone, original, moduleTree)
145 |       assert(clone)
146 |       assert(original)
147 |       if scdone[torch.pointer(original)] then
148 |          for k,param in pairs(moduleTree) do
149 |             if torch.isTypeOf(param,'nn.Module') then
150 |                if param.dpnn_sharedClone then
151 |                   -- Call the custom sharedClone
152 |                   clone[k] = param:dpnn_sharedClone()
153 |                else
154 |                   -- AbstractRecurrent instances branch here with stepClone = true
155 |                   clone[k] = param
156 |                end
157 |                original[k] = param
158 |             elseif torch.isTensor(param) then
159 |                if param.storage then
160 |                   clone[k] = param.new():set(param)
161 |                   original[k] = param
162 |                else -- for torch.MultiCudaTensor
163 |                   clone[k] = param
164 |                   original[k] = param
165 |                end
166 |             elseif type(param) == 'table' then
167 |                recursiveSet(clone[k], original[k], param)
168 |             end
169 |          end 
170 |          scdone[torch.pointer(original)] = nil
171 |       end
172 |          
173 |    end
174 |    
175 |    recursiveSet(clone, self, moduleTree)
176 |    
177 |    return clone
178 | end      
179 | 
180 | -- we override this method such that hidden modules
181 | -- will be included in the getParameters call.
182 | -- Hidden modules are common for recurrent modules that
183 | -- have internal references to modules that share parameters 
184 | -- with the main modules.
185 | -- These must also be included in the getParameters() call in order 
186 | -- to maintain shared storage for tensors.
187 | function Module:getParameters()
188 | 
189 |    local con = nn.Container()
190 |    con:add(self)
191 |    
192 |    -- recursive get all modules (modules, sharedclones, etc.)
193 |    local function recursiveGetModules(tbl)
194 |       for k,m in pairs(tbl) do
195 |          if torch.isTypeOf(m, 'nn.Module') then
196 |             if not m.dpnn_getParameters_found then
197 |                con:add(m)
198 |                m.dpnn_getParameters_found = true
199 |                recursiveGetModules(m)
200 |             end
201 |          elseif torch.type(m) == 'table' then
202 |             recursiveGetModules(m)
203 |          end
204 |       end
205 |    end
206 |    
207 |    recursiveGetModules(self)
208 |    
209 |    for i,m in ipairs(con.modules) do
210 |       m.dpnn_getParameters_found = nil
211 |    end
212 | 
213 |    -- get ALL parameters
214 |    local parameters,gradParameters = con:parameters()
215 |    return Module.flatten(parameters), Module.flatten(gradParameters)
216 | end
217 | 
218 | ----------------- serialization (see nn.Serial) -------------------
219 | 
220 | Module.dpnn_mediumEmpty = {'output', 'gradInput', 'momGradParams', 'dpnn_input'}
221 | Module.dpnn_lightEmpty = Module.dpnn_gradParameters
222 | -- defaults to heavy serialization
223 | Module.dpnn_serialEmpty = {}
224 | 
225 | -- sets the serialization behavior of the entire module structure
226 | function Module:serialMode(empty)
227 |    assert(torch.type(empty) == 'table', "Expecting table at arg 1")
228 |    self.dpnn_serialEmpty = empty
229 |    -- set the serial of all encapsulated modules
230 |    local function recursiveSerial(tbl)
231 |       for k,v in pairs(tbl) do
232 |          if torch.isTypeOf(v, 'nn.Module') then
233 |             v:serialMode(empty)
234 |          elseif torch.type(v) == 'table' then
235 |             recursiveSerial(v)
236 |          end
237 |       end
238 |    end
239 |    recursiveSerial(self)
240 |    return self
241 | end
242 | 
243 | -- serialMode : serialize everything
244 | function Module:heavySerial()
245 |    return self:serialMode({})
246 | end
247 | 
248 | -- serialMode : serialize everything except dpnn_mediumEmpty attributes
249 | function Module:mediumSerial()
250 |    
251 |    self.dpnn_serialEmpty = self.dpnn_mediumEmpty
252 |    
253 |    -- set the serial of all encapsulated modules
254 |    local function recursiveSerial(tbl)
255 |       for k,v in pairs(tbl) do
256 |          if torch.isTypeOf(v, 'nn.Module') then
257 |             v:mediumSerial()
258 |          elseif torch.type(v) == 'table' then
259 |             recursiveSerial(v)
260 |          end
261 |       end
262 |    end
263 |    recursiveSerial(self)
264 |    return self
265 | end
266 | 
267 | -- serialMode : serialize everything except dpnn_mediumEmpty and dpnn_lightEmpty attributes
268 | function Module:lightSerial()
269 |    
270 |    self.dpnn_serialEmpty = _.clone(self.dpnn_mediumEmpty)
271 |    for k,v in ipairs(self.dpnn_lightEmpty) do
272 |       table.insert(self.dpnn_serialEmpty, v)
273 |    end
274 |    
275 |    -- set the serial of all encapsulated modules
276 |    local function recursiveSerial(tbl)
277 |       for k,v in pairs(tbl) do
278 |          if torch.isTypeOf(v, 'nn.Module') then
279 |             v:lightSerial()
280 |          elseif torch.type(v) == 'table' then
281 |             recursiveSerial(v)
282 |          end
283 |       end
284 |    end
285 |    recursiveSerial(self)
286 |    
287 |    return self
288 | end
289 | 
290 | function Module:getSerialState(states)
291 |    states = states or {}
292 |    
293 |    -- dont get the serial state of the same module twice (reuse existing)
294 |    if states[self] then
295 |       return states[self]
296 |    end
297 |    
298 |    -- returns the object structure as tables (i.e. without metatables)
299 |    local function recursiveState(tbl)
300 |       local state = _.map(tbl, 
301 |          function(k,v) 
302 |             if torch.isTypeOf(tbl, 'nn.Module') and _.contains(tbl.dpnn_serialEmpty, k) then 
303 |                -- "empties" module attributes found in empty
304 |                if torch.type(v) == 'table' then
305 |                   -- empty table
306 |                   return {} 
307 |                elseif torch.isTensor(v) then
308 |                   -- empty tensor
309 |                   return v.new() 
310 |                else
311 |                   -- not table nor tensor? then serialize as is
312 |                   return v
313 |                end
314 |             elseif torch.isTypeOf(v, 'nn.Module') then
315 |                -- recursive, yet can be overwritten
316 |                return v:getSerialState(states)
317 |             elseif torch.type(v) == 'table' then
318 |                -- in case it is a table of modules
319 |                if not states[v] then
320 |                   states[v] = recursiveState(v)
321 |                end
322 |                return states[v]
323 |             else
324 |                return v
325 |             end
326 |          end
327 |       )
328 |       return state
329 |    end
330 |    
331 |    local state = recursiveState(self)
332 |    
333 |    -- include typename so that module can be reconstructed from the state
334 |    state.dpnn_typename = torch.type(self)
335 |    states[self] = state
336 |    
337 |    return state
338 | end
339 | 
340 | -- decorates self with nn.Serial
341 | function Module:Serial(tensortype)
342 |    return nn.Serial(self, tensortype)
343 | end
344 | 
345 | ----------------------- for training -----------------------------
346 | 
347 | -- useful to get the output size
348 | -- I chose this method name because it is less likely to be overriden.
349 | function Module:outside(insize)
350 |    local input
351 |    if torch.type(insize) == 'table' then
352 |       input = torch.randn(table.unpack(insize))
353 |    else
354 |       input = torch.randn(insize)
355 |    end
356 |    local output = self:updateOutput(input)
357 |    return output:size()
358 | end
359 | 
360 | -- for those interested in implementing the visitor design pattern
361 | function Module:accept(visitor)
362 |    visitor:visit(self)
363 | end
364 | 
365 | -- Can be used as a regularizer instead of weight decay
366 | -- Assumes that parameters are arranged (output dim x ... x input dim)
367 | function Module:maxParamNorm(maxOutNorm, maxInNorm)
368 |    -- this allows each module to set its own max[Out,In]Norm
369 |    maxOutNorm = self.maxOutNorm or maxOutNorm
370 |    maxInNorm = self.maxInNorm or maxInNorm
371 |    if not (maxOutNorm or maxInNorm) then
372 |       return
373 |    end
374 |    
375 |    if self.modules then
376 |       for i,module in ipairs(self.modules) do
377 |          module:maxParamNorm(maxOutNorm, maxInNorm)
378 |       end
379 |    else
380 |       local params = self:parameters() 
381 |       if not params or gradParams then
382 |          return
383 |       end
384 |       for k,param in pairs(params) do -- pairs for sparse params
385 |          -- By default, only affects non-1D params.
386 |          if param:dim() > 1 then
387 |             if maxOutNorm and maxOutNorm > 0 then
388 |                -- rows feed into output neurons 
389 |                param:renorm(2, 1, maxOutNorm)
390 |             end
391 |             if maxInNorm and maxInNorm > 0 then
392 |                -- cols feed out from input neurons
393 |                param:renorm(2, param:dim(), maxInNorm)
394 |             end
395 |          end
396 |       end
397 |    end
398 | end
399 | 
400 | -- Similar to maxParamNorm, but norm is global to Module for which 
401 | -- this is called. Unless moduleLocal is true, in which case, the
402 | -- norm constraint is applied to the norm of all parameters in each
403 | -- component (non-container) module.
404 | function Module:gradParamClip(cutoffNorm, moduleLocal)
405 |    -- this allows each module to set its own cutoffNorm
406 |    cutoffNorm = self.cutoffNorm or cutoffNorm
407 |    if cutoffNorm <= 0 then
408 |       return
409 |    end
410 |    if self.moduleLocal ~= nil then
411 |       moduleLocal = self.moduleLocal
412 |    end
413 |    
414 |    local norm = 0
415 |    if moduleLocal and self.modules then
416 |       for i,module in ipairs(self.modules) do
417 |          norm = norm + math.pow(module:gradParamClip(cutoffNorm, moduleLocal), 2)
418 |       end
419 |       norm = math.sqrt(norm)
420 |    else
421 |       local params, gradParams = self:parameters()
422 |       if not (params and gradParams) then
423 |          return norm
424 |       end
425 |       for k,gradParam in pairs(gradParams) do -- pairs for sparse params
426 |          if torch.type(gradParam) == 'torch.CudaTensor' then
427 |             cutorch.withDevice(gradParam:getDevice(), function() -- support multi-device models
428 |                norm = norm + math.pow(gradParam:norm(),2)
429 |             end)
430 |          else
431 |             norm = norm + math.pow(gradParam:norm(),2)
432 |          end
433 |       end
434 |       norm = math.sqrt(norm)
435 |       if norm > cutoffNorm then
436 |          -- rescale gradParams to obtain desired cutoffNorm
437 |          for k,gradParam in pairs(gradParams) do
438 |             if torch.type(gradParam) == 'torch.CudaTensor' then
439 |                cutorch.withDevice(gradParam:getDevice(), function() -- support multi-device models
440 |                   gradParam:mul(cutoffNorm/norm)
441 |                end)
442 |             else
443 |                gradParam:mul(cutoffNorm/norm)
444 |             end
445 |          end
446 |       end
447 |    end
448 |    return norm
449 | end
450 | 
451 | -- Adds weight decay constraint on params with dims > 2 (default).
452 | -- TODO : allow inplace weightDecay (before calling accUpdateGradParameters)
453 | function Module:weightDecay(wdFactor, wdMinDim)
454 |    -- this allows each module to set its own hyper-parameters
455 |    wdFactor = self.wdFactor or wdFactor
456 |    if wdFactor <= 0 then
457 |       return
458 |    end
459 |    wdMinDim = self.wdMinDim or wdMinDim or 2
460 |    
461 |    if self.modules then
462 |       for i,module in ipairs(self.modules) do
463 |          module:weightDecay(wdFactor, wdMinDim)
464 |       end
465 |    else
466 |       local params, gradParams = self:parameters()
467 |       if not (params and gradParams) then
468 |          return
469 |       end
470 |       
471 |       for i,param in pairs(params) do -- pairs for sparse params
472 |          if param:dim() >= wdMinDim then
473 |             gradParams[i]:add(wdFactor, param)
474 |          end
475 |       end
476 |    end
477 | end
478 | 
479 | function Module:momentumGradParameters()
480 |    if (not self.momGradParams) or _.isEmpty(self.momGradParams) then
481 |       local params, gradParams = self:parameters()
482 |       if not gradParams or _.isEmpty(gradParams) then
483 |          return
484 |       end
485 |       self.momGradParams = {}
486 |       for i,gradParam in pairs(gradParams) do 
487 |          if torch.type(gradParam) == 'torch.CudaTensor' then
488 |             cutorch.withDevice(gradParam:getDevice(), function() -- support multi-device models
489 |                self.momGradParams[i] = gradParam.new():resizeAs(gradParam):copy(gradParam)
490 |             end)
491 |          else
492 |             self.momGradParams[i] = gradParam.new():resizeAs(gradParam):copy(gradParam)
493 |          end
494 |       end
495 |    end
496 |    return self.momGradParams
497 | end
498 | 
499 | -- uses momentum learning to update gradParams
500 | function Module:updateGradParameters(momFactor, momDamp, momNesterov)
501 |    -- this allows each module to set its own hyper-parameters
502 |    momFactor = self.momFactor or momFactor
503 |    if momFactor <= 0 then
504 |       return
505 |    end
506 |    momDamp = self.momDamp or momDamp or momFactor
507 |    if self.momNesterov ~= nil then
508 |       momNesterov = self.momNesterov
509 |    end
510 |    
511 |    if self.modules then
512 |       for i,module in ipairs(self.modules) do
513 |          module:updateGradParameters(momFactor, momDamp, momNesterov)
514 |       end
515 |    else
516 |       local params, gradParams = self:parameters()
517 |       if (not params) or _.isEmpty(params) then
518 |          return
519 |       end
520 |       local momGradParams = self:momentumGradParameters()
521 |       for i,gradParam in pairs(gradParams) do
522 |          momGradParams[i]:mul(momFactor)
523 |          momGradParams[i]:add(1-momDamp, gradParam)
524 |       end
525 |       
526 |       if momNesterov then
527 |          for i,gradParam in pairs(gradParams) do
528 |             gradParam:add(momFactor, momGradParams[i])
529 |          end
530 |       else
531 |          for i,gradParam in pairs(gradParams) do
532 |             gradParam:copy(momGradParams[i])
533 |          end
534 |       end
535 |    end
536 | end
537 | 
538 | function Module:checkParameters()
539 |    local params = self:parameters() or {}
540 |    for k,param in pairs(params) do
541 |       if _.isNaN(param:sum()) then
542 |          error("NaN Error for param at index" ..k)
543 |       end
544 |    end
545 | end
546 | 
547 | function Module:dontBackward()
548 |    self.backward = function() end
549 |    self.updateGradInput = function() end
550 |    self.accGradParameters = function() end
551 |    self.accUpdateGradParameters = function() end
552 |    return self
553 | end
554 | 
555 | function Module:contiguousInput(input, backward)
556 |    if backward then
557 |       return self.dpnn_cinput or input
558 |    end
559 |    if not input:isContiguous() then
560 |       self.dpnn_cinput = self.dpnn_cinput or input.new()
561 |       self.dpnn_cinput:resizeAs(input):copy(input)
562 |       input = self.dpnn_cinput
563 |    end
564 |    return input
565 | end
566 | 
567 | function Module:toBatch(tensor, nDim, batchDim)
568 |    local batchDim = batchDim or 1
569 |    if tensor:dim() == nDim then
570 |       self.dpnn_online = true
571 |       local size = tensor:size():totable()
572 |       table.insert(size, batchDim, 1)
573 |       tensor = tensor:view(table.unpack(size))
574 |    else
575 |       self.dpnn_online = false
576 |    end
577 |    return tensor
578 | end
579 | 
580 | function Module:fromBatch(tensor, batchDim)
581 |    if self.dpnn_online then
582 |       local size = tensor:size():totable()
583 |       assert(table.remove(size, batchDim) == 1)
584 |       tensor = tensor:view(table.unpack(size))
585 |    end
586 |    return tensor
587 | end
588 | 
589 | function Module:extrapolateType()
590 |    local params = module:parameters()
591 |    if params then
592 |       -- extrapolate the tensor type of the module
593 |       local types = {}
594 |       for i, param in ipairs(params) do
595 |          local tensorType = torch.type(param)
596 |          types[tensorType] = (types[tensorType] or 0) + 1
597 |       end
598 |       local maxCount = 0
599 |       local maxType
600 |       for tensorType, count in pairs(types) do
601 |          if count > maxCount then
602 |             maxtype = tensorType
603 |             maxCount = count
604 |          end
605 |       end
606 |       return maxType
607 |    end
608 |    return nil --unknown otherwise
609 | end
610 | 
611 | function Module:profile()
612 |    if self.modules then
613 |       for i, module in ipairs(self.modules) do
614 |          module:profile()
615 |       end
616 |    end
617 |    self.dpnn_profile = true
618 | end
619 | 
620 | function Module:reinforce(reward)
621 |    if self.modules then
622 |       for i, module in ipairs(self.modules) do
623 |          module:reinforce(reward)
624 |       end
625 |    end
626 | end
627 | 


--------------------------------------------------------------------------------