├── test ├── CMakeLists.txt └── test_firemodule.lua ├── tutorials ├── lena.jpg ├── srd1.jpg ├── srd2.jpg ├── ladder.md └── ladder_network │ ├── ladder_help_funcs.lua │ └── ladder.lua ├── Criterion.lua ├── SpatialConvolutionMM.lua ├── SpatialMaxPooling.lua ├── Dictionary.lua ├── SpatialConvolution.lua ├── SpatialBatchNormalization.lua ├── CMakeLists.txt ├── LookupTable.lua ├── BatchNormalization.lua ├── Collapse.lua ├── rocks └── dpnn-scm-1.rockspec ├── PrintSize.lua ├── ZipTable.lua ├── ReverseTable.lua ├── WhiteNoise.lua ├── SoftMaxTree.lua ├── ZipTableOneToMany.lua ├── Clip.lua ├── TotalDropout.lua ├── CAddTensorTable.lua ├── Constant.lua ├── Container.lua ├── Serial.lua ├── LICENSE.txt ├── ModuleCriterion.lua ├── FireModule.lua ├── Decorator.lua ├── ParallelTable.lua ├── SoftMaxForest.lua ├── ReinforceBernoulli.lua ├── Reinforce.lua ├── OneHot.lua ├── ArgMax.lua ├── init.lua ├── ReinforceCategorical.lua ├── CategoricalEntropy.lua ├── NaN.lua ├── SpatialFeatNormalization.lua ├── SpatialBinaryLogisticRegression.lua ├── SpatialRegionDropout.lua ├── BinaryClassReward.lua ├── BinaryLogisticRegression.lua ├── NCECriterion.lua ├── SimpleColorTransform.lua ├── VRClassReward.lua ├── DontCast.lua ├── Sequential.lua ├── ReinforceNormal.lua ├── SpatialUniformCrop.lua ├── ReinforceGamma.lua ├── PCAColorTransform.lua ├── SpatialBinaryConvolution.lua ├── Kmeans.lua ├── SpatialGlimpse.lua ├── Inception.lua ├── Convert.lua ├── NCEModule.lua └── Module.lua /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | install_files(${INSTALL_PREFIX} test.lua) 3 | -------------------------------------------------------------------------------- /tutorials/lena.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicholas-leonard/dpnn/HEAD/tutorials/lena.jpg -------------------------------------------------------------------------------- /tutorials/srd1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicholas-leonard/dpnn/HEAD/tutorials/srd1.jpg -------------------------------------------------------------------------------- /tutorials/srd2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicholas-leonard/dpnn/HEAD/tutorials/srd2.jpg -------------------------------------------------------------------------------- /Criterion.lua: -------------------------------------------------------------------------------- 1 | local Criterion = nn.Criterion 2 | 3 | Criterion.toBatch = nn.Module.toBatch 4 | Criterion.fromBatch = nn.Module.fromBatch 5 | -------------------------------------------------------------------------------- /SpatialConvolutionMM.lua: -------------------------------------------------------------------------------- 1 | local SpatialConvolutionMM, parent = nn.SpatialConvolutionMM, nn.Module 2 | 3 | SpatialConvolutionMM.dpnn_mediumEmpty = nn.SpatialConvolution.dpnn_mediumEmpty 4 | -------------------------------------------------------------------------------- /SpatialMaxPooling.lua: -------------------------------------------------------------------------------- 1 | local SpatialMaxPooling, parent = nn.SpatialMaxPooling, nn.Module 2 | local _ = require 'moses' 3 | 4 | local empty = _.clone(parent.dpnn_mediumEmpty) 5 | table.insert(empty, 'indices') 6 | SpatialMaxPooling.dpnn_mediumEmpty = empty 7 | -------------------------------------------------------------------------------- /Dictionary.lua: -------------------------------------------------------------------------------- 1 | local Dictionary, parent = torch.class("nn.Dictionary", "nn.LookupTable") 2 | 3 | -- don't use this with optim (useless), use nn.LookupTable instead 4 | function Dictionary:__init(dictSize, embeddingSize, accUpdate) 5 | error"DEPRECATED Jan 14, 2016" 6 | end 7 | -------------------------------------------------------------------------------- /SpatialConvolution.lua: -------------------------------------------------------------------------------- 1 | local SpatialConvolution, parent = nn.SpatialConvolution, nn.Module 2 | local _ = require 'moses' 3 | 4 | local empty = _.clone(parent.dpnn_mediumEmpty) 5 | table.insert(empty, 'finput') 6 | table.insert(empty, 'fgradinput') 7 | table.insert(empty, '_input') 8 | table.insert(empty, '_gradOutput') 9 | SpatialConvolution.dpnn_mediumEmpty = empty 10 | -------------------------------------------------------------------------------- /SpatialBatchNormalization.lua: -------------------------------------------------------------------------------- 1 | local BN, parent = nn.SpatialBatchNormalization, nn.Module 2 | local _ = require 'moses' 3 | 4 | local empty = _.clone(parent.dpnn_mediumEmpty) 5 | table.insert(empty, 'buffer') 6 | table.insert(empty, 'buffer2') 7 | table.insert(empty, 'centered') 8 | table.insert(empty, 'std') 9 | table.insert(empty, 'normalized') 10 | table.insert(empty, 'output') 11 | table.insert(empty, 'gradInput') 12 | BN.dpnn_mediumEmpty = empty 13 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | CMAKE_MINIMUM_REQUIRED(VERSION 2.6 FATAL_ERROR) 3 | CMAKE_POLICY(VERSION 2.6) 4 | IF(LUAROCKS_PREFIX) 5 | MESSAGE(STATUS "Installing Torch through Luarocks") 6 | STRING(REGEX REPLACE "(.*)lib/luarocks/rocks.*" "\\1" CMAKE_INSTALL_PREFIX "${LUAROCKS_PREFIX}") 7 | MESSAGE(STATUS "Prefix inferred from Luarocks: ${CMAKE_INSTALL_PREFIX}") 8 | ENDIF() 9 | FIND_PACKAGE(Torch REQUIRED) 10 | 11 | SET(src) 12 | FILE(GLOB luasrc *.lua) 13 | 14 | SET(luasrc ${luasrc} test/test.lua) 15 | ADD_TORCH_PACKAGE(dpnn "${src}" "${luasrc}" "Deep Neural Networks") 16 | -------------------------------------------------------------------------------- /LookupTable.lua: -------------------------------------------------------------------------------- 1 | local LookupTable, parent = nn.LookupTable, nn.Module 2 | 3 | function LookupTable:maxParamNorm(maxOutNorm, maxInNorm) 4 | maxOutNorm = self.maxOutNorm or maxOutNorm or self.maxInNorm or maxInNorm 5 | if not (maxOutNorm or maxInNorm) then 6 | return 7 | end 8 | 9 | if maxOutNorm and maxOutNorm > 0 then 10 | -- cols feed into output neurons 11 | self.weight:renorm(2, 2, maxOutNorm) 12 | end 13 | if maxInNorm and maxInNorm > 0 then 14 | -- rows feed out from input neurons 15 | self.weight:renorm(2, 1, maxInNorm) 16 | end 17 | end 18 | -------------------------------------------------------------------------------- /BatchNormalization.lua: -------------------------------------------------------------------------------- 1 | local _ = require 'moses' 2 | local BN, parent = nn.BatchNormalization, nn.Module 3 | 4 | local empty = _.clone(parent.dpnn_mediumEmpty) 5 | table.insert(empty, 'buffer') 6 | table.insert(empty, 'buffer2') 7 | table.insert(empty, 'centered') 8 | table.insert(empty, 'std') 9 | table.insert(empty, 'normalized') 10 | table.insert(empty, 'output') 11 | table.insert(empty, 'gradInput') 12 | BN.dpnn_mediumEmpty = empty 13 | 14 | -- for sharedClone 15 | local params = _.clone(parent.dpnn_parameters) 16 | table.insert(params, 'running_mean') 17 | table.insert(params, 'running_var') 18 | BN.dpnn_parameters = params 19 | -------------------------------------------------------------------------------- /Collapse.lua: -------------------------------------------------------------------------------- 1 | local Collapse, parent = torch.class('nn.Collapse', 'nn.Module') 2 | 3 | -- collapses non-batch dims 4 | function Collapse:__init(nInputDim) 5 | parent.__init(self) 6 | self.nInputDim = nInputDim 7 | end 8 | 9 | function Collapse:updateOutput(input) 10 | if not input:isContiguous() then 11 | self._input = self._input or input.new() 12 | self._input:resize(input:size()):copy(input) 13 | input = self._input 14 | end 15 | if input:dim() > self.nInputDim then 16 | self.output:view(input,input:size(1),-1) 17 | else 18 | self.output:view(input,-1) 19 | end 20 | return self.output 21 | end 22 | 23 | function Collapse:updateGradInput(input, gradOutput) 24 | self.gradInput:view(gradOutput, input:size()) 25 | return self.gradInput 26 | end 27 | -------------------------------------------------------------------------------- /rocks/dpnn-scm-1.rockspec: -------------------------------------------------------------------------------- 1 | package = "dpnn" 2 | version = "scm-1" 3 | 4 | source = { 5 | url = "git://github.com/Element-Research/dpnn", 6 | tag = "master" 7 | } 8 | 9 | description = { 10 | summary = "deep extensions to nn Modules and Criterions", 11 | detailed = [[sharedClone, type, outside, updateGradParameters, Serial, Inception, etc.]], 12 | homepage = "https://github.com/Element-Research/dpnn", 13 | license = "BSD" 14 | } 15 | 16 | dependencies = { 17 | "torch >= 7.0", 18 | "torchx", 19 | "nn >= 1.0", 20 | "nnx >= 0.1", 21 | "moses >= 1.3.1" 22 | } 23 | 24 | build = { 25 | type = "command", 26 | build_command = [[ 27 | cmake -E make_directory build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH="$(LUAROCKS_PREFIX)" -DCMAKE_INSTALL_PREFIX="$(PREFIX)" && $(MAKE) 28 | ]], 29 | install_command = "cd build && $(MAKE) install" 30 | } 31 | -------------------------------------------------------------------------------- /PrintSize.lua: -------------------------------------------------------------------------------- 1 | local PrintSize, parent = torch.class('nn.PrintSize', 'nn.Module') 2 | 3 | function PrintSize:__init(prefix) 4 | parent.__init(self) 5 | self.prefix = prefix or "PrintSize" 6 | end 7 | 8 | function PrintSize:updateOutput(input) 9 | self.output = input 10 | local size 11 | if torch.type(input) == 'table' then 12 | size = input 13 | elseif torch.type(input) == 'nil' then 14 | size = 'missing size' 15 | else 16 | size = input:size() 17 | end 18 | print(self.prefix..":input\n", size) 19 | return self.output 20 | end 21 | 22 | 23 | function PrintSize:updateGradInput(input, gradOutput) 24 | local size 25 | if torch.type(gradOutput) == 'table' then 26 | size = gradOutput 27 | elseif torch.type(gradOutput) == 'nil' then 28 | size = 'missing size' 29 | else 30 | size = gradOutput:size() 31 | end 32 | print(self.prefix..":gradOutput\n", size) 33 | self.gradInput = gradOutput 34 | return self.gradInput 35 | end 36 | 37 | -------------------------------------------------------------------------------- /ZipTable.lua: -------------------------------------------------------------------------------- 1 | local ZipTable, parent = torch.class('nn.ZipTable', 'nn.Container') 2 | 3 | -- input : { {a1,a2}, {b1,b2}, {c1,c2} } 4 | -- output : { {a1,b1,c1}, {a2,b2,c2} } 5 | function ZipTable:__init() 6 | parent.__init(self) 7 | self.output = {} 8 | self.gradInput = {} 9 | end 10 | 11 | function ZipTable:updateOutput(inputTable) 12 | self.output = {} 13 | for i,inTable in ipairs(inputTable) do 14 | for j,input in ipairs(inTable) do 15 | local output = self.output[j] or {} 16 | output[i] = input 17 | self.output[j] = output 18 | end 19 | end 20 | return self.output 21 | end 22 | 23 | function ZipTable:updateGradInput(inputTable, gradOutputTable) 24 | self.gradInput = {} 25 | for i,gradOutTable in ipairs(gradOutputTable) do 26 | for j,gradOutput in ipairs(gradOutTable) do 27 | local gradInput = self.gradInput[j] or {} 28 | gradInput[i] = gradOutput 29 | self.gradInput[j] = gradInput 30 | end 31 | end 32 | return self.gradInput 33 | end 34 | 35 | -------------------------------------------------------------------------------- /test/test_firemodule.lua: -------------------------------------------------------------------------------- 1 | require 'nn' 2 | require 'dpnn' 3 | require 'cunn' 4 | require 'cutorch' 5 | 6 | --torch.setdefaulttensortype('torch.FloatTensor') 7 | 8 | -- FireModule issue 45 9 | --[[ 10 | m = nn.Sequential() 11 | m:add(nn.FireModule(1,1,1,1)) 12 | _, p = m:getParameters() 13 | print(p:sum()) 14 | 15 | m = m:cuda() 16 | _, p = m:getParameters() 17 | print(p:sum()) 18 | 19 | m:zeroGradParameters() 20 | print(p:sum())--]] 21 | 22 | 23 | -- Testing FireModule 24 | input = torch.rand(1, 3, 6, 6) 25 | model = nn.FireModule(3, 1, 1, 1, 'Tanh') 26 | print(model) 27 | print(model.module) 28 | parameters, gradParameters = model:getParameters() 29 | output = model:forward(input) 30 | grads = torch.rand(output:size()) 31 | gi = model:backward(input, grads) 32 | print(gi:mean(), gi:std(), gi:min(), gi:max()) 33 | 34 | cutorch.setDevice(1) 35 | model:cuda() 36 | print(model.module.modules[1].finput) 37 | cinput = input:cuda() 38 | output = model:forward(cinput) 39 | gi = model:backward(input:cuda(), grads:cuda()) 40 | print(gi:mean(), gi:std(), gi:min(), gi:max()) 41 | -------------------------------------------------------------------------------- /ReverseTable.lua: -------------------------------------------------------------------------------- 1 | local ReverseTable, parent = torch.class("nn.ReverseTable", "nn.Module") 2 | 3 | function ReverseTable:__init() 4 | parent.__init(self) 5 | self.output = {} 6 | self.gradInput = {} 7 | end 8 | 9 | function ReverseTable:updateOutput(inputTable) 10 | assert(torch.type(inputTable) == 'table', "Expecting table at arg 1") 11 | 12 | -- empty output table 13 | for k,v in ipairs(self.output) do 14 | self.output[k] = nil 15 | end 16 | 17 | -- reverse input 18 | local k = 1 19 | for i=#inputTable,1,-1 do 20 | self.output[k] = inputTable[i] 21 | k = k + 1 22 | end 23 | return self.output 24 | end 25 | 26 | function ReverseTable:updateGradInput(inputTable, gradOutputTable) 27 | -- empty gradInput table 28 | for k,v in ipairs(self.gradInput) do 29 | self.gradInput[k] = nil 30 | end 31 | 32 | -- reverse gradOutput 33 | local k = 1 34 | for i=#gradOutputTable,1,-1 do 35 | self.gradInput[k] = gradOutputTable[i] 36 | k = k + 1 37 | end 38 | return self.gradInput 39 | end 40 | -------------------------------------------------------------------------------- /WhiteNoise.lua: -------------------------------------------------------------------------------- 1 | local WhiteNoise, Parent = torch.class('nn.WhiteNoise', 'nn.Module') 2 | 3 | function WhiteNoise:__init(mean, std) 4 | Parent.__init(self) 5 | -- std corresponds to 50% for MNIST training data std. 6 | self.mean = mean or 0 7 | self.std = std or 0.1 8 | self.noise = torch.Tensor() 9 | end 10 | 11 | function WhiteNoise:updateOutput(input) 12 | self.output:resizeAs(input):copy(input) 13 | if self.train ~= false then 14 | self.noise:resizeAs(input) 15 | self.noise:normal(self.mean, self.std) 16 | self.output:add(self.noise) 17 | else 18 | if self.mean ~= 0 then 19 | self.output:add(self.mean) 20 | end 21 | end 22 | return self.output 23 | end 24 | 25 | function WhiteNoise:updateGradInput(input, gradOutput) 26 | if self.train ~= false then 27 | -- Simply return the gradients. 28 | self.gradInput:resizeAs(gradOutput):copy(gradOutput) 29 | else 30 | error('backprop only defined while training') 31 | end 32 | return self.gradInput 33 | end 34 | 35 | function WhiteNoise:__tostring__() 36 | return string.format('%s mean: %f, std: %f', 37 | torch.type(self), self.mean, self.std) 38 | end 39 | -------------------------------------------------------------------------------- /SoftMaxTree.lua: -------------------------------------------------------------------------------- 1 | local SoftMaxTree, parent = nn.SoftMaxTree, nn.Module 2 | local _ = require 'moses' 3 | 4 | function SoftMaxTree:momentumGradParameters() 5 | -- get dense view of momGradParams 6 | if not self.momGradParams or _.isEmpty(self.momGradParams) then 7 | assert(not self.accUpdate, "cannot use momentum with accUpdate") 8 | self.momGradParams = {self.gradWeight:clone():zero(), self.gradBias:clone():zero()} 9 | end 10 | local momGradParams = self.momGradParams 11 | if self.static and not _.isEmpty(self.updates) then 12 | local momGradWeight = momGradParams[1] 13 | local momGradBias = momGradParams[2] 14 | momGradParams = {} 15 | -- only return the parameters affected by the forward/backward 16 | for parentId, scale in pairs(self.updates) do 17 | local node = self.parentChildren:select(1, parentId) 18 | local parentIdx = node[1] 19 | local nChildren = node[2] 20 | momGradParams[parentId] = momGradWeight:narrow(1, parentIdx, nChildren) 21 | local biasId = parentId+self.maxParentId 22 | momGradParams[biasId] = momGradBias:narrow(1, parentIdx, nChildren) 23 | end 24 | end 25 | return momGradParams 26 | end 27 | -------------------------------------------------------------------------------- /ZipTableOneToMany.lua: -------------------------------------------------------------------------------- 1 | local ZipTableOneToMany, parent = torch.class('nn.ZipTableOneToMany', 'nn.Container') 2 | 3 | -- based on ZipTable in dpnn 4 | 5 | -- input : { v, {a, b, c} } 6 | -- output : { {v,a}, {v,b}, {v,c} } 7 | function ZipTableOneToMany:__init() 8 | parent.__init(self) 9 | self.output = {} 10 | self.gradInput = {} 11 | -- make buffer to update during forward/backward 12 | self.gradInputEl = torch.Tensor() 13 | end 14 | 15 | function ZipTableOneToMany:updateOutput(input) 16 | assert(#input == 2, "input must be table of element and table") 17 | local inputEl, inputTable = input[1], input[2] 18 | self.output = {} 19 | for i,v in ipairs(inputTable) do 20 | self.output[i] = {inputEl, v} 21 | end 22 | return self.output 23 | end 24 | 25 | function ZipTableOneToMany:updateGradInput(input, gradOutput) 26 | assert(#input == 2, "input must be table of element and table") 27 | local inputEl, inputTable = input[1], input[2] 28 | self.gradInputEl:resizeAs(inputEl):zero() 29 | local gradInputTable = {} 30 | for i,gradV in ipairs(gradOutput) do 31 | self.gradInputEl:add(gradV[1]) 32 | gradInputTable[i] = gradV[2] 33 | end 34 | self.gradInput = {self.gradInputEl, gradInputTable} 35 | return self.gradInput 36 | end 37 | 38 | -------------------------------------------------------------------------------- /Clip.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ Clip ]]-- 3 | -- clips values within minval and maxval 4 | ------------------------------------------------------------------------ 5 | local Clip, parent = torch.class("nn.Clip", "nn.Module") 6 | 7 | function Clip:__init(minval, maxval) 8 | assert(torch.type(minval) == 'number') 9 | assert(torch.type(maxval) == 'number') 10 | self.minval = minval 11 | self.maxval = maxval 12 | parent.__init(self) 13 | end 14 | 15 | function Clip:updateOutput(input) 16 | -- bound results within height and width 17 | self._mask = self._mask or input.new() 18 | self._byte = self._byte or torch.ByteTensor() 19 | self.output:resizeAs(input):copy(input) 20 | self._mask:gt(self.output, self.maxval) 21 | local byte = torch.type(self.output) == 'torch.CudaTensor' and self._mask 22 | or self._byte:resize(self._mask:size()):copy(self._mask) 23 | self.output[byte] = self.maxval 24 | self._mask:lt(self.output, self.minval) 25 | byte = torch.type(self.output) == 'torch.CudaTensor' and self._mask 26 | or self._byte:resize(self._mask:size()):copy(self._mask) 27 | self.output[byte] = self.minval 28 | return self.output 29 | end 30 | 31 | function Clip:updateGradInput(input, gradOutput) 32 | self.gradInput:set(gradOutput) 33 | return self.gradInput 34 | end 35 | 36 | -------------------------------------------------------------------------------- /TotalDropout.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ TotalDropout ]]-- 3 | -- Like vanilla Dropout, but on the entire inputs. 4 | -- So either the input is entirely forwarded or entirely zeroed. 5 | ------------------------------------------------------------------------ 6 | local TotalDropout, parent = torch.class("nn.TotalDropout", "nn.Module") 7 | 8 | function TotalDropout:__init(p) 9 | self.p = p or 0.5 10 | self.train = true 11 | if self.p >= 1 or self.p < 0 then 12 | error(' illegal percentage, must be 0 <= p < 1') 13 | end 14 | parent.__init(self) 15 | end 16 | 17 | function TotalDropout:updateOutput(input) 18 | self.output:resizeAs(input):copy(input) 19 | if self.train then 20 | self.noise = torch.bernoulli(1-self.p) 21 | self.output:mul(self.noise) 22 | end 23 | return self.output 24 | end 25 | 26 | function TotalDropout:updateGradInput(input, gradOutput) 27 | if self.train then 28 | self.gradInput:resizeAs(gradOutput):copy(gradOutput) 29 | self.gradInput:mul(self.noise) -- simply mask the gradients with the noise vector 30 | else 31 | error('backprop only defined while training') 32 | end 33 | return self.gradInput 34 | end 35 | 36 | function TotalDropout:__tostring__() 37 | return string.format('%s(%f)', torch.type(self), self.p) 38 | end 39 | -------------------------------------------------------------------------------- /CAddTensorTable.lua: -------------------------------------------------------------------------------- 1 | 2 | local CAddTensorTable, parent = torch.class('nn.CAddTensorTable', 'nn.Module') 3 | 4 | function CAddTensorTable:__init() 5 | parent.__init(self) 6 | self.gradInput = {} 7 | end 8 | 9 | -- input is a table with 2 entries. input[1] is the vector to be added. 10 | -- input[2] is the table to which we add the vector 11 | function CAddTensorTable:updateOutput(input) 12 | local currentOutput = {} 13 | for i=1,#input[2] do 14 | currentOutput[i] = currentOutput[i] or input[1].new() 15 | currentOutput[i]:resizeAs(input[1]) 16 | currentOutput[i]:copy(input[2][i]) 17 | currentOutput[i]:add(input[1]) 18 | end 19 | for i = #input[2]+1, #currentOutput do 20 | currentOutput[i] = nil 21 | end 22 | self.output = currentOutput 23 | return self.output 24 | end 25 | 26 | function CAddTensorTable:updateGradInput(input, gradOutput) 27 | self.gradInput[1] = self.gradInput[1] or input[1].new() 28 | self.gradInput[1]:resizeAs(input[1]) 29 | self.gradInput[1]:copy(gradOutput[1]) 30 | for i=2, #input[2] do 31 | self.gradInput[1]:add(gradOutput[i]) 32 | end 33 | self.gradInput[2] = self.gradInput[2] or {} 34 | for i=1,#input[2] do 35 | self.gradInput[2][i] = self.gradInput[2][i] or input[1].new() 36 | self.gradInput[2][i]:resizeAs(input[1]) 37 | self.gradInput[2][i]:copy(gradOutput[i]) 38 | end 39 | for i=#input[2]+1, #self.gradInput[2] do 40 | self.gradInput[2][i] = nil 41 | end 42 | return self.gradInput 43 | end -------------------------------------------------------------------------------- /Constant.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ Constant ]]-- 3 | -- Outputs a constant value given an input. 4 | -- If nInputDim is specified, uses the input to determine the size of 5 | -- the batch. The value is then replicated over the batch. 6 | -- You can use this with nn.ConcatTable() to append constant inputs to 7 | -- an input : nn.ConcatTable():add(nn.Constant(v)):add(nn.Identity()) . 8 | ------------------------------------------------------------------------ 9 | local Constant, parent = torch.class("nn.Constant", "nn.Module") 10 | 11 | function Constant:__init(value, nInputDim) 12 | self.value = value 13 | if torch.type(self.value) == 'number' then 14 | self.value = torch.Tensor{self.value} 15 | end 16 | assert(torch.isTensor(self.value), "Expecting number or tensor at arg 1") 17 | self.nInputDim = nInputDim 18 | parent.__init(self) 19 | end 20 | 21 | function Constant:updateOutput(input) 22 | if self.nInputDim and input:dim() > self.nInputDim then 23 | local vsize = self.value:size():totable() 24 | self.output:resize(input:size(1), table.unpack(vsize)) 25 | local value = self.value:view(1, table.unpack(vsize)) 26 | self.output:copy(value:expand(self.output:size())) 27 | else 28 | self.output:resize(self.value:size()):copy(self.value) 29 | end 30 | return self.output 31 | end 32 | 33 | function Constant:updateGradInput(input, gradOutput) 34 | self.gradInput:resizeAs(input):zero() 35 | return self.gradInput 36 | end 37 | -------------------------------------------------------------------------------- /Container.lua: -------------------------------------------------------------------------------- 1 | local Container = nn.Container 2 | 3 | -- multi-add 4 | function Container:extend(...) 5 | for i,module in ipairs{...} do 6 | self:add(module) 7 | end 8 | return self 9 | end 10 | 11 | function Container:sparseParameters() 12 | local params = {} 13 | local gradParams = {} 14 | local scales = {} 15 | local size = 0 16 | for i=1,#self.modules do 17 | local mParams, mGradParams, mScales, mSize = self.modules[i]:sparseParameters() 18 | if mParams then 19 | for k,param in pairs(mParams) do 20 | assert(torch.type(param) ~= 'table') 21 | params[size+k] = param 22 | gradParams[size+k] = mGradParams[k] 23 | scales[size+k] = mScales and mScales[k] 24 | end 25 | size = size + (mSize or #mParams) 26 | end 27 | end 28 | return params, gradParams, scales, size 29 | end 30 | 31 | function Container:parameters() 32 | local function tinsert(to, from) 33 | if torch.type(from) == 'table' then -- we change this line so that it works with torch.MultiCudaTensor 34 | for i=1,#from do 35 | tinsert(to,from[i]) 36 | end 37 | else 38 | table.insert(to,from) 39 | end 40 | end 41 | local w = {} 42 | local gw = {} 43 | for i=1,#self.modules do 44 | local mw,mgw = self.modules[i]:parameters() 45 | if mw then 46 | tinsert(w,mw) 47 | tinsert(gw,mgw) 48 | end 49 | end 50 | return w,gw 51 | end 52 | -------------------------------------------------------------------------------- /Serial.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ Serial ]]-- 3 | -- Decorator that modifies the serialization/deserialization 4 | -- behaviour of encapsulated module. 5 | ------------------------------------------------------------------------ 6 | local _ = require 'moses' 7 | local Serial, parent = torch.class("nn.Serial", "nn.Decorator") 8 | 9 | function Serial:__init(module, tensortype) 10 | parent.__init(self, module) 11 | self.tensortype = tensortype 12 | if self.tensortype then 13 | assert(tensortype:find('torch.*Tensor'), "Expecting tensortype (e.g. torch.LongTensor) at arg1") 14 | end 15 | end 16 | 17 | function Serial:write(file) 18 | local state = self:getSerialState() 19 | 20 | local function recursiveSetMetaTable(state) 21 | for k,v in pairs(state) do 22 | if torch.type(v) == 'table' then 23 | recursiveSetMetaTable(v) 24 | end 25 | end 26 | 27 | if state.dpnn_typename then 28 | torch.setmetatable(state, state.dpnn_typename) 29 | end 30 | end 31 | 32 | -- typecast before serialization (useful for cuda) 33 | recursiveSetMetaTable(state) 34 | 35 | if self.tensortype then 36 | state:type(self.tensortype) 37 | end 38 | 39 | -- removes self's metatable 40 | state = _.map(state, function(k,v) return v end) 41 | 42 | file:writeObject(state) 43 | end 44 | 45 | function Serial:read(file) 46 | local state = file:readObject() 47 | for k,v in pairs(state) do 48 | self[k] = v 49 | end 50 | end 51 | 52 | 53 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014-2016 Element Inc (Nicholas Leonard) 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright 12 | notice, this list of conditions and the following disclaimer in the 13 | documentation and/or other materials provided with the distribution. 14 | 15 | 3. Neither the names of Element Inc. nor the names of its contributors may be 16 | used to endorse or promote products derived from this software without 17 | specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 23 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 | POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /ModuleCriterion.lua: -------------------------------------------------------------------------------- 1 | local ModuleCriterion, parent = torch.class("nn.ModuleCriterion", "nn.Criterion") 2 | 3 | function ModuleCriterion:__init(criterion, inputModule, targetModule, castTarget) 4 | self.inputModule = inputModule 5 | self.targetModule = targetModule 6 | self.castTarget = (castTarget == nil) and true or castTarget 7 | if self.inputModule then 8 | local params = self.inputModule:parameters() 9 | if params and #params > 0 then 10 | print"Warning: nn.ModuleCriterion doesn't support parameter updates" 11 | end 12 | end 13 | self.criterion = criterion 14 | end 15 | 16 | function ModuleCriterion:updateOutput(input, target) 17 | if self.inputModule then 18 | self.input = self.inputModule:forward(input) 19 | end 20 | if self.targetModule then 21 | self.target = self.targetModule:forward(target) 22 | end 23 | self.output = self.criterion:forward(self.input or input, self.target or target) 24 | return self.output 25 | end 26 | 27 | function ModuleCriterion:updateGradInput(input, target) 28 | self.gradInput = self.criterion:backward(self.input or input, self.target or target) 29 | if self.inputModule then 30 | self.gradInput = self.inputModule:backward(input, self.gradInput) 31 | end 32 | return self.gradInput 33 | end 34 | 35 | function ModuleCriterion:type(type, typecache) 36 | if self.inputModule then 37 | self.inputModule:type(type, typecache) 38 | end 39 | if self.castTarget and self.targetModule then 40 | self.targetModule:type(type, typecache) 41 | end 42 | self.criterion:type(type, typecache) 43 | return parent.type(self, type, typecache) 44 | end 45 | -------------------------------------------------------------------------------- /FireModule.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | Fire module as explained in SqueezeNet http://arxiv.org/pdf/1602.07360v1.pdf. 3 | --]] 4 | --FIXME works only for batches. 5 | 6 | local FireModule, Parent = torch.class('nn.FireModule', 'nn.Decorator') 7 | 8 | function FireModule:__init(nInputPlane, s1x1, e1x1, e3x3, activation) 9 | self.nInputPlane = nInputPlane 10 | self.s1x1 = s1x1 11 | self.e1x1 = e1x1 12 | self.e3x3 = e3x3 13 | self.activation = activation or 'ReLU' 14 | 15 | if self.s1x1 > (self.e1x1 + self.e3x3) then 16 | print('Warning: s1x1 is recommended to be smaller'.. 17 | ' then e1x1+e3x3') 18 | end 19 | 20 | self.module = nn.Sequential() 21 | self.squeeze = nn.SpatialConvolution(nInputPlane, s1x1, 1, 1) 22 | self.expand = nn.Concat(2) 23 | self.expand:add(nn.SpatialConvolution(s1x1, e1x1, 1, 1)) 24 | self.expand:add(nn.SpatialConvolution(s1x1, e3x3, 3, 3, 1, 1, 1, 1)) 25 | 26 | -- Fire Module 27 | self.module:add(self.squeeze) 28 | self.module:add(nn[self.activation]()) 29 | self.module:add(self.expand) 30 | self.module:add(nn[self.activation]()) 31 | 32 | Parent.__init(self, self.module) 33 | end 34 | 35 | --[[ 36 | function FireModule:type(type, tensorCache) 37 | assert(type, 'Module: must provide a type to convert to') 38 | self.module = nn.utils.recursiveType(self.module, type, tensorCache) 39 | end 40 | --]] 41 | 42 | function FireModule:__tostring__() 43 | return string.format('%s inputPlanes: %d -> Squeeze Planes: %d -> '.. 44 | 'Expand: %d(1x1) + %d(3x3), activation: %s', 45 | torch.type(self), self.nInputPlane, self.s1x1, 46 | self.e1x1, self.e3x3, self.activation) 47 | end 48 | -------------------------------------------------------------------------------- /Decorator.lua: -------------------------------------------------------------------------------- 1 | local Decorator, parent = torch.class("nn.Decorator", "nn.Container") 2 | 3 | function Decorator:__init(module) 4 | parent.__init(self) 5 | self.module = module 6 | -- so that it can be handled like a Container 7 | self.modules[1] = module 8 | end 9 | 10 | function Decorator:updateOutput(input) 11 | self.output = self.module:updateOutput(input) 12 | return self.output 13 | end 14 | 15 | function Decorator:updateGradInput(input, gradOutput) 16 | self.gradInput = self.module:updateGradInput(input, gradOutput) 17 | return self.gradInput 18 | end 19 | 20 | function Decorator:accGradParameters(input, gradOutput, scale) 21 | self.module:accGradParameters(input, gradOutput, scale) 22 | end 23 | 24 | function Decorator:accUpdateGradParameters(input, gradOutput, lr) 25 | self.module:accUpdateGradParameters(input, gradOutput, lr) 26 | end 27 | 28 | function Decorator:sharedAccUpdateGradParameters(input, gradOutput, lr) 29 | self.module:sharedAccUpdateGradParameters(input, gradOutput, lr) 30 | end 31 | 32 | function Decorator:__tostring__() 33 | if self.module.__tostring__ then 34 | return torch.type(self) .. ' @ ' .. self.module:__tostring__() 35 | else 36 | return torch.type(self) .. ' @ ' .. torch.type(self.module) 37 | end 38 | end 39 | 40 | -- useful for multiple-inheritance 41 | function Decorator.decorate(class) 42 | class.updateOutput = nn.Decorator.updateOutput 43 | class.updateGradInput = nn.Decorator.updateGradInput 44 | class.accGradParameters = nn.Decorator.accGradParameters 45 | class.accUpdateGradParameters = nn.Decorator.accUpdateGradParameters 46 | class.sharedAccUpdateGradParameters = nn.Decorator.sharedAccUpdateGradParameters 47 | class.__tostring__ = nn.Decorator.__tostring__ 48 | end 49 | -------------------------------------------------------------------------------- /ParallelTable.lua: -------------------------------------------------------------------------------- 1 | local ParallelTable, parent = nn.ParallelTable, nn.Container 2 | 3 | function ParallelTable:profile() 4 | function ParallelTable:updateOutput(input) 5 | for i=1,#self.modules do 6 | local start = sys.clock() 7 | self.output[i] = self.modules[i]:updateOutput(input[i]) 8 | if cutorch then cutorch.synchronize() end 9 | print(torch.type(self.modules[i])..' updateOutput: '..sys.clock() - start.." s") 10 | end 11 | return self.output 12 | end 13 | 14 | function ParallelTable:updateGradInput(input, gradOutput) 15 | for i,module in ipairs(self.modules) do 16 | local start = sys.clock() 17 | self.gradInput[i]= module:updateGradInput(input[i], gradOutput[i]) 18 | if cutorch then cutorch.synchronize() end 19 | print(torch.type(module)..' updateGradInput: '..sys.clock() - start.." s") 20 | end 21 | return self.gradInput 22 | end 23 | 24 | function ParallelTable:accGradParameters(input, gradOutput, scale) 25 | scale = scale or 1 26 | for i,module in ipairs(self.modules) do 27 | local start = sys.clock() 28 | module:accGradParameters(input[i], gradOutput[i], scale) 29 | if cutorch then cutorch.synchronize() end 30 | print(torch.type(module)..' accGradParameters: '..sys.clock() - start.." s") 31 | end 32 | end 33 | 34 | function ParallelTable:accUpdateGradParameters(input, gradOutput, lr) 35 | lr = lr or 1 36 | for i,module in ipairs(self.modules) do 37 | local start = sys.clock() 38 | module:accUpdateGradParameters(input[i], gradOutput[i], lr) 39 | if cutorch then cutorch.synchronize() end 40 | print(torch.type(module)..' accUpdateGradParameters: '..sys.clock() - start.." s") 41 | end 42 | end 43 | parent.profile(self) 44 | end 45 | -------------------------------------------------------------------------------- /SoftMaxForest.lua: -------------------------------------------------------------------------------- 1 | local SoftMaxForest, parent = torch.class("nn.SoftMaxForest", "nn.Container") 2 | 3 | function SoftMaxForest:__init(inputSize, trees, rootIds, gaterSize, gaterAct, accUpdate) 4 | local gaterAct = gaterAct or nn.Tanh() 5 | local gaterSize = gaterSize or {} 6 | 7 | -- experts 8 | self.experts = nn.ConcatTable() 9 | self.smts = {} 10 | for i,tree in ipairs(trees) do 11 | local smt = nn.SoftMaxTree(inputSize, tree, rootIds[i], accUpdate) 12 | table.insert(self._smts, smt) 13 | self.experts:add(smt) 14 | end 15 | 16 | -- gater 17 | self.gater = nn.Sequential() 18 | self.gater:add(nn.SelectTable(1)) -- ignore targets 19 | for i,hiddenSize in ipairs(gaterSize) do 20 | self.gater:add(nn.Linear(inputSize, hiddenSize)) 21 | self.gater:add(gaterAct:clone()) 22 | inputSize = hiddenSize 23 | end 24 | self.gater:add(nn.Linear(inputSize, self.experts:size())) 25 | self.gater:add(nn.SoftMax()) 26 | 27 | -- mixture 28 | self.trunk = nn.ConcatTable() 29 | self.trunk:add(self._gater) 30 | self.trunk:add(self._experts) 31 | self.mixture = nn.MixtureTable() 32 | self.module = nn.Sequential() 33 | self.module:add(self.trunk) 34 | self.module:add(self.mixture) 35 | parent.__init(self) 36 | self.modules[1] = self.module 37 | end 38 | 39 | function SoftMaxForest:updateOutput(input) 40 | self.output = self.module:updateOutput(input) 41 | return self.output 42 | end 43 | 44 | function SoftMaxForest:updateGradInput(input, gradOutput) 45 | self.gradInput = self.module:updateGradInput(input, gradOutput) 46 | return self.gradInput 47 | end 48 | 49 | function SoftMaxForest:accGradParameters(input, gradOutput, scale) 50 | self.module:accGradParameters(input, gradOutput, scale) 51 | end 52 | 53 | function SoftMaxForest:accUpdateGradParameters(input, gradOutput, lr) 54 | self.module:accUpdateGradParameters(input, gradOutput, lr) 55 | end 56 | -------------------------------------------------------------------------------- /ReinforceBernoulli.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ ReinforceBernoulli ]]-- 3 | -- Ref A. http://incompleteideas.net/sutton/williams-92.pdf 4 | -- Inputs are bernoulli probabilities (p) 5 | -- Ouputs are samples drawn from this distribution. 6 | -- Uses the REINFORCE algorithm (ref. A p.230-236) which is 7 | -- implemented through the nn.Module:reinforce(reward) interface. 8 | -- gradOutputs are ignored (REINFORCE algorithm). 9 | ------------------------------------------------------------------------ 10 | local ReinforceBernoulli, parent = torch.class("nn.ReinforceBernoulli", "nn.Reinforce") 11 | 12 | function ReinforceBernoulli:updateOutput(input) 13 | self.output:resizeAs(input) 14 | if self.stochastic or self.train ~= false then 15 | -- sample from bernoulli with P(output=1) = input 16 | self._uniform = self._uniform or input.new() 17 | self._uniform:resizeAs(input):uniform(0,1) 18 | self.output:lt(self._uniform, input) 19 | else 20 | -- use p for evaluation 21 | self.output:copy(input) 22 | end 23 | return self.output 24 | end 25 | 26 | function ReinforceBernoulli:updateGradInput(input, gradOutput) 27 | -- Note that gradOutput is ignored 28 | -- f : bernoulli probability mass function 29 | -- x : the sampled values (0 or 1) (self.output) 30 | -- p : probability of sampling a 1 31 | -- derivative of log bernoulli w.r.t. p 32 | -- d ln(f(x,p)) (x - p) 33 | -- ------------ = --------- 34 | -- d p p(1 - p) 35 | self.gradInput:resizeAs(input) 36 | -- (x - p) 37 | self.gradInput:copy(self.output):add(-1, input) 38 | -- divide by p(1 - p) 39 | self._div = self._div or input.new() 40 | self._div:resizeAs(input) 41 | self._div:fill(1):add(-1, input):cmul(input) 42 | self.gradInput:cdiv(self._div) 43 | 44 | -- multiply by reward 45 | self.gradInput:cmul(self:rewardAs(input)) 46 | -- multiply by -1 ( gradient descent on input ) 47 | self.gradInput:mul(-1) 48 | return self.gradInput 49 | end 50 | 51 | 52 | -------------------------------------------------------------------------------- /Reinforce.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ Reinforce ]]-- 3 | -- Ref A. http://incompleteideas.net/sutton/williams-92.pdf 4 | -- Abstract class for modules that use the REINFORCE algorithm (ref A). 5 | -- The reinforce(reward) method is called by a special Reward Criterion. 6 | -- After which, when backward is called, the reward will be used to 7 | -- generate gradInputs. The gradOutput is usually ignored. 8 | ------------------------------------------------------------------------ 9 | local Reinforce, parent = torch.class("nn.Reinforce", "nn.Module") 10 | 11 | function Reinforce:__init(stochastic) 12 | parent.__init(self) 13 | -- true makes it stochastic during evaluation and training 14 | -- false makes it stochastic only during training 15 | self.stochastic = stochastic 16 | end 17 | 18 | -- a Reward Criterion will call this 19 | function Reinforce:reinforce(reward) 20 | parent.reinforce(self, reward) 21 | self.reward = reward 22 | end 23 | 24 | function Reinforce:updateOutput(input) 25 | self.output:set(input) 26 | end 27 | 28 | function Reinforce:updateGradInput(input, gradOutput) 29 | local reward = self:rewardAs(input) 30 | self.gradInput:resizeAs(reward):copy(reward) 31 | end 32 | 33 | -- this can be called by updateGradInput 34 | function Reinforce:rewardAs(input) 35 | assert(self.reward:dim() == 1) 36 | if input:isSameSizeAs(self.reward) then 37 | return self.reward 38 | else 39 | if self.reward:size(1) ~= input:size(1) then 40 | -- assume input is in online-mode 41 | input = self:toBatch(input, input:dim()) 42 | assert(self.reward:size(1) == input:size(1), self.reward:size(1).." ~= "..input:size(1)) 43 | end 44 | self._reward = self._reward or self.reward.new() 45 | self.__reward = self.__reward or self.reward.new() 46 | local size = input:size():fill(1):totable() 47 | size[1] = self.reward:size(1) 48 | self._reward:view(self.reward, table.unpack(size)) 49 | self.__reward:expandAs(self._reward, input) 50 | return self.__reward 51 | end 52 | end 53 | -------------------------------------------------------------------------------- /OneHot.lua: -------------------------------------------------------------------------------- 1 | local OneHot, parent = torch.class('nn.OneHot', 'nn.Module') 2 | 3 | -- adapted from https://github.com/karpathy/char-rnn 4 | -- and https://github.com/hughperkins/char-lstm 5 | 6 | function OneHot:__init(outputSize) 7 | parent.__init(self) 8 | self.outputSize = outputSize 9 | end 10 | 11 | function OneHot:updateOutput(input) 12 | local size 13 | if type(input) == 'number' then 14 | if self:type() == 'torch.CudaTensor' then 15 | self._single = self._single or torch.CudaTensor():resize(1); 16 | else 17 | self._single = self._single or torch.LongTensor():resize(1); 18 | end 19 | self._single[1] = input 20 | input = self._single; 21 | size = {} 22 | else 23 | size = input:size():totable() 24 | end 25 | table.insert(size, self.outputSize) 26 | 27 | self.output:resize(unpack(size)):zero() 28 | 29 | size[#size] = 1 30 | local input_ = input:view(unpack(size)) 31 | 32 | if torch.type(input) == 'torch.CudaTensor' or torch.type(input) == 'torch.ClTensor' then 33 | self.output:scatter(self.output:dim(), input_, 1) 34 | else 35 | if torch.type(self.output) == 'torch.CudaTensor' then 36 | -- input is not cuda, module is, cast input to cuda 37 | self._input = self._input or torch.CudaTensor() 38 | self._input:resize(input_:size()):copy(input_) 39 | input_ = self._input 40 | elseif torch.type(input) ~= 'torch.LongTensor' then 41 | -- input is not long, module isnot cuda, cast input to long 42 | self._input = self._input or torch.LongTensor() 43 | self._input:resize(input_:size()):copy(input_) 44 | input_ = self._input 45 | end 46 | self.output:scatter(self.output:dim(), input_, 1) 47 | end 48 | 49 | return self.output 50 | end 51 | 52 | function OneHot:updateGradInput(input, gradOutput) 53 | if type(input) == 'number' then 54 | return 0 55 | else 56 | self.gradInput:resize(input:size()):zero() 57 | return self.gradInput 58 | end 59 | end 60 | 61 | function OneHot:type(type, typecache) 62 | self._single = nil 63 | self._input = nil 64 | return parent.type(self, type, typecache) 65 | end 66 | -------------------------------------------------------------------------------- /ArgMax.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ ArgMax ]]-- 3 | -- Returns the index of the maxima for dimension dim. 4 | -- Cannot backpropagate through this module. 5 | -- Created for use with ReinforceCategorical. 6 | ------------------------------------------------------------------------ 7 | local ArgMax, parent = torch.class("nn.ArgMax", "nn.Module") 8 | 9 | function ArgMax:__init(dim, nInputDim, asLong) 10 | parent.__init(self) 11 | self.dim = dim or 1 12 | self.nInputDim = nInputDim or 9999 13 | self.asLong = (asLong == nil) and true or asLong 14 | if self.asLong then 15 | self.output = torch.LongTensor() 16 | end 17 | end 18 | 19 | function ArgMax:updateOutput(input) 20 | self._value = self._value or input.new() 21 | self._indices = self._indices or 22 | (torch.type(input) == 'torch.CudaTensor' and (torch.CudaLongTensor and torch.CudaLongTensor() or torch.CudaTensor()) or torch.LongTensor()) 23 | local dim = (input:dim() > self.nInputDim) and (self.dim + 1) or self.dim 24 | 25 | torch.max(self._value, self._indices, input, dim) 26 | if input:dim() > 1 then 27 | local idx = self._indices:select(dim, 1) 28 | self.output:resize(idx:size()):copy(idx) 29 | else 30 | self.output:resize(self._indices:size()):copy(self._indices) 31 | end 32 | return self.output 33 | end 34 | 35 | function ArgMax:updateGradInput(input, gradOutput) 36 | -- cannot backprop from an index so just return a dummy zero tensor 37 | self.gradInput:resizeAs(input):zero() 38 | return self.gradInput 39 | end 40 | 41 | function ArgMax:type(type) 42 | -- torch.max expects a LongTensor as indices, whereas cutorch.max expects a CudaTensor. 43 | if type == 'torch.CudaTensor' then 44 | parent.type(self, type) 45 | else 46 | -- self._indices must be a LongTensor. Setting it to nil temporarily avoids 47 | -- unnecessary memory allocations. 48 | local indices 49 | indices, self._indices = self._indices, nil 50 | parent.type(self, type) 51 | self._indices = indices and indices:long() or nil 52 | end 53 | if self.asLong then 54 | self.output = torch.LongTensor() 55 | end 56 | return self 57 | end 58 | -------------------------------------------------------------------------------- /init.lua: -------------------------------------------------------------------------------- 1 | require 'torch' 2 | require 'nn' 3 | require 'nnx' 4 | local _ = require 'moses' 5 | 6 | -- create global dpnn table 7 | dpnn = {} 8 | dpnn.version = 2 9 | 10 | unpack = unpack or table.unpack -- lua 5.2 compat 11 | 12 | -- for testing: 13 | require('dpnn.test') 14 | 15 | -- extensions to existing modules 16 | require('dpnn.Module') 17 | require('dpnn.Container') 18 | require('dpnn.Sequential') 19 | require('dpnn.ParallelTable') 20 | require('dpnn.LookupTable') 21 | require('dpnn.SpatialBinaryConvolution') 22 | require('dpnn.SimpleColorTransform') 23 | require('dpnn.PCAColorTransform') 24 | 25 | -- extensions to existing criterions 26 | require('dpnn.Criterion') 27 | 28 | -- extensions to make serialization more efficient 29 | require('dpnn.SpatialMaxPooling') 30 | require('dpnn.SpatialConvolution') 31 | require('dpnn.SpatialConvolutionMM') 32 | require('dpnn.SpatialBatchNormalization') 33 | require('dpnn.BatchNormalization') 34 | 35 | -- decorator modules 36 | require('dpnn.Decorator') 37 | require('dpnn.Serial') 38 | require('dpnn.DontCast') 39 | require('dpnn.NaN') 40 | 41 | -- modules 42 | require('dpnn.PrintSize') 43 | require('dpnn.Convert') 44 | require('dpnn.Constant') 45 | require('dpnn.Collapse') 46 | require('dpnn.ZipTable') 47 | require('dpnn.ZipTableOneToMany') 48 | require('dpnn.CAddTensorTable') 49 | require('dpnn.ReverseTable') 50 | require('dpnn.Dictionary') 51 | require('dpnn.Inception') 52 | require('dpnn.SoftMaxTree') 53 | require('dpnn.SoftMaxForest') 54 | require('dpnn.Clip') 55 | require('dpnn.SpatialUniformCrop') 56 | require('dpnn.SpatialGlimpse') 57 | require('dpnn.WhiteNoise') 58 | require('dpnn.ArgMax') 59 | require('dpnn.CategoricalEntropy') 60 | require('dpnn.TotalDropout') 61 | require('dpnn.Kmeans') 62 | require('dpnn.OneHot') 63 | require('dpnn.SpatialRegionDropout') 64 | require('dpnn.FireModule') 65 | require('dpnn.SpatialFeatNormalization') 66 | 67 | -- Noise Contrastive Estimation 68 | require('dpnn.NCEModule') 69 | require('dpnn.NCECriterion') 70 | 71 | -- REINFORCE 72 | require('dpnn.Reinforce') 73 | require('dpnn.ReinforceGamma') 74 | require('dpnn.ReinforceBernoulli') 75 | require('dpnn.ReinforceNormal') 76 | require('dpnn.ReinforceCategorical') 77 | 78 | -- REINFORCE criterions 79 | require('dpnn.VRClassReward') 80 | require('dpnn.BinaryClassReward') 81 | 82 | -- criterions 83 | require('dpnn.ModuleCriterion') 84 | require('dpnn.BinaryLogisticRegression') 85 | require('dpnn.SpatialBinaryLogisticRegression') 86 | 87 | return dpnn 88 | -------------------------------------------------------------------------------- /ReinforceCategorical.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ ReinforceCategorical ]]-- 3 | -- Ref A. http://incompleteideas.net/sutton/williams-92.pdf 4 | -- Inputs are a vector of categorical prob : (p[1], p[2], ..., p[k]) 5 | -- Ouputs are samples drawn from this distribution. 6 | -- Uses the REINFORCE algorithm (ref. A sec 6. p.230-236) which is 7 | -- implemented through the nn.Module:reinforce(r,b) interface. 8 | -- gradOutputs are ignored (REINFORCE algorithm). 9 | ------------------------------------------------------------------------ 10 | local ReinforceCategorical, parent = torch.class("nn.ReinforceCategorical", "nn.Reinforce") 11 | 12 | function ReinforceCategorical:updateOutput(input) 13 | self.output:resizeAs(input) 14 | self._index = self._index or ((torch.type(input) == 'torch.CudaTensor') and torch.CudaTensor() or torch.LongTensor()) 15 | if self.stochastic or self.train ~= false then 16 | -- sample from categorical with p = input 17 | self._input = self._input or input.new() 18 | -- prevent division by zero error (see updateGradInput) 19 | self._input:resizeAs(input):copy(input):add(0.00000001) 20 | input.multinomial(self._index, input, 1) 21 | -- one hot encoding 22 | self.output:zero() 23 | self.output:scatter(2, self._index, 1) 24 | else 25 | -- use p for evaluation 26 | self.output:copy(input) 27 | end 28 | return self.output 29 | end 30 | 31 | function ReinforceCategorical:updateGradInput(input, gradOutput) 32 | -- Note that gradOutput is ignored 33 | -- f : categorical probability mass function 34 | -- x : the sampled indices (one per sample) (self.output) 35 | -- p : probability vector (p[1], p[2], ..., p[k]) 36 | -- derivative of log categorical w.r.t. p 37 | -- d ln(f(x,p)) 1/p[i] if i = x 38 | -- ------------ = 39 | -- d p 0 otherwise 40 | self.gradInput:resizeAs(input):zero() 41 | self.gradInput:copy(self.output) 42 | self._input = self._input or input.new() 43 | -- prevent division by zero error 44 | self._input:resizeAs(input):copy(input):add(0.00000001) 45 | self.gradInput:cdiv(self._input) 46 | 47 | -- multiply by reward 48 | self.gradInput:cmul(self:rewardAs(input)) 49 | -- multiply by -1 ( gradient descent on input ) 50 | self.gradInput:mul(-1) 51 | return self.gradInput 52 | end 53 | 54 | function ReinforceCategorical:type(type, tc) 55 | self._index = nil 56 | return parent.type(self, type, tc) 57 | end 58 | -------------------------------------------------------------------------------- /CategoricalEntropy.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ CategoricalEntropy ]]-- 3 | -- Maximize the entropy of a categorical distribution (e.g. softmax ). 4 | -- H(X) = E(-log(p(X)) = -sum(p(X)log(p(X)) 5 | -- where X = 1,...,N and N is the number of categories. 6 | -- A batch with an entropy below minEntropy will be maximized. 7 | -- d H(X=x) p(x) 8 | -- -------- = - ---- - log(p(x)) = -1 - log(p(x)) 9 | -- d p p(x) 10 | ------------------------------------------------------------------------ 11 | local CE, parent = torch.class("nn.CategoricalEntropy", "nn.Module") 12 | 13 | function CE:__init(scale, minEntropy) 14 | parent.__init(self) 15 | self.scale = scale or 1 16 | self.minEntropy = minEntropy 17 | 18 | -- get the P(X) using the batch as a prior 19 | self.module = nn.Sequential() 20 | self.module:add(nn.Sum(1)) -- sum categorical probabilities over batch 21 | self._mul = nn.MulConstant(1) 22 | self.module:add(self._mul) -- make them sum to one (i.e. probabilities) 23 | 24 | -- get entropy H(X) 25 | local concat = nn.ConcatTable() 26 | concat:add(nn.Identity()) -- p(X) 27 | local seq = nn.Sequential() 28 | seq:add(nn.AddConstant(0.000001)) -- prevent log(0) = nan errors 29 | seq:add(nn.Log()) 30 | concat:add(seq) 31 | self.module:add(concat) -- log(p(x)) 32 | self.module:add(nn.CMulTable()) -- p(x)log(p(x)) 33 | self.module:add(nn.Sum()) -- sum(p(x)log(p(x))) 34 | self.module:add(nn.MulConstant(-1)) -- H(x) 35 | 36 | self.modules = {self.module} 37 | 38 | self.minusOne = torch.Tensor{-self.scale} -- gradient descent on maximization 39 | self.sizeAverage = true 40 | end 41 | 42 | function CE:updateOutput(input) 43 | assert(input:dim() == 2, "CategoricalEntropy only works with batches") 44 | self.output:set(input) 45 | return self.output 46 | end 47 | 48 | function CE:updateGradInput(input, gradOutput, scale) 49 | assert(input:dim() == 2, "CategoricalEntropy only works with batches") 50 | self.gradInput:resizeAs(input):copy(gradOutput) 51 | 52 | self._mul.constant_scalar = 1/input:sum() -- sum to one 53 | self.entropy = self.module:updateOutput(input)[1] 54 | if (not self.minEntropy) or (self.entropy < self.minEntropy) then 55 | local gradEntropy = self.module:updateGradInput(input, self.minusOne, scale) 56 | if self.sizeAverage then 57 | gradEntropy:div(input:size(1)) 58 | end 59 | self.gradInput:add(gradEntropy) 60 | end 61 | 62 | return self.gradInput 63 | end 64 | -------------------------------------------------------------------------------- /NaN.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ NaN ]]-- 3 | -- Asserts that outputs and gradInputs do not contain NaNs. 4 | -- Useful for locating the source of NaN errors. 5 | ------------------------------------------------------------------------ 6 | local NaN, parent = torch.class("nn.NaN", "nn.Decorator") 7 | 8 | local idseq = 0 9 | function NaN.newId() 10 | idseq = idseq + 1 11 | return idseq 12 | end 13 | 14 | function NaN:__init(module, id) 15 | parent.__init(self, module) 16 | self.id = id or NaN.newId() 17 | end 18 | 19 | function NaN:recursiveIsNaN(tensor) 20 | local isNaN = false 21 | if torch.type(tensor) == 'table' then 22 | for k,v in pairs(tensor) do 23 | isNaN = self:recursiveIsNaN(v) 24 | if isNaN then break end 25 | end 26 | else 27 | local _ = require 'moses' 28 | isNaN = _.isNaN(tensor:sum()) 29 | end 30 | return isNaN 31 | end 32 | 33 | function NaN:updateOutput(input) 34 | self.output = self.module:updateOutput(input) 35 | if self:recursiveIsNaN(self.output) then 36 | if self:recursiveIsNaN(input) then 37 | error(string.format("NaN found in input of module :\n%s", self:__tostring__())) 38 | elseif self:recursiveIsNaN(self:parameters()) then 39 | error(string.format("NaN found in parameters of module :\n%s", self:__tostring__())) 40 | end 41 | error(string.format("NaN found in output of module :\n%s", self:__tostring__())) 42 | end 43 | return self.output 44 | end 45 | 46 | function NaN:updateGradInput(input, gradOutput) 47 | self.gradInput = self.module:updateGradInput(input, gradOutput) 48 | if self:recursiveIsNaN(self.gradInput) then 49 | if self:recursiveIsNaN(gradOutput) then 50 | error(string.format("NaN found in gradOutput of module :\n%s", self:__tostring__())) 51 | end 52 | error(string.format("NaN found in gradInput of module :\n%s", self:__tostring__())) 53 | end 54 | return self.gradInput 55 | end 56 | 57 | function NaN:accGradParameters(input, gradOutput, scale) 58 | self.module:accGradParameters(input, gradOutput, scale) 59 | local params, gradParams = self:parameters() 60 | if self:recursiveIsNaN(gradParams) then 61 | error(string.format("NaN found in gradParameters of module :\n%s", self:__tostring__())) 62 | end 63 | end 64 | 65 | function NaN:__tostring__() 66 | local selfstring = torch.type(self) .. '(' .. self.id .. ')' 67 | if self.module.__tostring__ then 68 | return selfstring .. ' @ ' .. self.module:__tostring__() 69 | else 70 | return selfstring .. ' @ ' .. torch.type(self.module) 71 | end 72 | end 73 | -------------------------------------------------------------------------------- /SpatialFeatNormalization.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | Color normalization (mean zeroing and dividing by standard deviation). 3 | Basic preprocessing step widely used in training classifier with images. 4 | --]] 5 | 6 | local SpatialFeatNormalization, Parent = torch.class('nn.SpatialFeatNormalization', 'nn.Module') 7 | 8 | function SpatialFeatNormalization:__init(mean, std) 9 | Parent.__init(self) 10 | if mean:dim() ~= 1 then 11 | error(' Mean/Std should be 1D.') 12 | end 13 | self.mean = torch.Tensor() 14 | self.mean:resizeAs(mean):copy(mean) 15 | self.std = torch.Tensor() 16 | self.std:resizeAs(mean) 17 | if std ~= nil then self.std:copy(std) else self.std:fill(1) end 18 | self.noOfFeats = mean:size(1) 19 | end 20 | 21 | function SpatialFeatNormalization:updateOutput(input) 22 | self.output:resizeAs(input):copy(input) 23 | if input:dim() == 4 then 24 | -- Batch of image/s 25 | if input:size(2) ~= self.noOfFeats then 26 | error(' No. of Feats dont match.') 27 | else 28 | for i=1, self.noOfFeats do 29 | self.output[{{}, i, {}, {}}]:add(-self.mean[i]) 30 | self.output[{{}, i, {}, {}}]:div(self.std[i]) 31 | end 32 | end 33 | elseif input:dim() == 3 then 34 | -- single image 35 | if input:size(1) ~= self.noOfFeats then 36 | error(' No. of Feats dont match.') 37 | else 38 | for i=1, self.noOfFeats do 39 | self.output[{i, {}, {}}]:add(-self.mean[i]) 40 | self.output[{i, {}, {}}]:div(self.std[i]) 41 | end 42 | end 43 | else 44 | error(' invalid input dims.') 45 | end 46 | return self.output 47 | end 48 | 49 | function SpatialFeatNormalization:updateGradInput(input, gradOutput) 50 | self.gradInput:resizeAs(gradOutput):copy(gradOutput) 51 | if self.gradInput:dim() == 4 then 52 | -- Batch of image/s 53 | if self.gradInput:size(2) ~= self.noOfFeats then 54 | error(' No. of Feats dont match.') 55 | else 56 | for i=1, self.noOfFeats do 57 | self.gradInput[{{}, i, {}, {}}]:div(self.std[i]) 58 | end 59 | end 60 | elseif self.gradInput:dim() == 3 then 61 | -- single image 62 | if self.gradInput:size(1) ~= self.noOfFeats then 63 | error(' No. of Feats dont match.') 64 | else 65 | for i=1, self.noOfFeats do 66 | self.gradInput[{i, {}, {}}]:div(self.std[i]) 67 | end 68 | end 69 | else 70 | error(' invalid self.gradInput dims.') 71 | end 72 | return self.gradInput 73 | end 74 | -------------------------------------------------------------------------------- /SpatialBinaryLogisticRegression.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ SpatialBinaryLogisticRegression ]]-- 3 | -- Takes an image of size batchSize x nChannel x width x height as input. 4 | -- Computes Binary Logistic Regression Cost. 5 | -- Useful for 2 class pixel classification. 6 | ------------------------------------------------------------------------ 7 | 8 | local SpatialBinaryLogisticRegression, parent = torch.class('nn.SpatialBinaryLogisticRegression', 'nn.Criterion') 9 | 10 | function SpatialBinaryLogisticRegression:__init() 11 | parent.__init(self) 12 | self.sizeAverage = true 13 | end 14 | 15 | function SpatialBinaryLogisticRegression:updateOutput(input, target) 16 | local inputDim = input:nDimension() 17 | local targetDim = target:nDimension() 18 | 19 | -- Check dimensions of input and target 20 | assert(inputDim == targetDim, "nDimension of input and target don't match.") 21 | assert(inputDim == 4 or inputDim == 3, "Expecting image or batch on images") 22 | 23 | for i=1,inputDim do 24 | assert(input:size(i) == target:size(i), 25 | "Input and target dimensions don't match.") 26 | end 27 | 28 | -- Check batch or single image 29 | if inputDim == 4 then 30 | self._isBatch = true 31 | assert(input:size(2) == 1, "No. of channels should be 1.") 32 | self._k = input:size(1) 33 | self._h = input:size(3) 34 | self._w = input:size(4) 35 | else 36 | self._isBatch = false 37 | assert(input:size(1) == 1, "No. of channels should be 1.") 38 | self._k = 1 39 | self._h = input:size(2) 40 | self._w = input:size(3) 41 | end 42 | 43 | self._baseExponents = self._baseExponents or input.new() 44 | self._coeff = self._coeff or input.new() 45 | self._logCoeff = self._logCoeff or input.new() 46 | 47 | --Compute exponent = -target*input 48 | self._baseExponents:resize(input:size()):copy(input) 49 | self._baseExponents:cmul(target) 50 | self._baseExponents:mul(-1) 51 | -- Compute exp(exponent) 52 | self._baseExponents:exp() 53 | 54 | self._coeff:resize(input:size()):copy(self._baseExponents) 55 | self._coeff:add(1) 56 | 57 | self._logCoeff:resize(input:size()):copy(self._coeff) 58 | self._logCoeff:log() 59 | 60 | if self.sizeAverage then 61 | return self._logCoeff:sum()/(2 * self._k * self._h * self._w) 62 | else 63 | return self._logCoeff:sum()/(2 * self._h * self._w) 64 | end 65 | end 66 | 67 | function SpatialBinaryLogisticRegression:updateGradInput(input, target) 68 | self.gradInput = self.gradInput or input.new() 69 | local gradInput = self.gradInput 70 | gradInput:resize(target:size()):copy(target) 71 | gradInput:mul(-1) 72 | gradInput:cmul(self._baseExponents) 73 | gradInput:cdiv(self._coeff) 74 | if self.sizeAverage then 75 | gradInput:div(2 * self._k * self._h * self._w) 76 | else 77 | gradInput:div(2 * self._h * self._w) 78 | end 79 | return gradInput 80 | end 81 | -------------------------------------------------------------------------------- /SpatialRegionDropout.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | Dropout edges rows or columns to simulate imperfect bounding boxes. 3 | --]] 4 | 5 | local SpatialRegionDropout, Parent = torch.class('nn.SpatialRegionDropout', 'nn.Module') 6 | 7 | function SpatialRegionDropout:__init(p) 8 | Parent.__init(self) 9 | self.p = p or 0.2 -- ratio of total number of rows or cols 10 | self.train = true 11 | self.noise = torch.Tensor() 12 | if self.p >= 1 or self.p < 0 then 13 | error(' illegal percentage, must be 0 <= p < 1') 14 | end 15 | end 16 | 17 | function SpatialRegionDropout:setp(p) 18 | self.p = p 19 | end 20 | 21 | -- Region Types 22 | -- 1: Dropout p ratio of top rows 23 | -- 2: Dropout p ratio of bottom rows 24 | -- 3: Dropout p ratio of leftmost cols 25 | -- 4: Dropout p ratio of rightmost cols 26 | function SpatialRegionDropout:updateOutput(input) 27 | self.output:resizeAs(input):copy(input) 28 | if self.train then 29 | self.noise:resizeAs(input):fill(1) 30 | self.regionType = torch.random(4) 31 | if input:dim() == 4 then 32 | local height = input:size(3) 33 | local width = input:size(4) 34 | if self.regionType == 1 then 35 | self.noise[{{}, {}, {1, math.floor(height*self.p)}}]:fill(0) 36 | elseif self.regionType == 2 then 37 | self.noise[{{}, {}, 38 | {height-math.floor(height*self.p)+1, height}}]:fill(0) 39 | elseif self.regionType == 3 then 40 | self.noise[{{}, {}, {}, {1, math.floor(width*self.p)}}]:fill(0) 41 | elseif self.regionType == 4 then 42 | self.noise[{{}, {}, {}, 43 | {width-math.floor(width*self.p)+1, width}}]:fill(0) 44 | end 45 | elseif input:dim() == 3 then 46 | local height = input:size(2) 47 | local width = input:size(3) 48 | if self.regionType == 1 then 49 | self.noise[{{}, {1, math.floor(height*self.p)}}]:fill(0) 50 | elseif self.regionType == 2 then 51 | self.noise[{{}, 52 | {height-math.floor(height*self.p)+1, height}}]:fill(0) 53 | elseif self.regionType == 3 then 54 | self.noise[{{}, {}, {1, math.floor(width*self.p)}}]:fill(0) 55 | elseif self.regionType == 4 then 56 | self.noise[{{}, {}, 57 | {width-math.floor(width*self.p)+1, width}}]:fill(0) 58 | end 59 | else 60 | error('Input must be 4D (nbatch, nfeat, h, w) or 3D (nfeat, h, w)') 61 | end 62 | self.noise:div(1-self.p) 63 | self.output:cmul(self.noise) 64 | end 65 | return self.output 66 | end 67 | 68 | function SpatialRegionDropout:updateGradInput(input, gradOutput) 69 | if self.train then 70 | self.gradInput:resizeAs(gradOutput):copy(gradOutput) 71 | self.gradInput:cmul(self.noise) 72 | else 73 | error('Backpropagation is only defined for training.') 74 | end 75 | return self.gradInput 76 | end 77 | 78 | function SpatialRegionDropout:__tostring__() 79 | return string.format('%s p: %f', torch.type(self), self.p) 80 | end 81 | -------------------------------------------------------------------------------- /BinaryClassReward.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ BinaryClassReward ]]-- 3 | -- Variance reduced binary classification reinforcement criterion. 4 | -- The binary class version of VRClassReward. 5 | -- input : {class prediction, baseline reward} 6 | -- Reward is 1 for success, Reward is 0 otherwise. 7 | -- reward = scale*(Reward - baseline) where baseline is 2nd input element 8 | -- Note : for RNNs with R = 1 for last step in sequence, encapsulate it 9 | -- in nn.ModuleCriterion(BinaryClassReward, nn.SelectTable(-1)) 10 | ------------------------------------------------------------------------ 11 | local BinaryClassReward, parent = torch.class("nn.BinaryClassReward", "nn.Criterion") 12 | 13 | function BinaryClassReward:__init(module, scale, criterion) 14 | parent.__init(self) 15 | self.module = module -- so it can call module:reinforce(reward) 16 | self.scale = scale or 1 -- scale of reward 17 | self.criterion = criterion or nn.MSECriterion() -- baseline criterion 18 | self.sizeAverage = true 19 | self.gradInput = {torch.Tensor()} 20 | end 21 | 22 | function BinaryClassReward:updateOutput(input, target) 23 | assert(torch.type(input) == 'table') 24 | local input = input[1] 25 | assert(input:dim() == 1) 26 | assert(target:dim() == 1) 27 | self._binary = self._binary or input.new() 28 | self._binary:gt(input, 0.5) 29 | 30 | -- max class value is class prediction 31 | if torch.type(self._binary) ~= torch.type(target) then 32 | self._target = self._target or self._binary.new() 33 | self._target:resize(target:size()):copy(target) 34 | target = self._target 35 | end 36 | 37 | -- reward = scale when correctly classified 38 | self._reward = self._reward or input.new() 39 | self._reward:eq(self._binary, target) 40 | self.reward = self.reward or input.new() 41 | self.reward:resize(self._reward:size(1)):copy(self._reward) 42 | self.reward:mul(self.scale) 43 | 44 | -- loss = -sum(reward) 45 | self.output = -self.reward:sum() 46 | if self.sizeAverage then 47 | self.output = self.output/input:size(1) 48 | end 49 | return self.output 50 | end 51 | 52 | function BinaryClassReward:updateGradInput(inputTable, target) 53 | local input, baseline = unpack(inputTable) 54 | 55 | -- reduce variance of reward using baseline 56 | self.vrReward = self.vrReward or self.reward.new() 57 | self.vrReward:resizeAs(self.reward):copy(self.reward) 58 | self.vrReward:add(-1, baseline) 59 | if self.sizeAverage then 60 | self.vrReward:div(input:size(1)) 61 | end 62 | -- broadcast reward to modules 63 | self.module:reinforce(self.vrReward) 64 | 65 | -- zero gradInput (this criterion has no gradInput for class pred) 66 | self.gradInput[1]:resizeAs(input):zero() 67 | 68 | -- learn the baseline reward 69 | self.gradInput[2] = self.criterion:backward(baseline, self.reward) 70 | 71 | return self.gradInput 72 | end 73 | 74 | function BinaryClassReward:type(type) 75 | self._binary = nil 76 | self._target = nil 77 | local module = self.module 78 | self.module = nil 79 | local ret = parent.type(self, type) 80 | self.module = module 81 | return ret 82 | end 83 | -------------------------------------------------------------------------------- /BinaryLogisticRegression.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ BinaryLogisticRegression ]]-- 3 | -- Takes an image of size batchSize x 1 or just batchSize as input. 4 | -- Computes Binary Logistic Regression Cost. 5 | -- Useful for 2 class classification. 6 | ------------------------------------------------------------------------ 7 | 8 | local BinaryLogisticRegression, parent = torch.class('nn.BinaryLogisticRegression', 'nn.Criterion') 9 | 10 | function BinaryLogisticRegression:__init(sizeAverage) 11 | parent.__init(self) 12 | if sizeAverage ~= nil then 13 | self.sizeAverage = sizeAverage 14 | else 15 | self.sizeAverage = true 16 | end 17 | end 18 | 19 | function BinaryLogisticRegression:updateOutput(input, target) 20 | local inputDim = input:nDimension() 21 | local targetDim = target:nDimension() 22 | 23 | -- Check dimensions of input and target 24 | assert(inputDim == 1 or inputDim == 2, 25 | "Input:Expecting batchSize or batchSize x 1") 26 | assert(targetDim == 1 or targetDim == 2, 27 | "Target:Expecting batchSize or batchSize x 1") 28 | if inputDim == 2 then 29 | assert(input:size(1)==1 or input:size(2)==1, 30 | "Input: Expecting batchSize x 1.") 31 | end 32 | if targetDim == 2 then 33 | assert(target:size(1)==1 or target:size(2)==1, 34 | "Target: Expecting batchSize x 1.") 35 | end 36 | 37 | local inputElements = input:nElement() 38 | local targetElements = target:nElement() 39 | 40 | assert(inputElements == targetElements, 41 | "No of input and target elements should be same.") 42 | 43 | self._k = inputElements 44 | local input = input:view(-1) 45 | local target = target:view(-1) 46 | 47 | self._baseExponents = self._baseExponents or input.new() 48 | self._coeff = self._coeff or input.new() 49 | self._logCoeff = self._logCoeff or input.new() 50 | 51 | --Compute exponent = -target*input 52 | self._baseExponents:resize(input:size()):copy(input) 53 | self._baseExponents:cmul(target) 54 | self._baseExponents:mul(-1) 55 | -- Compute exp(exponent) 56 | self._baseExponents:exp() 57 | 58 | self._coeff:resize(input:size()):copy(self._baseExponents) 59 | self._coeff:add(1) 60 | 61 | self._logCoeff:resize(input:size()):copy(self._coeff) 62 | self._logCoeff:log() 63 | 64 | if self.sizeAverage then 65 | return self._logCoeff:sum()/(self._k) 66 | else 67 | return self._logCoeff:sum() 68 | end 69 | end 70 | 71 | function BinaryLogisticRegression:updateGradInput(input, target) 72 | self.gradInput = self.gradInput or input.new() 73 | local gradInput = self.gradInput 74 | gradInput:resize(input:size()):copy(target) 75 | gradInput:mul(-1) 76 | gradInput:cmul(self._baseExponents) 77 | gradInput:cdiv(self._coeff) 78 | if self.sizeAverage then 79 | gradInput:div(self._k) 80 | end 81 | return gradInput 82 | end 83 | 84 | function BinaryLogisticRegression:type(type, tensorCache) 85 | if type then 86 | self._baseExponents = nil 87 | self._coeff = nil 88 | self._logCoeff = nil 89 | end 90 | return parent.type(self, type, tensorCache) 91 | end 92 | -------------------------------------------------------------------------------- /NCECriterion.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ Noise Contrast Estimation Criterion ]]-- 3 | -- Ref.: A. http://mi.eng.cam.ac.uk/~xc257/papers/ICASSP2015-rnnlm-nce.pdf 4 | -- B. https://www.cs.toronto.edu/~amnih/papers/ncelm.pdf 5 | ------------------------------------------------------------------------ 6 | local NCECriterion, parent = torch.class("nn.NCECriterion", "nn.Criterion") 7 | local eps = 0.0000001 8 | 9 | function NCECriterion:__init() 10 | parent.__init(self) 11 | self.sizeAverage = true 12 | 13 | self.gradInput = {torch.Tensor(), torch.Tensor(), torch.Tensor(), torch.Tensor()} 14 | end 15 | 16 | function NCECriterion:updateOutput(inputTable, target) 17 | -- P_model(target), P_model(sample), P_noise(target), P_noise(sample) 18 | local Pmt, Pms, Pnt, Pns = unpack(inputTable) 19 | local k = Pms:size(2) 20 | 21 | assert(Pmt:dim() == 1) 22 | assert(Pms:dim() == 2) 23 | assert(Pnt:dim() == 1) 24 | assert(Pns:dim() == 2) 25 | 26 | -- equation 5 in ref. A 27 | 28 | -- eq 5.1 : P(origin=model) = Pmt / (Pmt + k*Pnt) 29 | self._Pom = self._Pom or Pmt.new() 30 | self._Pom:resizeAs(Pmt):copy(Pmt) 31 | self._Pomdiv = self._Pomdiv or Pmt.new() 32 | self._Pomdiv:resizeAs(Pmt):copy(Pmt) 33 | self._Pomdiv:add(k, Pnt):add(eps) 34 | self._Pom:cdiv(self._Pomdiv) 35 | 36 | -- eq 5.2 : P(origin=noise) = k*Pns / (Pms + k*Pns) 37 | self._Pon = self._Pon or Pns.new() 38 | self._Pon:resizeAs(Pns):copy(Pns):mul(k) 39 | self._Pondiv = self._Pondiv or Pms.new() 40 | self._Pondiv:resizeAs(Pms):copy(Pms) 41 | self._Pondiv:add(k, Pns):add(eps) 42 | self._Pon:cdiv(self._Pondiv) 43 | 44 | -- equation 6 in ref. A 45 | 46 | self._lnPom = self._lnPom or self._Pom.new() 47 | self._lnPom:log(self._Pom) 48 | 49 | self._lnPon = self._lnPon or self._Pon.new() 50 | self._lnPon:log(self._Pon) 51 | 52 | local lnPomsum = self._lnPom:sum() 53 | local lnPonsum = self._lnPon:sum() 54 | 55 | self.output = - (lnPomsum + lnPonsum) 56 | 57 | if self.sizeAverage then 58 | self.output = self.output / Pmt:size(1) 59 | end 60 | 61 | return self.output 62 | end 63 | 64 | function NCECriterion:updateGradInput(inputTable, target) 65 | assert(#self.gradInput == 4) 66 | local Pmt, Pms, Pnt, Pns = unpack(inputTable) 67 | local k = Pms:size(2) 68 | 69 | -- equation 7 in ref. A 70 | 71 | -- d ln(Pom) / d input = -k*Pnt / ( Pmt * (Pmt + k*Pnt) ) 72 | local dlnPom = self.gradInput[1] 73 | dlnPom = dlnPom or Pnt.new() 74 | dlnPom:resizeAs(Pnt):copy(Pnt):mul(-k) 75 | dlnPom:cdiv(self._Pomdiv) 76 | Pmt:add(eps) 77 | dlnPom:cdiv(Pmt) -- d ln(Pmt) / d Pmt = 1 / d Pmt 78 | Pmt:add(-eps) 79 | 80 | -- d ln(Pon) / d input = Pms / ( Pms * (Pms + k*Pns) ) 81 | local dlnPon = self.gradInput[2] 82 | dlnPon = dlnPon or Pms.new() 83 | dlnPon:resizeAs(Pms):copy(Pms) 84 | dlnPon:cdiv(self._Pondiv) 85 | Pms:add(eps) 86 | dlnPon:cdiv(Pms) -- d ln(Pms) / d Pms = 1 / d Pms 87 | Pms:add(-eps) 88 | 89 | if self.gradInput[3]:nElement() ~= Pnt:nElement() then 90 | self.gradInput[3]:resizeAs(Pnt):zero() 91 | end 92 | if self.gradInput[4]:nElement() ~= Pns:nElement() then 93 | self.gradInput[4]:resizeAs(Pns):zero() 94 | end 95 | 96 | if self.sizeAverage then 97 | dlnPom:div(Pmt:size(1)) 98 | dlnPon:div(Pmt:size(1)) 99 | end 100 | 101 | return self.gradInput 102 | end 103 | -------------------------------------------------------------------------------- /SimpleColorTransform.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | Simple Color transformation module: This module implements a simple data 3 | augmentation technique of changing the pixel values of input image by adding 4 | sample sampled small quantities. 5 | Works only 6 | --]] 7 | 8 | local SimpleColorTransform, Parent = torch.class('nn.SimpleColorTransform', 'nn.Module') 9 | 10 | function SimpleColorTransform:__init(inputChannels, range) 11 | Parent.__init(self) 12 | 13 | self.train = true 14 | self.inputChannels = inputChannels 15 | assert(inputChannels == range:nElement(), 16 | "Number of input channels and number of range values don't match.") 17 | self.range = range 18 | end 19 | 20 | function SimpleColorTransform:updateOutput(input) 21 | self.output:resizeAs(input):copy(input) 22 | if self.train then 23 | self.noise = self.noise or self.output.new() 24 | self._tempNoise = self._tempNoise or self.output.new() 25 | self._tempNoiseExpanded = self._tempNoiseExpanded or self.output.new() 26 | self._tempNoiseSamples = self._tempNoiseSamples or self.output.new() 27 | 28 | if self.output:nDimension() == 4 then 29 | local batchSize = self.output:size(1) 30 | local channels = self.output:size(2) 31 | local height = self.output:size(3) 32 | local width = self.output:size(4) 33 | assert(channels == self.inputChannels) 34 | 35 | -- Randomly sample noise for each channel 36 | self.noise:resize(batchSize, channels) 37 | for i=1, channels do 38 | self.noise[{{}, {i}}]:uniform(-self.range[i], self.range[i]) 39 | end 40 | self._tempNoise = self.noise:view(batchSize, self.inputChannels, 1, 1) 41 | self._tempNoiseExpanded:expand(self._tempNoise, batchSize, 42 | channels, height, width) 43 | self._tempNoiseSamples:resizeAs(self._tempNoiseExpanded) 44 | :copy(self._tempNoiseExpanded) 45 | self.output:add(self._tempNoiseSamples) 46 | 47 | elseif self.output:nDimension() == 3 then 48 | local channels = self.output:size(1) 49 | local height = self.output:size(2) 50 | local width = self.output:size(3) 51 | assert(channels == self.inputChannels) 52 | 53 | -- Randomly sample noise for each channel 54 | self.noise:resize(channels) 55 | for i=1, channels do 56 | self.noise[i] = torch.uniform(-self.range[i], self.range[i]) 57 | end 58 | self._tempNoise = self.noise:view(self.inputChannels, 1, 1) 59 | self._tempNoiseExpanded:expand(self._tempNoise, channels, 60 | height, width) 61 | self._tempNoiseSamples:resizeAs(self._tempNoiseExpanded) 62 | :copy(self._tempNoiseExpanded) 63 | self.output:add(self._tempNoiseSamples) 64 | else 65 | error("Invalid input dimensionality.") 66 | end 67 | end 68 | return self.output 69 | end 70 | 71 | function SimpleColorTransform:updateGradInput(input, gradOutput) 72 | if self.train then 73 | self.gradInput:resizeAs(gradOutput):copy(gradOutput) 74 | else 75 | error('backprop only defined while training') 76 | end 77 | return self.gradInput 78 | end 79 | 80 | function SimpleColorTransform:type(type, tensorCache) 81 | self.noise = nil 82 | self._tempNoise = nil 83 | self._tempNoiseExpanded = nil 84 | self._tempNoiseSamples = nil 85 | Parent.type(self, type, tensorCache) 86 | end 87 | 88 | function SimpleColorTransform:__tostring__() 89 | return string.format('SimpleColorTransform', torch.type(self)) 90 | end 91 | -------------------------------------------------------------------------------- /VRClassReward.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ VRClassReward ]]-- 3 | -- Variance reduced classification reinforcement criterion. 4 | -- input : {class prediction, baseline reward} 5 | -- Reward is 1 for success, Reward is 0 otherwise. 6 | -- reward = scale*(Reward - baseline) where baseline is 2nd input element 7 | -- Note : for RNNs with R = 1 for last step in sequence, encapsulate it 8 | -- in nn.ModuleCriterion(VRClassReward, nn.SelectTable(-1)) 9 | ------------------------------------------------------------------------ 10 | local VRClassReward, parent = torch.class("nn.VRClassReward", "nn.Criterion") 11 | 12 | function VRClassReward:__init(module, scale, criterion) 13 | parent.__init(self) 14 | self.module = module -- so it can call module:reinforce(reward) 15 | self.scale = scale or 1 -- scale of reward 16 | self.criterion = criterion or nn.MSECriterion() -- baseline criterion 17 | self.sizeAverage = true 18 | self.gradInput = {torch.Tensor()} 19 | end 20 | 21 | function VRClassReward:updateOutput(input, target) 22 | assert(torch.type(input) == 'table') 23 | local input = self:toBatch(input[1], 1) 24 | self._maxVal = self._maxVal or input.new() 25 | self._maxIdx = self._maxIdx or torch.type(input) == 'torch.CudaTensor' and torch.CudaLongTensor() or torch.LongTensor() 26 | 27 | -- max class value is class prediction 28 | self._maxVal:max(self._maxIdx, input, 2) 29 | 30 | -- reward = scale when correctly classified 31 | local maxIdx = self._maxIdx 32 | if torch.type(self._maxIdx) == 'torch.CudaLongTensor' then 33 | self.__maxIdx = self.__maxIdx or torch.CudaTensor() 34 | self.__maxIdx:resize(maxIdx:size()):copy(maxIdx) 35 | maxIdx = self.__maxIdx 36 | end 37 | 38 | if torch.type(maxIdx) ~= torch.type(target) then 39 | self._target = self._target or maxIdx.new() 40 | self._target:resize(target:size()):copy(target) 41 | target = self._target 42 | end 43 | 44 | -- reward = scale when correctly classified 45 | self._reward = self._reward or maxIdx.new() 46 | self._reward:eq(maxIdx, target) 47 | self.reward = self.reward or input.new() 48 | self.reward:resize(self._reward:size(1)):copy(self._reward) 49 | self.reward:mul(self.scale) 50 | 51 | -- loss = -sum(reward) 52 | self.output = -self.reward:sum() 53 | if self.sizeAverage then 54 | self.output = self.output/input:size(1) 55 | end 56 | return self.output 57 | end 58 | 59 | function VRClassReward:updateGradInput(inputTable, target) 60 | local input = self:toBatch(inputTable[1], 1) 61 | local baseline = self:toBatch(inputTable[2], 1) 62 | 63 | -- reduce variance of reward using baseline 64 | self.vrReward = self.vrReward or self.reward.new() 65 | self.vrReward:resizeAs(self.reward):copy(self.reward) 66 | self.vrReward:add(-1, baseline) 67 | if self.sizeAverage then 68 | self.vrReward:div(input:size(1)) 69 | end 70 | -- broadcast reward to modules 71 | self.module:reinforce(self.vrReward) 72 | 73 | -- zero gradInput (this criterion has no gradInput for class pred) 74 | self.gradInput[1]:resizeAs(input):zero() 75 | self.gradInput[1] = self:fromBatch(self.gradInput[1], 1) 76 | 77 | -- learn the baseline reward 78 | self.criterion:forward(baseline, self.reward) 79 | self.gradInput[2] = self.criterion:backward(baseline, self.reward) 80 | self.gradInput[2] = self:fromBatch(self.gradInput[2], 1) 81 | return self.gradInput 82 | end 83 | 84 | function VRClassReward:type(type) 85 | self._maxVal = nil 86 | self._maxIdx = nil 87 | self.__maxIdx = nil 88 | self._target = nil 89 | local module = self.module 90 | self.module = nil 91 | local ret = parent.type(self, type) 92 | self.module = module 93 | return ret 94 | end 95 | -------------------------------------------------------------------------------- /DontCast.lua: -------------------------------------------------------------------------------- 1 | local DontCast, parent = torch.class("nn.DontCast", "nn.Decorator") 2 | 3 | -- utility functions 4 | 5 | local function recursiveTypeCopy(dst, src, type_str) 6 | if torch.type(src) == 'table' then 7 | dst = (torch.type(dst) == 'table') and dst or {} 8 | for k, v in pairs(src) do 9 | dst[k] = recursiveTypeCopy(dst[k], v, type_str) 10 | end 11 | elseif torch.isTensor(src) then 12 | dst = (torch.type(dst) == type_str) and dst or torch.getmetatable(type_str).new() 13 | dst:resize(src:size()) 14 | if src:nElement() > 0 then 15 | dst:copy(src) 16 | end 17 | end 18 | return dst 19 | end 20 | 21 | local function tableTensorType(src) 22 | if type(src) == 'table' then 23 | local type_str, found 24 | for k,v in pairs(src) do 25 | type_str, found = tableTensorType(v) 26 | if found then 27 | return type_str, true 28 | end 29 | end 30 | return type_str, found 31 | else 32 | return torch.type(src), torch.isTensor(src) 33 | end 34 | end 35 | 36 | -- DontCast methods and constructor 37 | 38 | function DontCast:__init(module, castin, castout, moduleType) 39 | parent.__init(self, module) 40 | self.castin = castin 41 | self.castout = (castout == nil) and castin or castout 42 | self.moduleType = moduleType 43 | if (self.castin or self.castout) and not self.moduleType then 44 | local moduleType, found = tableTensorType(module.output) 45 | if found then 46 | self.moduleType = moduleType 47 | else 48 | moduleType, found = tableTensorType(module:parameters()) 49 | if found then 50 | self.moduleType = moduleType 51 | else 52 | error"Cannot extrapolate moduleType. Provide constructor argument 4" 53 | end 54 | end 55 | end 56 | end 57 | 58 | function DontCast:updateOutput(input) 59 | if self.castin and tableTensorType(input) ~= self.moduleType then 60 | self._input = recursiveTypeCopy(self._input, input, self.moduleType) 61 | input = self._input 62 | end 63 | 64 | local output = self.module:updateOutput(input) 65 | 66 | if self.castout then 67 | self.output = recursiveTypeCopy(self.output, output, tableTensorType(self.output)) 68 | else 69 | self.output = output 70 | end 71 | return self.output 72 | end 73 | 74 | function DontCast:updateGradInput(input, gradOutput) 75 | if self.castin and tableTensorType(input) ~= self.moduleType then 76 | input = self._input 77 | end 78 | if self.castout and tableTensorType(gradOutput) ~= self.moduleType then 79 | self._gradOutput = recursiveTypeCopy(self._gradOutput, gradOutput, self.moduleType) 80 | gradOutput = self._gradOutput 81 | end 82 | 83 | local gradInput = self.module:updateGradInput(input, gradOutput) 84 | 85 | if self.castin then 86 | self.gradInput = recursiveTypeCopy(self.gradInput, gradInput, tableTensorType(self.gradInput)) 87 | else 88 | self.gradInput = gradInput 89 | end 90 | return self.gradInput 91 | end 92 | 93 | function DontCast:accGradParameters(input, gradOutput, scale) 94 | if self.castin and tableTensorType(input) ~= self.moduleType then 95 | input = self._input 96 | end 97 | if self.castout and tableTensorType(gradOutput) ~= self.moduleType then 98 | gradOutput = self._gradOutput 99 | end 100 | 101 | self.module:accGradParameters(input, gradOutput, scale) 102 | end 103 | 104 | function DontCast:accUpdateGradParameters(input, gradOutput, lr) 105 | if self.castin and tableTensorType(input) ~= self.moduleType then 106 | input = self._input 107 | end 108 | if self.castout and tableTensorType(gradOutput) ~= self.moduleType then 109 | gradOutput = self._gradOutput 110 | end 111 | 112 | self.module:accUpdateGradParameters(input, gradOutput, lr) 113 | end 114 | 115 | -- dont cast (the essence thereof) 116 | function DontCast:type(type) 117 | if self.castout and tableTensorType(self.output) ~= type then 118 | self.output = recursiveTypeCopy(nil, self.output, type) 119 | end 120 | if self.castin and tableTensorType(self.gradInput) ~= type then 121 | self.gradInput = recursiveTypeCopy(nil, self.gradInput, type) 122 | end 123 | return self 124 | end 125 | -------------------------------------------------------------------------------- /Sequential.lua: -------------------------------------------------------------------------------- 1 | local Sequential, parent = nn.Sequential, nn.Container 2 | 3 | function Sequential:profile() 4 | 5 | function Sequential:updateOutput(input) 6 | local currentOutput = input 7 | for i=1,#self.modules do 8 | local start = torch.Timer() 9 | currentOutput = self.modules[i]:updateOutput(currentOutput) 10 | if cutorch then cutorch.synchronize() end 11 | print(torch.type(self.modules[i])..' updateOutput: '..start:time().real.." s") 12 | end 13 | self.output = currentOutput 14 | return currentOutput 15 | end 16 | 17 | function Sequential:updateGradInput(input, gradOutput) 18 | local currentGradOutput = gradOutput 19 | local currentModule = self.modules[#self.modules] 20 | for i=#self.modules-1,1,-1 do 21 | local previousModule = self.modules[i] 22 | local start = torch.Timer() 23 | currentGradOutput = currentModule:updateGradInput(previousModule.output, currentGradOutput) 24 | if cutorch then cutorch.synchronize() end 25 | print(torch.type(currentModule)..' updateGradInput: '..start:time().real.." s") 26 | currentModule = previousModule 27 | end 28 | local start = torch.Timer() 29 | currentGradOutput = currentModule:updateGradInput(input, currentGradOutput) 30 | if cutorch then cutorch.synchronize() end 31 | print(torch.type(currentModule)..' updateGradInput: '..start:time().real.." s") 32 | self.gradInput = currentGradOutput 33 | return currentGradOutput 34 | end 35 | 36 | function Sequential:accGradParameters(input, gradOutput, scale) 37 | scale = scale or 1 38 | 39 | local currentGradOutput = gradOutput 40 | local currentModule = self.modules[#self.modules] 41 | for i=#self.modules-1,1,-1 do 42 | local previousModule = self.modules[i] 43 | local start = torch.Timer() 44 | currentModule:accGradParameters(previousModule.output, currentGradOutput, scale) 45 | if cutorch then cutorch.synchronize() end 46 | print(torch.type(currentModule)..' accGradParameters: '..start:time().real.." s") 47 | currentGradOutput = currentModule.gradInput 48 | currentModule = previousModule 49 | end 50 | 51 | local start = torch.Timer() 52 | currentModule:accGradParameters(input, currentGradOutput, scale) 53 | if cutorch then cutorch.synchronize() end 54 | print(torch.type(currentModule)..' accGradParameters: '..start:time().real.." s") 55 | end 56 | 57 | function Sequential:backward(input, gradOutput, scale) 58 | scale = scale or 1 59 | local currentGradOutput = gradOutput 60 | local currentModule = self.modules[#self.modules] 61 | for i=#self.modules-1,1,-1 do 62 | local previousModule = self.modules[i] 63 | local start = torch.Timer() 64 | currentGradOutput = currentModule:backward(previousModule.output, currentGradOutput, scale) 65 | if cutorch then cutorch.synchronize() end 66 | print(torch.type(currentModule)..' backward: '..start:time().real.." s") 67 | currentModule.gradInput = currentGradOutput 68 | currentModule = previousModule 69 | end 70 | local start = torch.Timer() 71 | currentGradOutput = currentModule:backward(input, currentGradOutput, scale) 72 | if cutorch then cutorch.synchronize() end 73 | print(torch.type(currentModule)..' backward: '..start:time().real.." s") 74 | self.gradInput = currentGradOutput 75 | return currentGradOutput 76 | end 77 | 78 | function Sequential:accUpdateGradParameters(input, gradOutput, lr) 79 | local currentGradOutput = gradOutput 80 | local currentModule = self.modules[#self.modules] 81 | for i=#self.modules-1,1,-1 do 82 | local previousModule = self.modules[i] 83 | local start = torch.Timer() 84 | currentModule:accUpdateGradParameters(previousModule.output, currentGradOutput, lr) 85 | if cutorch then cutorch.synchronize() end 86 | print(torch.type(currentModule)..' accUpdateGradParameters: '..start:time().real.." s") 87 | currentGradOutput = currentModule.gradInput 88 | currentModule = previousModule 89 | end 90 | 91 | local start = torch.Timer() 92 | currentModule:accUpdateGradParameters(input, currentGradOutput, lr) 93 | if cutorch then cutorch.synchronize() end 94 | print(torch.type(currentModule)..' accUpdateGradParameters: '..start:time().real.." s") 95 | end 96 | 97 | parent.profile(self) 98 | end 99 | -------------------------------------------------------------------------------- /ReinforceNormal.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ ReinforceNormal ]]-- 3 | -- Ref A. http://incompleteideas.net/sutton/williams-92.pdf 4 | -- Inputs are mean (mu) of multivariate normal distribution. 5 | -- Ouputs are samples drawn from these distributions. 6 | -- Standard deviation is provided as constructor argument. 7 | -- Uses the REINFORCE algorithm (ref. A sec 6. p.237-239) which is 8 | -- implemented through the nn.Module:reinforce(r,b) interface. 9 | -- gradOutputs are ignored (REINFORCE algorithm). 10 | ------------------------------------------------------------------------ 11 | local ReinforceNormal, parent = torch.class("nn.ReinforceNormal", "nn.Reinforce") 12 | 13 | function ReinforceNormal:__init(stdev, stochastic) 14 | parent.__init(self, stochastic) 15 | self.stdev = stdev 16 | if not stdev then 17 | self.gradInput = {torch.Tensor(), torch.Tensor()} 18 | end 19 | end 20 | 21 | function ReinforceNormal:updateOutput(input) 22 | local mean, stdev = input, self.stdev 23 | if torch.type(input) == 'table' then 24 | -- input is {mean, stdev} 25 | assert(#input == 2) 26 | mean, stdev = unpack(input) 27 | end 28 | assert(stdev) 29 | 30 | self.output:resizeAs(mean) 31 | 32 | if self.stochastic or self.train ~= false then 33 | self.output:normal() 34 | -- multiply by standard deviations 35 | if torch.type(stdev) == 'number' then 36 | self.output:mul(stdev) 37 | elseif torch.isTensor(stdev) then 38 | if stdev:dim() == mean:dim() then 39 | assert(stdev:isSameSizeAs(mean)) 40 | self.output:cmul(stdev) 41 | else 42 | assert(stdev:dim()+1 == mean:dim()) 43 | self._stdev = self._stdev or stdev.new() 44 | self._stdev:view(stdev,1,table.unpack(stdev:size():totable())) 45 | self.__stdev = self.__stdev or stdev.new() 46 | self.__stdev:expandAs(self._stdev, mean) 47 | self.output:cmul(self.__stdev) 48 | end 49 | else 50 | error"unsupported mean type" 51 | end 52 | 53 | -- re-center the means to the mean 54 | self.output:add(mean) 55 | else 56 | -- use maximum a posteriori (MAP) estimate 57 | self.output:copy(mean) 58 | end 59 | return self.output 60 | end 61 | 62 | function ReinforceNormal:updateGradInput(input, gradOutput) 63 | -- Note that gradOutput is ignored 64 | -- f : normal probability density function 65 | -- x : the sampled values (self.output) 66 | -- u : mean (mu) (mean) 67 | -- s : standard deviation (sigma) (stdev) 68 | 69 | local mean, stdev = input, self.stdev 70 | local gradMean, gradStdev = self.gradInput, nil 71 | if torch.type(input) == 'table' then 72 | mean, stdev = unpack(input) 73 | gradMean, gradStdev = unpack(self.gradInput) 74 | end 75 | assert(stdev) 76 | 77 | -- Derivative of log normal w.r.t. mean : 78 | -- d ln(f(x,u,s)) (x - u) 79 | -- -------------- = ------- 80 | -- d u s^2 81 | 82 | gradMean:resizeAs(mean) 83 | -- (x - u) 84 | gradMean:copy(self.output):add(-1, mean) 85 | 86 | -- divide by squared standard deviations 87 | if torch.type(stdev) == 'number' then 88 | gradMean:div(stdev^2) 89 | else 90 | if stdev:dim() == mean:dim() then 91 | gradMean:cdiv(stdev):cdiv(stdev) 92 | else 93 | gradMean:cdiv(self.__stdev):cdiv(self.__stdev) 94 | end 95 | end 96 | -- multiply by reward 97 | gradMean:cmul(self:rewardAs(mean) ) 98 | -- multiply by -1 ( gradient descent on mean ) 99 | gradMean:mul(-1) 100 | 101 | -- Derivative of log normal w.r.t. stdev : 102 | -- d ln(f(x,u,s)) (x - u)^2 - s^2 103 | -- -------------- = --------------- 104 | -- d s s^3 105 | 106 | if gradStdev then 107 | gradStdev:resizeAs(stdev) 108 | -- (x - u)^2 109 | gradStdev:copy(self.output):add(-1, mean):pow(2) 110 | -- subtract s^2 111 | self._stdev2 = self._stdev2 or stdev.new() 112 | self._stdev2:resizeAs(stdev):copy(stdev):cmul(stdev) 113 | gradStdev:add(-1, self._stdev2) 114 | -- divide by s^3 115 | self._stdev2:cmul(stdev):add(0.00000001) 116 | gradStdev:cdiv(self._stdev2) 117 | -- multiply by reward 118 | gradStdev:cmul(self:rewardAs(stdev)) 119 | -- multiply by -1 ( gradient descent on stdev ) 120 | gradStdev:mul(-1) 121 | end 122 | 123 | return self.gradInput 124 | end 125 | -------------------------------------------------------------------------------- /SpatialUniformCrop.lua: -------------------------------------------------------------------------------- 1 | local SpatialUniformCrop, parent = torch.class("nn.SpatialUniformCrop", "nn.Module") 2 | 3 | function SpatialUniformCrop:__init(oheight, owidth, scale) 4 | parent.__init(self) 5 | self.scale = scale or nil 6 | if self.scale ~= nil then 7 | assert(torch.type(scale)=='table') 8 | self.scaler = nn.SpatialReSampling{owidth=owidth, oheight=oheight} 9 | end 10 | self.oheight = oheight 11 | self.owidth = owidth or oheight 12 | end 13 | 14 | function SpatialUniformCrop:updateOutput(input) 15 | input = self:toBatch(input, 3) 16 | 17 | self.output:resize(input:size(1), input:size(2), self.oheight, self.owidth) 18 | self.coord = self.coord or torch.IntTensor() 19 | self.coord:resize(input:size(1), 2) 20 | 21 | if self.scale ~= nil then 22 | self.scales = self.scales or torch.FloatTensor() 23 | self.scales:resize(input:size(1)) 24 | end 25 | 26 | local iH, iW = input:size(3), input:size(4) 27 | if self.train ~= false then 28 | if self.scale ~= nil then 29 | for i=1,input:size(1) do 30 | -- do random crop 31 | local s = torch.uniform(self.scale['min'] or self.scale[1], self.scale['max'] or self.scale[2]) 32 | local soheight = math.ceil(s*self.oheight) 33 | local sowidth = math.ceil(s*self.owidth) 34 | 35 | local h = math.ceil(torch.uniform(1e-2, iH-soheight)) 36 | local w = math.ceil(torch.uniform(1e-2, iW-sowidth)) 37 | 38 | local ch = math.ceil(iH/2 - (iH-soheight)/2 + h) 39 | local cw = math.ceil(iW/2 - (iH-sowidth)/2 + w) 40 | 41 | local h1 = ch - math.ceil(soheight/2) 42 | local w1 = cw - math.ceil(sowidth/2) 43 | if h1 < 1 then h1 = 1 end 44 | if w1 < 1 then w1 = 1 end 45 | 46 | local crop = input[i]:narrow(2, h1, soheight):narrow(3, w1, sowidth) 47 | 48 | self.output[i]:copy(self.scaler:forward(crop)) 49 | -- save crop coordinates and scale for backward 50 | self.scales[i] = s 51 | self.coord[{i,1}] = h 52 | self.coord[{i,2}] = w 53 | end 54 | else 55 | for i=1,input:size(1) do 56 | -- do random crop 57 | local h1 = math.ceil(torch.uniform(1e-2, iH-self.oheight)) 58 | local w1 = math.ceil(torch.uniform(1e-2, iW-self.owidth)) 59 | local crop = input[i]:narrow(2,h1,self.oheight):narrow(3,w1,self.owidth) 60 | self.output[i]:copy(crop) 61 | -- save crop coordinates for backward 62 | self.coord[{i,1}] = h1 63 | self.coord[{i,2}] = w1 64 | end 65 | end 66 | else 67 | -- use center crop 68 | local h1 = math.ceil((iH-self.oheight)/2) 69 | local w1 = math.ceil((iW-self.owidth)/2) 70 | local crop = input:narrow(3,h1,self.oheight):narrow(4,w1,self.owidth) 71 | self.output:copy(crop) 72 | end 73 | 74 | self.output = self:fromBatch(self.output, 1) 75 | return self.output 76 | end 77 | 78 | function SpatialUniformCrop:updateGradInput(input, gradOutput) 79 | input = self:toBatch(input, 3) 80 | gradOutput = self:toBatch(gradOutput, 3) 81 | 82 | self.gradInput:resizeAs(input):zero() 83 | if self.scale ~= nil then 84 | local iH, iW = input:size(3), input:size(4) 85 | for i=1,input:size(1) do 86 | local s = self.scales[i] 87 | local soheight = math.ceil(s*self.oheight) 88 | local sowidth = math.ceil(s*self.owidth) 89 | 90 | local h, w = self.coord[{i,1}], self.coord[{i,2}] 91 | 92 | local ch = math.ceil(iH/2 - (iH-soheight)/2 + h) 93 | local cw = math.ceil(iW/2 - (iH-sowidth)/2 + w) 94 | 95 | local h1 = ch - math.ceil(soheight/2) 96 | local w1 = cw - math.ceil(sowidth/2) 97 | if h1 < 1 then h1 = 1 end 98 | if w1 < 1 then w1 = 1 end 99 | 100 | local crop = input[i]:narrow(2, h1, soheight):narrow(3, w1, sowidth) 101 | local samplerGradInput = self.scaler:updateGradInput(crop, gradOutput[i]) 102 | 103 | self.gradInput[i]:narrow(2, h1, soheight):narrow(3, w1, sowidth):copy(samplerGradInput) 104 | end 105 | else 106 | for i=1,input:size(1) do 107 | local h1, w1 = self.coord[{i,1}], self.coord[{i,2}] 108 | self.gradInput[i]:narrow(2,h1,self.oheight):narrow(3,w1,self.owidth):copy(gradOutput[i]) 109 | end 110 | end 111 | 112 | self.gradInput = self:fromBatch(self.gradInput, 1) 113 | return self.gradInput 114 | end 115 | 116 | function SpatialUniformCrop:type(type, cache) 117 | self.coord = nil 118 | return parent.type(self, type, cache) 119 | end 120 | -------------------------------------------------------------------------------- /ReinforceGamma.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ ReinforceGamma ]]-- 3 | -- Ref A. http://incompleteideas.net/sutton/williams-92.pdf 4 | -- Inputs are shape (k) and scale (theta) of multivariate Gamma distribution. 5 | -- Ouputs are samples drawn from these distributions. 6 | -- Scale is provided as constructor argument. 7 | -- Uses the REINFORCE algorithm (ref. A sec 6. p.237-239) which is 8 | -- implemented through the nn.Module:reinforce(r,b) interface. 9 | -- gradOutputs are ignored (REINFORCE algorithm). 10 | ------------------------------------------------------------------------ 11 | 12 | 13 | local ReinforceGamma, parent = torch.class("nn.ReinforceGamma", "nn.Reinforce") 14 | 15 | function ReinforceGamma:__init(scale, stochastic) 16 | require('randomkit') -- needed to sample gamma dist : luarocks install randomkit 17 | require('cephes') -- needed to compute digamma for gradient : 18 | parent.__init(self, stochastic) 19 | self.scale = scale 20 | if not scale then 21 | self.gradInput = {torch.Tensor(), torch.Tensor()} 22 | end 23 | end 24 | 25 | function ReinforceGamma:updateOutput(input) 26 | local shape, scale = input, self.scale 27 | if torch.type(input) == 'table' then 28 | -- input is {shape, scale} 29 | assert(#input == 2) 30 | shape, scale = unpack(input) 31 | end 32 | assert(scale) 33 | 34 | self.output:resizeAs(shape) 35 | 36 | if torch.type(scale) == 'number' then 37 | scale = shape.new():resizeAs(shape):fill(scale) 38 | elseif torch.isTensor(scale) then 39 | if scale:dim() == shape:dim() then 40 | assert(scale:isSameSizeAs(shape)) 41 | else 42 | assert(scale:dim()+1 == shape:dim()) 43 | self._scale = self._scale or scale.new() 44 | self._scale:view(scale,1,table.unpack(scale:size():totable())) 45 | self.__scale = self.__scale or scale.new() 46 | self.__scale:expandAs(self._scale, shape) 47 | scale = self.__scale 48 | end 49 | else 50 | error"unsupported shape type" 51 | end 52 | 53 | if self.stochastic or self.train ~= false then 54 | self.output:copy(randomkit.gamma(shape:squeeze():float(),scale:squeeze():float())) 55 | else 56 | -- use maximum a posteriori (MAP) estimate 57 | self.output:copy(shape):cmul(scale) 58 | end 59 | 60 | return self.output 61 | end 62 | 63 | function ReinforceGamma:updateGradInput(input, gradOutput) 64 | -- Note that gradOutput is ignored 65 | -- f : Gamma probability density function 66 | -- g : Digamma probability density function 67 | -- x : the sampled values (self.output) 68 | -- shape : shape parameter of gamma dist 69 | -- scale: scale parameter of gamma dist 70 | 71 | local shape, scale = input, self.scale 72 | local gradShape, gradScale = self.gradInput, nil 73 | if torch.type(input) == 'table' then 74 | shape, scale = unpack(input) 75 | gradShape, gradScale = unpack(self.gradInput) 76 | end 77 | assert(scale) 78 | 79 | -- Derivative of log gamma w.r.t. shape : 80 | -- d ln(f(x,shape,scale)) 81 | -- ---------------------- = ln(x) - g(shape) - ln(scale) 82 | -- d shape 83 | gradShape:resizeAs(shape) 84 | 85 | if torch.type(scale) == 'number' then 86 | scale = shape.new():resizeAs(shape):fill(scale) 87 | else 88 | if not scale:dim() == shape:dim() then 89 | scale:copy(self.__scale) 90 | end 91 | end 92 | gradShape:copy(cephes.digamma(shape:float())) 93 | gradShape:mul(-1) 94 | 95 | self._logOutput = self._logOutput or self.output.new() 96 | self._logOutput:log( self.output ) 97 | 98 | self._logScale = self._logScale or scale.new() 99 | self._logScale:log( scale ) 100 | 101 | gradShape:add( self._logOutput ) 102 | gradShape:add(-1, self._logScale ) 103 | 104 | -- multiply by variance reduced reward 105 | gradShape:cmul(self:rewardAs(shape) ) 106 | -- multiply by -1 ( gradient descent on shape ) 107 | gradShape:mul(-1) 108 | 109 | -- Derivative of log Gamma w.r.t. scale : 110 | -- d ln(f(x,shape,scale)) x shape 111 | -- ---------------------- = ------- - ----- 112 | -- d scale scale^2 scale 113 | 114 | if gradScale then 115 | gradScale:resizeAs(scale) 116 | gradScale:copy( torch.cdiv(self.output, torch.pow(scale,2)) ) 117 | gradScale:add(-1, torch.cdiv(shape, scale) ) 118 | gradScale:cmul( self:rewardAs(scale) ) 119 | gradScale:mul(-1) 120 | end 121 | 122 | return self.gradInput 123 | end 124 | 125 | function ReinforceGamma:type(type,cache) 126 | self._logOutput = nil 127 | self._logScale = nil 128 | return parent.type(self,type,cache) 129 | end 130 | -------------------------------------------------------------------------------- /PCAColorTransform.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | Color transformation module: Commonly used data augmentation technique. 3 | Random color noise is added to input image/images based on the Principal 4 | Component Analysis (PCA) of pixel values. 5 | 6 | Arguments 7 | -> eigenVectors: Each row represent an eigen vector. 8 | -> eigenValues: Corresponding eigen values. 9 | -> std: std of gaussian distribution for augmentation (default 0.1). 10 | --]] 11 | 12 | local PCAColorTransform, Parent = torch.class('nn.PCAColorTransform', 'nn.Module') 13 | 14 | function PCAColorTransform:__init(inputChannels, eigenVectors, eigenValues, std) 15 | Parent.__init(self) 16 | 17 | self.train = true 18 | self.inputChannels = inputChannels 19 | assert(inputChannels == eigenVectors:size(1), 20 | "Number of input channels do not match number of eigen vectors.") 21 | assert(eigenVectors:size(2) == eigenVectors:size(1), 22 | "Invalid dimensionality: eigen vectors.") 23 | assert(inputChannels == eigenValues:nElement(), 24 | "Number of input channels do not match number of eigen values.") 25 | 26 | self.eigenVectors = eigenVectors 27 | self.eigenValues = eigenValues 28 | self.std = std or 0.1 29 | end 30 | 31 | function PCAColorTransform:updateOutput(input) 32 | self.output:resizeAs(input):copy(input) 33 | if self.train then 34 | self.noise = self.noise or self.output.new() 35 | self.alphas = self.alphas or self.output.new() 36 | self._tempNoise = self._tempNoise or self.output.new() 37 | self._tempNoiseExpanded = self._tempNoiseExpanded or self.output.new() 38 | self._tempNoiseSamples = self._tempNoiseSamples or self.output.new() 39 | self._tempLambda = self._tempLambda or self.output.new() 40 | self._tempLambdaExpanded = self._tempLambdaExpanded or self.output.new() 41 | 42 | if self.output:nDimension() == 4 then 43 | local batchSize = self.output:size(1) 44 | local channels = self.output:size(2) 45 | local height = self.output:size(3) 46 | local width = self.output:size(4) 47 | assert(channels == self.inputChannels) 48 | 49 | -- Randomly sample noise for each channel and scale by eigen values 50 | self.alphas:resize(channels, batchSize) 51 | self.alphas:normal(0, self.std) 52 | self._tempLambda = self.eigenValues:view(self.inputChannels, 1) 53 | self._tempLambdaExpanded = self._tempLambda:expand(channels, batchSize) 54 | self.alphas:cmul(self._tempLambdaExpanded) 55 | 56 | -- Scale by eigen vectors 57 | self.noise:resize(batchSize, self.inputChannels):zero() 58 | self.noise:t():addmm(self.eigenVectors, self.alphas) 59 | 60 | -- Add noise to the input 61 | self._tempNoise = self.noise:view(batchSize, self.inputChannels, 1, 1) 62 | self._tempNoiseExpanded:expand(self._tempNoise, batchSize, 63 | channels, height, width) 64 | self.output:add(self._tempNoiseExpanded) 65 | 66 | elseif self.output:nDimension() == 3 then 67 | local channels = self.output:size(1) 68 | local height = self.output:size(2) 69 | local width = self.output:size(3) 70 | assert(channels == self.inputChannels) 71 | 72 | -- Randomly sample noise for each channel and scale by eigen values 73 | self.alphas:resize(channels, 1) 74 | self.alphas:normal(0, self.std) 75 | self._tempLambda = self.eigenValues:view(self.inputChannels, 1) 76 | self._tempLambdaExpanded = self._tempLambda:expand(channels, 1) 77 | self.alphas:cmul(self._tempLambdaExpanded) 78 | 79 | -- Scale by eigen vectors 80 | self.noise:resize(1, self.inputChannels):zero() 81 | self.noise:t():addmm(self.eigenVectors, self.alphas) 82 | 83 | -- Add noise to the input 84 | self._tempNoise = self.noise:view(self.inputChannels, 1, 1) 85 | self._tempNoiseExpanded:expand(self._tempNoise, channels, 86 | height, width) 87 | self.output:add(self._tempNoiseExpanded) 88 | else 89 | error("Invalid input dimensionality.") 90 | end 91 | end 92 | return self.output 93 | end 94 | 95 | function PCAColorTransform:updateGradInput(input, gradOutput) 96 | if self.train then 97 | self.gradInput:resizeAs(gradOutput):copy(gradOutput) 98 | else 99 | error('backprop only defined while training') 100 | end 101 | return self.gradInput 102 | end 103 | 104 | function PCAColorTransform:type(type, tensorCache) 105 | self.noise = nil 106 | self.alphas = nil 107 | self._tempLambda = nil 108 | self._tempLambdaExpanded = nil 109 | self._tempNoise = nil 110 | self._tempNoiseExpanded = nil 111 | Parent.type(self, type, tensorCache) 112 | end 113 | 114 | function PCAColorTransform:__tostring__() 115 | return string.format('%s channels: %d, std: %f', torch.type(self), 116 | self.inputChannels, self.std) 117 | end 118 | -------------------------------------------------------------------------------- /tutorials/ladder.md: -------------------------------------------------------------------------------- 1 | # Lateral Connections in Denoising Autoencoders Support Supervised Learning 2 | 3 | In this tutorial we will understand how to implement ladder network as explained in [[1](http://arxiv.org/pdf/1504.08215.pdf)]. In this paper the authors have shown how unsupervised learning using a denoising autoencoder with lateral connections help improve the classification accuracy in supervised learning. 4 | 5 | To produce results as mentioned in the paper please run following command (best test error we got was **`0.6%`**). To run this script you will need following torch packages: [`nn`](https://github.com/torch/nn), [`nngraph`](https://github.com/torch/nngraph), [`dp`](https://github.com/nicholas-leonard/dp), [`dpnn`](https://github.com/Element-Research/dpnn), [`optim`](https://github.com/torch/optim) and [`cunn`](https://github.com/torch/cunn) & [`cutorch`](https://github.com/torch/cutorch) if using cuda (```--useCuda``` flag). 6 | ``` 7 | th tutorials/ladder.lua --verbose --eta 500 --epochs 100 --learningRate 0.002 --linearDecay --endLearningRate 0 --startEpoch 50 --useCuda --deviceId 1 --noiseSigma 0.3 --useBatchNorm --batchSize 100 --adam --noValidation --attempts 10 8 | ``` 9 | 10 | The unsupervised learning (denoising) task supplements the supervised learning task (classification in this case). As in autoencoders this network has an encoder and a decoder. The output of encoder is also used for classification. The output of encoder is **`N`** dimensional where **`N`** is number of classes. This **`N`** dimensional vector is used for computing classification cost as well as feeds into the decoder. 11 | 12 | ## Classification 13 | Encoder/classifier units are defined as 14 | ```lua 15 | Z = nn.BatchNormalization(hidden_units)(nn.Linear(inputDims, hidden_units)(previous_H)) 16 | ``` 17 | where 18 | ```lua 19 | H = nn.ReLU()(nn.CMul()(nn.Add()(Z))) 20 | ``` 21 | For first layer **`previous_H`** is the corrupted input. 22 | ```lua 23 | input = nn.WhiteNoise(mean, sigma) 24 | ``` 25 | 26 | **`H`** for last encoder unit is defined as 27 | ```lua 28 | H = nn.LogSoftMax()(nn.CMul()(nn.Add()(Z))) 29 | ``` 30 | Last **`H`** feeds into the negative log likelihood criterion. 31 | 32 | ## Denoising 33 | Typically in denoising autoencoder the input samples are corrupted using Dropout [```nn.Dropout```](https://github.com/torch/nn/blob/master/Dropout.lua) but in this paper the authors use isotropic Gaussian noise [```nn.WhiteNoise```](https://github.com/Element-Research/dpnn/blob/master/WhiteNoise.lua) with zero mean. 34 | 35 | ### Lateral Connections in Autoencoder 36 | **`Z`** units in encoder are laterally connected to corresponding unit in the decoder. The output of decoder unit for neuron `i` is defined by 37 | ``` 38 | z^_i = a_i1 * z_i + a_i2 * sigmoid(a_i3 + a_i4) + a_i5 39 | ``` 40 | where 41 | ``` 42 | a_ij = c_ij * u_i + d_ij 43 | ``` 44 | **`U`** is output of decoder unit's ```nn.Linear()```. For the top most layer **`U`** is zero. **`Z`** is output of corresponding encoder unit (this is lateral connection, decoder takes output from its previous unit through **`U`** as well as corresponding encoder unit). For the lowest layer of decoder **`Z`** is the corrupted input signal. **`c_j`** and **`d_j`** are trainable weight vectors. This forms the crux of the ladder network. This can be easily implemented using **`nngraph`** as follows 45 | 46 | For the topmost layer **`U`**`= 0` and **`Z`** is the batch normalized output from the corresponding (in this case last) encoder/classifier unit. **`Z^`** for topmost layer is defined as 47 | ```lua 48 | z_hat1 = nn.CMul(hiddens[i])(Z) 49 | z_hat2 = nn.CMul(hiddens[i])(Z) 50 | z_hat3 = nn.CMul(hiddens[i])(Z) 51 | z_hat34 = nn.Add(hiddens[i])(z_hat3) 52 | z_hatSigmoid34 = nn.Sigmoid()(z_hat34) 53 | z_hat234 = nn.CMulTable()({z_hat2, z_hatSigmoid34}) 54 | z_hat5 = nn.CMul(hiddens_units)(Z) 55 | 56 | -- Z_hat = z^ 57 | Z_hat = nn.CAddTable()({z_hat1, z_hat234, z_hat5}) 58 | ``` 59 | 60 | For lower decoder units **`Z^`** is defined as 61 | ```lua 62 | 63 | u = nn.Linear()(previous_Z_hat) 64 | 65 | cu1 = nn.CMul(hidden_units)(u) 66 | du1 = nn.Add(hidden_units])(u) 67 | a1 = nn.CAddTable()({cu1, du1}) 68 | cu2 = nn.CMul(hidden_units)(u) 69 | du2 = nn.Add(hidden_units)(u) 70 | a2 = nn.CAddTable()({cu2, du2}) 71 | cu3 = nn.CMul(hidden_units)(u) 72 | du3 = nn.Add(hidden_units)(u) 73 | a3 = nn.CAddTable()({cu3, du3}) 74 | cu4 = nn.CMul(hidden_units)(u) 75 | du4 = nn.Add(hidden_units)(u) 76 | a4 = nn.CAddTable()({cu4, du4}) 77 | cu5 = nn.CMul(hidden_units)(u) 78 | du5 = nn.Add(hidden_units)(u) 79 | a5 = nn.CAddTable()({cu5, du5}) 80 | 81 | z_hat1 = nn.CMulTable()({a1, z}) 82 | z_hat2 = nn.CMulTable()({a3, z}) 83 | z_hat3 = nn.Sigmoid()(nn.CAddTable()({z_hat2, a4})) 84 | z_hat4 = nn.CMulTable()({a2, z_hat3}) 85 | Z_hat = nn.CAddTable()({z_hat1, z_hat4, a5}) 86 | ``` 87 | `Z_hat` is `z^`. Final `Z_hat` is the output of decoder and feeds into the mean squared error criterion. 88 | 89 | ## Criterions 90 | Negative log likelihood criterion is used for classification task. 91 | ```lua 92 | nll = nn.ClassNLLCriterion() 93 | ``` 94 | Mean squared error is used for the auxillary task. 95 | ```lua 96 | mse = nn.MSECriterion() 97 | ``` 98 | These two training criterions are combined using `eta` which determines weight for auxillary task. If `eta` is zero then the model is trained for classification only. 99 | Combined criterion 100 | ```lua 101 | criterions = ParallelCriterion() 102 | criterions:add(nll) 103 | criterions:add(mse, eta) 104 | ``` 105 | 106 | ## References 107 | [1] Rasmus, Antti, Harri Valpola, and Tapani Raiko. "Lateral Connections in Denoising Autoencoders Support Supervised Learning." arXiv preprint arXiv:1504.08215 (2015). 108 | -------------------------------------------------------------------------------- /SpatialBinaryConvolution.lua: -------------------------------------------------------------------------------- 1 | -- Reference: http://arxiv.org/abs/1603.05279 2 | -- We use floating point Matrix-Matrix multiplication as in SpatialConvolution. 3 | -- Filters are made binary {-1, +1} using Sign. 4 | -- Convolution output is scaled by L1-norm of the filters. 5 | 6 | -- Inheriting nn/SpatialConvolution. 7 | 8 | local SpatialBinaryConvolution, parent = torch.class('nn.SpatialBinaryConvolution', 'nn.SpatialConvolution') 9 | 10 | function SpatialBinaryConvolution:__init(nInputPlane, nOutputPlane, kW, kH, dW, dH, padW, padH) 11 | parent.__init(self, nInputPlane, nOutputPlane, kW, kH, dW, dH, padW, padH) 12 | parent.noBias(self) 13 | 14 | self.iwh = self.nInputPlane * self.kW * self.kH 15 | self.owh = self.nOutputPlane * self.kW * self.kH 16 | self.train = true 17 | end 18 | 19 | function SpatialBinaryConvolution:training() 20 | self.train = true 21 | end 22 | 23 | function SpatialBinaryConvolution:evaluate() 24 | self.train = false 25 | end 26 | 27 | -- Function to binarize weights and compute L1 norms 28 | function SpatialBinaryConvolution:binarizeWeight() 29 | self.tempWeight = self.tempWeight or self.weight.new() 30 | 31 | -- Grad Input alphas 32 | self.gradInputAlphas = self.gradInputAlphas or self.weight.new() 33 | self.gradInputAlphas:resize(self.nInputPlane) 34 | 35 | local temp = self.weight:transpose(1,2) 36 | self.tempWeight:resizeAs(temp):copy(temp) 37 | self.gradInputAlphas:norm(self.tempWeight:view(self.nInputPlane, -1), 1, 2) 38 | self.gradInputAlphas:div(self.owh) -- 1/owh 39 | 40 | -- alphas 41 | self.tempWeight:resizeAs(self.weight):copy(self.weight) 42 | self.alphas = self.alphas or self.weight.new() 43 | self.alphas:resize(self.nOutputPlane) 44 | self.alphas:norm(self.weight:view(self.nOutputPlane, -1), 1, 2) 45 | self.alphas:div(self.iwh) -- 1/iwh 46 | 47 | -- Binarize weights 48 | if not self.wmask then 49 | if torch.type(self.weight) == 'torch.CudaTensor' then 50 | self.wmask = torch.CudaTensor() 51 | else 52 | self.wmask = torch.ByteTensor() 53 | end 54 | end 55 | 56 | -- Binarizing weights 57 | self.weight.ge(self.wmask, self.weight, 0) 58 | self.weight[self.wmask] = 1 59 | self.weight.lt(self.wmask, self.weight, 0) 60 | self.weight[self.wmask] = -1 61 | end 62 | 63 | function SpatialBinaryConvolution:updateOutput(input) 64 | -- Binarize Weights 65 | self.binarizeWeight(self) 66 | 67 | -- Convolution 68 | self.output = parent.updateOutput(self, input) 69 | 70 | -- Scale output by alphas 71 | self._tempAlphas = self._tempAlphas or self.output.new() 72 | self._tempAlphasExpanded = self._tempAlphasExpanded or self.output.new() 73 | self._tempAlphasSamples = self._tempAlphasSamples or self.output.new() 74 | if self.output:nDimension() == 4 then 75 | local batchSize = self.output:size(1) 76 | local height = self.output:size(3) 77 | local width = self.output:size(4) 78 | 79 | self._tempAlphas = self.alphas:view(1, self.nOutputPlane, 1, 1) 80 | self._tempAlphasExpanded:expand(self._tempAlphas, batchSize, 81 | self.nOutputPlane, height, width) 82 | self._tempAlphasSamples:resizeAs(self._tempAlphasExpanded) 83 | :copy(self._tempAlphasExpanded) 84 | self.output:cmul(self._tempAlphasSamples) 85 | else 86 | local height = self.output:size(2) 87 | local width = self.output:size(3) 88 | 89 | self._tempAlphas = self.alphas:view(self.nOutputPlane, 1, 1) 90 | self._tempAlphasExpanded:expand(self._tempAlphas, self.nOutputPlane, 91 | height, width) 92 | self._tempAlphasSamples:resizeAs(self._tempAlphasExpanded) 93 | :copy(self._tempAlphasExpanded) 94 | self.output:cmul(self._tempAlphasSamples) 95 | end 96 | 97 | -- In evaluate mode. 98 | if not self.train then self.weight:copy(self.tempWeight) end 99 | 100 | return self.output 101 | end 102 | 103 | function SpatialBinaryConvolution:updateGradInput(input, gradOutput) 104 | self.gradInput = parent.updateGradInput(self, input, gradOutput) 105 | 106 | -- Scale gradInput by gradAlphas 107 | self._tempGradAlphas = self._temp or self.gradInput.new() 108 | self._tempGradAlphasExpanded = self._temp or self.gradInput.new() 109 | self._tempGradAlphasSamples = self._temp or self.gradInput.new() 110 | if self.gradInput:nDimension() == 4 then 111 | local batchSize = self.gradInput:size(1) 112 | local height = self.gradInput:size(3) 113 | local width = self.gradInput:size(4) 114 | 115 | self._tempGradAlphas = self.gradInputAlphas:view(1, self.nInputPlane, 116 | 1, 1) 117 | self._tempGradAlphasExpanded:expand(self._tempGradAlphas, 118 | batchSize, self.nInputPlane, 119 | height, width) 120 | self._tempGradAlphasSamples:resizeAs(self._tempGradAlphasExpanded) 121 | :copy(self._tempGradAlphasExpanded) 122 | 123 | self.gradInput:cmul(self._tempGradAlphasSamples) 124 | else 125 | local height = self.gradInput:size(2) 126 | local width = self.gradInput:size(3) 127 | 128 | self._tempGradAlphas = self.gradInputAlphas:view(self.nInputPlane, 129 | 1, 1) 130 | self._tempGradAlphasExpanded:expand(self._tempGradAlphas, 131 | self.nInputPlane, 132 | height, width) 133 | self._tempGradAlphasSamples:resizeAs(self._tempGradAlphasExpanded) 134 | :copy(self._tempGradAlphasExpanded) 135 | 136 | self.gradInput:cmul(self._tempGradAlphasSamples) 137 | end 138 | return self.gradInput 139 | end 140 | 141 | function SpatialBinaryConvolution:accGradParameters(input, gradOutput, scale) 142 | 143 | parent.accGradParameters(self, input, gradOutput, scale) 144 | 145 | --[[ 146 | Copy back floating point weights for weight update. 147 | This could be done individually after forward and backward, but to avoid 148 | additional copy is done at the end of backward. 149 | --]] 150 | 151 | self.weight:copy(self.tempWeight) 152 | end 153 | 154 | function SpatialBinaryConvolution:type(type, tensorCache) 155 | self.tempWeight = nil 156 | self.alphas = nil 157 | self.gradInputAlphas = nil 158 | self.wmask = nil 159 | 160 | self._tempAlphas = nil 161 | self._tempAlphasExpanded = nil 162 | self._tempAlphasSamples = nil 163 | 164 | self._tempGradAlphas = nil 165 | self._tempGradAlphasExpanded = nil 166 | self._tempGradAlphasSamples = nil 167 | 168 | parent.type(self, type, tensorCache) 169 | end 170 | 171 | function SpatialBinaryConvolution:__tostring__() 172 | return "Binary Convolution: "..parent.__tostring__(self) 173 | end 174 | -------------------------------------------------------------------------------- /tutorials/ladder_network/ladder_help_funcs.lua: -------------------------------------------------------------------------------- 1 | require 'csvigo' 2 | require 'string' 3 | require 'xlua' 4 | require 'lfs' 5 | 6 | -- Training function test 7 | -- Processing a batch in one Go. 8 | -- Has useCuda option to run on GPU [model and criterion expected in CUDA] 9 | local conTargets, conOutputs 10 | function model_train_multi_criterion(model, criterions, parameters, 11 | gradParameters, trainData, 12 | optimMethod, optimState, batchSize, 13 | epoch, confusion, trainLogger, 14 | useCuda, displayProgress, classifierIndx) 15 | 16 | model:training() 17 | confusion:zero() 18 | local displayProgress = displayProgress or false 19 | local classifierIndx = classifierIndx or 1 20 | 21 | -- epoch tracker 22 | local epoch = epoch or 1 23 | 24 | local totalLoss = 0 25 | 26 | -- shuffle at each epoch 27 | local shuffle = torch.randperm(trainData.size()) 28 | 29 | local sampleSize = trainData.data[1]:size() 30 | local isScalar = false 31 | local labelSize 32 | if trainData.labels:size():size() == 1 then 33 | isScalar = true 34 | else 35 | labelSize = trainData.labels[1]:size() 36 | end 37 | 38 | print("Doing epoch on training data:") 39 | print("Online epoch # " .. epoch .. " [batchSize = " .. batchSize .. "]") 40 | 41 | -- local variables 42 | local time = sys.clock() 43 | local inputs 44 | local targets 45 | if isScalar then 46 | targets = torch.Tensor(batchSize) 47 | else 48 | targets = torch.Tensor(batchSize, labelSize[1]) 49 | end 50 | 51 | -- Samples 52 | sizeLen = sampleSize:size() 53 | if sizeLen == 1 then 54 | inputs = torch.Tensor(batchSize, sampleSize[1]) 55 | elseif sizeLen == 2 then 56 | inputs = torch.Tensor(batchSize, sampleSize[1], sampleSize[2]) 57 | elseif sizeLen == 3 then 58 | inputs = torch.Tensor(batchSize, sampleSize[1], sampleSize[2], 59 | sampleSize[3]) 60 | else 61 | print("Invalid Sample Size") 62 | end 63 | 64 | local trainInputs = useCuda and torch.CudaTensor() or torch.FloatTensor() 65 | local trainTargets = useCuda and torch.CudaTensor() or torch.FloatTensor() 66 | local criterionTargets 67 | 68 | t = 1 69 | while t <= trainData.size() do 70 | if displayProgress then xlua.progress(t, trainData.size()) end 71 | noOfSamples = math.min(t + batchSize -1, trainData.size()) 72 | --create mini batch 73 | indx = 1 74 | for i=t,math.min(t+batchSize-1, trainData.size()) do 75 | -- Load new sample 76 | inputs[indx] = trainData.data[shuffle[i]] 77 | targets[indx] = trainData.labels[shuffle[i]] 78 | indx = indx + 1 79 | end 80 | indx = indx - 1 81 | 82 | local inputs_ = inputs[{{1,indx}}] 83 | trainInputs:resize(inputs_:size()):copy(inputs_) 84 | 85 | local targets_ = targets[{{1,indx}}] 86 | trainTargets:resize(targets_:size()):copy(targets_) 87 | 88 | criterionTargets = {trainTargets, trainInputs} 89 | 90 | t = t + batchSize 91 | 92 | -- create closure to evaluate F(X) and df/dX 93 | local feval = function(x) 94 | -- Get new parameters 95 | if x ~= parameters then 96 | parameters:copy(x) 97 | end 98 | 99 | -- reset gradients 100 | gradParameters:zero() 101 | 102 | -- evaluate function for complete mini batch 103 | local outputs = model:forward(trainInputs) 104 | local f = criterions:forward(outputs, criterionTargets) 105 | -- Total Loss 106 | totalLoss = totalLoss + f 107 | 108 | local df_do = criterions:backward(outputs, criterionTargets) 109 | model:backward(trainInputs, df_do) 110 | 111 | if useCuda then 112 | conOutputs = outputs[classifierIndx]:float() 113 | conTargets = trainTargets:float() 114 | else 115 | conOutputs = outputs[classifierIndx] 116 | conTargets = trainTargets 117 | end 118 | 119 | confusion:batchAdd(conOutputs, conTargets) 120 | 121 | -- Normalize gradients 122 | gradParameters:div(trainInputs:size()[1]) 123 | f = f/trainInputs:size()[1] 124 | 125 | -- L1/L2 Regularization 126 | if optimState.coefL1 ~= 0 or optimState.coefL2 ~= 0 then 127 | -- locals" 128 | local norm, sign = torch.norm, torch.sign 129 | 130 | -- Update loss with regularizer 131 | f = f + optimState.coefL1 * norm(parameters, 1) 132 | f = f + optimState.coefL2 * norm(parameters, 2)^2/2 133 | 134 | -- Gradients 135 | gradParameters:add(sign(parameters):mul(optimState.coefL1) 136 | + parameters:clone():mul(opt.coefL2)) 137 | end 138 | 139 | -- return f and df/dX 140 | return f, gradParameters 141 | end 142 | 143 | -- optimize on current mini batch # Using SGD/adam 144 | optimMethod(feval, parameters, optimState) 145 | end 146 | 147 | -- time taken 148 | time = sys.clock() - time 149 | time = time/trainData.size() 150 | print("\n==> time to learn 1 sample = " .. (time*1000) .. "ms") 151 | 152 | -- Total loss 153 | totalLoss = totalLoss/trainData.size() 154 | 155 | -- update logger 156 | if trainLogger ~= nil then 157 | trainLogger:add{["% mean class accuracy (train set)"] = 158 | confusion.totalValid * 100} 159 | end 160 | return totalLoss 161 | end 162 | 163 | function model_test_multi_criterion(model, criterions, testData, confusion, 164 | useCuda, classifierIndx) 165 | local time = sys.clock() 166 | model:evaluate() 167 | confusion:zero() 168 | local classifierIndx = classifierIndx or 1 169 | local totalLoss = 0 170 | local criterionTargets 171 | 172 | if useCuda then 173 | local batchSize = 64 174 | local inputs = torch.CudaTensor() 175 | local testInputs 176 | local cpu_targets 177 | local gpu_targets = torch.CudaTensor() 178 | local gpu_preds 179 | local cpu_preds 180 | local i = 1 181 | local j = 0 182 | while i <= testData.size() do 183 | j = math.min(i + batchSize -1, testData.size()) 184 | -- Copy input and targets to cuda 185 | testInputs = testData.data[{{i, j}}] 186 | inputs:resize(testInputs:size()):copy(testInputs) 187 | cpu_targets = testData.labels[{{i, j}}] 188 | gpu_targets:resize(cpu_targets:size()):copy(cpu_targets) 189 | criterionTargets = {gpu_targets, inputs} 190 | 191 | gpu_preds = model:forward(inputs) 192 | totalLoss = totalLoss + criterions:forward(gpu_preds, 193 | criterionTargets) 194 | cpu_preds = gpu_preds[classifierIndx]:float() 195 | confusion:batchAdd(cpu_preds, cpu_targets) 196 | i = i + batchSize 197 | end 198 | else 199 | local trainInputs = testData.data 200 | local trainTargets = testData.labels 201 | criterionTargets = {trainTargets, trainInputs} 202 | 203 | local outputs = model:forward(trainInputs) 204 | totalLoss = criterions:forward(outputs, criterionTargets) 205 | 206 | local conOutputs = outputs[classifierIndx] 207 | local conTargets = trainTargets 208 | confusion:batchAdd(conOutputs, conTargets) 209 | end 210 | 211 | -- time taken 212 | time = sys.clock() - time 213 | time = time/testData.size() 214 | print("\n==> time to test 1 sample = " .. (time*1000) .. "ms") 215 | 216 | -- Total loss 217 | totalLoss = totalLoss/testData.size() 218 | 219 | return totalLoss 220 | end 221 | -------------------------------------------------------------------------------- /Kmeans.lua: -------------------------------------------------------------------------------- 1 | -- Online (Hard) Kmeans layer. 2 | local Kmeans, parent = torch.class('nn.Kmeans', 'nn.Module') 3 | 4 | function Kmeans:__init(k, dim, scale) 5 | parent.__init(self) 6 | self.k = k 7 | self.dim = dim 8 | 9 | -- scale for online kmean update 10 | self.scale = scale 11 | 12 | assert(k > 0, "Clusters cannot be 0 or negative.") 13 | assert(dim > 0, "Dimensionality cannot be 0 or negative.") 14 | 15 | -- Kmeans centers -> self.weight 16 | self.weight = torch.Tensor(self.k, self.dim) 17 | 18 | self.gradWeight = torch.Tensor(self.weight:size()) 19 | self.loss = 0 -- within cluster error of the last forward 20 | 21 | self.clusterSampleCount = torch.Tensor(self.k) 22 | 23 | self:reset() 24 | end 25 | 26 | -- Reset 27 | function Kmeans:reset(stdev) 28 | local stdev = stdev or 1 29 | self.weight:uniform(-stdev, stdev) 30 | end 31 | 32 | -- Initialize Kmeans weight with random samples from input. 33 | function Kmeans:initRandom(input) 34 | local inputDim = input:nDimension() 35 | assert(inputDim == 2, "Incorrect input dimensionality. Expecting 2D.") 36 | 37 | local noOfSamples = input:size(1) 38 | local dim = input:size(2) 39 | assert(dim == self.dim, "Dimensionality of input and weight don't match.") 40 | assert(noOfSamples >= self.k, "Need atleast k samples for initialization.") 41 | 42 | local indices = torch.zeros(self.k) 43 | indices:random(1, noOfSamples) 44 | 45 | for i=1, self.k do 46 | self.weight[i]:copy(input[indices[i]]) 47 | end 48 | end 49 | 50 | -- Initialize using Kmeans++ 51 | function Kmeans:initKmeansPlus(input, p) 52 | self.p = p or self.p or 0.95 53 | assert(self.p>=0 and self.p<=1, "P value should be between 0-1.") 54 | 55 | local inputDim = input:nDimension() 56 | assert(inputDim == 2, "Incorrect input dimensionality. Expecting 2D.") 57 | local noOfSamples = input:size(1) 58 | 59 | local pcount = math.ceil((1-self.p)*noOfSamples) 60 | if pcount <= 0 then pcount = 1 end 61 | 62 | local initializedK = 1 63 | self.weight[initializedK]:copy(input[torch.random(noOfSamples)]) 64 | initializedK = initializedK + 1 65 | 66 | local clusters = self.weight.new() 67 | local clusterDistances = self.weight.new() 68 | local temp = self.weight.new() 69 | local expandedSample = self.weight.new() 70 | local distances = self.weight.new() 71 | distances:resize(noOfSamples):fill(math.huge) 72 | local maxScores = self.weight.new() 73 | local maxIndx = self.weight.new() 74 | 75 | for k=initializedK, self.k do 76 | clusters = self.weight[{{initializedK-1, initializedK-1}}] 77 | for i=1, noOfSamples do 78 | temp:expand(input[{{i}}], 1, self.dim) 79 | expandedSample:resize(temp:size()):copy(temp) 80 | 81 | -- Squared Euclidean distance 82 | expandedSample:add(-1, clusters) 83 | clusterDistances:norm(expandedSample, 2, 2) 84 | clusterDistances:pow(2) 85 | distances[i] = math.min(clusterDistances:min(), distances[i]) 86 | end 87 | maxScores, maxIndx = distances:sort(true) 88 | local tempIndx = torch.random(pcount) 89 | local indx = maxIndx[tempIndx] 90 | self.weight[initializedK]:copy(input[indx]) 91 | initializedK = initializedK + 1 92 | end 93 | end 94 | 95 | -- Kmeans updateOutput (forward) 96 | function Kmeans:updateOutput(input) 97 | local inputDim = input:nDimension() 98 | assert(inputDim == 2, "Incorrect input dimensionality. Expecting 2D.") 99 | 100 | local batchSize = input:size(1) 101 | local dim = input:size(2) 102 | assert(dim == self.dim, "Dimensionality of input and weight don't match.") 103 | 104 | assert(input:isContiguous(), "Input is not contiguous.") 105 | 106 | -- a sample copied k times to compute distance between sample and weight 107 | self._expandedSamples = self._expandedSamples or self.weight.new() 108 | 109 | -- distance between a sample and weight 110 | self._clusterDistances = self._clusterDistances or self.weight.new() 111 | 112 | self._temp = self._temp or input.new() 113 | self._tempExpanded = self._tempExpanded or input.new() 114 | 115 | -- Expanding inputs 116 | self._temp:view(input, 1, batchSize, self.dim) 117 | self._tempExpanded:expand(self._temp, self.k, batchSize, self.dim) 118 | self._expandedSamples:resize(self.k, batchSize, self.dim) 119 | :copy(self._tempExpanded) 120 | 121 | -- Expanding weights 122 | self._tempWeight = self._tempWeight or self.weight.new() 123 | self._tempWeightExp = self._tempWeightExp or self.weight.new() 124 | self._expandedWeight = self._expanedWeight or self.weight.new() 125 | self._tempWeight:view(self.weight, self.k, 1, self.dim) 126 | self._tempWeightExp:expand(self._tempWeight, self._expandedSamples:size()) 127 | self._expandedWeight:resize(self.k, batchSize, self.dim) 128 | :copy(self._tempWeightExp) 129 | 130 | -- x-c 131 | self._expandedSamples:add(-1, self._expandedWeight) 132 | -- Squared Euclidean distance 133 | self._clusterDistances:norm(self._expandedSamples, 2, 3) 134 | self._clusterDistances:pow(2) 135 | self._clusterDistances:resize(self.k, batchSize) 136 | 137 | self._minScore = self._minScore or self.weight.new() 138 | self._minIndx = self._minIndx or torch.LongTensor() 139 | self._minScore:min(self._minIndx, self._clusterDistances, 1) 140 | self._minIndx:resize(batchSize) 141 | 142 | self.output:resize(batchSize):copy(self._minIndx) 143 | self.loss = self._minScore:sum() 144 | 145 | return self.output 146 | end 147 | 148 | -- Kmeans has its own criterion hence gradInput are zeros 149 | function Kmeans:updateGradInput(input, gradOuput) 150 | self.gradInput:resize(input:size()):zero() 151 | 152 | return self.gradInput 153 | end 154 | 155 | -- We define kmeans update rule as c -> c + scale * 1/n * sum_i (x-c). 156 | -- n is no. of x's belonging to c. 157 | -- With this update rule and gradient descent will be negative the gradWeights. 158 | function Kmeans:accGradParameters(input, gradOutput, scale) 159 | local scale = self.scale or scale or 1 160 | assert(scale > 0 , " Scale has to be positive.") 161 | 162 | -- Update cluster sample count 163 | local batchSize = input:size(1) 164 | self._cscAdder = self._cscAdder or self.weight.new() 165 | self._cscAdder:resize(batchSize):fill(1) 166 | self.clusterSampleCount:zero() 167 | self.clusterSampleCount:indexAdd(1, self._minIndx, self._cscAdder) 168 | 169 | -- scale * (x[k]-c[k]) where k is nearest cluster to x 170 | self._gradWeight = self._gradWeight or self.gradWeight.new() 171 | self._gradWeight:index(self.weight, 1, self._minIndx) 172 | self._gradWeight:mul(-1) 173 | self._gradWeight:add(input) 174 | self._gradWeight:mul(-scale) 175 | 176 | self._gradWeight2 = self._gradWeight2 or self.gradWeight.new() 177 | self._gradWeight2:resizeAs(self.gradWeight):zero() 178 | self._gradWeight2:indexAdd(1, self._minIndx, self._gradWeight) 179 | 180 | -- scale/n * sum_i (x-c) 181 | self._ccounts = self._ccounts or self.clusterSampleCount.new() 182 | self._ccounts:resize(self.k):copy(self.clusterSampleCount) 183 | self._ccounts:add(0.0000001) -- prevent division by zero errors 184 | 185 | self._gradWeight2:cdiv(self._ccounts:view(self.k,1):expandAs(self.gradWeight)) 186 | 187 | self.gradWeight:add(self._gradWeight2) 188 | end 189 | 190 | function Kmeans:type(type, tensorCache) 191 | if type then 192 | -- prevent premature memory allocations 193 | self._expandedSamples = nil 194 | self._clusterDistances = nil 195 | self._temp = nil 196 | self._tempExpanded = nil 197 | self._tempWeight = nil 198 | self._tempWeightExp = nil 199 | self._expandedWeight = nil 200 | self._minScore = nil 201 | self._minIndx = nil 202 | self._cscAdder = nil 203 | end 204 | return parent.type(self, type, tensorCache) 205 | end 206 | -------------------------------------------------------------------------------- /SpatialGlimpse.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ SpatialGlimpse ]]-- 3 | -- Ref A.: http://papers.nips.cc/paper/5542-recurrent-models-of-visual-attention.pdf 4 | -- a glimpse is the concatenation of down-scaled cropped images of 5 | -- increasing scale around a given location in a given image. 6 | -- input is a pair of Tensors: {image, location} 7 | -- locations are x,y coordinates of the center of cropped patches. 8 | -- Coordinates are between -1,-1 (top-left) and 1,1 (bottom right) 9 | -- output is a batch of glimpses taken in image at location (x,y) 10 | -- glimpse size is {height, width}, or width only if square-shaped 11 | -- depth is number of patches to crop per glimpse (one patch per scale) 12 | -- Each successive patch is scale x size of the previous patch 13 | ------------------------------------------------------------------------ 14 | local SpatialGlimpse, parent = torch.class("nn.SpatialGlimpse", "nn.Module") 15 | 16 | function SpatialGlimpse:__init(size, depth, scale) 17 | require 'nnx' 18 | if torch.type(size)=='table' then 19 | self.height = size[1] 20 | self.width = size[2] 21 | else 22 | self.width = size 23 | self.height = size 24 | end 25 | self.depth = depth or 3 26 | self.scale = scale or 2 27 | 28 | assert(torch.type(self.width) == 'number') 29 | assert(torch.type(self.height) == 'number') 30 | assert(torch.type(self.depth) == 'number') 31 | assert(torch.type(self.scale) == 'number') 32 | parent.__init(self) 33 | self.gradInput = {torch.Tensor(), torch.Tensor()} 34 | if self.scale == 2 then 35 | self.module = nn.SpatialAveragePooling(2,2,2,2) 36 | else 37 | self.module = nn.SpatialReSampling{oheight=self.height,owidth=self.width} 38 | end 39 | self.modules = {self.module} 40 | end 41 | 42 | -- a bandwidth limited sensor which focuses on a location. 43 | -- locations index the x,y coord of the center of the output glimpse 44 | function SpatialGlimpse:updateOutput(inputTable) 45 | assert(torch.type(inputTable) == 'table') 46 | assert(#inputTable >= 2) 47 | local input, location = unpack(inputTable) 48 | input, location = self:toBatch(input, 3), self:toBatch(location, 1) 49 | assert(input:dim() == 4 and location:dim() == 2) 50 | 51 | self.output:resize(input:size(1), self.depth, input:size(2), self.height, self.width) 52 | 53 | self._crop = self._crop or self.output.new() 54 | self._pad = self._pad or input.new() 55 | 56 | for sampleIdx=1,self.output:size(1) do 57 | local outputSample = self.output[sampleIdx] 58 | local inputSample = input[sampleIdx] 59 | local yx = location[sampleIdx] 60 | -- (-1,-1) top left corner, (1,1) bottom right corner of image 61 | local y, x = yx:select(1,1), yx:select(1,2) 62 | -- (0,0), (1,1) 63 | y, x = (y+1)/2, (x+1)/2 64 | 65 | -- for each depth of glimpse : pad, crop, downscale 66 | local glimpseWidth = math.floor(self.width) 67 | local glimpseHeight = math.floor(self.height) 68 | for depth=1,self.depth do 69 | local dst = outputSample[depth] 70 | if depth > 1 then 71 | glimpseWidth = math.floor(glimpseWidth*self.scale) 72 | glimpseHeight = math.floor(glimpseHeight*self.scale) 73 | end 74 | 75 | -- add zero padding (glimpse could be partially out of bounds) 76 | local padWidth = math.floor((glimpseWidth-1)/2) 77 | local padHeight = math.floor((glimpseHeight-1)/2) 78 | self._pad:resize(input:size(2), input:size(3)+padHeight*2, input:size(4)+padWidth*2):zero() 79 | local center = self._pad:narrow(2,padHeight+1,input:size(3)):narrow(3,padWidth+1,input:size(4)) 80 | center:copy(inputSample) 81 | 82 | -- crop it 83 | local h, w = self._pad:size(2)-glimpseHeight, self._pad:size(3)-glimpseWidth 84 | local y, x = math.floor(math.min(h,math.max(0,y*h))), math.floor(math.min(w,math.max(0,x*w))) 85 | 86 | if depth == 1 then 87 | dst:copy(self._pad:narrow(2,y+1,glimpseHeight):narrow(3,x+1,glimpseWidth)) 88 | else 89 | self._crop:resize(input:size(2), glimpseHeight, glimpseWidth) 90 | self._crop:copy(self._pad:narrow(2,y+1,glimpseHeight):narrow(3,x+1,glimpseWidth)) 91 | 92 | if torch.type(self.module) == 'nn.SpatialAveragePooling' then 93 | local poolWidth = glimpseWidth/self.width 94 | assert(poolWidth % 2 == 0) 95 | local poolHeight = glimpseHeight/self.height 96 | assert(poolHeight % 2 == 0) 97 | self.module.kW = poolWidth 98 | self.module.kH = poolHeight 99 | self.module.dW = poolWidth 100 | self.module.dH = poolHeight 101 | end 102 | dst:copy(self.module:updateOutput(self._crop)) 103 | end 104 | end 105 | end 106 | 107 | self.output:resize(input:size(1), self.depth*input:size(2), self.height, self.width) 108 | self.output = self:fromBatch(self.output, 1) 109 | return self.output 110 | end 111 | 112 | function SpatialGlimpse:updateGradInput(inputTable, gradOutput) 113 | local input, location = unpack(inputTable) 114 | if #self.gradInput ~= 2 then 115 | self.gradInput = {input.new(), input.new()} 116 | end 117 | local gradInput, gradLocation = unpack(self.gradInput) 118 | input, location = self:toBatch(input, 3), self:toBatch(location, 1) 119 | gradOutput = self:toBatch(gradOutput, 3) 120 | 121 | gradInput:resizeAs(input):zero() 122 | gradLocation:resizeAs(location):zero() -- no backprop through location 123 | 124 | gradOutput = gradOutput:view(input:size(1), self.depth, input:size(2), self.height, self.width) 125 | 126 | for sampleIdx=1,gradOutput:size(1) do 127 | local gradOutputSample = gradOutput[sampleIdx] 128 | local gradInputSample = gradInput[sampleIdx] 129 | local yx = location[sampleIdx] -- height, width 130 | -- (-1,-1) top left corner, (1,1) bottom right corner of image 131 | local y, x = yx:select(1,1), yx:select(1,2) 132 | -- (0,0), (1,1) 133 | y, x = (y+1)/2, (x+1)/2 134 | 135 | -- for each depth of glimpse : pad, crop, downscale 136 | local glimpseWidth = math.floor(self.width) 137 | local glimpseHeight = math.floor(self.height) 138 | for depth=1,self.depth do 139 | local src = gradOutputSample[depth] 140 | if depth > 1 then 141 | glimpseWidth = math.floor(glimpseWidth*self.scale) 142 | glimpseHeight = math.floor(glimpseHeight*self.scale) 143 | end 144 | 145 | -- add zero padding (glimpse could be partially out of bounds) 146 | local padWidth = math.floor((glimpseWidth-1)/2) 147 | local padHeight = math.floor((glimpseHeight-1)/2) 148 | self._pad:resize(input:size(2), input:size(3)+padHeight*2, input:size(4)+padWidth*2):zero() 149 | 150 | local h, w = self._pad:size(2)-glimpseHeight, self._pad:size(3)-glimpseWidth 151 | local y, x = math.floor(math.min(h,math.max(0,y*h))), math.floor(math.min(w,math.max(0,x*w))) 152 | local pad = self._pad:narrow(2, y+1, glimpseHeight):narrow(3, x+1, glimpseWidth) 153 | 154 | -- upscale glimpse for different depths 155 | if depth == 1 then 156 | pad:copy(src) 157 | else 158 | self._crop:resize(input:size(2), glimpseHeight, glimpseWidth) 159 | 160 | if torch.type(self.module) == 'nn.SpatialAveragePooling' then 161 | local poolWidth = glimpseWidth/self.width 162 | assert(poolWidth % 2 == 0) 163 | local poolHeight = glimpseHeight/self.height 164 | assert(poolHeight % 2 == 0) 165 | self.module.kW = poolWidth 166 | self.module.kH = poolHeight 167 | self.module.dW = poolWidth 168 | self.module.dH = poolHeight 169 | end 170 | 171 | pad:copy(self.module:updateGradInput(self._crop, src)) 172 | end 173 | 174 | -- copy into gradInput tensor (excluding padding) 175 | gradInputSample:add(self._pad:narrow(2, padHeight+1, input:size(3)):narrow(3, padWidth+1, input:size(4))) 176 | end 177 | end 178 | 179 | self.gradInput[1] = self:fromBatch(gradInput, 1) 180 | self.gradInput[2] = self:fromBatch(gradLocation, 1) 181 | 182 | return self.gradInput 183 | end 184 | -------------------------------------------------------------------------------- /Inception.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | -- [[ Inception ]]-- 3 | -- Uses n+2 parallel "columns". The original paper uses 2+2 where 4 | -- the first two are (but there could be more than two): 5 | -- 1x1 conv (reduce) -> relu -> 5x5 conv -> relu 6 | -- 1x1 conv (reduce) -> relu -> 3x3 conv -> relu 7 | -- and where the other two are : 8 | -- 3x3 maxpool -> 1x1 conv (reduce/project) -> relu 9 | -- 1x1 conv (reduce) -> relu. 10 | -- This Model allows the first group of columns to be of any 11 | -- number while the last group consist of exactly two columns. 12 | -- The 1x1 conv are used to reduce the number of input channels 13 | -- (or filters) such that the capacity of the network doesnt 14 | -- explode. We refer to these here has "reduce". Since each 15 | -- column seems to have one and only one reduce, their initial 16 | -- configuration options are specified in lists of n+2 elements. 17 | ------------------------------------------------------------------------ 18 | local Inception, parent = torch.class("nn.Inception", "nn.Decorator") 19 | 20 | function Inception:__init(config) 21 | --[[ Required Arguments ]]-- 22 | -- Number of input channels or colors 23 | self.inputSize = config.inputSize 24 | -- Number of filters in the non-1x1 convolution kernel sizes, e.g. {32,48} 25 | self.outputSize = config.outputSize 26 | -- Number of filters in the 1x1 convolutions (reduction) 27 | -- used in each column, e.g. {48,64,32,32}. The last 2 are 28 | -- used respectively for the max pooling (projection) column 29 | -- (the last column in the paper) and the column that has 30 | -- nothing but a 1x1 conv (the first column in the paper). 31 | -- This table should have two elements more than the outputSize 32 | self.reduceSize = config.reduceSize 33 | 34 | --[[ Optional Arguments ]]-- 35 | -- The strides of the 1x1 (reduction) convolutions. Defaults to {1,1,...} 36 | self.reduceStride = config.reduceStride or {} 37 | -- A transfer function like nn.Tanh, nn.Sigmoid, nn.ReLU, nn.Identity, etc. 38 | -- It is used after each reduction (1x1 convolution) and convolution 39 | self.transfer = config.transfer or nn.ReLU() 40 | -- batch normalization can be awesome 41 | self.batchNorm = config.batchNorm 42 | -- Adding padding to the input of the convolutions such that 43 | -- input width and height are same as that of output. 44 | self.padding = true 45 | if config.padding ~= nil then 46 | self.padding = config.padding 47 | end 48 | -- The size (height=width) of the non-1x1 convolution kernels. 49 | self.kernelSize = config.kernelSize or {5,3} 50 | -- The stride (height=width) of the convolution. 51 | self.kernelStride = config.kernelStride or {1,1} 52 | -- The size (height=width) of the spatial max pooling used 53 | -- in the next-to-last column. 54 | self.poolSize = config.poolSize or 3 55 | -- The stride (height=width) of the spatial max pooling. 56 | self.poolStride = config.poolStride or 1 57 | -- The pooling layer. 58 | self.pool = config.pool or nn.SpatialMaxPooling(self.poolSize, self.poolSize, self.poolStride, self.poolStride) 59 | 60 | 61 | -- Variables checking that all of the output sizes are the same for a sample input. 62 | local iWidth, iHeight = 100, 200 63 | local oWidth, oHeight 64 | 65 | -- [[ Module Construction ]]-- 66 | local depthConcat = nn.DepthConcat(2) -- concat on 'c' dimension 67 | -- 1x1 conv (reduce) -> 3x3 conv 68 | -- 1x1 conv (reduce) -> 5x5 conv 69 | -- ... 70 | for i=1,#self.kernelSize do 71 | local mlp = nn.Sequential() 72 | -- 1x1 conv 73 | local reduce = nn.SpatialConvolution( 74 | self.inputSize, self.reduceSize[i], 1, 1, 75 | self.reduceStride[i] or 1, self.reduceStride[i] or 1 76 | ) 77 | mlp:add(reduce) 78 | if self.batchNorm then 79 | mlp:add(nn.SpatialBatchNormalization(self.reduceSize[i])) 80 | end 81 | mlp:add(self.transfer:clone()) 82 | 83 | -- nxn conv 84 | local pad = self.padding and math.floor(self.kernelSize[i]/2) or 0 85 | local conv = nn.SpatialConvolution( 86 | self.reduceSize[i], self.outputSize[i], 87 | self.kernelSize[i], self.kernelSize[i], 88 | self.kernelStride[i], self.kernelStride[i], 89 | pad 90 | ) 91 | mlp:add(conv) 92 | if self.batchNorm then 93 | mlp:add(nn.SpatialBatchNormalization(self.outputSize[i])) 94 | end 95 | mlp:add(self.transfer:clone()) 96 | depthConcat:add(mlp) 97 | 98 | -- Check the output sizes. 99 | local oWidth_i = torch.floor( 100 | (iWidth + 2*pad - self.kernelSize[i])/self.kernelStride[i] + 1) 101 | local oHeight_i = torch.floor( 102 | (iHeight + 2*pad - self.kernelSize[i])/self.kernelStride[i] + 1) 103 | if oWidth == nil then 104 | oWidth = oWidth_i 105 | oHeight = oHeight_i 106 | else 107 | if oWidth ~= oWidth_i or oHeight ~= oHeight_i then 108 | print("dpnn.Inception: Warning: Inconsistent output sizes.") 109 | end 110 | end 111 | end 112 | 113 | -- pool -> 1x1 conv 114 | local mlp = nn.Sequential() 115 | mlp:add(self.pool) 116 | -- not sure if transfer should go here? mlp:add(transfer:clone()) 117 | local i = #(self.kernelSize) + 1 118 | if self.reduceSize[i] then 119 | local reduce = nn.SpatialConvolution( 120 | self.inputSize, self.reduceSize[i], 1, 1, 121 | self.reduceStride[i] or 1, self.reduceStride[i] or 1 122 | ) 123 | mlp:add(reduce) 124 | if self.batchNorm then 125 | mlp:add(nn.SpatialBatchNormalization(self.reduceSize[i])) 126 | end 127 | mlp:add(self.transfer:clone()) 128 | end 129 | depthConcat:add(mlp) 130 | 131 | -- Check the output sizes. Infer the operation of the pooling layer. 132 | if self.pool.kW ~= nil and self.pool.dW ~= nil and self.pool.padW ~= nil then 133 | assert(oWidth ~= nil) 134 | assert(oHeight ~= nil) 135 | local oWidth_pool = torch.floor( 136 | (iWidth + 2*self.pool.padW - self.pool.kW)/self.pool.dW + 1) 137 | local oHeight_pool = torch.floor( 138 | (iHeight + 2*self.pool.padH - self.pool.kH)/self.pool.dH + 1) 139 | if oWidth ~= oWidth_pool or oHeight ~= oHeight_pool then 140 | print("dpnn.Inception: Warning: Inconsistent output sizes in pooling.") 141 | end 142 | end 143 | 144 | -- reduce: 1x1 conv (channel-wise pooling) 145 | i = i + 1 146 | if self.reduceSize[i] then 147 | local mlp = nn.Sequential() 148 | local reduce = nn.SpatialConvolution( 149 | self.inputSize, self.reduceSize[i], 1, 1, 150 | self.reduceStride[i] or 1, self.reduceStride[i] or 1 151 | ) 152 | mlp:add(reduce) 153 | if self.batchNorm then 154 | mlp:add(nn.SpatialBatchNormalization(self.reduceSize[i])) 155 | end 156 | mlp:add(self.transfer:clone()) 157 | depthConcat:add(mlp) 158 | 159 | -- Check the output sizes. 160 | local oWidth_conv = torch.floor((iWidth - 1)/(self.reduceStride[i] or 1) + 1) 161 | local oHeight_conv = torch.floor((iHeight - 1)/(self.reduceStride[i] or 1) + 1) 162 | if oWidth ~= oWidth_conv or oHeight ~= oHeight_conv then 163 | print("dpnn.Inception: Warning: Inconsistent output sizes in 1x1 conv.") 164 | end 165 | end 166 | 167 | parent.__init(self, depthConcat) 168 | end 169 | 170 | function Inception:updateOutput(input) 171 | local input = self:toBatch(input, 3) 172 | local output = self.module:updateOutput(input) 173 | self.output = self:fromBatch(output, 3) 174 | return self.output 175 | end 176 | 177 | function Inception:updateGradInput(input, gradOutput) 178 | local input, gradOutput = self:toBatch(input, 3), self:toBatch(gradOutput, 3) 179 | local gradInput = self.module:updateGradInput(input, gradOutput) 180 | self.gradInput = self:fromBatch(gradInput, 3) 181 | return self.gradInput 182 | end 183 | 184 | function Inception:accGradParameters(input, gradOutput, scale) 185 | local input, gradOutput = self:toBatch(input, 3), self:toBatch(gradOutput, 3) 186 | self.module:accGradParameters(input, gradOutput, scale) 187 | end 188 | 189 | function Inception:accUpdateGradParameters(input, gradOutput, lr) 190 | local input, gradOutput = self:toBatch(input, 3), self:toBatch(gradOutput, 3) 191 | self.module:accUpdateGradParameters(input, gradOutput, lr) 192 | end 193 | -------------------------------------------------------------------------------- /Convert.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[ nn.Convert ]-- 3 | -- Module to convert between different data formats 4 | -- nn.Convert('bchw', 'bf') or nn.Convert('chw', 'f') 5 | -- Automatically converts input to same type as self.output 6 | -- Simplest use is for automatic input type converions : nn.Convert() 7 | ------------------------------------------------------------------------ 8 | local _ = require 'moses' 9 | local Convert, parent = torch.class("nn.Convert", "nn.Container") 10 | 11 | function Convert:__init(inputShape, outputShape) 12 | if outputShape and not inputShape then 13 | error"Expecting non-nil arg 1 when arg 2 is provided" 14 | end 15 | inputShape = inputShape or 'b*' 16 | outputShape = outputShape or inputShape 17 | self.inputShape = inputShape:find('b') and inputShape or ('b'..inputShape) 18 | self.outputShape = outputShape:find('b') and outputShape or ('b'..outputShape) 19 | self.inputBatchDim = self.inputShape:find('b') 20 | self.outputBatchDim = self.outputShape:find('b') 21 | if self.inputShape == 'b*' or self.outputShape == 'b*' then 22 | assert(self.inputShape == 'b*' and self.outputShape == 'b*', 'Both or neither shapes must be b*') 23 | self.nInputDim = -1 24 | self.nOutputDim = -1 25 | self.transposition = true 26 | else 27 | -- number of dims in batch mode 28 | self.nInputDim = #self.inputShape 29 | self.nOutputDim = #self.outputShape 30 | -- is the outputShape just a transposition of the inputShape? 31 | if self.nInputDim == self.nOutputDim then 32 | self.transposition = true 33 | for i=1,self.nInputDim do 34 | if not self.outputShape:find(self.inputShape:sub(i,i)) then 35 | self.transposition = false 36 | break 37 | end 38 | end 39 | end 40 | end 41 | parent.__init(self) 42 | end 43 | 44 | -- post-initialization 45 | function Convert:buildConverter(input) 46 | if self.transposition then 47 | self.converter = self:transpose(self.outputShape) 48 | else 49 | if (torch.type(self[self.outputShape]) ~= 'function') then 50 | error(string.format("Unrecognized conversion of shape %s to %s", self.inputShape, self.outputShape)) 51 | end 52 | self.converter = self[self.outputShape](self, input) 53 | end 54 | assert(torch.isTensor(self.output), "Expecting Tensor output") 55 | 56 | self.converter:type(torch.type(self.output)) 57 | self.converter:serialMode(self.dpnn_serialEmpty, self.dpnn_serialType) 58 | 59 | self.modules[1] = self.converter 60 | end 61 | 62 | function Convert:updateOutput(input) 63 | assert(torch.isTensor(input), "expecting Tensor") 64 | if not torch.isTypeOf(input, torch.type(self.output)) then 65 | -- handle different input type 66 | self._input = self._input or self.output.new() 67 | self._input:resize(input:size()):copy(input) 68 | input = self._input 69 | end 70 | self.batchMode = true 71 | if input:dim() < self.nInputDim then 72 | -- handle non-batch mode 73 | local inputSize = input:size():totable() 74 | table.insert(inputSize, self.inputBatchDim, 1) 75 | self.__input = self.__input or input.new() 76 | self.__input:set(input):resize(unpack(inputSize)) 77 | input = self.__input 78 | self.batchMode = false 79 | end 80 | if not self.converter then 81 | self:buildConverter(input) 82 | end 83 | 84 | self.output = self.converter:updateOutput(input) 85 | 86 | if not self.batchMode then 87 | local outputSize = self.output:size():totable() 88 | table.remove(outputSize, self.outputBatchDim) 89 | self.__output = self.__output or self.output.new() 90 | self.__output:set(self.output):resize(unpack(outputSize)) 91 | self.output = self.__output 92 | end 93 | return self.output 94 | end 95 | 96 | function Convert:updateGradInput(input, gradOutput) 97 | local input_ = input 98 | input = self._input or input 99 | if not self.batchMode then 100 | input = self.__input 101 | self.__gradOutput = self.__gradOutput or gradOutput.new() 102 | self.__gradOutput:set(gradOutput):resize(self.converter.output:size()) 103 | gradOutput = self.__gradOutput 104 | end 105 | 106 | local gradInput = self.converter:updateGradInput(input, gradOutput) 107 | 108 | if not self.batchMode then 109 | self.__gradInput = self.__gradInput or gradInput.new() 110 | self.__gradInput:set(gradInput):resize(input_:size()) 111 | gradInput = self.__gradInput 112 | end 113 | if self._input then 114 | self._gradInput = self._gradInput or input.new() 115 | self._gradInput:resize(input:size()):copy(gradInput) 116 | self.gradInput = self._gradInput 117 | else 118 | self.gradInput = gradInput 119 | end 120 | 121 | return self.gradInput 122 | end 123 | 124 | function Convert:accGradParameters(input, gradOutput, scale) 125 | input = self.batchMode and self.__input or self._input or input 126 | gradOutput = self.batchMode and self.__gradOutput or gradOutput 127 | self.converter:accGradParameters(input, gradOutput, scale) 128 | end 129 | 130 | function Convert:accUpdateGradParameters(input, gradOutput, lr) 131 | input = self.batchMode and self.__input or self._input or input 132 | gradOutput = self.batchMode and self.__gradOutput or gradOutput 133 | self.converter:accUpdateGradParameters(input, gradOutput, lr) 134 | end 135 | 136 | -- batch feature 137 | function Convert:bf(input) 138 | local b_pos = self:findAxis('b', self.inputShape) 139 | local dim = #self.inputShape 140 | if self.inputShape == 'bt' then 141 | error"Conversion of shape bt to bf not supported: open an issue on github" 142 | end 143 | -- was b 144 | if dim == 1 then 145 | return nn.Reshape(1) 146 | end 147 | -- was b... 148 | local modula 149 | if b_pos ~= 1 then 150 | modula = nn.Transpose({1, b_pos}) 151 | end 152 | if dim > 2 then 153 | local transpose = modula 154 | local sampleSize = input:select(self:findAxis('b'),1):nElement() 155 | local reshape = nn.Reshape(sampleSize) 156 | if transpose then 157 | modula = nn.Sequential() 158 | modula:add(transpose) 159 | modula:add(reshape) 160 | else 161 | modula = reshape 162 | end 163 | end 164 | return modula or nn.Identity() 165 | end 166 | 167 | -- each example is a scalar; batch is a vector 168 | function Convert:b(input) 169 | local b_pos = self:findAxis('b') 170 | if self.inputShape == 'bt' or self.inputShape == 'tb' then 171 | local t_pos = self:findAxis('t') 172 | -- select first set of classes 173 | return nn.Select(t_pos, 1) 174 | elseif self.inputShape == 'bf' or self.inputShape == 'fb' then 175 | -- this wont work as expected with size(f) > 1 176 | local f_pos = self:findAxis('f') 177 | if input:size(f_pos) > 1 then 178 | error("Cannot convert shape "..self.inputShape.." to b when feature > 1") 179 | end 180 | return nn.Select(f_pos, 1) 181 | else 182 | error("Cannot convert shape "..self.inputShape.." to shape b") 183 | end 184 | end 185 | 186 | -- returns the current shape of the data 187 | function Convert:default() 188 | return nn.Identity() 189 | end 190 | 191 | -- multi-class (batch target) 192 | function Convert:bt() 193 | local b_pos = self:findAxis('b') 194 | local modula 195 | if self.inputShape == 'b' then 196 | modula = nn.Reshape(1) 197 | else 198 | error("cannot convert shape '"..self.inputShape.."' to bt") 199 | end 200 | return modula 201 | end 202 | 203 | -- a generic function for transposing shape axes 204 | function Convert:transpose(newShape) 205 | if newShape == self.inputShape then 206 | return nn.Identity() 207 | end 208 | local inputShape = {} 209 | for i=1,#self.inputShape do 210 | table.insert(inputShape, self.inputShape:sub(i,i)) 211 | end 212 | local transpositions = {} 213 | for i=1,#newShape do 214 | local j = _.indexOf(inputShape, newShape:sub(i,i)) 215 | if i ~= j then 216 | local char = inputShape[i] 217 | inputShape[i] = inputShape[j] 218 | inputShape[j] = char 219 | table.insert(transpositions, {j, i}) 220 | end 221 | end 222 | return nn.Transpose(unpack(transpositions)) 223 | end 224 | 225 | function Convert:findAxis(axis_char, shape, silent) 226 | shape = shape or self.inputShape 227 | local axis_pos = shape:find(axis_char) 228 | if (not silent) and (not axis_pos) then 229 | error("Provided shape '"..shape.."' has no axis '"..axis_char.."'", 2) 230 | end 231 | return axis_pos 232 | end 233 | 234 | function Convert:type(type) 235 | if not torch.isTypeOf(self.output, type) then 236 | self._input = nil 237 | self._gradInput = nil 238 | self.__input = nil 239 | self.__output = nil 240 | self.__gradInput = nil 241 | self.__gradOutput = nil 242 | end 243 | return parent.type(self, type) 244 | end 245 | -------------------------------------------------------------------------------- /tutorials/ladder_network/ladder.lua: -------------------------------------------------------------------------------- 1 | --[[! 2 | Implementation of ladder as mentioned in http://arxiv.org/pdf/1504.08215.pdf 3 | --]] 4 | 5 | require 'nn' 6 | require 'dp' 7 | require 'dpnn' 8 | require 'math' 9 | require 'xlua' 10 | require 'optim' 11 | require 'nngraph' 12 | 13 | -- Cuda 14 | require 'cutorch' 15 | require 'cunn' 16 | 17 | -- Help functions 18 | require 'ladder_help_funcs' 19 | 20 | torch.setdefaulttensortype("torch.FloatTensor") 21 | op = xlua.OptionParser('%prog [options]') 22 | 23 | -- Data 24 | op:option{'--noValidation', action='store_true', dest='noValidation', 25 | help='Use validation data for training as well.', default=false} 26 | op:option{'--best', action='store_true', dest='best', 27 | help='Use best training or validation model.', default=false} 28 | 29 | -- Model parameters 30 | op:option{'--noOfClasses', action='store', dest='noOfClasses', 31 | help='Number of classes.', default=10} -- MNIST data 32 | op:option{'--noiseSigma', action='store', dest='noiseSigma', 33 | help='Stdev for noise for denoising autoencoder (Mean is zero).', 34 | default=0} 35 | op:option{'--hiddens', action='store', dest='hiddens', 36 | help='Hiddens units', default='{1000, 500, 250, 250, 250}'} 37 | op:option{'--useBatchNorm', action='store_true', dest='useBatchNorm', 38 | help='Use batch normalization.', default=false} 39 | op:option{'--weightTied', action='store_true', dest='weightTied', 40 | help='Tie weights of decoder with encoder.', default=false} 41 | 42 | -- Criterion and learning 43 | op:option{'--attempts', action='store', dest='attempts', 44 | help='Run attempts independent experiments.', default=1} 45 | op:option{'--eta', action='store', dest='eta', 46 | help='If zero then only classifier cost is considered.', default=0} 47 | op:option{'--batchSize', action='store', dest='batchSize', 48 | help='Batch Size.',default=32} 49 | op:option{'--epochs', action='store', dest='epochs', 50 | help='Number of epochs.',default=100} 51 | op:option{'--maxTries', action='store', dest='maxTries', 52 | help='Number of tries for stopping.',default=0} 53 | op:option{'--learningRate', action='store', dest='learningRate', 54 | help='Learning rate',default=0.002} 55 | op:option{'--learningRateDecay', action='store', dest='learningRateDecay', 56 | help='Learning rate decay',default=1e-7} 57 | op:option{'--linearDecay', action='store_true', dest='linearDecay', 58 | help='Linearly reduce learning rate', default=false} 59 | op:option{'--startEpoch', action='store', dest='startEpoch', 60 | help='Epoch number when to start linear decay.',default=1} 61 | op:option{'--endLearningRate', action='store', dest='endLearningRate', 62 | help='Learning rate at last epoch',default=0.0} 63 | op:option{'--momentum', action='store', dest='momentum', 64 | help='Learning Momemtum',default=0} 65 | op:option{'--loss', action='store_true', dest='loss', 66 | help='If true use loss for early stopping else confusion matrix.', 67 | default=false} 68 | op:option{'--adam', action='store_true', dest='adam', 69 | help='Use adaptive moment estimation optimizer.', default=false} 70 | 71 | -- Use Cuda 72 | op:option{'--useCuda', action='store_true', dest='useCuda', help='Use GPU', 73 | default=false} 74 | op:option{'--deviceId', action='store', dest='deviceId', help='GPU device Id', 75 | default=2} 76 | 77 | -- Print debug messages 78 | op:option{'--verbose', action='store_true', dest='verbose', 79 | help='Print apppropriate debug messages.', default=false} 80 | 81 | -- Command line arguments 82 | opt = op:parse() 83 | op:summarize() 84 | 85 | -- Data 86 | noValidation = opt.noValidation 87 | best = opt.best 88 | verbose = opt.verbose 89 | 90 | -- Cuda 91 | useCuda = opt.useCuda 92 | deviceId = tonumber(opt.deviceId) 93 | 94 | -- MNIST Data source 95 | ds = dp.Mnist{} 96 | 97 | attempts = tonumber(opt.attempts) 98 | testAccus = torch.zeros(attempts) 99 | trData = {} 100 | tvData = {} 101 | tsData = {} 102 | for attempt=1,attempts do 103 | 104 | local t1, t2 105 | 106 | trData.data, t1, t2 = ds:get('train', 'input', 'bchw', 'float') 107 | trData.labels, t1, t2 = ds:get('train', 'target') 108 | trData.size = function() return trData.data:size()[1] end 109 | 110 | tvData.data, t1, t2 = ds:get('valid', 'input', 'bchw', 'float') 111 | tvData.labels, t1, t2 = ds:get('valid', 'target') 112 | tvData.size = function() return tvData.data:size()[1] end 113 | 114 | tsData.data, t1, t2 = ds:get('test', 'input', 'bchw', 'float') 115 | tsData.labels, t1, t2 = ds:get('test', 'target') 116 | tsData.size = function() return tsData.data:size()[1] end 117 | collectgarbage() 118 | 119 | local tempSample = trData.data[1] 120 | local channels = tempSample:size(1) 121 | local width = tempSample:size(2) 122 | local height = tempSample:size(3) 123 | local linFeats = channels * height * width 124 | 125 | -- MNIST 126 | local classes = {'1', '2', '3', '4', '5', '6', '7', '8', '9', '10'} 127 | local confusion = optim.ConfusionMatrix(classes) 128 | 129 | -- Model 130 | local noOfClasses = tonumber(opt.noOfClasses) 131 | local noiseSigma = tonumber(opt.noiseSigma) 132 | local inputHiddens = dp.returnString(opt.hiddens) 133 | local useBatchNorm = opt.useBatchNorm 134 | local weightTied = opt.weightTied 135 | 136 | 137 | hiddens = {linFeats} 138 | for i=1,#inputHiddens do 139 | hiddens[#hiddens+1] = inputHiddens[i] 140 | end 141 | hiddens[#hiddens+1] = noOfClasses 142 | 143 | -- encoder input 144 | local input = nil 145 | if noiseSigma ~= 0 then 146 | if verbose then print("Add noise to the samples.") end 147 | input = nn.WhiteNoise(0, noiseSigma)() 148 | else 149 | input = nn.Identity()() 150 | end 151 | 152 | -- encoder model 153 | local encoderLayers = {} 154 | local Zs = {} 155 | Zs[1] = input 156 | local Hs = {} 157 | Hs[1] = input 158 | for i=2,#hiddens do 159 | -- Zs 160 | encoderLayers[i] = nn.Linear(hiddens[i-1], hiddens[i]) 161 | if useBatchNorm then 162 | Zs[i] = nn.BatchNormalization(hiddens[i]) 163 | (encoderLayers[i](Hs[i-1])) 164 | else 165 | Zs[i] = encoderLayers[i](Hs[i-1]) 166 | end 167 | 168 | -- Hs 169 | if i==#hiddens then 170 | Hs[i] = nn.CMul(hiddens[i])(nn.Add(hiddens[i])(Zs[i])) 171 | else 172 | Hs[i] = nn.ReLU()(nn.CMul(hiddens[i])(nn.Add(hiddens[i])(Zs[i]))) 173 | end 174 | end 175 | 176 | -- classifier 177 | local classifier = nn.LogSoftMax()(Hs[#Hs]) 178 | 179 | -- Decoder 180 | local decoderLayers = {} 181 | local Z_hats = {} 182 | for i=#hiddens,1,-1 do 183 | 184 | -- u = 0 hence no cij 185 | if i==#hiddens then 186 | z_hat1 = nn.CMul(hiddens[i])(Zs[i]) 187 | z_hat2 = nn.CMul(hiddens[i])(Zs[i]) 188 | z_hat3 = nn.CMul(hiddens[i])(Zs[i]) 189 | z_hat34 = nn.Add(hiddens[i])(z_hat3) 190 | z_hatSigmoid34 = nn.Sigmoid()(z_hat34) 191 | z_hat234 = nn.CMulTable()({z_hat2, z_hatSigmoid34}) 192 | z_hat5 = nn.CMul(hiddens[i])(Zs[i]) 193 | Z_hats[i] = nn.CAddTable()({z_hat1, z_hat234, z_hat5}) 194 | else 195 | decoderLayers[i] = nn.Linear(hiddens[i+1], hiddens[i]) 196 | if weightTied then 197 | if verbose then print("Tying encoder-decoder weights.") end 198 | decoderLayers[i].weight:set(encoderLayers[i+1].weight:t()) 199 | decoderLayers[i].gradWeight:set(encoderLayers[i+1].gradWeight:t()) 200 | end 201 | 202 | u = decoderLayers[i](Z_hats[i+1]) 203 | 204 | cu1 = nn.CMul(hiddens[i])(u) 205 | du1 = nn.Add(hiddens[i])(u) 206 | a1 = nn.CAddTable()({cu1, du1}) 207 | cu2 = nn.CMul(hiddens[i])(u) 208 | du2 = nn.Add(hiddens[i])(u) 209 | a2 = nn.CAddTable()({cu2, du2}) 210 | cu3 = nn.CMul(hiddens[i])(u) 211 | du3 = nn.Add(hiddens[i])(u) 212 | a3 = nn.CAddTable()({cu3, du3}) 213 | cu4 = nn.CMul(hiddens[i])(u) 214 | du4 = nn.Add(hiddens[i])(u) 215 | a4 = nn.CAddTable()({cu4, du4}) 216 | cu5 = nn.CMul(hiddens[i])(u) 217 | du5 = nn.Add(hiddens[i])(u) 218 | a5 = nn.CAddTable()({cu5, du5}) 219 | 220 | z_hat1 = nn.CMulTable()({a1, Zs[i]}) 221 | z_hat2 = nn.CMulTable()({a3, Zs[i]}) 222 | z_hat3 = nn.Sigmoid()(nn.CAddTable()({z_hat2, a4})) 223 | z_hat4 = nn.CMulTable()({a2, z_hat3}) 224 | Z_hats[i] = nn.CAddTable()({z_hat1, z_hat4, a5}) 225 | end 226 | end 227 | local model = nn.gModule({input}, {classifier, Z_hats[1]--[[Decoder--]]}) 228 | if verbose then print(model) end 229 | 230 | -- Criterion and learning 231 | -- Criterion 232 | local eta = tonumber(opt.eta) 233 | local criterions = nn.ParallelCriterion() 234 | local nll = nn.ClassNLLCriterion() 235 | local mse = nn.MSECriterion() 236 | criterions:add(nll) 237 | criterions:add(mse, eta) 238 | 239 | -- Learning 240 | local batchSize = tonumber(opt.batchSize) 241 | local epochs = tonumber(opt.epochs) 242 | local maxTries = tonumber(opt.maxTries) 243 | local learningRate = tonumber(opt.learningRate) 244 | local learningRateDecay = tonumber(opt.learningRateDecay) 245 | local linearDecay = opt.linearDecay 246 | local startEpoch = tonumber(opt.startEpoch) 247 | local endLearningRate = tonumber(opt.endLearningRate) 248 | assert(epochs > startEpoch, "startEpoch should be smaller than epochs.") 249 | 250 | if linearDecay then 251 | if verbose then print("Using linear decay.") end 252 | learningRates = torch.zeros(startEpoch):fill(learningRate) 253 | local temp = torch.range(learningRate, endLearningRate, 254 | -learningRate/(epochs-startEpoch)) 255 | learningRates = torch.cat(learningRates, temp) 256 | end 257 | 258 | local momentum = tonumber(opt.momentum) 259 | local loss = opt.loss 260 | local adam = opt.adam 261 | 262 | -- Optimizer 263 | local optimState = { 264 | coefL1 = 0, 265 | coefL2 = 0, 266 | learningRate = learningRate, 267 | weightDecay = 0.0, 268 | momentum = momentum, 269 | learningRateDecay = learningRateDecay 270 | } 271 | 272 | -- If true use Adaptive moment estimation else SGD. 273 | if adam then 274 | if verbose then print("Using Adaptive moment estimation optimizer.") end 275 | optimMethod = optim.adam 276 | else 277 | if verbose then print("Using Stocastic gradient descent optimizer.") end 278 | optimMethod = optim.sgd 279 | end 280 | if verbose then 281 | print(optimMethod) 282 | print(optimState) 283 | end 284 | 285 | 286 | if useCuda then 287 | if verbose then print("Using GPU: "..deviceId) end 288 | cutorch.setDevice(deviceId) 289 | if verbose then print("GPU set") end 290 | model:cuda() 291 | if verbose then print("Model copied to GPU.") end 292 | criterions:cuda() 293 | if verbose then print("Criterion copied to GPU.") end 294 | else 295 | if verbose then print("Not using GPU.") end 296 | end 297 | 298 | -- Retrieve parameters and gradients 299 | parameters, gradParameters = model:getParameters() 300 | 301 | -- Reshape samples from images to vectors 302 | trData.data = trData.data:reshape(trData.size(1), linFeats) 303 | tvData.data = tvData.data:reshape(tvData.size(1), linFeats) 304 | tsData.data = tsData.data:reshape(tsData.size(1), linFeats) 305 | collectgarbage() 306 | 307 | if noValidation then 308 | trData.data = torch.cat(trData.data, tvData.data, 1) 309 | trData.labels = torch.cat(trData.labels, tvData.labels, 1) 310 | tvData.data = nil 311 | tvData.labels = nil 312 | collectgarbage() 313 | end 314 | 315 | if verbose then 316 | print(trData) 317 | print(tvData) 318 | print(tsData) 319 | end 320 | 321 | -- Training 322 | local displayProgress = verbose 323 | local classifierIndx = 1 324 | local trainAccu = 0 325 | local validAccu = 0 326 | local bestTrainAccu = 0 327 | local bestValidAccu = 0 328 | local trainLoss = 0 329 | local validLoss = 0 330 | local bestTrainLoss = math.huge 331 | local bestValidLoss = math.huge 332 | local bestTrainModel = nn.Sequential() 333 | local bestValidModel = nn.Sequential() 334 | local earlyStopCount = 0 335 | for i=1, epochs do 336 | if linearDecay then 337 | optimState.learningRate = learningRates[i] 338 | end 339 | -- Training 340 | trainLoss = model_train_multi_criterion(model, criterions, 341 | parameters, gradParameters, trData, 342 | optimMethod, optimState, batchSize, 343 | i, confusion, trainLogger, 344 | useCuda, displayProgress, 345 | classiferIndx) 346 | confusion:updateValids() 347 | if loss then 348 | if verbose then 349 | print("Current train loss: ".. trainLoss 350 | ..", best train loss: " .. bestTrainLoss) 351 | end 352 | if trainLoss < bestTrainLoss then 353 | bestTrainLoss = trainLoss 354 | bestTrainModel = model:clone() 355 | print(confusion) 356 | end 357 | else -- Using classification accuracy for saving best train model 358 | trainAccu = confusion.totalValid * 100 359 | if bestTrainAccu < trainAccu then 360 | bestTrainAccu = trainAccu 361 | bestTrainModel = model:clone() 362 | bestTrainLoss = trainLoss 363 | end 364 | if verbose then 365 | print("Current train accu: ".. trainAccu 366 | ..", best train accu: " .. bestTrainAccu 367 | ..", best train loss: " .. bestTrainLoss) 368 | end 369 | end 370 | 371 | -- Validating 372 | if not noValidation then 373 | validLoss = model_test_multi_criterion(model, criterions, 374 | tvData, confusion, 375 | useCuda, classifierIndx) 376 | confusion:updateValids() 377 | if loss then 378 | if verbose then 379 | print("Current valid loss: ".. validLoss 380 | ..", best valid loss: " .. bestValidLoss) 381 | end 382 | if validLoss < bestValidLoss then 383 | earlyStopCount = 0 384 | bestValidLoss = validLoss 385 | bestValidModel = model:clone() 386 | print(confusion) 387 | else 388 | earlyStopCount = earlyStopCount + 1 389 | end 390 | else 391 | validAccu = confusion.totalValid * 100 392 | if bestValidAccu < validAccu then 393 | earlyStopCount = 0 394 | bestValidAccu = validAccu 395 | bestValidModel = model:clone() 396 | bestValidLoss = validLoss 397 | else 398 | earlyStopCount = earlyStopCount + 1 399 | end 400 | if verbose then 401 | print("Current valid accu: ".. validAccu 402 | ..", best valid accu: " .. bestValidAccu 403 | ..", best valid loss: " .. bestValidLoss) 404 | end 405 | end 406 | if verbose then 407 | print(noiseSigma, weightTied, useBatchNorm, eta, earlyStopCount) 408 | end 409 | end 410 | 411 | if maxTries ~= 0 then 412 | if earlyStopCount >= maxTries then 413 | if verbose then print("Early stopping at epoch: " .. i) end 414 | break 415 | end 416 | end 417 | end 418 | 419 | -- Testing 420 | if best then 421 | if noValidation then 422 | testLoss = model_test_multi_criterion(bestTrainModel, criterions, 423 | tsData, confusion, 424 | useCuda, classifierIndx) 425 | else 426 | testLoss = model_test_multi_criterion(bestValidModel, criterions, 427 | tsData, confusion, 428 | useCuda, classifierIndx) 429 | end 430 | else 431 | testLoss = model_test_multi_criterion(model, criterions, 432 | tsData, confusion, 433 | useCuda, classifierIndx) 434 | end 435 | confusion:updateValids() 436 | testAccu = confusion.totalValid * 100 437 | testAccus[attempt] = testAccu 438 | if verbose then 439 | print("Attempt: " .. tostring(attempt) .. " Test Accu: " .. testAccu) 440 | end 441 | end 442 | print("Test accuracies.") 443 | print(testAccus) 444 | print("Max Test Error is: " .. tostring(100 - testAccus:max()) .. "%") 445 | -------------------------------------------------------------------------------- /NCEModule.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ Noise Contrast Estimation Module]]-- 3 | -- Ref.: A. https://www.cs.toronto.edu/~amnih/papers/ncelm.pdf 4 | ------------------------------------------------------------------------ 5 | local _ = require 'moses' 6 | local NCEModule, parent = torch.class("nn.NCEModule", "nn.Linear") 7 | NCEModule.version = 6 -- better bias init 8 | 9 | -- for efficient serialization using nn.Serial 10 | local empty = _.clone(parent.dpnn_mediumEmpty) 11 | table.insert(empty, 'sampleidx') 12 | table.insert(empty, 'sampleprob') 13 | table.insert(empty, '_noiseidx') 14 | table.insert(empty, '_noiseprob') 15 | table.insert(empty, '_weight') 16 | table.insert(empty, '_gradWeight') 17 | table.insert(empty, '_gradOutput') 18 | table.insert(empty, '_tgradOutput') 19 | NCEModule.dpnn_mediumEmpty = empty 20 | 21 | -- for sharedClone 22 | local params = _.clone(parent.dpnn_parameters) 23 | table.insert(params, 'unigrams') 24 | table.insert(params, 'Z') 25 | NCEModule.dpnn_parameters = params 26 | 27 | function NCEModule:__init(inputSize, outputSize, k, unigrams, Z) 28 | parent.__init(self, inputSize, outputSize) 29 | assert(torch.type(k) == 'number') 30 | assert(torch.isTensor(unigrams)) 31 | self.k = k 32 | self.unigrams = unigrams 33 | self.Z = torch.Tensor{Z or -1} 34 | 35 | self.batchnoise = true 36 | 37 | self:fastNoise() 38 | 39 | -- output is {P_linear(target|input), P_linear(samples|input), P_noise(target), P_noise(samples)} 40 | self.output = {torch.Tensor(), torch.Tensor(), torch.Tensor(), torch.Tensor()} 41 | self.gradInput = {torch.Tensor(), torch.Tensor()} 42 | end 43 | 44 | function NCEModule:reset(stdv) 45 | if stdv then 46 | self.weight:uniform(-stdv, stdv) 47 | self.bias:uniform(-stdv, stdv) 48 | else 49 | stdv = stdv or 1./math.sqrt(self.weight:size(2)) 50 | self.weight:uniform(-stdv, stdv) 51 | -- this is useful for Z = 1 52 | self.bias:fill(-math.log(self.bias:size(1))) 53 | end 54 | return self 55 | end 56 | 57 | function NCEModule:fastNoise() 58 | -- we use alias to speedup multinomial sampling (see noiseSample method) 59 | require 'torchx' 60 | assert(torch.AliasMultinomial, "update torchx : luarocks install torchx") 61 | self.unigrams:div(self.unigrams:sum()) 62 | self.aliasmultinomial = torch.AliasMultinomial(self.unigrams) 63 | self.aliasmultinomial.dpnn_parameters = {'J', 'q'} 64 | end 65 | 66 | function NCEModule:updateOutput(inputTable) 67 | local input, target = unpack(inputTable) 68 | assert(input:dim() == 2) 69 | assert(target:dim() == 1) 70 | local batchsize = input:size(1) 71 | local inputsize = self.weight:size(2) 72 | 73 | if self.train == false and self.normalized then 74 | self.linout = self.linout or input.new() 75 | -- full linear + softmax 76 | local nElement = self.linout:nElement() 77 | self.linout:resize(batchsize, self.weight:size(1)) 78 | if self.linout:nElement() ~= nElement then 79 | self.linout:zero() 80 | end 81 | self.addBuffer = self.addBuffer or input.new() 82 | if self.addBuffer:nElement() ~= batchsize then 83 | self.addBuffer:resize(batchsize):fill(1) 84 | end 85 | self.weight.addmm(self.linout, 0, self.linout, 1, input, self.weight:t()) 86 | if self.bias then self.linout:addr(1, self.addBuffer, self.bias) end 87 | self.output = torch.type(self.output) == 'table' and input.new() or self.output 88 | if self.logsoftmax then 89 | input.THNN.LogSoftMax_updateOutput( 90 | self.linout:cdata(), 91 | self.output:cdata() 92 | ) 93 | else 94 | input.THNN.SoftMax_updateOutput( 95 | self.linout:cdata(), 96 | self.output:cdata() 97 | ) 98 | end 99 | elseif self.batchnoise then 100 | self.output = (torch.type(self.output) == 'table' and #self.output == 4) and self.output 101 | or {input.new(), input.new(), input.new(), input.new()} 102 | assert(torch.type(target) == 'torch.CudaTensor' or torch.type(target) == 'torch.LongTensor') 103 | self.sampleidx = self.sampleidx or target.new() 104 | 105 | -- the last elements contain the target indices 106 | self.sampleidx:resize(self.k + batchsize) 107 | self.sampleidx:narrow(1,self.k+1,batchsize):copy(target) 108 | 109 | -- sample k noise samples 110 | self:noiseSample(self.sampleidx, 1, self.k) 111 | self.sampleidx:resize(self.k + batchsize) 112 | 113 | -- build (batchsize+k, inputsize) weight tensor 114 | self._weight = self._weight or self.bias.new() 115 | self.weight.index(self._weight, self.weight, 1, self.sampleidx) 116 | assert(self._weight:nElement() == (self.k+batchsize)*inputsize) 117 | self._weight:resize(self.k+batchsize, inputsize) 118 | 119 | -- build (batchsize+k,) bias tensor 120 | self._bias = self._bias or self.bias.new() 121 | self._bias:index(self.bias, 1, self.sampleidx) 122 | assert(self._bias:nElement() == (self.k+batchsize)) 123 | self._bias:resize(self.k+batchsize) 124 | 125 | -- separate sample and target weight matrices and bias vectors 126 | local sweight = self._weight:narrow(1, 1, self.k) 127 | local tweight = self._weight:narrow(1, self.k+1, batchsize) 128 | local sbias = self._bias:narrow(1, 1, self.k) 129 | local tbias = self._bias:narrow(1, self.k+1, batchsize) 130 | 131 | -- get model probability of targets (batchsize,) 132 | local Pmt = self.output[1] 133 | self._pm = self._pm or input.new() 134 | self._pm:cmul(input, tweight) 135 | Pmt:sum(self._pm, 2):resize(batchsize) 136 | Pmt:add(tbias) 137 | Pmt:exp() 138 | 139 | -- get model probability of samples (batchsize x k) samples 140 | local Pms = self.output[2] 141 | Pms:resize(batchsize, self.k) 142 | Pms:copy(sbias:view(1,self.k):expand(batchsize, self.k)) 143 | Pms:addmm(1, Pms, 1, input, sweight:t()) 144 | Pms:exp() 145 | 146 | if self.Z[1] <= 0 then 147 | -- approximate Z using current batch 148 | self.Z[1] = Pms:mean()*self.weight:size(1) 149 | print("normalization constant Z approximated to "..self.Z[1]) 150 | end 151 | 152 | -- divide by normalization constant 153 | Pms:div(self.Z[1]) 154 | Pmt:div(self.Z[1]) 155 | 156 | -- get noise probability (pn) for all samples 157 | 158 | self.sampleprob = self.sampleprob or Pms.new() 159 | self.sampleprob = self:noiseProb(self.sampleprob, self.sampleidx) 160 | 161 | local Pnt = self.sampleprob:narrow(1,self.k+1,target:size(1)) 162 | local Pns = self.sampleprob:narrow(1,1,self.k) 163 | Pns = Pns:resize(1, self.k):expand(batchsize, self.k) 164 | 165 | self.output[3]:set(Pnt) 166 | self.output[4]:set(Pns) 167 | else 168 | self.output = (torch.type(self.output) == 'table' and #self.output == 4) and self.output 169 | or {input.new(), input.new(), input.new(), input.new()} 170 | self.sampleidx = self.sampleidx or target.new() 171 | 172 | -- the last first column will contain the target indices 173 | self.sampleidx:resize(batchsize, self.k+1) 174 | self.sampleidx:select(2,1):copy(target) 175 | 176 | self._sampleidx = self._sampleidx or self.sampleidx.new() 177 | self._sampleidx:resize(batchsize, self.k) 178 | 179 | -- sample (batchsize x k+1) noise samples 180 | self:noiseSample(self._sampleidx, batchsize, self.k) 181 | 182 | self.sampleidx:narrow(2,2,self.k):copy(self._sampleidx) 183 | 184 | -- make sure that targets are still first column of sampleidx 185 | if not self.testedtargets then 186 | for i=1,math.min(target:size(1),3) do 187 | assert(self.sampleidx[{i,1}] == target[i]) 188 | end 189 | self.testedtargets = true 190 | end 191 | 192 | -- build (batchsize x k+1 x inputsize) weight tensor 193 | self._weight = self._weight or self.bias.new() 194 | self.weight.index(self._weight, self.weight, 1, self.sampleidx:view(-1)) 195 | assert(self._weight:nElement() == batchsize*(self.k+1)*inputsize) 196 | self._weight:resize(batchsize, self.k+1, inputsize) 197 | 198 | -- build (batchsize x k+1) bias tensor 199 | self._bias = self._bias or self.bias.new() 200 | self._bias:index(self.bias, 1, self.sampleidx:view(-1)) 201 | assert(self._bias:nElement() == batchsize*(self.k+1)) 202 | self._bias:resize(batchsize, self.k+1) 203 | 204 | -- get model probability (pm) of sample and target (batchsize x k+1) samples 205 | self._pm = self._pm or input.new() 206 | self._pm:resizeAs(self._bias):copy(self._bias) 207 | self._pm:resize(batchsize, 1, self.k+1) 208 | local _input = input:view(batchsize, 1, inputsize) 209 | self._pm:baddbmm(1, self._pm, 1, _input, self._weight:transpose(2,3)) 210 | self._pm:resize(batchsize, self.k+1) 211 | self._pm:exp() 212 | 213 | if self.Z[1] <= 0 then 214 | -- approximate Z using current batch 215 | self.Z[1] = self._pm:mean()*self.weight:size(1) 216 | print("normalization constant Z approximated to "..self.Z[1]) 217 | end 218 | 219 | self._pm:div(self.Z[1]) -- divide by normalization constant 220 | 221 | -- separate target from sample model probabilities 222 | local Pmt = self._pm:select(2,1) 223 | local Pms = self._pm:narrow(2,2,self.k) 224 | 225 | self.output[1]:set(Pmt) 226 | self.output[2]:set(Pms) 227 | 228 | -- get noise probability (pn) for all samples 229 | 230 | self.sampleprob = self.sampleprob or self._pm.new() 231 | self.sampleprob = self:noiseProb(self.sampleprob, self.sampleidx) 232 | 233 | local Pnt = self.sampleprob:select(2,1) 234 | local Pns = self.sampleprob:narrow(2,2,self.k) 235 | 236 | self.output[3]:set(Pnt) 237 | self.output[4]:set(Pns) 238 | end 239 | 240 | return self.output 241 | end 242 | 243 | function NCEModule:updateGradInput(inputTable, gradOutput) 244 | local input, target = unpack(inputTable) 245 | assert(input:dim() == 2) 246 | assert(target:dim() == 1) 247 | local dPmt, dPms = gradOutput[1], gradOutput[2] 248 | local batchsize = input:size(1) 249 | local inputsize = self.weight:size(2) 250 | 251 | if self.batchnoise then 252 | local Pmt, Pms = self.output[1], self.output[2] 253 | 254 | -- separate sample and target weight matrices 255 | local sweight = self._weight:narrow(1, 1, self.k) 256 | local tweight = self._weight:narrow(1, self.k+1, batchsize) 257 | 258 | -- the rest of equation 7 259 | -- d Pm / d linear = exp(linear)/z 260 | self._gradOutput = self._gradOutput or dPms.new() 261 | self._tgradOutput = self._tgradOutput or dPmt.new() 262 | self._gradOutput:cmul(dPms, Pms) 263 | self._tgradOutput:cmul(dPmt, Pmt) 264 | 265 | -- gradient of linear 266 | self.gradInput[1] = self.gradInput[1] or input.new() 267 | self.gradInput[1]:cmul(self._tgradOutput:view(batchsize, 1):expandAs(tweight), tweight) 268 | self.gradInput[1]:addmm(1, 1, self._gradOutput, sweight) 269 | else 270 | -- the rest of equation 7 (combine both sides of + sign into one tensor) 271 | self._gradOutput = self._gradOutput or dPmt.new() 272 | self._gradOutput:resize(batchsize, self.k+1) 273 | self._gradOutput:select(2,1):copy(dPmt) 274 | self._gradOutput:narrow(2,2,self.k):copy(dPms) 275 | self._gradOutput:resize(batchsize, 1, self.k+1) 276 | -- d Pm / d linear = exp(linear)/z 277 | self._gradOutput:cmul(self._pm) 278 | 279 | -- gradient of linear 280 | self.gradInput[1] = self.gradInput[1] or input.new() 281 | self.gradInput[1]:resize(batchsize, 1, inputsize):zero() 282 | self.gradInput[1]:baddbmm(0, 1, self._gradOutput, self._weight) 283 | self.gradInput[1]:resizeAs(input) 284 | end 285 | 286 | self.gradInput[2] = self.gradInput[2] or input.new() 287 | if self.gradInput[2]:nElement() ~= target:nElement() then 288 | self.gradInput[2]:resize(target:size()):zero() 289 | end 290 | 291 | return self.gradInput 292 | end 293 | 294 | function NCEModule:accGradParameters(inputTable, gradOutput, scale) 295 | local input, target = unpack(inputTable) 296 | assert(input:dim() == 2) 297 | assert(target:dim() == 1) 298 | local batchsize = input:size(1) 299 | local inputsize = self.weight:size(2) 300 | 301 | if self.batchnoise then 302 | self._gradWeight = self._gradWeight or self.bias.new() 303 | self._gradWeight:resizeAs(self._weight):zero() -- (batchsize + k) x inputsize 304 | 305 | local sgradWeight = self._gradWeight:narrow(1, 1, self.k) 306 | local tgradWeight = self._gradWeight:narrow(1, self.k+1, batchsize) 307 | 308 | self._gradOutput:mul(scale) 309 | self._tgradOutput:mul(scale) 310 | 311 | sgradWeight:addmm(0, sgradWeight, 1, self._gradOutput:t(), input) 312 | tgradWeight:cmul(self._tgradOutput:view(batchsize, 1):expandAs(self.gradInput[1]), input) 313 | 314 | self.gradWeight:indexAdd(1, self.sampleidx, self._gradWeight) 315 | self.gradBias:indexAdd(1, self.sampleidx:narrow(1,self.k+1,batchsize), self._tgradOutput) 316 | self._tgradOutput:sum(self._gradOutput, 1) -- reuse buffer 317 | self.gradBias:indexAdd(1, self.sampleidx:sub(1,self.k), self._tgradOutput:view(-1)) 318 | 319 | else 320 | self._gradWeight = self._gradWeight or self.bias.new() 321 | self._gradWeight:resizeAs(self._weight):zero() -- batchsize x k+1 x inputsize 322 | self._gradOutput:resize(batchsize, self.k+1, 1) 323 | self._gradOutput:mul(scale) 324 | local _input = input:view(batchsize, 1, inputsize) 325 | self._gradWeight:baddbmm(0, self._gradWeight, 1, self._gradOutput, _input) 326 | 327 | local sampleidx = self.sampleidx:view(batchsize * (self.k+1)) 328 | local _gradWeight = self._gradWeight:view(batchsize * (self.k+1), inputsize) 329 | self.gradWeight:indexAdd(1, sampleidx, _gradWeight) 330 | 331 | local _gradOutput = self._gradOutput:view(batchsize * (self.k+1)) 332 | self.gradBias:indexAdd(1, sampleidx, _gradOutput) 333 | end 334 | end 335 | 336 | function NCEModule:type(type, cache) 337 | if type then 338 | self.sampleidx = nil 339 | self.sampleprob = nil 340 | self._noiseidx = nil 341 | self._noiseprob = nil 342 | self._metaidx = nil 343 | self._gradOutput = nil 344 | self._tgradOutput = nil 345 | self._gradWeight = nil 346 | self._weight = nil 347 | end 348 | local unigrams = self.unigrams 349 | self.unigrams = nil 350 | local am = self.aliasmultinomial 351 | 352 | local rtn 353 | if type and torch.type(self.weight) == 'torch.MultiCudaTensor' then 354 | assert(type == 'torch.CudaTensor', "Cannot convert a multicuda NCEModule to anything other than cuda") 355 | local weight = self.weight 356 | local gradWeight = self.gradWeight 357 | self.weight = nil 358 | self.gradWeight = nil 359 | 360 | rtn = parent.type(self, type, cache) 361 | 362 | assert(torch.type(self.aliasmultinomial.J) ~= 'torch.CudaTensor') 363 | self.weight = weight 364 | self.gradWeight = gradWeight 365 | else 366 | rtn = parent.type(self, type, cache) 367 | end 368 | 369 | self.unigrams = unigrams 370 | self.aliasmultinomial = am 371 | return rtn 372 | end 373 | 374 | function NCEModule:noiseProb(sampleprob, sampleidx) 375 | assert(sampleprob) 376 | assert(sampleidx) 377 | self._noiseprob = self._noiseprob or self.unigrams.new() 378 | self._noiseidx = self._noiseidx or torch.LongTensor() 379 | self._noiseidx:resize(sampleidx:size()):copy(sampleidx) 380 | 381 | self._noiseprob:index(self.unigrams, 1, self._noiseidx:view(-1)) 382 | 383 | sampleprob:resize(sampleidx:size()):copy(self._noiseprob) 384 | return sampleprob 385 | end 386 | 387 | function NCEModule:noiseSample(sampleidx, batchsize, k) 388 | if torch.type(sampleidx) ~= 'torch.LongTensor' then 389 | self._noiseidx = self._noiseidx or torch.LongTensor() 390 | self._noiseidx:resize(batchsize, k) 391 | self.aliasmultinomial:batchdraw(self._noiseidx) 392 | sampleidx:resize(batchsize, k):copy(self._noiseidx) 393 | else 394 | sampleidx:resize(batchsize, k) 395 | self.aliasmultinomial:batchdraw(sampleidx) 396 | end 397 | return sampleidx 398 | end 399 | 400 | function NCEModule:clearState() 401 | self.sampleidx = nil 402 | self.sampleprob = nil 403 | self._noiseidx = nil 404 | self._noiseprob = nil 405 | self._tgradOutput = nil 406 | self._gradOutput = nil 407 | if torch.isTensor(self.output) then 408 | self.output:set() 409 | else 410 | for i,output in ipairs(self.output) do 411 | output:set() 412 | end 413 | end 414 | for i,gradInput in ipairs(self.gradInput) do 415 | gradInput:set() 416 | end 417 | end 418 | 419 | function NCEModule:multicuda(device1, device2) 420 | assert(device1 and device2, "specify two devices as arguments") 421 | require 'torchx' 422 | assert(torchx.version and torchx.version >= 1, "update torchx: luarocks install torchx") 423 | 424 | self:float() 425 | 426 | local isize = self.weight:size(2) 427 | local weights = { 428 | cutorch.withDevice(device1, function() return self.weight[{{}, {1, torch.round(isize/2)}}]:cuda() end), 429 | cutorch.withDevice(device2, function() return self.weight[{{}, {torch.round(isize/2)+1, isize}}]:cuda() end) 430 | } 431 | self.weight = torch.MultiCudaTensor(2, weights) 432 | local gradWeights = { 433 | cutorch.withDevice(device1, function() return self.gradWeight[{{}, {1, torch.round(isize/2)}}]:cuda() end), 434 | cutorch.withDevice(device2, function() return self.gradWeight[{{}, {torch.round(isize/2)+1, isize}}]:cuda() end) 435 | } 436 | self.gradWeight = torch.MultiCudaTensor(2, gradWeights) 437 | 438 | self:cuda() 439 | end 440 | -------------------------------------------------------------------------------- /Module.lua: -------------------------------------------------------------------------------- 1 | local _ = require 'moses' 2 | 3 | local Module = nn.Module 4 | 5 | function Module:sparseParameters() 6 | return self:parameters() 7 | end 8 | 9 | function Module:updateParameters(learningRate) 10 | -- sparse params can have different learningRate scales per param 11 | local params, gradParams, scales = self:sparseParameters() 12 | if params then 13 | for i,param in pairs(params) do -- pairs for sparse params 14 | local scale = scales and scales[i] or 1 15 | param:add(-learningRate*scale, gradParams[i]) 16 | end 17 | end 18 | end 19 | 20 | function Module:zeroGradParameters() 21 | local _,gradParams = self:sparseParameters() 22 | if gradParams then 23 | for i,gradParam in pairs(gradParams) do -- pairs for sparse params 24 | gradParam:zero() 25 | end 26 | end 27 | end 28 | 29 | ------------------------ clone and type -------------------------------- 30 | 31 | Module.dpnn_parameters = {'weight', 'bias'} 32 | Module.dpnn_gradParameters = {'gradWeight', 'gradBias'} 33 | 34 | -- efficient version of : 35 | -- clone = self:clone() 36 | -- clone:share(self, paramNames, gradParamNames) 37 | -- Note that this method is the very bane of my existence. 38 | -- I have worked on it too many times... 39 | function Module:sharedClone(shareParams, shareGradParams, stepClone) 40 | shareParams = (shareParams == nil) and true or shareParams 41 | shareGradParams = (shareGradParams == nil) and true or shareGradParams 42 | 43 | if stepClone and self.dpnn_stepclone then 44 | -- this is for AbstractRecurrent modules (in rnn) 45 | return self 46 | end 47 | 48 | local pointers = {} -- to params/gradParams (dont clone params/gradParams) 49 | local scdone = {} 50 | 51 | -- 1. remove all params/gradParams 52 | local function recursiveRemove(obj) -- remove modules 53 | local moduleTree 54 | local isTable = type(obj) == 'table' 55 | if torch.isTypeOf(obj, 'nn.Module') then 56 | assert(isTable) 57 | if stepClone and obj.dpnn_stepclone then 58 | -- this is for AbstractRecurrent modules (in rnn) 59 | moduleTree = obj 60 | obj = nil 61 | isTable = false 62 | elseif obj.dpnn_sharedClone then 63 | -- allow to use a custom sharedClone method on one module 64 | moduleTree = obj 65 | obj = nil 66 | isTable = false 67 | elseif scdone[torch.pointer(obj)] then 68 | moduleTree = scdone[torch.pointer(obj)] 69 | else 70 | -- remove the params, gradParams. Save for later. 71 | local params = {} 72 | 73 | if shareParams then 74 | for i,paramName in ipairs(obj.dpnn_parameters) do 75 | local param = obj[paramName] 76 | if param then 77 | params[paramName] = param 78 | obj[paramName] = nil 79 | if torch.isTensor(param) and param.storage and param:storage() then 80 | pointers[torch.pointer(param:storage():data())] = true 81 | end 82 | end 83 | end 84 | end 85 | 86 | if shareGradParams then 87 | for i,paramName in ipairs(obj.dpnn_gradParameters) do 88 | local gradParam = obj[paramName] 89 | if gradParam then 90 | params[paramName] = gradParam 91 | obj[paramName] = nil 92 | if torch.isTensor(gradParam) and gradParam.storage and gradParam:storage() then 93 | pointers[torch.pointer(gradParam:storage():data())] = true 94 | end 95 | end 96 | end 97 | end 98 | 99 | -- find all obj.attribute tensors that share storage with the shared params 100 | for paramName, param in pairs(obj) do 101 | if torch.isTensor(param) and param:storage() then 102 | if pointers[torch.pointer(param:storage():data())] then 103 | params[paramName] = param 104 | obj[paramName] = nil 105 | end 106 | end 107 | end 108 | 109 | moduleTree = params 110 | 111 | scdone[torch.pointer(obj)] = moduleTree 112 | 113 | for k,v in pairs(obj) do 114 | moduleTree[k], obj[k] = recursiveRemove(v) 115 | end 116 | 117 | end 118 | elseif isTable then 119 | if scdone[torch.pointer(obj)] then 120 | moduleTree = scdone[torch.pointer(obj)] 121 | else 122 | assert(not moduleTree) 123 | moduleTree = {} 124 | for k,v in pairs(obj) do 125 | moduleTree[k], obj[k] = recursiveRemove(v) 126 | end 127 | scdone[torch.pointer(obj)] = moduleTree 128 | end 129 | 130 | end 131 | 132 | return moduleTree, obj 133 | end 134 | 135 | local moduleTree, original = recursiveRemove(self) 136 | assert(original) 137 | 138 | -- 2. clone everything but parameters, gradients and modules (removed above) 139 | 140 | local clone = self:clone() 141 | 142 | -- 3. add back to self/clone everything that was removed in step 1 143 | 144 | local function recursiveSet(clone, original, moduleTree) 145 | assert(clone) 146 | assert(original) 147 | if scdone[torch.pointer(original)] then 148 | for k,param in pairs(moduleTree) do 149 | if torch.isTypeOf(param,'nn.Module') then 150 | if param.dpnn_sharedClone then 151 | -- Call the custom sharedClone 152 | clone[k] = param:dpnn_sharedClone() 153 | else 154 | -- AbstractRecurrent instances branch here with stepClone = true 155 | clone[k] = param 156 | end 157 | original[k] = param 158 | elseif torch.isTensor(param) then 159 | if param.storage then 160 | clone[k] = param.new():set(param) 161 | original[k] = param 162 | else -- for torch.MultiCudaTensor 163 | clone[k] = param 164 | original[k] = param 165 | end 166 | elseif type(param) == 'table' then 167 | recursiveSet(clone[k], original[k], param) 168 | end 169 | end 170 | scdone[torch.pointer(original)] = nil 171 | end 172 | 173 | end 174 | 175 | recursiveSet(clone, self, moduleTree) 176 | 177 | return clone 178 | end 179 | 180 | -- we override this method such that hidden modules 181 | -- will be included in the getParameters call. 182 | -- Hidden modules are common for recurrent modules that 183 | -- have internal references to modules that share parameters 184 | -- with the main modules. 185 | -- These must also be included in the getParameters() call in order 186 | -- to maintain shared storage for tensors. 187 | function Module:getParameters() 188 | 189 | local con = nn.Container() 190 | con:add(self) 191 | 192 | -- recursive get all modules (modules, sharedclones, etc.) 193 | local function recursiveGetModules(tbl) 194 | for k,m in pairs(tbl) do 195 | if torch.isTypeOf(m, 'nn.Module') then 196 | if not m.dpnn_getParameters_found then 197 | con:add(m) 198 | m.dpnn_getParameters_found = true 199 | recursiveGetModules(m) 200 | end 201 | elseif torch.type(m) == 'table' then 202 | recursiveGetModules(m) 203 | end 204 | end 205 | end 206 | 207 | recursiveGetModules(self) 208 | 209 | for i,m in ipairs(con.modules) do 210 | m.dpnn_getParameters_found = nil 211 | end 212 | 213 | -- get ALL parameters 214 | local parameters,gradParameters = con:parameters() 215 | return Module.flatten(parameters), Module.flatten(gradParameters) 216 | end 217 | 218 | ----------------- serialization (see nn.Serial) ------------------- 219 | 220 | Module.dpnn_mediumEmpty = {'output', 'gradInput', 'momGradParams', 'dpnn_input'} 221 | Module.dpnn_lightEmpty = Module.dpnn_gradParameters 222 | -- defaults to heavy serialization 223 | Module.dpnn_serialEmpty = {} 224 | 225 | -- sets the serialization behavior of the entire module structure 226 | function Module:serialMode(empty) 227 | assert(torch.type(empty) == 'table', "Expecting table at arg 1") 228 | self.dpnn_serialEmpty = empty 229 | -- set the serial of all encapsulated modules 230 | local function recursiveSerial(tbl) 231 | for k,v in pairs(tbl) do 232 | if torch.isTypeOf(v, 'nn.Module') then 233 | v:serialMode(empty) 234 | elseif torch.type(v) == 'table' then 235 | recursiveSerial(v) 236 | end 237 | end 238 | end 239 | recursiveSerial(self) 240 | return self 241 | end 242 | 243 | -- serialMode : serialize everything 244 | function Module:heavySerial() 245 | return self:serialMode({}) 246 | end 247 | 248 | -- serialMode : serialize everything except dpnn_mediumEmpty attributes 249 | function Module:mediumSerial() 250 | 251 | self.dpnn_serialEmpty = self.dpnn_mediumEmpty 252 | 253 | -- set the serial of all encapsulated modules 254 | local function recursiveSerial(tbl) 255 | for k,v in pairs(tbl) do 256 | if torch.isTypeOf(v, 'nn.Module') then 257 | v:mediumSerial() 258 | elseif torch.type(v) == 'table' then 259 | recursiveSerial(v) 260 | end 261 | end 262 | end 263 | recursiveSerial(self) 264 | return self 265 | end 266 | 267 | -- serialMode : serialize everything except dpnn_mediumEmpty and dpnn_lightEmpty attributes 268 | function Module:lightSerial() 269 | 270 | self.dpnn_serialEmpty = _.clone(self.dpnn_mediumEmpty) 271 | for k,v in ipairs(self.dpnn_lightEmpty) do 272 | table.insert(self.dpnn_serialEmpty, v) 273 | end 274 | 275 | -- set the serial of all encapsulated modules 276 | local function recursiveSerial(tbl) 277 | for k,v in pairs(tbl) do 278 | if torch.isTypeOf(v, 'nn.Module') then 279 | v:lightSerial() 280 | elseif torch.type(v) == 'table' then 281 | recursiveSerial(v) 282 | end 283 | end 284 | end 285 | recursiveSerial(self) 286 | 287 | return self 288 | end 289 | 290 | function Module:getSerialState(states) 291 | states = states or {} 292 | 293 | -- dont get the serial state of the same module twice (reuse existing) 294 | if states[self] then 295 | return states[self] 296 | end 297 | 298 | -- returns the object structure as tables (i.e. without metatables) 299 | local function recursiveState(tbl) 300 | local state = _.map(tbl, 301 | function(k,v) 302 | if torch.isTypeOf(tbl, 'nn.Module') and _.contains(tbl.dpnn_serialEmpty, k) then 303 | -- "empties" module attributes found in empty 304 | if torch.type(v) == 'table' then 305 | -- empty table 306 | return {} 307 | elseif torch.isTensor(v) then 308 | -- empty tensor 309 | return v.new() 310 | else 311 | -- not table nor tensor? then serialize as is 312 | return v 313 | end 314 | elseif torch.isTypeOf(v, 'nn.Module') then 315 | -- recursive, yet can be overwritten 316 | return v:getSerialState(states) 317 | elseif torch.type(v) == 'table' then 318 | -- in case it is a table of modules 319 | if not states[v] then 320 | states[v] = recursiveState(v) 321 | end 322 | return states[v] 323 | else 324 | return v 325 | end 326 | end 327 | ) 328 | return state 329 | end 330 | 331 | local state = recursiveState(self) 332 | 333 | -- include typename so that module can be reconstructed from the state 334 | state.dpnn_typename = torch.type(self) 335 | states[self] = state 336 | 337 | return state 338 | end 339 | 340 | -- decorates self with nn.Serial 341 | function Module:Serial(tensortype) 342 | return nn.Serial(self, tensortype) 343 | end 344 | 345 | ----------------------- for training ----------------------------- 346 | 347 | -- useful to get the output size 348 | -- I chose this method name because it is less likely to be overriden. 349 | function Module:outside(insize) 350 | local input 351 | if torch.type(insize) == 'table' then 352 | input = torch.randn(table.unpack(insize)) 353 | else 354 | input = torch.randn(insize) 355 | end 356 | local output = self:updateOutput(input) 357 | return output:size() 358 | end 359 | 360 | -- for those interested in implementing the visitor design pattern 361 | function Module:accept(visitor) 362 | visitor:visit(self) 363 | end 364 | 365 | -- Can be used as a regularizer instead of weight decay 366 | -- Assumes that parameters are arranged (output dim x ... x input dim) 367 | function Module:maxParamNorm(maxOutNorm, maxInNorm) 368 | -- this allows each module to set its own max[Out,In]Norm 369 | maxOutNorm = self.maxOutNorm or maxOutNorm 370 | maxInNorm = self.maxInNorm or maxInNorm 371 | if not (maxOutNorm or maxInNorm) then 372 | return 373 | end 374 | 375 | if self.modules then 376 | for i,module in ipairs(self.modules) do 377 | module:maxParamNorm(maxOutNorm, maxInNorm) 378 | end 379 | else 380 | local params = self:parameters() 381 | if not params or gradParams then 382 | return 383 | end 384 | for k,param in pairs(params) do -- pairs for sparse params 385 | -- By default, only affects non-1D params. 386 | if param:dim() > 1 then 387 | if maxOutNorm and maxOutNorm > 0 then 388 | -- rows feed into output neurons 389 | param:renorm(2, 1, maxOutNorm) 390 | end 391 | if maxInNorm and maxInNorm > 0 then 392 | -- cols feed out from input neurons 393 | param:renorm(2, param:dim(), maxInNorm) 394 | end 395 | end 396 | end 397 | end 398 | end 399 | 400 | -- Similar to maxParamNorm, but norm is global to Module for which 401 | -- this is called. Unless moduleLocal is true, in which case, the 402 | -- norm constraint is applied to the norm of all parameters in each 403 | -- component (non-container) module. 404 | function Module:gradParamClip(cutoffNorm, moduleLocal) 405 | -- this allows each module to set its own cutoffNorm 406 | cutoffNorm = self.cutoffNorm or cutoffNorm 407 | if cutoffNorm <= 0 then 408 | return 409 | end 410 | if self.moduleLocal ~= nil then 411 | moduleLocal = self.moduleLocal 412 | end 413 | 414 | local norm = 0 415 | if moduleLocal and self.modules then 416 | for i,module in ipairs(self.modules) do 417 | norm = norm + math.pow(module:gradParamClip(cutoffNorm, moduleLocal), 2) 418 | end 419 | norm = math.sqrt(norm) 420 | else 421 | local params, gradParams = self:parameters() 422 | if not (params and gradParams) then 423 | return norm 424 | end 425 | for k,gradParam in pairs(gradParams) do -- pairs for sparse params 426 | if torch.type(gradParam) == 'torch.CudaTensor' then 427 | cutorch.withDevice(gradParam:getDevice(), function() -- support multi-device models 428 | norm = norm + math.pow(gradParam:norm(),2) 429 | end) 430 | else 431 | norm = norm + math.pow(gradParam:norm(),2) 432 | end 433 | end 434 | norm = math.sqrt(norm) 435 | if norm > cutoffNorm then 436 | -- rescale gradParams to obtain desired cutoffNorm 437 | for k,gradParam in pairs(gradParams) do 438 | if torch.type(gradParam) == 'torch.CudaTensor' then 439 | cutorch.withDevice(gradParam:getDevice(), function() -- support multi-device models 440 | gradParam:mul(cutoffNorm/norm) 441 | end) 442 | else 443 | gradParam:mul(cutoffNorm/norm) 444 | end 445 | end 446 | end 447 | end 448 | return norm 449 | end 450 | 451 | -- Adds weight decay constraint on params with dims > 2 (default). 452 | -- TODO : allow inplace weightDecay (before calling accUpdateGradParameters) 453 | function Module:weightDecay(wdFactor, wdMinDim) 454 | -- this allows each module to set its own hyper-parameters 455 | wdFactor = self.wdFactor or wdFactor 456 | if wdFactor <= 0 then 457 | return 458 | end 459 | wdMinDim = self.wdMinDim or wdMinDim or 2 460 | 461 | if self.modules then 462 | for i,module in ipairs(self.modules) do 463 | module:weightDecay(wdFactor, wdMinDim) 464 | end 465 | else 466 | local params, gradParams = self:parameters() 467 | if not (params and gradParams) then 468 | return 469 | end 470 | 471 | for i,param in pairs(params) do -- pairs for sparse params 472 | if param:dim() >= wdMinDim then 473 | gradParams[i]:add(wdFactor, param) 474 | end 475 | end 476 | end 477 | end 478 | 479 | function Module:momentumGradParameters() 480 | if (not self.momGradParams) or _.isEmpty(self.momGradParams) then 481 | local params, gradParams = self:parameters() 482 | if not gradParams or _.isEmpty(gradParams) then 483 | return 484 | end 485 | self.momGradParams = {} 486 | for i,gradParam in pairs(gradParams) do 487 | if torch.type(gradParam) == 'torch.CudaTensor' then 488 | cutorch.withDevice(gradParam:getDevice(), function() -- support multi-device models 489 | self.momGradParams[i] = gradParam.new():resizeAs(gradParam):copy(gradParam) 490 | end) 491 | else 492 | self.momGradParams[i] = gradParam.new():resizeAs(gradParam):copy(gradParam) 493 | end 494 | end 495 | end 496 | return self.momGradParams 497 | end 498 | 499 | -- uses momentum learning to update gradParams 500 | function Module:updateGradParameters(momFactor, momDamp, momNesterov) 501 | -- this allows each module to set its own hyper-parameters 502 | momFactor = self.momFactor or momFactor 503 | if momFactor <= 0 then 504 | return 505 | end 506 | momDamp = self.momDamp or momDamp or momFactor 507 | if self.momNesterov ~= nil then 508 | momNesterov = self.momNesterov 509 | end 510 | 511 | if self.modules then 512 | for i,module in ipairs(self.modules) do 513 | module:updateGradParameters(momFactor, momDamp, momNesterov) 514 | end 515 | else 516 | local params, gradParams = self:parameters() 517 | if (not params) or _.isEmpty(params) then 518 | return 519 | end 520 | local momGradParams = self:momentumGradParameters() 521 | for i,gradParam in pairs(gradParams) do 522 | momGradParams[i]:mul(momFactor) 523 | momGradParams[i]:add(1-momDamp, gradParam) 524 | end 525 | 526 | if momNesterov then 527 | for i,gradParam in pairs(gradParams) do 528 | gradParam:add(momFactor, momGradParams[i]) 529 | end 530 | else 531 | for i,gradParam in pairs(gradParams) do 532 | gradParam:copy(momGradParams[i]) 533 | end 534 | end 535 | end 536 | end 537 | 538 | function Module:checkParameters() 539 | local params = self:parameters() or {} 540 | for k,param in pairs(params) do 541 | if _.isNaN(param:sum()) then 542 | error("NaN Error for param at index" ..k) 543 | end 544 | end 545 | end 546 | 547 | function Module:dontBackward() 548 | self.backward = function() end 549 | self.updateGradInput = function() end 550 | self.accGradParameters = function() end 551 | self.accUpdateGradParameters = function() end 552 | return self 553 | end 554 | 555 | function Module:contiguousInput(input, backward) 556 | if backward then 557 | return self.dpnn_cinput or input 558 | end 559 | if not input:isContiguous() then 560 | self.dpnn_cinput = self.dpnn_cinput or input.new() 561 | self.dpnn_cinput:resizeAs(input):copy(input) 562 | input = self.dpnn_cinput 563 | end 564 | return input 565 | end 566 | 567 | function Module:toBatch(tensor, nDim, batchDim) 568 | local batchDim = batchDim or 1 569 | if tensor:dim() == nDim then 570 | self.dpnn_online = true 571 | local size = tensor:size():totable() 572 | table.insert(size, batchDim, 1) 573 | tensor = tensor:view(table.unpack(size)) 574 | else 575 | self.dpnn_online = false 576 | end 577 | return tensor 578 | end 579 | 580 | function Module:fromBatch(tensor, batchDim) 581 | if self.dpnn_online then 582 | local size = tensor:size():totable() 583 | assert(table.remove(size, batchDim) == 1) 584 | tensor = tensor:view(table.unpack(size)) 585 | end 586 | return tensor 587 | end 588 | 589 | function Module:extrapolateType() 590 | local params = module:parameters() 591 | if params then 592 | -- extrapolate the tensor type of the module 593 | local types = {} 594 | for i, param in ipairs(params) do 595 | local tensorType = torch.type(param) 596 | types[tensorType] = (types[tensorType] or 0) + 1 597 | end 598 | local maxCount = 0 599 | local maxType 600 | for tensorType, count in pairs(types) do 601 | if count > maxCount then 602 | maxtype = tensorType 603 | maxCount = count 604 | end 605 | end 606 | return maxType 607 | end 608 | return nil --unknown otherwise 609 | end 610 | 611 | function Module:profile() 612 | if self.modules then 613 | for i, module in ipairs(self.modules) do 614 | module:profile() 615 | end 616 | end 617 | self.dpnn_profile = true 618 | end 619 | 620 | function Module:reinforce(reward) 621 | if self.modules then 622 | for i, module in ipairs(self.modules) do 623 | module:reinforce(reward) 624 | end 625 | end 626 | end 627 | --------------------------------------------------------------------------------