├── .gitignore ├── AbstractRecurrent.lua ├── AbstractSequencer.lua ├── BiSequencer.lua ├── BiSequencerLM.lua ├── CMakeLists.txt ├── CopyGrad.lua ├── Dropout.lua ├── ExpandAs.lua ├── FastLSTM.lua ├── GRU.lua ├── LICENSE.2nd.txt ├── LICENSE.txt ├── LSTM.lua ├── LinearNoBias.lua ├── LookupTableMaskZero.lua ├── MaskZero.lua ├── MaskZeroCriterion.lua ├── Module.lua ├── Mufuru.lua ├── NormStabilizer.lua ├── Padding.lua ├── README.md ├── Recurrence.lua ├── Recurrent.lua ├── RecurrentAttention.lua ├── Recursor.lua ├── Repeater.lua ├── RepeaterCriterion.lua ├── SAdd.lua ├── SeqBRNN.lua ├── SeqGRU.lua ├── SeqLSTM.lua ├── SeqLSTMP.lua ├── SeqReverseSequence.lua ├── Sequencer.lua ├── SequencerCriterion.lua ├── TrimZero.lua ├── ZeroGrad.lua ├── doc ├── article │ ├── ff-lua.tex │ ├── ff.lua │ ├── ff2-lua.tex │ ├── ff2.lua │ ├── lm-lua.tex │ ├── lm.lua │ ├── lstm-lua.tex │ ├── lstm.lua │ ├── mlp-lua.tex │ ├── mlp.lua │ ├── nips15submit_e.sty │ ├── nll-lua.tex │ ├── nll.lua │ ├── ram-lua.tex │ ├── ram.lua │ ├── rec-lua.tex │ ├── rec.lua │ ├── rec2-lua.tex │ ├── rec2.lua │ ├── rec3-lua.tex │ ├── rec3.lua │ ├── rec4-lua.tex │ ├── rec4.lua │ ├── rec5-lua.tex │ ├── rec5.lua │ ├── recurrence-lua.tex │ ├── recurrence.lua │ ├── repeater-lua.tex │ ├── repeater.lua │ ├── rnn-example-lua.tex │ ├── rnn-example.lua │ ├── rnn2-lua.tex │ ├── rnn2.lua │ ├── rnn_library.bbl │ ├── rnn_library.bib │ ├── rnn_library.blg │ ├── rnn_library.log │ ├── rnn_library.out │ ├── rnn_library.pdf │ ├── rnn_library.synctex.gz │ ├── rnn_library.tex │ ├── rnnlm-lua.tex │ ├── rnnlm.lua │ ├── sequencer-lua.tex │ ├── sequencer.lua │ ├── srnn-lua.tex │ ├── srnn.lua │ ├── trainEpoch-lua.tex │ └── trainEpoch.lua └── image │ ├── LSTM.png │ ├── bgru-benchmark.png │ ├── bidirectionallm.png │ ├── gru-benchmark.png │ ├── hellofuzzy.png │ └── sequence.png ├── examples ├── README.md ├── encoder-decoder-coupling.lua ├── multigpu-nce-rnnlm.lua ├── nested-recurrence-lstm.lua ├── noise-contrastive-estimate.lua ├── recurrent-language-model.lua ├── recurrent-time-series.lua ├── recurrent-visual-attention.lua ├── sequence-to-one.lua ├── simple-bisequencer-network-variable.lua ├── simple-bisequencer-network.lua ├── simple-recurrence-network.lua ├── simple-recurrent-network.lua ├── simple-sequencer-network.lua └── twitter_sentiment_rnn.lua ├── init.lua ├── recursiveUtils.lua ├── rocks └── rnn-scm-1.rockspec ├── scripts ├── evaluate-rnnlm.lua └── evaluate-rva.lua └── test ├── CMakeLists.txt ├── GRU_test.lua ├── bigtest.lua ├── mnistsample.t7 ├── test.lua └── test_trimzero.lua /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | -------------------------------------------------------------------------------- /AbstractSequencer.lua: -------------------------------------------------------------------------------- 1 | local AbstractSequencer, parent = torch.class("nn.AbstractSequencer", "nn.Container") 2 | 3 | function AbstractSequencer:getStepModule(step) 4 | error"DEPRECATED 27 Oct 2015. Wrap your internal modules into a Recursor instead" 5 | end 6 | 7 | function AbstractSequencer:sharedClone(shareParams, shareGradParams, clones, pointers, stepClone) 8 | -- stepClone is ignored (always false, i.e. uses sharedClone) 9 | return parent.sharedClone(self, shareParams, shareGradParams, clones, pointers) 10 | end 11 | 12 | -- AbstractSequence handles its own rho internally (dynamically) 13 | function AbstractSequencer:maxBPTTstep(rho) 14 | end 15 | 16 | -- Toggle to feed long sequences using multiple forwards. 17 | -- 'eval' only affects evaluation (recommended for RNNs) 18 | -- 'train' only affects training 19 | -- 'neither' affects neither training nor evaluation 20 | -- 'both' affects both training and evaluation (recommended for LSTMs) 21 | -- Essentially, forget() isn't called on rnn module when remember is on 22 | function AbstractSequencer:remember(remember) 23 | self._remember = (remember == nil) and 'both' or remember 24 | local _ = require 'moses' 25 | assert(_.contains({'both','eval','train','neither'}, self._remember), 26 | "AbstractSequencer : unrecognized value for remember : "..self._remember) 27 | return self 28 | end 29 | 30 | function AbstractSequencer:hasMemory() 31 | local _ = require 'moses' 32 | if (self.train ~= false) and _.contains({'both','train'}, self._remember) then -- train (defaults to nil...) 33 | return true 34 | elseif (self.train == false) and _.contains({'both','eval'}, self._remember) then -- evaluate 35 | return true 36 | else 37 | return false 38 | end 39 | end 40 | 41 | -------------------------------------------------------------------------------- /BiSequencer.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ BiSequencer ]]-- 3 | -- Encapsulates forward, backward and merge modules. 4 | -- Input is a sequence (a table) of tensors. 5 | -- Output is a sequence (a table) of tensors of the same length. 6 | -- Applies a forward rnn to each element in the sequence in 7 | -- forward order and applies a backward rnn in reverse order. 8 | -- For each step, the outputs of both rnn are merged together using 9 | -- the merge module (defaults to nn.JoinTable(1,1)). 10 | -- The sequences in a batch must have the same size. 11 | -- But the sequence length of each batch can vary. 12 | -- It is implemented by decorating a structure of modules that makes 13 | -- use of 3 Sequencers for the forward, backward and merge modules. 14 | ------------------------------------------------------------------------ 15 | local BiSequencer, parent = torch.class('nn.BiSequencer', 'nn.AbstractSequencer') 16 | 17 | function BiSequencer:__init(forward, backward, merge) 18 | 19 | if not torch.isTypeOf(forward, 'nn.Module') then 20 | error"BiSequencer: expecting nn.Module instance at arg 1" 21 | end 22 | self.forwardModule = forward 23 | 24 | self.backwardModule = backward 25 | if not self.backwardModule then 26 | self.backwardModule = forward:clone() 27 | self.backwardModule:reset() 28 | end 29 | if not torch.isTypeOf(self.backwardModule, 'nn.Module') then 30 | error"BiSequencer: expecting nn.Module instance at arg 2" 31 | end 32 | 33 | if torch.type(merge) == 'number' then 34 | self.mergeModule = nn.JoinTable(1, merge) 35 | elseif merge == nil then 36 | self.mergeModule = nn.JoinTable(1, 1) 37 | elseif torch.isTypeOf(merge, 'nn.Module') then 38 | self.mergeModule = merge 39 | else 40 | error"BiSequencer: expecting nn.Module or number instance at arg 3" 41 | end 42 | 43 | self.fwdSeq = nn.Sequencer(self.forwardModule) 44 | self.bwdSeq = nn.Sequencer(self.backwardModule) 45 | self.mergeSeq = nn.Sequencer(self.mergeModule) 46 | 47 | local backward = nn.Sequential() 48 | backward:add(nn.ReverseTable()) -- reverse 49 | backward:add(self.bwdSeq) 50 | backward:add(nn.ReverseTable()) -- unreverse 51 | 52 | local concat = nn.ConcatTable() 53 | concat:add(self.fwdSeq):add(backward) 54 | 55 | local brnn = nn.Sequential() 56 | brnn:add(concat) 57 | brnn:add(nn.ZipTable()) 58 | brnn:add(self.mergeSeq) 59 | 60 | parent.__init(self) 61 | 62 | self.output = {} 63 | self.gradInput = {} 64 | 65 | self.module = brnn 66 | -- so that it can be handled like a Container 67 | self.modules[1] = brnn 68 | end 69 | 70 | -- multiple-inheritance 71 | nn.Decorator.decorate(BiSequencer) 72 | -------------------------------------------------------------------------------- /BiSequencerLM.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ BiSequencerLM ]]-- 3 | -- Encapsulates forward, backward and merge modules. 4 | -- Input is a sequence (a table) of tensors. 5 | -- Output is a sequence (a table) of tensors of the same length. 6 | -- Applies a `fwd` rnn instance to the first `N-1` elements in the 7 | -- sequence in forward order. 8 | -- Applies the `bwd` rnn in reverse order to the last `N-1` elements 9 | -- (from second-to-last element to first element). 10 | -- Note : you shouldn't stack these for language modeling. 11 | -- Instead, stack each fwd/bwd seqs and encapsulate these. 12 | ------------------------------------------------------------------------ 13 | local _ = require 'moses' 14 | local BiSequencerLM, parent = torch.class('nn.BiSequencerLM', 'nn.AbstractSequencer') 15 | 16 | function BiSequencerLM:__init(forward, backward, merge) 17 | 18 | if not torch.isTypeOf(forward, 'nn.Module') then 19 | error"BiSequencerLM: expecting nn.Module instance at arg 1" 20 | end 21 | self.forwardModule = forward 22 | 23 | self.backwardModule = backward 24 | if not self.backwardModule then 25 | self.backwardModule = forward:clone() 26 | self.backwardModule:reset() 27 | end 28 | if not torch.isTypeOf(self.backwardModule, 'nn.Module') then 29 | error"BiSequencerLM: expecting nn.Module instance at arg 2" 30 | end 31 | 32 | if torch.type(merge) == 'number' then 33 | self.mergeModule = nn.JoinTable(1, merge) 34 | elseif merge == nil then 35 | self.mergeModule = nn.JoinTable(1, 1) 36 | elseif torch.isTypeOf(merge, 'nn.Module') then 37 | self.mergeModule = merge 38 | else 39 | error"BiSequencerLM: expecting nn.Module or number instance at arg 3" 40 | end 41 | 42 | if torch.isTypeOf(self.forwardModule, 'nn.AbstractRecurrent') then 43 | self.fwdSeq = nn.Sequencer(self.forwardModule) 44 | else -- assumes a nn.Sequencer or stack thereof 45 | self.fwdSeq = self.forwardModule 46 | end 47 | 48 | if torch.isTypeOf(self.backwardModule, 'nn.AbstractRecurrent') then 49 | self.bwdSeq = nn.Sequencer(self.backwardModule) 50 | else 51 | self.bwdSeq = self.backwardModule 52 | end 53 | self.mergeSeq = nn.Sequencer(self.mergeModule) 54 | 55 | self._fwd = self.fwdSeq 56 | 57 | self._bwd = nn.Sequential() 58 | self._bwd:add(nn.ReverseTable()) 59 | self._bwd:add(self.bwdSeq) 60 | self._bwd:add(nn.ReverseTable()) 61 | 62 | self._merge = nn.Sequential() 63 | self._merge:add(nn.ZipTable()) 64 | self._merge:add(self.mergeSeq) 65 | 66 | 67 | parent.__init(self) 68 | 69 | self.modules = {self._fwd, self._bwd, self._merge} 70 | 71 | self.output = {} 72 | self.gradInput = {} 73 | end 74 | 75 | function BiSequencerLM:updateOutput(input) 76 | assert(torch.type(input) == 'table', 'Expecting table at arg 1') 77 | local nStep = #input 78 | assert(nStep > 1, "Expecting at least 2 elements in table") 79 | 80 | -- forward through fwd and bwd rnn in fwd and reverse order 81 | self._fwdOutput = self._fwd:updateOutput(_.first(input, nStep - 1)) 82 | self._bwdOutput = self._bwd:updateOutput(_.last(input, nStep - 1)) 83 | 84 | -- empty outputs 85 | for k,v in ipairs(self.output) do self.output[k] = nil end 86 | 87 | -- padding for first and last elements of fwd and bwd outputs, respectively 88 | self._firstStep = nn.rnn.recursiveResizeAs(self._firstStep, self._fwdOutput[1]) 89 | nn.rnn.recursiveFill(self._firstStep, 0) 90 | self._lastStep = nn.rnn.recursiveResizeAs(self._lastStep, self._bwdOutput[1]) 91 | nn.rnn.recursiveFill(self._lastStep, 0) 92 | 93 | -- { { zeros, fwd1, fwd2, ..., fwdN}, {bwd1, bwd2, ..., bwdN, zeros} } 94 | self._mergeInput = {_.clone(self._fwdOutput), _.clone(self._bwdOutput)} 95 | table.insert(self._mergeInput[1], 1, self._firstStep) 96 | table.insert(self._mergeInput[2], self._lastStep) 97 | assert(#self._mergeInput[1] == #self._mergeInput[2]) 98 | 99 | self.output = self._merge:updateOutput(self._mergeInput) 100 | 101 | return self.output 102 | end 103 | 104 | function BiSequencerLM:updateGradInput(input, gradOutput) 105 | local nStep = #input 106 | 107 | self._mergeGradInput = self._merge:updateGradInput(self._mergeInput, gradOutput) 108 | self._fwdGradInput = self._fwd:updateGradInput(_.first(input, nStep - 1), _.last(self._mergeGradInput[1], nStep - 1)) 109 | self._bwdGradInput = self._bwd:updateGradInput(_.last(input, nStep - 1), _.first(self._mergeGradInput[2], nStep - 1)) 110 | 111 | -- add fwd rnn gradInputs to bwd rnn gradInputs 112 | for i=1,nStep do 113 | if i == 1 then 114 | self.gradInput[1] = self._fwdGradInput[1] 115 | elseif i == nStep then 116 | self.gradInput[nStep] = self._bwdGradInput[nStep-1] 117 | else 118 | self.gradInput[i] = nn.rnn.recursiveCopy(self.gradInput[i], self._fwdGradInput[i]) 119 | nn.rnn.recursiveAdd(self.gradInput[i], self._bwdGradInput[i-1]) 120 | end 121 | end 122 | 123 | return self.gradInput 124 | end 125 | 126 | function BiSequencerLM:accGradParameters(input, gradOutput, scale) 127 | local nStep = #input 128 | 129 | self._merge:accGradParameters(self._mergeInput, gradOutput, scale) 130 | self._fwd:accGradParameters(_.first(input, nStep - 1), _.last(self._mergeGradInput[1], nStep - 1), scale) 131 | self._bwd:accGradParameters(_.last(input, nStep - 1), _.first(self._mergeGradInput[2], nStep - 1), scale) 132 | end 133 | 134 | function BiSequencerLM:accUpdateGradParameters(input, gradOutput, lr) 135 | local nStep = #input 136 | 137 | self._merge:accUpdateGradParameters(self._mergeInput, gradOutput, lr) 138 | self._fwd:accUpdateGradParameters(_.first(input, nStep - 1), _.last(self._mergeGradInput[1], nStep - 1), lr) 139 | self._bwd:accUpdateGradParameters(_.last(input, nStep - 1), _.first(self._mergeGradInput[2], nStep - 1), lr) 140 | end 141 | 142 | function BiSequencerLM:__tostring__() 143 | local tab = ' ' 144 | local line = '\n' 145 | local ext = ' | ' 146 | local extlast = ' ' 147 | local last = ' ... -> ' 148 | local str = torch.type(self) 149 | str = str .. ' {' 150 | str = str .. line .. tab .. '( fwd ): ' .. tostring(self._fwd):gsub(line, line .. tab .. ext) 151 | str = str .. line .. tab .. '( bwd ): ' .. tostring(self._bwd):gsub(line, line .. tab .. ext) 152 | str = str .. line .. tab .. '( merge ): ' .. tostring(self._merge):gsub(line, line .. tab .. ext) 153 | str = str .. line .. '}' 154 | return str 155 | end 156 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | CMAKE_MINIMUM_REQUIRED(VERSION 2.6 FATAL_ERROR) 3 | CMAKE_POLICY(VERSION 2.6) 4 | IF(LUAROCKS_PREFIX) 5 | MESSAGE(STATUS "Installing Torch through Luarocks") 6 | STRING(REGEX REPLACE "(.*)lib/luarocks/rocks.*" "\\1" CMAKE_INSTALL_PREFIX "${LUAROCKS_PREFIX}") 7 | MESSAGE(STATUS "Prefix inferred from Luarocks: ${CMAKE_INSTALL_PREFIX}") 8 | ENDIF() 9 | FIND_PACKAGE(Torch REQUIRED) 10 | 11 | SET(src) 12 | FILE(GLOB luasrc *.lua) 13 | SET(luasrc ${luasrc}) 14 | ADD_SUBDIRECTORY(test) 15 | ADD_TORCH_PACKAGE(rnn "${src}" "${luasrc}" "Recurrent Neural Networks") 16 | -------------------------------------------------------------------------------- /CopyGrad.lua: -------------------------------------------------------------------------------- 1 | local CopyGrad, _ = torch.class('nn.CopyGrad', 'nn.Identity') 2 | 3 | function CopyGrad:updateGradInput(input, gradOutput) 4 | self.gradInput:resizeAs(gradOutput):copy(gradOutput) 5 | return self.gradInput 6 | end 7 | -------------------------------------------------------------------------------- /Dropout.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ Dropout ]]-- 3 | 4 | -- Implementation of Lazy Dropout. 5 | -- `lazy` option is used to to only resample after backward is called. 6 | -- This mechanism is used by Bayesian GRUs to use the same dropout mask 7 | -- for each sequence, not for each word. 8 | -- See GRU part in README.md (Ref. E & F) 9 | ------------------------------------------------------------------------ 10 | local Dropout, Parent = nn.Dropout, nn.Module 11 | 12 | function Dropout:__init(p,v1,inplace,lazy,mono) 13 | Parent.__init(self) 14 | self.p = p or 0.5 15 | self.train = true 16 | self.inplace = inplace 17 | self.lazy = lazy or false 18 | self.mono = mono or false -- used by trimZero, single sample for a batch 19 | self.flag = true -- used by lazy noise 20 | -- version 2 scales output during training instead of evaluation 21 | self.v2 = not v1 22 | if self.p >= 1 or self.p < 0 then 23 | error(' illegal percentage, must be 0 <= p < 1') 24 | end 25 | self.noise = torch.Tensor() 26 | end 27 | 28 | function Dropout:updateOutput(input) 29 | if self.inplace then 30 | self.output = input 31 | else 32 | self.output:resizeAs(input):copy(input) 33 | end 34 | if self.p > 0 then 35 | if self.train then 36 | if not self.lazy or self.flag then 37 | local noiseSize = input:size() 38 | if self.mono then noiseSize[1] = 1 end 39 | self.noise:resize(noiseSize) 40 | self.noise:bernoulli(1-self.p) 41 | if self.v2 then 42 | self.noise:div(1-self.p) 43 | end 44 | self.flag = false 45 | end 46 | if self.mono and self.noise:size(1) ~= input:size(1) then 47 | self.noise = self.noise:narrow(1,1,1):expandAs(input) 48 | end 49 | self.output:cmul(self.noise) 50 | elseif not self.v2 then 51 | self.output:mul(1-self.p) 52 | end 53 | end 54 | return self.output 55 | end 56 | 57 | function Dropout:updateGradInput(input, gradOutput) 58 | if self.lazy then 59 | self.flag = true 60 | end 61 | if self.train then 62 | if self.inplace then 63 | self.gradInput = gradOutput 64 | else 65 | self.gradInput:resizeAs(gradOutput):copy(gradOutput) 66 | end 67 | if self.p > 0 then 68 | self.gradInput:cmul(self.noise) -- simply mask the gradients with the noise vector 69 | end 70 | else 71 | if self.inplace then 72 | self.gradInput = gradOutput 73 | else 74 | self.gradInput:resizeAs(gradOutput):copy(gradOutput) 75 | end 76 | if not self.v2 and self.p > 0 then 77 | self.gradInput:cdiv(1-self.p) 78 | end 79 | end 80 | return self.gradInput 81 | end 82 | 83 | function Dropout:__tostring__() 84 | return string.format('%s(%.1f, %s)', torch.type(self), self.p, self.lazy and 'lazy' or 'busy') 85 | end 86 | 87 | function Dropout:clearState() 88 | if self.noise then 89 | self.noise:set() 90 | end 91 | self.flag = true 92 | return Parent.clearState(self) 93 | end 94 | -------------------------------------------------------------------------------- /ExpandAs.lua: -------------------------------------------------------------------------------- 1 | local ExpandAs, parent = torch.class('nn.ExpandAs', 'nn.Module') 2 | -- expands the second input to match the first 3 | 4 | function ExpandAs:__init() 5 | parent.__init(self) 6 | self.output = {} 7 | self.gradInput = {} 8 | 9 | self.sum1 = torch.Tensor() 10 | self.sum2 = torch.Tensor() 11 | end 12 | 13 | function ExpandAs:updateOutput(input) 14 | self.output[1] = input[1] 15 | self.output[2] = input[2]:expandAs(input[1]) 16 | return self.output 17 | end 18 | 19 | function ExpandAs:updateGradInput(input, gradOutput) 20 | local b, db = input[2], gradOutput[2] 21 | local s1, s2 = self.sum1, self.sum2 22 | local sumSrc, sumDst = db, s1 23 | 24 | for i=1,b:dim() do 25 | if b:size(i) ~= db:size(i) then 26 | sumDst:sum(sumSrc, i) 27 | sumSrc = sumSrc == s1 and s2 or s1 28 | sumDst = sumDst == s1 and s2 or s1 29 | end 30 | end 31 | 32 | self.gradInput[1] = gradOutput[1] 33 | self.gradInput[2] = sumSrc 34 | 35 | return self.gradInput 36 | end 37 | -------------------------------------------------------------------------------- /FastLSTM.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ LSTM ]]-- 3 | -- Long Short Term Memory architecture. 4 | -- Ref. A.: http://arxiv.org/pdf/1303.5778v1 (blueprint for this module) 5 | -- B. http://web.eecs.utk.edu/~itamar/courses/ECE-692/Bobby_paper1.pdf 6 | -- C. http://arxiv.org/pdf/1503.04069v1.pdf 7 | -- D. https://github.com/wojzaremba/lstm 8 | -- Expects 1D or 2D input. 9 | -- The first input in sequence uses zero value for cell and hidden state 10 | 11 | -- For p > 0, it becomes Bayesian GRUs [Gal, 2015]. 12 | -- In this case, please do not dropout on input as BGRUs handle the input with 13 | -- its own dropouts. First, try 0.25 for p as Gal (2016) suggested, 14 | -- presumably, because of summations of two parts in GRUs connections. 15 | ------------------------------------------------------------------------ 16 | local FastLSTM, parent = torch.class("nn.FastLSTM", "nn.LSTM") 17 | 18 | -- set this to true to have it use nngraph instead of nn 19 | -- setting this to true can make your next FastLSTM significantly faster 20 | FastLSTM.usenngraph = false 21 | FastLSTM.bn = false 22 | 23 | function FastLSTM:__init(inputSize, outputSize, rho, eps, momentum, affine, p, mono) 24 | -- when FastLSTM.bn=true, the default values of eps and momentum are set by nn.BatchNormalization 25 | self.eps = eps 26 | self.momentum = momentum 27 | self.affine = affine == nil and true or affine 28 | self.p = p or 0 29 | if p and p ~= 0 then 30 | assert(nn.Dropout(p,false,false,true).lazy, 'only work with Lazy Dropout!') 31 | end 32 | self.mono = mono or false 33 | 34 | parent.__init(self, inputSize, outputSize, rho, nil, p, mono) 35 | end 36 | 37 | function FastLSTM:buildModel() 38 | -- input : {input, prevOutput, prevCell} 39 | -- output : {output, cell} 40 | 41 | -- Calculate all four gates in one go : input, hidden, forget, output 42 | if self.p ~= 0 then 43 | self.i2g = nn.Sequential() 44 | :add(nn.ConcatTable() 45 | :add(nn.Dropout(self.p,false,false,true,self.mono)) 46 | :add(nn.Dropout(self.p,false,false,true,self.mono)) 47 | :add(nn.Dropout(self.p,false,false,true,self.mono)) 48 | :add(nn.Dropout(self.p,false,false,true,self.mono))) 49 | :add(nn.ParallelTable() 50 | :add(nn.Linear(self.inputSize, self.outputSize)) 51 | :add(nn.Linear(self.inputSize, self.outputSize)) 52 | :add(nn.Linear(self.inputSize, self.outputSize)) 53 | :add(nn.Linear(self.inputSize, self.outputSize))) 54 | :add(nn.JoinTable(2)) 55 | self.o2g = nn.Sequential() 56 | :add(nn.ConcatTable() 57 | :add(nn.Dropout(self.p,false,false,true,self.mono)) 58 | :add(nn.Dropout(self.p,false,false,true,self.mono)) 59 | :add(nn.Dropout(self.p,false,false,true,self.mono)) 60 | :add(nn.Dropout(self.p,false,false,true,self.mono))) 61 | :add(nn.ParallelTable() 62 | :add(nn.LinearNoBias(self.outputSize, self.outputSize)) 63 | :add(nn.LinearNoBias(self.outputSize, self.outputSize)) 64 | :add(nn.LinearNoBias(self.outputSize, self.outputSize)) 65 | :add(nn.LinearNoBias(self.outputSize, self.outputSize))) 66 | :add(nn.JoinTable(2)) 67 | else 68 | self.i2g = nn.Linear(self.inputSize, 4*self.outputSize) 69 | self.o2g = nn.LinearNoBias(self.outputSize, 4*self.outputSize) 70 | end 71 | 72 | if self.usenngraph or self.bn then 73 | require 'nngraph' 74 | return self:nngraphModel() 75 | end 76 | 77 | local para = nn.ParallelTable():add(self.i2g):add(self.o2g) 78 | local gates = nn.Sequential() 79 | gates:add(nn.NarrowTable(1,2)) 80 | gates:add(para) 81 | gates:add(nn.CAddTable()) 82 | 83 | -- Reshape to (batch_size, n_gates, hid_size) 84 | -- Then slize the n_gates dimension, i.e dimension 2 85 | gates:add(nn.Reshape(4,self.outputSize)) 86 | gates:add(nn.SplitTable(1,2)) 87 | local transfer = nn.ParallelTable() 88 | transfer:add(nn.Sigmoid()):add(nn.Tanh()):add(nn.Sigmoid()):add(nn.Sigmoid()) 89 | gates:add(transfer) 90 | 91 | local concat = nn.ConcatTable() 92 | concat:add(gates):add(nn.SelectTable(3)) 93 | local seq = nn.Sequential() 94 | seq:add(concat) 95 | seq:add(nn.FlattenTable()) -- input, hidden, forget, output, cell 96 | 97 | -- input gate * hidden state 98 | local hidden = nn.Sequential() 99 | hidden:add(nn.NarrowTable(1,2)) 100 | hidden:add(nn.CMulTable()) 101 | 102 | -- forget gate * cell 103 | local cell = nn.Sequential() 104 | local concat = nn.ConcatTable() 105 | concat:add(nn.SelectTable(3)):add(nn.SelectTable(5)) 106 | cell:add(concat) 107 | cell:add(nn.CMulTable()) 108 | 109 | local nextCell = nn.Sequential() 110 | local concat = nn.ConcatTable() 111 | concat:add(hidden):add(cell) 112 | nextCell:add(concat) 113 | nextCell:add(nn.CAddTable()) 114 | 115 | local concat = nn.ConcatTable() 116 | concat:add(nextCell):add(nn.SelectTable(4)) 117 | seq:add(concat) 118 | seq:add(nn.FlattenTable()) -- nextCell, outputGate 119 | 120 | local cellAct = nn.Sequential() 121 | cellAct:add(nn.SelectTable(1)) 122 | cellAct:add(nn.Tanh()) 123 | local concat = nn.ConcatTable() 124 | concat:add(cellAct):add(nn.SelectTable(2)) 125 | local output = nn.Sequential() 126 | output:add(concat) 127 | output:add(nn.CMulTable()) 128 | 129 | local concat = nn.ConcatTable() 130 | concat:add(output):add(nn.SelectTable(1)) 131 | seq:add(concat) 132 | 133 | return seq 134 | end 135 | 136 | function FastLSTM:nngraphModel() 137 | assert(nngraph, "Missing nngraph package") 138 | 139 | local inputs = {} 140 | table.insert(inputs, nn.Identity()()) -- x 141 | table.insert(inputs, nn.Identity()()) -- prev_h[L] 142 | table.insert(inputs, nn.Identity()()) -- prev_c[L] 143 | 144 | local x, prev_h, prev_c = unpack(inputs) 145 | 146 | local bn_wx, bn_wh, bn_c 147 | local i2h, h2h 148 | if self.bn then 149 | -- apply recurrent batch normalization 150 | -- http://arxiv.org/pdf/1502.03167v3.pdf 151 | -- normalize recurrent terms W_h*h_{t-1} and W_x*x_t separately 152 | -- Olalekan Ogunmolu 153 | 154 | bn_wx = nn.BatchNormalization(4*self.outputSize, self.eps, self.momentum, self.affine) 155 | bn_wh = nn.BatchNormalization(4*self.outputSize, self.eps, self.momentum, self.affine) 156 | bn_c = nn.BatchNormalization(self.outputSize, self.eps, self.momentum, self.affine) 157 | 158 | -- initialize gamma (the weight) to the recommended value 159 | -- (https://github.com/torch/nn/blob/master/lib/THNN/generic/BatchNormalization.c#L61) 160 | bn_wx.weight:fill(0.1) 161 | bn_wh.weight:fill(0.1) 162 | bn_c.weight:fill(0.1) 163 | 164 | -- evaluate the input sums at once for efficiency 165 | i2h = bn_wx(self.i2g(x):annotate{name='i2h'}):annotate {name='bn_wx'} 166 | h2h = bn_wh(self.o2g(prev_h):annotate{name='h2h'}):annotate {name = 'bn_wh'} 167 | 168 | -- add bias after BN as per paper 169 | h2h = nn.Add(4*self.outputSize)(h2h) 170 | else 171 | -- evaluate the input sums at once for efficiency 172 | i2h = self.i2g(x):annotate{name='i2h'} 173 | h2h = self.o2g(prev_h):annotate{name='h2h'} 174 | end 175 | local all_input_sums = nn.CAddTable()({i2h, h2h}) 176 | 177 | local reshaped = nn.Reshape(4, self.outputSize)(all_input_sums) 178 | -- input, hidden, forget, output 179 | local n1, n2, n3, n4 = nn.SplitTable(2)(reshaped):split(4) 180 | local in_gate = nn.Sigmoid()(n1) 181 | local in_transform = nn.Tanh()(n2) 182 | local forget_gate = nn.Sigmoid()(n3) 183 | local out_gate = nn.Sigmoid()(n4) 184 | 185 | -- perform the LSTM update 186 | local next_c = nn.CAddTable()({ 187 | nn.CMulTable()({forget_gate, prev_c}), 188 | nn.CMulTable()({in_gate, in_transform}) 189 | }) 190 | local next_h 191 | if self.bn then 192 | -- gated cells form the output 193 | next_h = nn.CMulTable()({out_gate, nn.Tanh()(bn_c(next_c):annotate {name = 'bn_c'}) }) 194 | else 195 | -- gated cells form the output 196 | next_h = nn.CMulTable()({out_gate, nn.Tanh()(next_c)}) 197 | end 198 | 199 | local outputs = {next_h, next_c} 200 | 201 | nngraph.annotateNodes() 202 | 203 | return nn.gModule(inputs, outputs) 204 | end 205 | 206 | function FastLSTM:buildGate() 207 | error"Not Implemented" 208 | end 209 | 210 | function FastLSTM:buildInputGate() 211 | error"Not Implemented" 212 | end 213 | 214 | function FastLSTM:buildForgetGate() 215 | error"Not Implemented" 216 | end 217 | 218 | function FastLSTM:buildHidden() 219 | error"Not Implemented" 220 | end 221 | 222 | function FastLSTM:buildCell() 223 | error"Not Implemented" 224 | end 225 | 226 | function FastLSTM:buildOutputGate() 227 | error"Not Implemented" 228 | end 229 | -------------------------------------------------------------------------------- /GRU.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ GRU ]]-- 3 | -- Author: Jin-Hwa Kim 4 | -- License: LICENSE.2nd.txt 5 | 6 | -- Gated Recurrent Units architecture. 7 | -- http://www.wildml.com/2015/10/recurrent-neural-network-tutorial-part-4-implementing-a-grulstm-rnn-with-python-and-theano/ 8 | -- Expects 1D or 2D input. 9 | -- The first input in sequence uses zero value for cell and hidden state 10 | -- 11 | -- For p > 0, it becomes Bayesian GRUs [Moon et al., 2015; Gal, 2015]. 12 | -- In this case, please do not dropout on input as BGRUs handle the input with 13 | -- its own dropouts. First, try 0.25 for p as Gal (2016) suggested, presumably, 14 | -- because of summations of two parts in GRUs connections. 15 | ------------------------------------------------------------------------ 16 | local GRU, parent = torch.class('nn.GRU', 'nn.AbstractRecurrent') 17 | 18 | function GRU:__init(inputSize, outputSize, rho, p, mono) 19 | parent.__init(self, rho or 9999) 20 | self.p = p or 0 21 | if p and p ~= 0 then 22 | assert(nn.Dropout(p,false,false,true).lazy, 'only work with Lazy Dropout!') 23 | end 24 | self.mono = mono or false 25 | self.inputSize = inputSize 26 | self.outputSize = outputSize 27 | -- build the model 28 | self.recurrentModule = self:buildModel() 29 | -- make it work with nn.Container 30 | self.modules[1] = self.recurrentModule 31 | self.sharedClones[1] = self.recurrentModule 32 | 33 | -- for output(0), cell(0) and gradCell(T) 34 | self.zeroTensor = torch.Tensor() 35 | 36 | self.cells = {} 37 | self.gradCells = {} 38 | end 39 | 40 | -------------------------- factory methods ----------------------------- 41 | function GRU:buildModel() 42 | -- input : {input, prevOutput} 43 | -- output : {output} 44 | 45 | -- Calculate all four gates in one go : input, hidden, forget, output 46 | if self.p ~= 0 then 47 | self.i2g = nn.Sequential() 48 | :add(nn.ConcatTable() 49 | :add(nn.Dropout(self.p,false,false,true,self.mono)) 50 | :add(nn.Dropout(self.p,false,false,true,self.mono))) 51 | :add(nn.ParallelTable() 52 | :add(nn.Linear(self.inputSize, self.outputSize)) 53 | :add(nn.Linear(self.inputSize, self.outputSize))) 54 | :add(nn.JoinTable(2)) 55 | self.o2g = nn.Sequential() 56 | :add(nn.ConcatTable() 57 | :add(nn.Dropout(self.p,false,false,true,self.mono)) 58 | :add(nn.Dropout(self.p,false,false,true,self.mono))) 59 | :add(nn.ParallelTable() 60 | :add(nn.LinearNoBias(self.outputSize, self.outputSize)) 61 | :add(nn.LinearNoBias(self.outputSize, self.outputSize))) 62 | :add(nn.JoinTable(2)) 63 | else 64 | self.i2g = nn.Linear(self.inputSize, 2*self.outputSize) 65 | self.o2g = nn.LinearNoBias(self.outputSize, 2*self.outputSize) 66 | end 67 | 68 | local para = nn.ParallelTable():add(self.i2g):add(self.o2g) 69 | local gates = nn.Sequential() 70 | gates:add(para) 71 | gates:add(nn.CAddTable()) 72 | 73 | -- Reshape to (batch_size, n_gates, hid_size) 74 | -- Then slize the n_gates dimension, i.e dimension 2 75 | gates:add(nn.Reshape(2,self.outputSize)) 76 | gates:add(nn.SplitTable(1,2)) 77 | local transfer = nn.ParallelTable() 78 | transfer:add(nn.Sigmoid()):add(nn.Sigmoid()) 79 | gates:add(transfer) 80 | 81 | local concat = nn.ConcatTable():add(nn.Identity()):add(gates) 82 | local seq = nn.Sequential() 83 | seq:add(concat) 84 | seq:add(nn.FlattenTable()) -- x(t), s(t-1), r, z 85 | 86 | -- Rearrange to x(t), s(t-1), r, z, s(t-1) 87 | local concat = nn.ConcatTable() -- 88 | concat:add(nn.NarrowTable(1,4)):add(nn.SelectTable(2)) 89 | seq:add(concat):add(nn.FlattenTable()) 90 | 91 | -- h 92 | local hidden = nn.Sequential() 93 | local concat = nn.ConcatTable() 94 | local t1 = nn.Sequential() 95 | t1:add(nn.SelectTable(1)) 96 | local t2 = nn.Sequential() 97 | t2:add(nn.NarrowTable(2,2)):add(nn.CMulTable()) 98 | if self.p ~= 0 then 99 | t1:add(nn.Dropout(self.p,false,false,true,self.mono)) 100 | t2:add(nn.Dropout(self.p,false,false,true,self.mono)) 101 | end 102 | t1:add(nn.Linear(self.inputSize, self.outputSize)) 103 | t2:add(nn.LinearNoBias(self.outputSize, self.outputSize)) 104 | 105 | concat:add(t1):add(t2) 106 | hidden:add(concat):add(nn.CAddTable()):add(nn.Tanh()) 107 | 108 | local z1 = nn.Sequential() 109 | z1:add(nn.SelectTable(4)) 110 | z1:add(nn.SAdd(-1, true)) -- Scalar add & negation 111 | 112 | local z2 = nn.Sequential() 113 | z2:add(nn.NarrowTable(4,2)) 114 | z2:add(nn.CMulTable()) 115 | 116 | local o1 = nn.Sequential() 117 | local concat = nn.ConcatTable() 118 | concat:add(hidden):add(z1) 119 | o1:add(concat):add(nn.CMulTable()) 120 | 121 | local o2 = nn.Sequential() 122 | local concat = nn.ConcatTable() 123 | concat:add(o1):add(z2) 124 | o2:add(concat):add(nn.CAddTable()) 125 | 126 | seq:add(o2) 127 | 128 | return seq 129 | end 130 | 131 | function GRU:getHiddenState(step, input) 132 | local prevOutput 133 | if step == 0 then 134 | prevOutput = self.userPrevOutput or self.outputs[step] or self.zeroTensor 135 | if input then 136 | if input:dim() == 2 then 137 | self.zeroTensor:resize(input:size(1), self.outputSize):zero() 138 | else 139 | self.zeroTensor:resize(self.outputSize):zero() 140 | end 141 | end 142 | else 143 | -- previous output and cell of this module 144 | prevOutput = self.outputs[step] 145 | end 146 | return prevOutput 147 | end 148 | 149 | 150 | function GRU:setHiddenState(step, hiddenState) 151 | assert(torch.isTensor(hiddenState)) 152 | self.outputs[step] = hiddenState 153 | end 154 | 155 | ------------------------- forward backward ----------------------------- 156 | function GRU:updateOutput(input) 157 | local prevOutput = self:getHiddenState(self.step-1, input) 158 | 159 | -- output(t) = gru{input(t), output(t-1)} 160 | local output 161 | if self.train ~= false then 162 | self:recycle() 163 | local recurrentModule = self:getStepModule(self.step) 164 | -- the actual forward propagation 165 | output = recurrentModule:updateOutput{input, prevOutput} 166 | else 167 | output = self.recurrentModule:updateOutput{input, prevOutput} 168 | end 169 | 170 | self.outputs[self.step] = output 171 | 172 | self.output = output 173 | 174 | self.step = self.step + 1 175 | self.gradPrevOutput = nil 176 | self.updateGradInputStep = nil 177 | self.accGradParametersStep = nil 178 | -- note that we don't return the cell, just the output 179 | return self.output 180 | end 181 | 182 | 183 | function GRU:getGradHiddenState(step) 184 | local gradOutput 185 | if step == self.step-1 then 186 | gradOutput = self.userNextGradOutput or self.gradOutputs[step] or self.zeroTensor 187 | else 188 | gradOutput = self.gradOutputs[step] 189 | end 190 | return gradOutput 191 | end 192 | 193 | function GRU:setGradHiddenState(step, gradHiddenState) 194 | assert(torch.isTensor(gradHiddenState)) 195 | self.gradOutputs[step] = gradHiddenState 196 | end 197 | 198 | function GRU:_updateGradInput(input, gradOutput) 199 | assert(self.step > 1, "expecting at least one updateOutput") 200 | local step = self.updateGradInputStep - 1 201 | assert(step >= 1) 202 | 203 | -- set the output/gradOutput states of current Module 204 | local recurrentModule = self:getStepModule(step) 205 | 206 | -- backward propagate through this step 207 | local _gradOutput = self:getGradHiddenState(step) 208 | assert(_gradOutput) 209 | self._gradOutputs[step] = nn.rnn.recursiveCopy(self._gradOutputs[step], _gradOutput) 210 | nn.rnn.recursiveAdd(self._gradOutputs[step], gradOutput) 211 | gradOutput = self._gradOutputs[step] 212 | 213 | local gradInputTable = recurrentModule:updateGradInput({input, self:getHiddenState(step-1)}, gradOutput) 214 | 215 | self:setGradHiddenState(step-1, gradInputTable[2]) 216 | 217 | return gradInputTable[1] 218 | end 219 | 220 | function GRU:_accGradParameters(input, gradOutput, scale) 221 | local step = self.accGradParametersStep - 1 222 | assert(step >= 1) 223 | 224 | -- set the output/gradOutput states of current Module 225 | local recurrentModule = self:getStepModule(step) 226 | 227 | -- backward propagate through this step 228 | local gradOutput = self._gradOutputs[step] or self:getGradHiddenState(step) 229 | recurrentModule:accGradParameters({input, self:getHiddenState(step-1)}, gradOutput, scale) 230 | end 231 | 232 | function GRU:__tostring__() 233 | return string.format('%s(%d -> %d, %.2f)', torch.type(self), self.inputSize, self.outputSize, self.p) 234 | end 235 | 236 | -- migrate GRUs params to BGRUs params 237 | function GRU:migrate(params) 238 | local _params = self:parameters() 239 | assert(self.p ~= 0, 'only support for BGRUs.') 240 | assert(#params == 6, '# of source params should be 6.') 241 | assert(#_params == 9, '# of destination params should be 9.') 242 | _params[1]:copy(params[1]:narrow(1,1,self.outputSize)) 243 | _params[2]:copy(params[2]:narrow(1,1,self.outputSize)) 244 | _params[3]:copy(params[1]:narrow(1,self.outputSize+1,self.outputSize)) 245 | _params[4]:copy(params[2]:narrow(1,self.outputSize+1,self.outputSize)) 246 | _params[5]:copy(params[3]:narrow(1,1,self.outputSize)) 247 | _params[6]:copy(params[3]:narrow(1,self.outputSize+1,self.outputSize)) 248 | _params[7]:copy(params[4]) 249 | _params[8]:copy(params[5]) 250 | _params[9]:copy(params[6]) 251 | end 252 | -------------------------------------------------------------------------------- /LICENSE.2nd.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016 NAVER Corp. and Seoul National University R&DB Foundation 2 | All rights reserved. 3 | 4 | Author: jnhwkim@snu.ac.kr (Jin-Hwa Kim) 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | 3. Neither the names of NAVER Corp. and Seoul National University R&DB 17 | Foundation nor the names of its contributors may be used to endorse or 18 | promote products derived from this software without specific prior 19 | written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 25 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 | POSSIBILITY OF SUCH DAMAGE. 32 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014-2016 Element Inc (Nicholas Leonard) 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright 12 | notice, this list of conditions and the following disclaimer in the 13 | documentation and/or other materials provided with the distribution. 14 | 15 | 3. Neither the names of Element Inc. nor the names of its contributors may be 16 | used to endorse or promote products derived from this software without 17 | specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 23 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 | POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /LinearNoBias.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ LinearNoBias ]]-- 3 | -- Subclass of nn.Linear with no bias term 4 | ------------------------------------------------------------------------ 5 | nn = require 'nn' 6 | local LinearNoBias, Linear = torch.class('nn.LinearNoBias', 'nn.Linear') 7 | 8 | function LinearNoBias:__init(inputSize, outputSize) 9 | nn.Module.__init(self) 10 | 11 | self.weight = torch.Tensor(outputSize, inputSize) 12 | self.gradWeight = torch.Tensor(outputSize, inputSize) 13 | 14 | self:reset() 15 | end 16 | 17 | function LinearNoBias:reset(stdv) 18 | if stdv then 19 | stdv = stdv * math.sqrt(3) 20 | else 21 | stdv = 1./math.sqrt(self.weight:size(2)) 22 | end 23 | if nn.oldSeed then 24 | for i=1,self.weight:size(1) do 25 | self.weight:select(1, i):apply(function() 26 | return torch.uniform(-stdv, stdv) 27 | end) 28 | end 29 | else 30 | self.weight:uniform(-stdv, stdv) 31 | end 32 | 33 | return self 34 | end 35 | 36 | function LinearNoBias:updateOutput(input) 37 | if input:dim() == 1 then 38 | self.output:resize(self.weight:size(1)) 39 | self.output:mv(self.weight, input) 40 | elseif input:dim() == 2 then 41 | local nframe = input:size(1) 42 | local nElement = self.output:nElement() 43 | self.output:resize(nframe, self.weight:size(1)) 44 | if self.output:nElement() ~= nElement then 45 | self.output:zero() 46 | end 47 | if not self.addBuffer or self.addBuffer:nElement() ~= nframe then 48 | self.addBuffer = input.new(nframe):fill(1) 49 | end 50 | self.output:addmm(0, self.output, 1, input, self.weight:t()) 51 | else 52 | error('input must be vector or matrix') 53 | end 54 | 55 | return self.output 56 | end 57 | 58 | function LinearNoBias:accGradParameters(input, gradOutput, scale) 59 | scale = scale or 1 60 | if input:dim() == 1 then 61 | self.gradWeight:addr(scale, gradOutput, input) 62 | elseif input:dim() == 2 then 63 | self.gradWeight:addmm(scale, gradOutput:t(), input) 64 | end 65 | end 66 | -------------------------------------------------------------------------------- /LookupTableMaskZero.lua: -------------------------------------------------------------------------------- 1 | local LookupTableMaskZero, parent = torch.class('nn.LookupTableMaskZero', 'nn.LookupTable') 2 | 3 | function LookupTableMaskZero:__init(nIndex, nOutput) 4 | parent.__init(self, nIndex + 1, nOutput) 5 | end 6 | 7 | function LookupTableMaskZero:updateOutput(input) 8 | self.weight[1]:zero() 9 | if self.__input and (torch.type(self.__input) ~= torch.type(input)) then 10 | self.__input = nil -- fixes old casting bug 11 | end 12 | self.__input = self.__input or input.new() 13 | self.__input:resizeAs(input):add(input, 1) 14 | return parent.updateOutput(self, self.__input) 15 | end 16 | 17 | function LookupTableMaskZero:accGradParameters(input, gradOutput, scale) 18 | parent.accGradParameters(self, self.__input, gradOutput, scale) 19 | end 20 | 21 | function LookupTableMaskZero:type(type, cache) 22 | self.__input = nil 23 | return parent.type(self, type, cache) 24 | end 25 | -------------------------------------------------------------------------------- /MaskZero.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ MaskZero ]]-- 3 | -- Decorator that zeroes the output rows of the encapsulated module 4 | -- for commensurate input rows which are tensors of zeros 5 | ------------------------------------------------------------------------ 6 | local MaskZero, parent = torch.class("nn.MaskZero", "nn.Decorator") 7 | 8 | function MaskZero:__init(module, nInputDim, silent) 9 | parent.__init(self, module) 10 | assert(torch.isTypeOf(module, 'nn.Module')) 11 | if torch.isTypeOf(module, 'nn.AbstractRecurrent') and not silent then 12 | print("Warning : you are most likely using MaskZero the wrong way. " 13 | .."You should probably use AbstractRecurrent:maskZero() so that " 14 | .."it wraps the internal AbstractRecurrent.recurrentModule instead of " 15 | .."wrapping the AbstractRecurrent module itself.") 16 | end 17 | assert(torch.type(nInputDim) == 'number', 'Expecting nInputDim number at arg 1') 18 | self.nInputDim = nInputDim 19 | end 20 | 21 | function MaskZero:recursiveGetFirst(input) 22 | if torch.type(input) == 'table' then 23 | return self:recursiveGetFirst(input[1]) 24 | else 25 | assert(torch.isTensor(input)) 26 | return input 27 | end 28 | end 29 | 30 | function MaskZero:recursiveMask(output, input, mask) 31 | if torch.type(input) == 'table' then 32 | output = torch.type(output) == 'table' and output or {} 33 | for k,v in ipairs(input) do 34 | output[k] = self:recursiveMask(output[k], v, mask) 35 | end 36 | else 37 | assert(torch.isTensor(input)) 38 | output = torch.isTensor(output) and output or input.new() 39 | 40 | -- make sure mask has the same dimension as the input tensor 41 | local inputSize = input:size():fill(1) 42 | if self.batchmode then 43 | inputSize[1] = input:size(1) 44 | end 45 | mask:resize(inputSize) 46 | -- build mask 47 | local zeroMask = mask:expandAs(input) 48 | output:resizeAs(input):copy(input) 49 | output:maskedFill(zeroMask, 0) 50 | end 51 | return output 52 | end 53 | 54 | function MaskZero:updateOutput(input) 55 | -- recurrent module input is always the first one 56 | local rmi = self:recursiveGetFirst(input):contiguous() 57 | if rmi:dim() == self.nInputDim then 58 | self.batchmode = false 59 | rmi = rmi:view(-1) -- collapse dims 60 | elseif rmi:dim() - 1 == self.nInputDim then 61 | self.batchmode = true 62 | rmi = rmi:view(rmi:size(1), -1) -- collapse non-batch dims 63 | else 64 | error("nInputDim error: "..rmi:dim()..", "..self.nInputDim) 65 | end 66 | 67 | -- build mask 68 | local vectorDim = rmi:dim() 69 | self._zeroMask = self._zeroMask or rmi.new() 70 | self._zeroMask:norm(rmi, 2, vectorDim) 71 | self.zeroMask = self.zeroMask or ( 72 | (torch.type(rmi) == 'torch.CudaTensor') and torch.CudaByteTensor() 73 | or (torch.type(rmi) == 'torch.ClTensor') and torch.ClTensor() 74 | or torch.ByteTensor() 75 | ) 76 | self._zeroMask.eq(self.zeroMask, self._zeroMask, 0) 77 | 78 | -- forward through decorated module 79 | local output = self.modules[1]:updateOutput(input) 80 | 81 | self.output = self:recursiveMask(self.output, output, self.zeroMask) 82 | return self.output 83 | end 84 | 85 | function MaskZero:updateGradInput(input, gradOutput) 86 | -- zero gradOutputs before backpropagating through decorated module 87 | self.gradOutput = self:recursiveMask(self.gradOutput, gradOutput, self.zeroMask) 88 | 89 | self.gradInput = self.modules[1]:updateGradInput(input, self.gradOutput) 90 | return self.gradInput 91 | end 92 | 93 | function MaskZero:type(type, ...) 94 | self.zeroMask = nil 95 | self._zeroMask = nil 96 | self._maskbyte = nil 97 | self._maskindices = nil 98 | return parent.type(self, type, ...) 99 | end 100 | -------------------------------------------------------------------------------- /MaskZeroCriterion.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ MaskZeroCriterion ]]-- 3 | -- Decorator that zeros err and gradInputs of the encapsulated criterion 4 | -- for commensurate input rows which are tensors of zeros 5 | ------------------------------------------------------------------------ 6 | local MaskZeroCriterion, parent = torch.class("nn.MaskZeroCriterion", "nn.Criterion") 7 | 8 | function MaskZeroCriterion:__init(criterion, nInputDim) 9 | parent.__init(self) 10 | self.criterion = criterion 11 | assert(torch.isTypeOf(criterion, 'nn.Criterion')) 12 | assert(torch.type(nInputDim) == 'number', 'Expecting nInputDim number at arg 2') 13 | self.nInputDim = nInputDim 14 | end 15 | 16 | function MaskZeroCriterion:recursiveGetFirst(input) 17 | if torch.type(input) == 'table' then 18 | return self:recursiveGetFirst(input[1]) 19 | else 20 | assert(torch.isTensor(input)) 21 | return input 22 | end 23 | end 24 | 25 | function MaskZeroCriterion:recursiveMask(dst, src, mask) 26 | if torch.type(src) == 'table' then 27 | dst = torch.type(dst) == 'table' and dst or {} 28 | for k,v in ipairs(src) do 29 | dst[k] = self:recursiveMask(dst[k], v, mask) 30 | end 31 | else 32 | assert(torch.isTensor(src)) 33 | dst = torch.isTensor(dst) and dst or src.new() 34 | 35 | dst:index(src, 1, mask) 36 | end 37 | return dst 38 | end 39 | 40 | function MaskZeroCriterion:updateOutput(input, target) 41 | -- recurrent module input is always the first one 42 | local rmi = self:recursiveGetFirst(input):contiguous() 43 | if rmi:dim() == self.nInputDim then 44 | error("does not support online (i.e. non-batch) mode") 45 | elseif rmi:dim() - 1 == self.nInputDim then 46 | rmi = rmi:view(rmi:size(1), -1) -- collapse non-batch dims 47 | else 48 | error("nInputDim error: "..rmi:dim()..", "..self.nInputDim) 49 | end 50 | 51 | -- build mask 52 | local vectorDim = rmi:dim() 53 | self._zeroMask = self._zeroMask or rmi.new() 54 | self._zeroMask:norm(rmi, 2, vectorDim) 55 | local zeroMask = self._zeroMask 56 | if torch.isTypeOf(zeroMask, 'torch.CudaTensor') or 57 | torch.isTypeOf(zeroMask, 'torch.ClTensor') then 58 | self.__zeroMask = self.__zeroMask or torch.FloatTensor() 59 | self.__zeroMask:resize(self._zeroMask:size()):copy(self._zeroMask) 60 | zeroMask = self._zeroMask 61 | end 62 | 63 | self.zeroMask = self.zeroMask or torch.LongTensor() 64 | self.zeroMask:resize(self._zeroMask:size(1)):zero() 65 | 66 | local i, j = 0, 0 67 | zeroMask:apply(function(norm) 68 | i = i + 1 69 | if norm ~= 0 then 70 | j = j + 1 71 | self.zeroMask[j] = i 72 | end 73 | end) 74 | self.zeroMask:resize(j) 75 | 76 | if j > 0 then 77 | self.input = self:recursiveMask(self.input, input, self.zeroMask) 78 | self.target = self:recursiveMask(self.target, target, self.zeroMask) 79 | 80 | -- forward through decorated criterion 81 | self.output = self.criterion:updateOutput(self.input, self.target) 82 | else 83 | -- when all samples are masked, then loss is zero (issue 128) 84 | self.output = 0 85 | end 86 | 87 | return self.output 88 | end 89 | 90 | function MaskZeroCriterion:recursiveMaskGradInput(dst, mask, src, input) 91 | if torch.type(input) == 'table' then 92 | dst = (torch.type(dst) == 'table') and dst or {dst} 93 | src = (torch.type(src) == 'table') and src or {src} 94 | for key,_ in pairs(input) do 95 | dst[key] = self:recursiveMaskGradInput(dst[key], mask, src[key], input[key]) 96 | end 97 | for i=#input+1,#dst do 98 | dst[i] = nil 99 | end 100 | elseif torch.isTensor(input) then 101 | dst = torch.isTensor(dst) and dst or input.new() 102 | dst:resizeAs(input):zero() 103 | if mask:nElement() > 0 then 104 | assert(src) 105 | dst:indexCopy(1, mask, src) 106 | end 107 | else 108 | error("expecting nested tensors or tables. Got ".. 109 | torch.type(dst).." and "..torch.type(input).." instead") 110 | end 111 | return dst 112 | end 113 | 114 | function MaskZeroCriterion:updateGradInput(input, target) 115 | if self.zeroMask:nElement() > 0 then 116 | assert(self.input and self.target) 117 | self._gradInput = self.criterion:updateGradInput(self.input, self.target) 118 | end 119 | self.gradInput = self:recursiveMaskGradInput(self.gradInput, self.zeroMask, self._gradInput, input) 120 | return self.gradInput 121 | end 122 | 123 | function MaskZeroCriterion:type(type, ...) 124 | self.zeroMask = nil 125 | self._zeroMask = nil 126 | self.__zeroMask = nil 127 | self.input = nil 128 | self.target = nil 129 | self._gradInput = nil 130 | 131 | return parent.type(self, type, ...) 132 | end 133 | -------------------------------------------------------------------------------- /Module.lua: -------------------------------------------------------------------------------- 1 | local Module = nn.Module 2 | 3 | -- You can use this to manually forget past memories in AbstractRecurrent instances 4 | function Module:forget() 5 | if self.modules then 6 | for i,module in ipairs(self.modules) do 7 | module:forget() 8 | end 9 | end 10 | return self 11 | end 12 | 13 | -- Used by nn.Sequencers 14 | function Module:remember(remember) 15 | if self.modules then 16 | for i, module in ipairs(self.modules) do 17 | module:remember(remember) 18 | end 19 | end 20 | return self 21 | end 22 | 23 | function Module:stepClone(shareParams, shareGradParams) 24 | return self:sharedClone(shareParams, shareGradParams, true) 25 | end 26 | 27 | function Module:backwardOnline() 28 | print("Deprecated Jan 6, 2016. By default rnn now uses backwardOnline, so no need to call this method") 29 | end 30 | 31 | -- calls setOutputStep on all component AbstractRecurrent modules 32 | -- used by Recursor() after calling stepClone. 33 | -- this solves a very annoying bug... 34 | function Module:setOutputStep(step) 35 | if self.modules then 36 | for i,module in ipairs(self.modules) do 37 | module:setOutputStep(step) 38 | end 39 | end 40 | end 41 | 42 | -- set the maximum number of backpropagation through time (BPTT) time-steps 43 | function Module:maxBPTTstep(rho) 44 | if self.modules then 45 | for i, module in ipairs(self.modules) do 46 | module:maxBPTTstep(rho) 47 | end 48 | end 49 | end 50 | 51 | function Module:getHiddenState(step) 52 | if self.modules then 53 | local hiddenState = {} 54 | for i, module in ipairs(self.modules) do 55 | hiddenState[i] = module:getHiddenState(step) 56 | end 57 | return hiddenState 58 | end 59 | end 60 | 61 | function Module:setHiddenState(step, hiddenState) 62 | if self.modules then 63 | assert(torch.type(hiddenState) == 'table') 64 | for i, module in ipairs(self.modules) do 65 | module:setHiddenState(step, hiddenState[i]) 66 | end 67 | end 68 | end 69 | 70 | function Module:getGradHiddenState(step) 71 | if self.modules then 72 | local gradHiddenState = {} 73 | for i, module in ipairs(self.modules) do 74 | gradHiddenState[i] = module:getGradHiddenState(step) 75 | end 76 | return gradHiddenState 77 | end 78 | end 79 | 80 | function Module:setGradHiddenState(step, gradHiddenState) 81 | if self.modules then 82 | assert(torch.type(gradHiddenState) == 'table') 83 | for i, module in ipairs(self.modules) do 84 | module:setGradHiddenState(step, gradHiddenState[i]) 85 | end 86 | end 87 | end -------------------------------------------------------------------------------- /Mufuru.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ MuFuRu - Multi-Function Recurrent Unit ]]-- 3 | -- Author: Jonathan Uesato 4 | -- License: LICENSE.2nd.txt 5 | 6 | -- Ref. A.: http://arxiv.org/pdf/1606.03002v1.pdf 7 | ------------------------------------------------------------------------ 8 | 9 | local MuFuRu, parent = torch.class('nn.MuFuRu', 'nn.GRU') 10 | 11 | local SqrtDiffLayer = nn.Sequential() 12 | :add(nn.CSubTable()) 13 | :add(nn.Abs()) 14 | :add(nn.Sqrt()) 15 | :add(nn.MulConstant(0.25)) 16 | 17 | local MaxLayer = nn.Sequential() 18 | :add(nn.MapTable(nn.Unsqueeze(1))) 19 | :add(nn.JoinTable(1)) 20 | :add(nn.Max(1)) 21 | 22 | local MinLayer = nn.Sequential() 23 | :add(nn.MapTable(nn.Unsqueeze(1))) 24 | :add(nn.JoinTable(1)) 25 | :add(nn.Min(1)) 26 | 27 | -- all operations take a table {oldState, newState} and return newState 28 | _operations = { 29 | max = MaxLayer, 30 | keep = nn.SelectTable(1), 31 | replace = nn.SelectTable(2), 32 | mul = nn.CMulTable(), 33 | min = MinLayer, 34 | diff = nn.CSubTable(), 35 | forget = nn.Sequential():add(nn.SelectTable(1)):add(nn.MulConstant(0.0)), 36 | sqrt_diff = SqrtDiffLayer 37 | } 38 | 39 | function MuFuRu:__init(inputSize, outputSize, ops, rho) 40 | -- Use all ops by default. To replicate GRU, use keep and replace only. 41 | self.ops = ops or {'keep', 'replace', 'mul', 'diff', 'forget', 'sqrt_diff', 'max', 'min'} 42 | self.num_ops = #self.ops 43 | self.operations = {} 44 | for i=1,self.num_ops do 45 | self.operations[i] = _operations[self.ops[i]] 46 | end 47 | self.inputSize = inputSize 48 | self.outputSize = outputSize 49 | parent.__init(self, inputSize, outputSize, rho or 9999) 50 | end 51 | 52 | -------------------------- factory methods ----------------------------- 53 | function MuFuRu:buildModel() 54 | -- input : {input, prevOutput} 55 | -- output : output 56 | 57 | local nonBatchDim = 2 58 | -- resetGate takes {input, prevOutput} to resetGate 59 | local resetGate = nn.Sequential() 60 | :add(nn.ParallelTable() 61 | :add(nn.Linear(self.inputSize, self.outputSize), false) 62 | :add(nn.Linear(self.outputSize, self.outputSize)) 63 | ) 64 | :add(nn.CAddTable()) 65 | :add(nn.Sigmoid()) 66 | 67 | -- Feature takes {input, prevOutput, reset} to feature 68 | local featureVec = nn.Sequential() 69 | :add(nn.ConcatTable() 70 | :add(nn.SelectTable(1)) 71 | :add(nn.Sequential() 72 | :add(nn.NarrowTable(2,2)) 73 | :add(nn.CMulTable()) 74 | ) 75 | ) 76 | :add(nn.JoinTable(nonBatchDim)) -- [x_t, r dot s_t-1] 77 | :add(nn.Linear(self.inputSize + self.outputSize, self.outputSize)) 78 | :add(nn.Sigmoid()) 79 | 80 | -- opWeights takes {input, prevOutput, reset} to opWeights. 81 | -- Note that reset is not used 82 | local opWeights = nn.Sequential() 83 | :add(nn.NarrowTable(1,2)) 84 | :add(nn.JoinTable(nonBatchDim)) -- k_t 85 | :add(nn.Linear(self.inputSize + self.outputSize, self.num_ops * self.outputSize)) --p^_t 86 | :add(nn.View(self.num_ops, self.outputSize):setNumInputDims(1)) 87 | :add(nn.Transpose({1,2})) 88 | :add(nn.SoftMax()) --p_t 89 | 90 | -- all_ops takes {oldState, newState} to {newState1, newState2, ...newStateN} 91 | local all_ops = nn.ConcatTable() 92 | for i=1,self.num_ops do 93 | -- an operation is any layer taking {prevHidden, featureVec} to newState 94 | all_ops:add(self.operations[i]) 95 | end 96 | 97 | local all_op_activations = nn.Sequential() 98 | :add(nn.NarrowTable(1,2)) 99 | :add(all_ops) 100 | :add(nn.MapTable(nn.Unsqueeze(1))) 101 | :add(nn.JoinTable(1,3)) 102 | 103 | -- combine_ops takes {prevHidden, featureVec, opWeights} to nextHidden 104 | local combine_ops = nn.Sequential() 105 | :add(nn.ConcatTable() 106 | :add(all_op_activations) 107 | :add(nn.SelectTable(3)) 108 | ) 109 | :add(nn.CMulTable()) 110 | :add(nn.Sum(1,3)) 111 | 112 | local cell = nn.Sequential() 113 | :add(nn.ConcatTable() 114 | :add(nn.SelectTable(1)) 115 | :add(nn.SelectTable(2)) 116 | :add(resetGate) 117 | ) -- {input,prevOutput,reset} 118 | :add(nn.ConcatTable() 119 | :add(nn.SelectTable(2)) 120 | :add(featureVec) 121 | :add(opWeights) 122 | ) -- {prevOutput, v_t, opWeights} 123 | :add(combine_ops) 124 | return cell 125 | end 126 | 127 | -- Factory methods are inherited from GRU 128 | 129 | function MuFuRu:__tostring__() 130 | local op_str = '{ ' 131 | for i=1,self.num_ops do 132 | op_str = op_str .. self.ops[i] .. ' ' 133 | end 134 | op_str = op_str .. '}' 135 | return (string.format('%s(%d -> %d) ', torch.type(self), self.inputSize, self.outputSize)) .. op_str 136 | end 137 | 138 | function MuFuRu:migrate(params) 139 | error"Migrate not supported for MuFuRu" 140 | end 141 | -------------------------------------------------------------------------------- /NormStabilizer.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ Norm Stabilization]] 3 | -- Regularizing RNNs by Stabilizing Activations 4 | -- Ref. A: http://arxiv.org/abs/1511.08400 5 | -- For training, this module only works in batch mode. 6 | ------------------------------------------------------------------------ 7 | 8 | local NS, parent = torch.class("nn.NormStabilizer", "nn.AbstractRecurrent") 9 | 10 | function NS:__init(beta) 11 | parent.__init(self, 99999) 12 | 13 | self.beta = beta or 1 14 | self.recurrentModule = nn.CopyGrad() 15 | 16 | -- make it work with nn.Container 17 | self.modules[1] = self.recurrentModule 18 | self.sharedClones[1] = self.recurrentModule 19 | end 20 | 21 | function NS:_accGradParameters(input, gradOutput, scale) 22 | -- No parameters to update 23 | end 24 | 25 | function NS:updateOutput(input) 26 | assert(input:dim() == 2) 27 | local output 28 | if self.train ~= false then 29 | self:recycle() 30 | local rm = self:getStepModule(self.step) 31 | output = rm:updateOutput(input) 32 | -- in training mode, we also calculate norm of hidden state 33 | rm.norm = rm.norm or output.new() 34 | rm.norm:norm(output, 2, 2) 35 | else 36 | output = self.recurrentModule:updateOutput(input) 37 | end 38 | 39 | self.outputs[self.step] = output 40 | 41 | self.output = output 42 | self.step = self.step + 1 43 | self.gradPrevOutput = nil 44 | self.updateGradInputStep = nil 45 | self.accGradParametersStep = nil 46 | 47 | return self.output 48 | end 49 | 50 | -- returns norm-stabilizer loss as defined in ref. A 51 | function NS:updateLoss() 52 | self.loss = 0 53 | self._normsum = self._normsum or self.output.new() 54 | 55 | for step=2,self.step-1 do 56 | local rm1 = self:getStepModule(step-1) 57 | local rm2 = self:getStepModule(step) 58 | self._normsum:add(rm1.norm, rm2.norm) 59 | self._normsum:pow(2) 60 | local steploss = self._normsum:mean() -- sizeAverage 61 | self.loss = self.loss + steploss 62 | end 63 | 64 | -- the loss is divided by the number of time-steps (but not the gradients) 65 | self.loss = self.beta * self.loss / (self.step-1) 66 | return self.loss 67 | end 68 | 69 | function NS:_updateGradInput(input, gradOutput) 70 | -- First grab h[t] : 71 | -- backward propagate through this step 72 | local curStep = self.updateGradInputStep-1 73 | local hiddenModule = self:getStepModule(curStep) 74 | local gradInput = hiddenModule:updateGradInput(input, gradOutput) 75 | assert(curStep < self.step) 76 | 77 | -- buffers 78 | self._normsum = self._normsum or self.output.new() 79 | self._gradInput = self._gradInput or self.output.new() 80 | 81 | local batchSize = hiddenModule.output:size(1) 82 | 83 | -- Add gradient of norm stabilizer cost function directly to respective CopyGrad.gradInput tensors 84 | 85 | if curStep > 1 then 86 | -- then grab h[t-1] 87 | local prevHiddenModule = self:getStepModule(curStep - 1) 88 | 89 | self._normsum:resizeAs(hiddenModule.norm):copy(hiddenModule.norm) 90 | self._normsum:add(-1, prevHiddenModule.norm) 91 | self._normsum:mul(self.beta*2) 92 | self._normsum:cdiv(hiddenModule.norm) 93 | 94 | self._gradInput:mul(hiddenModule.output, 1/batchSize) 95 | self._gradInput:cmul(self._normsum:expandAs(self._gradInput)) 96 | hiddenModule.gradInput:add(self._gradInput) 97 | end 98 | 99 | if curStep < self.step-1 then 100 | local nextHiddenModule = self:getStepModule(curStep + 1) 101 | 102 | self._normsum:resizeAs(hiddenModule.norm):copy(hiddenModule.norm) 103 | self._normsum:add(-1, nextHiddenModule.norm) 104 | self._normsum:mul(self.beta*2) 105 | self._normsum:cdiv(hiddenModule.norm) 106 | 107 | self._gradInput:mul(hiddenModule.output, 1/batchSize) 108 | self._gradInput:cmul(self._normsum:expandAs(self._gradInput)) 109 | hiddenModule.gradInput:add(self._gradInput) 110 | end 111 | 112 | return hiddenModule.gradInput 113 | end 114 | 115 | function NS:__tostring__() 116 | return "nn.NormStabilizer" 117 | end 118 | -------------------------------------------------------------------------------- /Padding.lua: -------------------------------------------------------------------------------- 1 | local Padding, parent 2 | if nn.Padding then -- prevent name conflicts with nnx 3 | Padding, parent = nn.Padding, nn.Module 4 | else 5 | Padding, parent = torch.class('nn.Padding', 'nn.Module') 6 | end 7 | 8 | -- pad can be positive (right) negative (left) 9 | function Padding:__init(dim, pad, nInputDim, value) 10 | self.dim = dim 11 | self.pad = pad 12 | self.nInputDim = nInputDim 13 | self.value = value or 0 14 | self.outputSize = torch.LongStorage() 15 | parent.__init(self) 16 | end 17 | 18 | function Padding:updateOutput(input) 19 | self.outputSize:resize(input:dim()) 20 | self.outputSize:copy(input:size()) 21 | local dim = self.dim 22 | if self.nInputDim and input:dim() ~= self.nInputDim then 23 | dim = dim + 1 24 | end 25 | self.outputSize[dim] = self.outputSize[dim] + math.abs(self.pad) 26 | self.output:resize(self.outputSize) 27 | self.output:fill(self.value) 28 | local outputWindow 29 | if self.pad > 0 then 30 | outputWindow = self.output:narrow(dim, 1, input:size(dim)) 31 | else 32 | outputWindow = self.output:narrow(dim, 1 - self.pad, input:size(dim)) 33 | end 34 | outputWindow:copy(input) 35 | return self.output 36 | end 37 | 38 | function Padding:updateGradInput(input, gradOutput) 39 | self.gradInput:resizeAs(input) 40 | local dim = self.dim 41 | if self.nInputDim and input:dim() ~= self.nInputDim then 42 | dim = dim + 1 43 | end 44 | local gradOutputWindow 45 | if self.pad > 0 then 46 | gradOutputWindow = gradOutput:narrow(dim, 1, input:size(dim)) 47 | else 48 | gradOutputWindow = gradOutput:narrow(dim, 1 - self.pad, input:size(dim)) 49 | end 50 | self.gradInput:copy(gradOutputWindow:copy(input)) 51 | return self.gradInput 52 | end 53 | -------------------------------------------------------------------------------- /Recurrence.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ Recurrence ]]-- 3 | -- A general container for implementing a recurrence. 4 | -- Unlike Recurrent, this module doesn't manage a separate input layer, 5 | -- nor does it have a startModule. Instead for the first step, it 6 | -- just forwards a zero tensor through the recurrent layer (like LSTM). 7 | -- The recurrentModule should output Tensor or table : output(t) 8 | -- given input table : {input(t), output(t-1)} 9 | ------------------------------------------------------------------------ 10 | local _ = require 'moses' 11 | local Recurrence, parent = torch.class('nn.Recurrence', 'nn.AbstractRecurrent') 12 | 13 | function Recurrence:__init(recurrentModule, outputSize, nInputDim, rho) 14 | parent.__init(self, rho or 9999) 15 | 16 | assert(_.contains({'table','torch.LongStorage','number'}, torch.type(outputSize)), "Unsupported size type") 17 | self.outputSize = torch.type(outputSize) == 'number' and {outputSize} or outputSize 18 | -- for table outputs, this is the number of dimensions in the first (left) tensor (depth-first). 19 | assert(torch.type(nInputDim) == 'number', "Expecting nInputDim number for arg 2") 20 | self.nInputDim = nInputDim 21 | assert(torch.isTypeOf(recurrentModule, 'nn.Module'), "Expecting recurrenModule nn.Module for arg 3") 22 | self.recurrentModule = recurrentModule 23 | 24 | -- make it work with nn.Container and nn.Decorator 25 | self.module = self.recurrentModule 26 | self.modules[1] = self.recurrentModule 27 | self.sharedClones[1] = self.recurrentModule 28 | 29 | -- just so we can know the type of this module 30 | self.typeTensor = torch.Tensor() 31 | end 32 | 33 | -- recursively creates a zero tensor (or table thereof) (or table thereof). 34 | -- This zero Tensor is forwarded as output(t=0). 35 | function Recurrence:recursiveResizeZero(tensor, size, batchSize) 36 | local isTable = torch.type(size) == 'table' 37 | if isTable and torch.type(size[1]) ~= 'number' then 38 | tensor = (torch.type(tensor) == 'table') and tensor or {} 39 | for k,v in ipairs(size) do 40 | tensor[k] = self:recursiveResizeZero(tensor[k], v, batchSize) 41 | end 42 | elseif torch.type(size) == 'torch.LongStorage' then 43 | local size_ = size:totable() 44 | tensor = torch.isTensor(tensor) and tensor or self.typeTensor.new() 45 | if batchSize then 46 | tensor:resize(batchSize, unpack(size_)) 47 | else 48 | tensor:resize(unpack(size_)) 49 | end 50 | tensor:zero() 51 | elseif isTable and torch.type(size[1]) == 'number' then 52 | tensor = torch.isTensor(tensor) and tensor or self.typeTensor.new() 53 | if batchSize then 54 | tensor:resize(batchSize, unpack(size)) 55 | else 56 | tensor:resize(unpack(size)) 57 | end 58 | tensor:zero() 59 | else 60 | error("Unknown size type : "..torch.type(size)) 61 | end 62 | return tensor 63 | end 64 | 65 | -- get the batch size. 66 | -- When input is a table, we use the first tensor (depth first). 67 | function Recurrence:getBatchSize(input, nInputDim) 68 | local nInputDim = nInputDim or self.nInputDim 69 | if torch.type(input) == 'table' then 70 | return self:getBatchSize(input[1]) 71 | else 72 | assert(torch.isTensor(input)) 73 | if input:dim() == nInputDim then 74 | return nil 75 | elseif input:dim() - 1 == nInputDim then 76 | return input:size(1) 77 | else 78 | error("inconsitent tensor dims "..input:dim()) 79 | end 80 | end 81 | end 82 | 83 | function Recurrence:getHiddenState(step, input) 84 | local prevOutput 85 | if step == 0 then 86 | if input then 87 | -- first previous output is zeros 88 | local batchSize = self:getBatchSize(input) 89 | self.zeroTensor = self:recursiveResizeZero(self.zeroTensor, self.outputSize, batchSize) 90 | end 91 | prevOutput = self.userPrevOutput or self.outputs[step] or self.zeroTensor 92 | else 93 | -- previous output of this module 94 | prevOutput = self.outputs[step] 95 | end 96 | -- call getHiddenState on recurrentModule as they may contain AbstractRecurrent instances... 97 | return {prevOutput, nn.Container.getHiddenState(self, step)} 98 | end 99 | 100 | function Recurrence:setHiddenState(step, hiddenState) 101 | assert(torch.type(hiddenState) == 'table') 102 | assert(#hiddenState >= 1) 103 | self.outputs[step] = hiddenState[1] 104 | 105 | if hiddenState[2] then 106 | -- call setHiddenState on recurrentModule as they may contain AbstractRecurrent instances... 107 | nn.Container.setHiddenState(self, step, hiddenState[2]) 108 | end 109 | end 110 | 111 | function Recurrence:updateOutput(input) 112 | -- output(t-1) 113 | local prevOutput = self:getHiddenState(self.step-1, input)[1] 114 | 115 | -- output(t) = recurrentModule{input(t), output(t-1)} 116 | local output 117 | if self.train ~= false then 118 | self:recycle() 119 | local recurrentModule = self:getStepModule(self.step) 120 | -- the actual forward propagation 121 | output = recurrentModule:updateOutput{input, prevOutput} 122 | else 123 | output = self.recurrentModule:updateOutput{input, prevOutput} 124 | end 125 | 126 | self.outputs[self.step] = output 127 | 128 | self.output = output 129 | 130 | self.step = self.step + 1 131 | self.gradPrevOutput = nil 132 | self.updateGradInputStep = nil 133 | self.accGradParametersStep = nil 134 | 135 | return self.output 136 | end 137 | 138 | function Recurrence:getGradHiddenState(step) 139 | local gradOutput 140 | if step == self.step-1 then 141 | gradOutput = self.userNextGradOutput or self.gradOutputs[step] or self.zeroTensor 142 | else 143 | gradOutput = self.gradOutputs[step] 144 | end 145 | return {gradOutput, nn.Container.getGradHiddenState(self, step)} 146 | end 147 | 148 | function Recurrence:setGradHiddenState(step, gradHiddenState) 149 | assert(torch.type(gradHiddenState) == 'table') 150 | assert(#gradHiddenState >= 1) 151 | 152 | self.gradOutputs[step] = gradHiddenState[1] 153 | if gradHiddenState[2] then 154 | nn.Container.setGradHiddenState(self, step, gradHiddenState[2]) 155 | end 156 | end 157 | 158 | function Recurrence:_updateGradInput(input, gradOutput) 159 | assert(self.step > 1, "expecting at least one updateOutput") 160 | local step = self.updateGradInputStep - 1 161 | assert(step >= 1) 162 | 163 | -- set the output/gradOutput states of current Module 164 | local recurrentModule = self:getStepModule(step) 165 | 166 | -- backward propagate through this step 167 | local _gradOutput = self:getGradHiddenState(step)[1] 168 | self._gradOutputs[step] = nn.rnn.recursiveCopy(self._gradOutputs[step], _gradOutput) 169 | nn.rnn.recursiveAdd(self._gradOutputs[step], gradOutput) 170 | gradOutput = self._gradOutputs[step] 171 | 172 | local gradInputTable = recurrentModule:updateGradInput({input, self:getHiddenState(step-1)[1]}, gradOutput) 173 | 174 | local _ = require 'moses' 175 | self:setGradHiddenState(step-1, _.slice(gradInputTable, 2, #gradInputTable)) 176 | 177 | return gradInputTable[1] 178 | end 179 | 180 | function Recurrence:_accGradParameters(input, gradOutput, scale) 181 | local step = self.accGradParametersStep - 1 182 | assert(step >= 1) 183 | 184 | local recurrentModule = self:getStepModule(step) 185 | 186 | -- backward propagate through this step 187 | local gradOutput = self._gradOutputs[step] or self:getGradHiddenState(step)[1] 188 | recurrentModule:accGradParameters({input, self:getHiddenState(step-1)[1]}, gradOutput, scale) 189 | end 190 | 191 | Recurrence.__tostring__ = nn.Decorator.__tostring__ 192 | -------------------------------------------------------------------------------- /Recurrent.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ Recurrent ]]-- 3 | -- Ref. A.: http://goo.gl/vtVGkO (Mikolov et al.) 4 | -- B. http://goo.gl/hu1Lqm 5 | -- Processes the sequence one timestep (forward/backward) at a time. 6 | -- A call to backward only keeps a log of the gradOutputs and scales. 7 | -- Back-Propagation Through Time (BPTT) is done when updateParameters 8 | -- is called. The Module keeps a list of all previous representations 9 | -- (Module.outputs), including intermediate ones for BPTT. 10 | -- To use this module with batches, we suggest using different 11 | -- sequences of the same size within a batch and calling 12 | -- updateParameters() at the end of the Sequence. 13 | -- Note that this won't work with modules that use more than the 14 | -- output attribute to keep track of their internal state between 15 | -- forward and backward. 16 | ------------------------------------------------------------------------ 17 | assert(not nn.Recurrent, "update nnx package : luarocks install nnx") 18 | local Recurrent, parent = torch.class('nn.Recurrent', 'nn.AbstractRecurrent') 19 | 20 | function Recurrent:__init(start, input, feedback, transfer, rho, merge) 21 | parent.__init(self, rho) 22 | 23 | local ts = torch.type(start) 24 | if ts == 'torch.LongStorage' or ts == 'number' then 25 | start = nn.Add(start) 26 | elseif ts == 'table' then 27 | start = nn.Add(torch.LongStorage(start)) 28 | elseif not torch.isTypeOf(start, 'nn.Module') then 29 | error"Recurrent : expecting arg 1 of type nn.Module, torch.LongStorage, number or table" 30 | end 31 | 32 | self.startModule = start 33 | self.inputModule = input 34 | self.feedbackModule = feedback 35 | self.transferModule = transfer or nn.Sigmoid() 36 | self.mergeModule = merge or nn.CAddTable() 37 | 38 | self.modules = {self.startModule, self.inputModule, self.feedbackModule, self.transferModule, self.mergeModule} 39 | 40 | self:buildInitialModule() 41 | self:buildRecurrentModule() 42 | self.sharedClones[2] = self.recurrentModule 43 | end 44 | 45 | -- build module used for the first step (steps == 1) 46 | function Recurrent:buildInitialModule() 47 | self.initialModule = nn.Sequential() 48 | self.initialModule:add(self.inputModule:sharedClone()) 49 | self.initialModule:add(self.startModule) 50 | self.initialModule:add(self.transferModule:sharedClone()) 51 | end 52 | 53 | -- build module used for the other steps (steps > 1) 54 | function Recurrent:buildRecurrentModule() 55 | local parallelModule = nn.ParallelTable() 56 | parallelModule:add(self.inputModule) 57 | parallelModule:add(self.feedbackModule) 58 | self.recurrentModule = nn.Sequential() 59 | self.recurrentModule:add(parallelModule) 60 | self.recurrentModule:add(self.mergeModule) 61 | self.recurrentModule:add(self.transferModule) 62 | end 63 | 64 | function Recurrent:updateOutput(input) 65 | -- output(t) = transfer(feedback(output_(t-1)) + input(input_(t))) 66 | local output 67 | if self.step == 1 then 68 | output = self.initialModule:updateOutput(input) 69 | else 70 | if self.train ~= false then 71 | -- set/save the output states 72 | self:recycle() 73 | local recurrentModule = self:getStepModule(self.step) 74 | -- self.output is the previous output of this module 75 | output = recurrentModule:updateOutput{input, self.outputs[self.step-1]} 76 | else 77 | -- self.output is the previous output of this module 78 | output = self.recurrentModule:updateOutput{input, self.outputs[self.step-1]} 79 | end 80 | end 81 | 82 | self.outputs[self.step] = output 83 | self.output = output 84 | self.step = self.step + 1 85 | self.gradPrevOutput = nil 86 | self.updateGradInputStep = nil 87 | self.accGradParametersStep = nil 88 | return self.output 89 | end 90 | 91 | function Recurrent:_updateGradInput(input, gradOutput) 92 | assert(self.step > 1, "expecting at least one updateOutput") 93 | local step = self.updateGradInputStep - 1 94 | 95 | local gradInput 96 | 97 | if self.gradPrevOutput then 98 | self._gradOutputs[step] = nn.rnn.recursiveCopy(self._gradOutputs[step], self.gradPrevOutput) 99 | nn.rnn.recursiveAdd(self._gradOutputs[step], gradOutput) 100 | gradOutput = self._gradOutputs[step] 101 | end 102 | 103 | local output = self.outputs[step-1] 104 | if step > 1 then 105 | local recurrentModule = self:getStepModule(step) 106 | gradInput, self.gradPrevOutput = unpack(recurrentModule:updateGradInput({input, output}, gradOutput)) 107 | elseif step == 1 then 108 | gradInput = self.initialModule:updateGradInput(input, gradOutput) 109 | else 110 | error"non-positive time-step" 111 | end 112 | 113 | return gradInput 114 | end 115 | 116 | function Recurrent:_accGradParameters(input, gradOutput, scale) 117 | local step = self.accGradParametersStep - 1 118 | 119 | local gradOutput = (step == self.step-1) and gradOutput or self._gradOutputs[step] 120 | local output = self.outputs[step-1] 121 | 122 | if step > 1 then 123 | local recurrentModule = self:getStepModule(step) 124 | recurrentModule:accGradParameters({input, output}, gradOutput, scale) 125 | elseif step == 1 then 126 | self.initialModule:accGradParameters(input, gradOutput, scale) 127 | else 128 | error"non-positive time-step" 129 | end 130 | end 131 | 132 | function Recurrent:recycle() 133 | return parent.recycle(self, 1) 134 | end 135 | 136 | function Recurrent:forget() 137 | return parent.forget(self, 1) 138 | end 139 | 140 | function Recurrent:includingSharedClones(f) 141 | local modules = self.modules 142 | self.modules = {} 143 | local sharedClones = self.sharedClones 144 | self.sharedClones = nil 145 | local initModule = self.initialModule 146 | self.initialModule = nil 147 | for i,modules in ipairs{modules, sharedClones, {initModule}} do 148 | for j, module in pairs(modules) do 149 | table.insert(self.modules, module) 150 | end 151 | end 152 | local r = f() 153 | self.modules = modules 154 | self.sharedClones = sharedClones 155 | self.initialModule = initModule 156 | return r 157 | end 158 | 159 | function Recurrent:reinforce(reward) 160 | if torch.type(reward) == 'table' then 161 | -- multiple rewards, one per time-step 162 | local rewards = reward 163 | for step, reward in ipairs(rewards) do 164 | if step == 1 then 165 | self.initialModule:reinforce(reward) 166 | else 167 | local sm = self:getStepModule(step) 168 | sm:reinforce(reward) 169 | end 170 | end 171 | else 172 | -- one reward broadcast to all time-steps 173 | return self:includingSharedClones(function() 174 | return parent.reinforce(self, reward) 175 | end) 176 | end 177 | end 178 | 179 | function Recurrent:maskZero() 180 | error("Recurrent doesn't support maskZero as it uses a different ".. 181 | "module for the first time-step. Use nn.Recurrence instead.") 182 | end 183 | 184 | function Recurrent:trimZero() 185 | error("Recurrent doesn't support trimZero as it uses a different ".. 186 | "module for the first time-step. Use nn.Recurrence instead.") 187 | end 188 | 189 | function Recurrent:__tostring__() 190 | local tab = ' ' 191 | local line = '\n' 192 | local next = ' -> ' 193 | local str = torch.type(self) 194 | str = str .. ' {' .. line .. tab .. '[{input(t), output(t-1)}' 195 | for i=1,3 do 196 | str = str .. next .. '(' .. i .. ')' 197 | end 198 | str = str .. next .. 'output(t)]' 199 | 200 | local tab = ' ' 201 | local line = '\n ' 202 | local next = ' |`-> ' 203 | local ext = ' | ' 204 | local last = ' ... -> ' 205 | str = str .. line .. '(1): ' .. ' {' .. line .. tab .. 'input(t)' 206 | str = str .. line .. tab .. next .. '(t==0): ' .. tostring(self.startModule):gsub('\n', '\n' .. tab .. ext) 207 | str = str .. line .. tab .. next .. '(t~=0): ' .. tostring(self.inputModule):gsub('\n', '\n' .. tab .. ext) 208 | str = str .. line .. tab .. 'output(t-1)' 209 | str = str .. line .. tab .. next .. tostring(self.feedbackModule):gsub('\n', line .. tab .. ext) 210 | str = str .. line .. "}" 211 | local tab = ' ' 212 | local line = '\n' 213 | local next = ' -> ' 214 | str = str .. line .. tab .. '(' .. 2 .. '): ' .. tostring(self.mergeModule):gsub(line, line .. tab) 215 | str = str .. line .. tab .. '(' .. 3 .. '): ' .. tostring(self.transferModule):gsub(line, line .. tab) 216 | str = str .. line .. '}' 217 | return str 218 | end 219 | -------------------------------------------------------------------------------- /RecurrentAttention.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ RecurrentAttention ]]-- 3 | -- Ref. A. http://papers.nips.cc/paper/5542-recurrent-models-of-visual-attention.pdf 4 | -- B. http://incompleteideas.net/sutton/williams-92.pdf 5 | -- module which takes an RNN as argument with other 6 | -- hyper-parameters such as the maximum number of steps, 7 | -- action (actions sampling module like ReinforceNormal) and 8 | ------------------------------------------------------------------------ 9 | local RecurrentAttention, parent = torch.class("nn.RecurrentAttention", "nn.AbstractSequencer") 10 | 11 | function RecurrentAttention:__init(rnn, action, nStep, hiddenSize) 12 | parent.__init(self) 13 | assert(torch.isTypeOf(action, 'nn.Module')) 14 | assert(torch.type(nStep) == 'number') 15 | assert(torch.type(hiddenSize) == 'table') 16 | assert(torch.type(hiddenSize[1]) == 'number', "Does not support table hidden layers" ) 17 | 18 | self.rnn = rnn 19 | -- we can decorate the module with a Recursor to make it AbstractRecurrent 20 | self.rnn = (not torch.isTypeOf(rnn, 'nn.AbstractRecurrent')) and nn.Recursor(rnn) or rnn 21 | 22 | -- samples an x,y actions for each example 23 | self.action = (not torch.isTypeOf(action, 'nn.AbstractRecurrent')) and nn.Recursor(action) or action 24 | self.hiddenSize = hiddenSize 25 | self.nStep = nStep 26 | 27 | self.modules = {self.rnn, self.action} 28 | 29 | self.output = {} -- rnn output 30 | self.actions = {} -- action output 31 | 32 | self.forwardActions = false 33 | 34 | self.gradHidden = {} 35 | end 36 | 37 | function RecurrentAttention:updateOutput(input) 38 | self.rnn:forget() 39 | self.action:forget() 40 | local nDim = input:dim() 41 | 42 | for step=1,self.nStep do 43 | 44 | if step == 1 then 45 | -- sample an initial starting actions by forwarding zeros through the action 46 | self._initInput = self._initInput or input.new() 47 | self._initInput:resize(input:size(1),table.unpack(self.hiddenSize)):zero() 48 | self.actions[1] = self.action:updateOutput(self._initInput) 49 | else 50 | -- sample actions from previous hidden activation (rnn output) 51 | self.actions[step] = self.action:updateOutput(self.output[step-1]) 52 | end 53 | 54 | -- rnn handles the recurrence internally 55 | local output = self.rnn:updateOutput{input, self.actions[step]} 56 | self.output[step] = self.forwardActions and {output, self.actions[step]} or output 57 | end 58 | 59 | return self.output 60 | end 61 | 62 | function RecurrentAttention:updateGradInput(input, gradOutput) 63 | assert(self.rnn.step - 1 == self.nStep, "inconsistent rnn steps") 64 | assert(torch.type(gradOutput) == 'table', "expecting gradOutput table") 65 | assert(#gradOutput == self.nStep, "gradOutput should have nStep elements") 66 | 67 | -- back-propagate through time (BPTT) 68 | for step=self.nStep,1,-1 do 69 | -- 1. backward through the action layer 70 | local gradOutput_, gradAction_ = gradOutput[step] 71 | if self.forwardActions then 72 | gradOutput_, gradAction_ = unpack(gradOutput[step]) 73 | else 74 | -- Note : gradOutput is ignored by REINFORCE modules so we give a zero Tensor instead 75 | self._gradAction = self._gradAction or self.action.output.new() 76 | if not self._gradAction:isSameSizeAs(self.action.output) then 77 | self._gradAction:resizeAs(self.action.output):zero() 78 | end 79 | gradAction_ = self._gradAction 80 | end 81 | 82 | if step == self.nStep then 83 | self.gradHidden[step] = nn.rnn.recursiveCopy(self.gradHidden[step], gradOutput_) 84 | else 85 | -- gradHidden = gradOutput + gradAction 86 | nn.rnn.recursiveAdd(self.gradHidden[step], gradOutput_) 87 | end 88 | 89 | if step == 1 then 90 | -- backward through initial starting actions 91 | self.action:updateGradInput(self._initInput, gradAction_) 92 | else 93 | local gradAction = self.action:updateGradInput(self.output[step-1], gradAction_) 94 | self.gradHidden[step-1] = nn.rnn.recursiveCopy(self.gradHidden[step-1], gradAction) 95 | end 96 | 97 | -- 2. backward through the rnn layer 98 | local gradInput = self.rnn:updateGradInput({input, self.actions[step]}, self.gradHidden[step])[1] 99 | if step == self.nStep then 100 | self.gradInput:resizeAs(gradInput):copy(gradInput) 101 | else 102 | self.gradInput:add(gradInput) 103 | end 104 | end 105 | 106 | return self.gradInput 107 | end 108 | 109 | function RecurrentAttention:accGradParameters(input, gradOutput, scale) 110 | assert(self.rnn.step - 1 == self.nStep, "inconsistent rnn steps") 111 | assert(torch.type(gradOutput) == 'table', "expecting gradOutput table") 112 | assert(#gradOutput == self.nStep, "gradOutput should have nStep elements") 113 | 114 | -- back-propagate through time (BPTT) 115 | for step=self.nStep,1,-1 do 116 | -- 1. backward through the action layer 117 | local gradAction_ = self.forwardActions and gradOutput[step][2] or self._gradAction 118 | 119 | if step == 1 then 120 | -- backward through initial starting actions 121 | self.action:accGradParameters(self._initInput, gradAction_, scale) 122 | else 123 | self.action:accGradParameters(self.output[step-1], gradAction_, scale) 124 | end 125 | 126 | -- 2. backward through the rnn layer 127 | self.rnn:accGradParameters({input, self.actions[step]}, self.gradHidden[step], scale) 128 | end 129 | end 130 | 131 | function RecurrentAttention:accUpdateGradParameters(input, gradOutput, lr) 132 | assert(self.rnn.step - 1 == self.nStep, "inconsistent rnn steps") 133 | assert(torch.type(gradOutput) == 'table', "expecting gradOutput table") 134 | assert(#gradOutput == self.nStep, "gradOutput should have nStep elements") 135 | 136 | -- backward through the action layers 137 | for step=self.nStep,1,-1 do 138 | -- 1. backward through the action layer 139 | local gradAction_ = self.forwardActions and gradOutput[step][2] or self._gradAction 140 | 141 | if step == 1 then 142 | -- backward through initial starting actions 143 | self.action:accUpdateGradParameters(self._initInput, gradAction_, lr) 144 | else 145 | -- Note : gradOutput is ignored by REINFORCE modules so we give action.output as a dummy variable 146 | self.action:accUpdateGradParameters(self.output[step-1], gradAction_, lr) 147 | end 148 | 149 | -- 2. backward through the rnn layer 150 | self.rnn:accUpdateGradParameters({input, self.actions[step]}, self.gradHidden[step], lr) 151 | end 152 | end 153 | 154 | function RecurrentAttention:type(type) 155 | self._input = nil 156 | self._actions = nil 157 | self._crop = nil 158 | self._pad = nil 159 | self._byte = nil 160 | return parent.type(self, type) 161 | end 162 | 163 | function RecurrentAttention:__tostring__() 164 | local tab = ' ' 165 | local line = '\n' 166 | local ext = ' | ' 167 | local extlast = ' ' 168 | local last = ' ... -> ' 169 | local str = torch.type(self) 170 | str = str .. ' {' 171 | str = str .. line .. tab .. 'action : ' .. tostring(self.action):gsub(line, line .. tab .. ext) 172 | str = str .. line .. tab .. 'rnn : ' .. tostring(self.rnn):gsub(line, line .. tab .. ext) 173 | str = str .. line .. '}' 174 | return str 175 | end 176 | -------------------------------------------------------------------------------- /Recursor.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ Recursor ]]-- 3 | -- Decorates module to be used within an AbstractSequencer. 4 | -- It does this by making the decorated module conform to the 5 | -- AbstractRecurrent interface (which is inherited by LSTM/Recurrent) 6 | ------------------------------------------------------------------------ 7 | local Recursor, parent = torch.class('nn.Recursor', 'nn.AbstractRecurrent') 8 | 9 | function Recursor:__init(module, rho) 10 | parent.__init(self, rho or 9999999) 11 | 12 | self.recurrentModule = module 13 | 14 | self.module = module 15 | self.modules = {module} 16 | self.sharedClones[1] = self.recurrentModule 17 | end 18 | 19 | function Recursor:updateOutput(input) 20 | local output 21 | if self.train ~= false then -- if self.train or self.train == nil then 22 | -- set/save the output states 23 | self:recycle() 24 | local recurrentModule = self:getStepModule(self.step) 25 | output = recurrentModule:updateOutput(input) 26 | else 27 | output = self.recurrentModule:updateOutput(input) 28 | end 29 | 30 | self.outputs[self.step] = output 31 | self.output = output 32 | self.step = self.step + 1 33 | self.updateGradInputStep = nil 34 | self.accGradParametersStep = nil 35 | return self.output 36 | end 37 | 38 | function Recursor:_updateGradInput(input, gradOutput) 39 | assert(self.step > 1, "expecting at least one updateOutput") 40 | local step = self.updateGradInputStep - 1 41 | assert(step >= 1) 42 | 43 | local recurrentModule = self:getStepModule(step) 44 | recurrentModule:setOutputStep(step) 45 | local gradInput = recurrentModule:updateGradInput(input, gradOutput) 46 | 47 | return gradInput 48 | end 49 | 50 | function Recursor:_accGradParameters(input, gradOutput, scale) 51 | local step = self.accGradParametersStep - 1 52 | assert(step >= 1) 53 | 54 | local recurrentModule = self:getStepModule(step) 55 | recurrentModule:setOutputStep(step) 56 | recurrentModule:accGradParameters(input, gradOutput, scale) 57 | end 58 | 59 | function Recursor:includingSharedClones(f) 60 | local modules = self.modules 61 | self.modules = {} 62 | local sharedClones = self.sharedClones 63 | self.sharedClones = nil 64 | for i,modules in ipairs{modules, sharedClones} do 65 | for j, module in pairs(modules) do 66 | table.insert(self.modules, module) 67 | end 68 | end 69 | local r = {f()} 70 | self.modules = modules 71 | self.sharedClones = sharedClones 72 | return unpack(r) 73 | end 74 | 75 | function Recursor:forget(offset) 76 | parent.forget(self, offset) 77 | nn.Module.forget(self) 78 | return self 79 | end 80 | 81 | function Recursor:maxBPTTstep(rho) 82 | self.rho = rho 83 | nn.Module.maxBPTTstep(self, rho) 84 | end 85 | 86 | function Recursor:getHiddenState(...) 87 | return self.modules[1]:getHiddenState(...) 88 | end 89 | 90 | function Recursor:setHiddenState(...) 91 | return self.modules[1]:setHiddenState(...) 92 | end 93 | 94 | function Recursor:getGradHiddenState(...) 95 | return self.modules[1]:getGradHiddenState(...) 96 | end 97 | 98 | function Recursor:setGradHiddenState(...) 99 | return self.modules[1]:setGradHiddenState(...) 100 | end 101 | 102 | Recursor.__tostring__ = nn.Decorator.__tostring__ 103 | -------------------------------------------------------------------------------- /Repeater.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ Repeater ]]-- 3 | -- Encapsulates an AbstractRecurrent instance (rnn) which is repeatedly 4 | -- presented with the same input for rho time steps. 5 | -- The output is a table of rho outputs of the rnn. 6 | ------------------------------------------------------------------------ 7 | assert(not nn.Repeater, "update nnx package : luarocks install nnx") 8 | local Repeater, parent = torch.class('nn.Repeater', 'nn.AbstractSequencer') 9 | 10 | function Repeater:__init(module, rho) 11 | parent.__init(self) 12 | assert(torch.type(rho) == 'number', "expecting number value for arg 2") 13 | self.rho = rho 14 | self.module = (not torch.isTypeOf(module, 'nn.AbstractRecurrent')) and nn.Recursor(module) or module 15 | 16 | self.module:maxBPTTstep(rho) -- hijack rho (max number of time-steps for backprop) 17 | 18 | self.modules[1] = self.module 19 | self.output = {} 20 | end 21 | 22 | function Repeater:updateOutput(input) 23 | self.module = self.module or self.rnn -- backwards compatibility 24 | 25 | self.module:forget() 26 | -- TODO make copy outputs optional 27 | for step=1,self.rho do 28 | self.output[step] = nn.rnn.recursiveCopy(self.output[step], self.module:updateOutput(input)) 29 | end 30 | return self.output 31 | end 32 | 33 | function Repeater:updateGradInput(input, gradOutput) 34 | assert(self.module.step - 1 == self.rho, "inconsistent rnn steps") 35 | assert(torch.type(gradOutput) == 'table', "expecting gradOutput table") 36 | assert(#gradOutput == self.rho, "gradOutput should have rho elements") 37 | 38 | -- back-propagate through time (BPTT) 39 | for step=self.rho,1,-1 do 40 | local gradInput = self.module:updateGradInput(input, gradOutput[step]) 41 | if step == self.rho then 42 | self.gradInput = nn.rnn.recursiveCopy(self.gradInput, gradInput) 43 | else 44 | nn.rnn.recursiveAdd(self.gradInput, gradInput) 45 | end 46 | end 47 | 48 | return self.gradInput 49 | end 50 | 51 | function Repeater:accGradParameters(input, gradOutput, scale) 52 | assert(self.module.step - 1 == self.rho, "inconsistent rnn steps") 53 | assert(torch.type(gradOutput) == 'table', "expecting gradOutput table") 54 | assert(#gradOutput == self.rho, "gradOutput should have rho elements") 55 | 56 | -- back-propagate through time (BPTT) 57 | for step=self.rho,1,-1 do 58 | self.module:accGradParameters(input, gradOutput[step], scale) 59 | end 60 | 61 | end 62 | 63 | function Repeater:maxBPTTstep(rho) 64 | self.rho = rho 65 | self.module:maxBPTTstep(rho) 66 | end 67 | 68 | function Repeater:accUpdateGradParameters(input, gradOutput, lr) 69 | assert(self.module.step - 1 == self.rho, "inconsistent rnn steps") 70 | assert(torch.type(gradOutput) == 'table', "expecting gradOutput table") 71 | assert(#gradOutput == self.rho, "gradOutput should have rho elements") 72 | 73 | -- back-propagate through time (BPTT) 74 | for step=self.rho,1,-1 do 75 | self.module:accUpdateGradParameters(input, gradOutput[step], lr) 76 | end 77 | end 78 | 79 | function Repeater:__tostring__() 80 | local tab = ' ' 81 | local line = '\n' 82 | local str = torch.type(self) .. ' {' .. line 83 | str = str .. tab .. '[ input, input, ..., input ]'.. line 84 | str = str .. tab .. ' V V V '.. line 85 | str = str .. tab .. tostring(self.modules[1]):gsub(line, line .. tab) .. line 86 | str = str .. tab .. ' V V V '.. line 87 | str = str .. tab .. '[output(1),output(2),...,output('..self.rho..')]' .. line 88 | str = str .. '}' 89 | return str 90 | end 91 | -------------------------------------------------------------------------------- /RepeaterCriterion.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ RepeaterCriterion ]]-- 3 | -- Applies a criterion to each of the inputs in a Table using the 4 | -- same target (the target is repeated). 5 | -- Useful for nn.Repeater and nn.Sequencer. 6 | ------------------------------------------------------------------------ 7 | assert(not nn.RepeaterCriterion, "update nnx package : luarocks install nnx") 8 | local RepeaterCriterion, parent = torch.class('nn.RepeaterCriterion', 'nn.Criterion') 9 | 10 | function RepeaterCriterion:__init(criterion) 11 | parent.__init(self) 12 | self.criterion = criterion 13 | self.gradInput = {} 14 | self.clones = {} 15 | end 16 | 17 | RepeaterCriterion.getStepCriterion = nn.SequencerCriterion.getStepCriterion 18 | 19 | function RepeaterCriterion:forward(input, target) 20 | self.output = 0 21 | local nStep 22 | if torch.isTensor(input) then 23 | nStep = input:size(1) 24 | else 25 | nStep = #input 26 | end 27 | 28 | 29 | for i=1,nStep do 30 | local criterion = self:getStepCriterion(i) 31 | self.output = self.output + criterion:forward(input[i], target) 32 | end 33 | 34 | return self.output 35 | end 36 | 37 | function RepeaterCriterion:backward(input, target) 38 | self.gradInput = {} 39 | if torch.isTensor(input) then 40 | nStep = input:size(1) 41 | else 42 | nStep = #input 43 | end 44 | 45 | local tableGradInput = {} 46 | for i=1,nStep do 47 | local criterion = self:getStepCriterion(i) 48 | tableGradInput[i] = criterion:backward(input[i], target) 49 | end 50 | 51 | if torch.isTensor(input) then 52 | self.gradInput = tableGradInput[1].new() 53 | self.gradInput:resize(nStep, unpack(tableGradInput[1]:size():totable())) 54 | for step=1,nStep do 55 | self.gradInput[step]:copy(tableGradInput[step]) 56 | end 57 | else 58 | self.gradInput = tableGradInput 59 | end 60 | 61 | return self.gradInput 62 | end 63 | -------------------------------------------------------------------------------- /SAdd.lua: -------------------------------------------------------------------------------- 1 | local SAdd, parent = torch.class('nn.SAdd', 'nn.Module') 2 | 3 | function SAdd:__init(addend, negate) 4 | parent.__init(self) 5 | 6 | self.addend = addend 7 | self.negate = (negate == nil) and false or negate 8 | end 9 | 10 | function SAdd:updateOutput(input) 11 | self.output:resizeAs(input):copy(input) 12 | self.output = self.output + self.addend 13 | if self.negate then 14 | self.output = -self.output 15 | end 16 | return self.output 17 | end 18 | 19 | function SAdd:updateGradInput(input, gradOutput) 20 | if self.gradInput then 21 | self.gradInput:resizeAs(gradOutput):copy(gradOutput) 22 | else 23 | self.gradInput = torch.Tensor():resizeAs(gradOutput):copy(gradOutput) 24 | end 25 | if self.negate then 26 | self.gradInput = -self.gradInput 27 | end 28 | return self.gradInput 29 | end -------------------------------------------------------------------------------- /SeqBRNN.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ SeqBRNN ]] -- 3 | -- Bi-directional RNN using two SeqLSTM modules. 4 | -- Input is a tensor e.g time x batch x inputdim. 5 | -- Output is a tensor of the same length e.g time x batch x outputdim. 6 | -- Applies a forward rnn to input tensor in forward order 7 | -- and applies a backward rnn in reverse order. 8 | -- Reversal of the sequence happens on the time dimension. 9 | -- For each step, the outputs of both rnn are merged together using 10 | -- the merge module (defaults to nn.CAddTable() which sums the activations). 11 | ------------------------------------------------------------------------ 12 | local SeqBRNN, parent = torch.class('nn.SeqBRNN', 'nn.Container') 13 | 14 | function SeqBRNN:__init(inputDim, hiddenDim, batchFirst, merge) 15 | self.forwardModule = nn.SeqLSTM(inputDim, hiddenDim) 16 | self.backwardModule = nn.SeqLSTM(inputDim, hiddenDim) 17 | self.merge = merge 18 | if not self.merge then 19 | self.merge = nn.CAddTable() 20 | end 21 | self.dim = 1 22 | local backward = nn.Sequential() 23 | backward:add(nn.SeqReverseSequence(self.dim)) -- reverse 24 | backward:add(self.backwardModule) 25 | backward:add(nn.SeqReverseSequence(self.dim)) -- unreverse 26 | 27 | local concat = nn.ConcatTable() 28 | concat:add(self.forwardModule):add(backward) 29 | 30 | local brnn = nn.Sequential() 31 | brnn:add(concat) 32 | brnn:add(self.merge) 33 | if(batchFirst) then 34 | -- Insert transposes before and after the brnn. 35 | brnn:insert(nn.Transpose({1, 2}), 1) 36 | brnn:insert(nn.Transpose({1, 2})) 37 | end 38 | 39 | parent.__init(self) 40 | 41 | self.output = torch.Tensor() 42 | self.gradInput = torch.Tensor() 43 | 44 | self.module = brnn 45 | -- so that it can be handled like a Container 46 | self.modules[1] = brnn 47 | end 48 | 49 | function SeqBRNN:updateOutput(input) 50 | self.output = self.module:updateOutput(input) 51 | return self.output 52 | end 53 | 54 | function SeqBRNN:updateGradInput(input, gradOutput) 55 | self.gradInput = self.module:updateGradInput(input, gradOutput) 56 | return self.gradInput 57 | end 58 | 59 | function SeqBRNN:accGradParameters(input, gradOutput, scale) 60 | self.module:accGradParameters(input, gradOutput, scale) 61 | end 62 | 63 | function SeqBRNN:accUpdateGradParameters(input, gradOutput, lr) 64 | self.module:accUpdateGradParameters(input, gradOutput, lr) 65 | end 66 | 67 | function SeqBRNN:sharedAccUpdateGradParameters(input, gradOutput, lr) 68 | self.module:sharedAccUpdateGradParameters(input, gradOutput, lr) 69 | end 70 | 71 | function SeqBRNN:__tostring__() 72 | if self.module.__tostring__ then 73 | return torch.type(self) .. ' @ ' .. self.module:__tostring__() 74 | else 75 | return torch.type(self) .. ' @ ' .. torch.type(self.module) 76 | end 77 | end -------------------------------------------------------------------------------- /SeqLSTMP.lua: -------------------------------------------------------------------------------- 1 | local SeqLSTMP, parent = torch.class('nn.SeqLSTMP', 'nn.SeqLSTM') 2 | 3 | SeqLSTMP.dpnn_parameters = {'weight', 'bias', 'weightO'} 4 | SeqLSTMP.dpnn_gradParameters = {'gradWeight', 'gradBias', 'gradWeightO'} 5 | 6 | function SeqLSTMP:__init(inputsize, hiddensize, outputsize) 7 | assert(inputsize and hiddensize and outputsize, "Expecting input, hidden and output size") 8 | local D, H, R = inputsize, hiddensize, outputsize 9 | 10 | self.weightO = torch.Tensor(H, R) 11 | self.gradWeightO = torch.Tensor(H, R) 12 | 13 | parent.__init(self, inputsize, hiddensize, outputsize) 14 | end 15 | 16 | function SeqLSTMP:reset(std) 17 | self.bias:zero() 18 | self.bias[{{self.outputsize + 1, 2 * self.outputsize}}]:fill(1) 19 | if not std then 20 | self.weight:normal(0, 1.0 / math.sqrt(self.hiddensize + self.inputsize)) 21 | self.weightO:normal(0, 1.0 / math.sqrt(self.outputsize + self.hiddensize)) 22 | else 23 | self.weight:normal(0, std) 24 | self.weightO:normal(0, std) 25 | end 26 | return self 27 | end 28 | 29 | function SeqLSTMP:adapter(t) 30 | local T, N = self._output:size(1), self._output:size(2) 31 | self._hidden = self._hidden or self.next_h.new() 32 | self._hidden:resize(T, N, self.hiddensize) 33 | 34 | self._hidden[t]:copy(self.next_h) 35 | self.next_h:resize(N,self.outputsize) 36 | self.next_h:mm(self._hidden[t], self.weightO) 37 | end 38 | 39 | function SeqLSTMP:gradAdapter(scale, t) 40 | self.buffer3:resizeAs(self.grad_next_h):copy(self.grad_next_h) 41 | 42 | self.gradWeightO:addmm(scale, self._hidden[t]:t(), self.grad_next_h) 43 | self.grad_next_h:resize(self._output:size(2), self.hiddensize) 44 | self.grad_next_h:mm(self.buffer3, self.weightO:t()) 45 | end 46 | 47 | function SeqLSTMP:parameters() 48 | return {self.weight, self.bias, self.weightO}, {self.gradWeight, self.gradBias, self.gradWeightO} 49 | end 50 | 51 | function SeqLSTMP:accUpdateGradParameters(input, gradOutput, lr) 52 | error"accUpdateGradParameters not implemented for SeqLSTMP" 53 | end 54 | 55 | function SeqLSTMP:toFastLSTM() 56 | error"toFastLSTM not supported for SeqLSTMP" 57 | end 58 | -------------------------------------------------------------------------------- /SeqReverseSequence.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ SeqReverseSequence ]] -- 3 | -- Reverses a sequence on a given dimension. 4 | -- Example: Given a tensor of torch.Tensor({{1,2,3,4,5}, {6,7,8,9,10}}) 5 | -- nn.SeqReverseSequence(1):forward(tensor) would give: torch.Tensor({{6,7,8,9,10},{1,2,3,4,5}}) 6 | ------------------------------------------------------------------------ 7 | local SeqReverseSequence, parent = torch.class("nn.SeqReverseSequence", "nn.Module") 8 | 9 | function SeqReverseSequence:__init(dim) 10 | parent.__init(self) 11 | self.output = torch.Tensor() 12 | self.gradInput = torch.Tensor() 13 | assert(dim, "Must specify dimension to reverse sequence over") 14 | assert(dim <= 3, "Dimension has to be no greater than 3 (Only supports up to a 3D Tensor).") 15 | self.dim = dim 16 | end 17 | 18 | function SeqReverseSequence:reverseOutput(input) 19 | self.output:resizeAs(input) 20 | self.outputIndices = self.outputIndices or ((torch.type(input) == 'torch.CudaTensor') and torch.CudaTensor() or (torch.type(input) == 'torch.ClTensor') and torch.ClTensor() or torch.LongTensor()) 21 | self.outputIndices:resize(input:size()) 22 | local T = input:size(1) 23 | for x = 1, T do 24 | self.outputIndices:narrow(1, x, 1):fill(T - x + 1) 25 | end 26 | self.output:gather(input, 1, self.outputIndices) 27 | end 28 | 29 | function SeqReverseSequence:updateOutput(input) 30 | if (self.dim == 1) then 31 | self:reverseOutput(input) 32 | end 33 | if (self.dim == 2) then 34 | input = input:transpose(1, 2) 35 | self:reverseOutput(input) 36 | self.output = self.output:transpose(1, 2) 37 | end 38 | if (self.dim == 3) then 39 | input = input:transpose(1, 3) 40 | self:reverseOutput(input) 41 | self.output = self.output:transpose(1, 3) 42 | end 43 | return self.output 44 | end 45 | 46 | function SeqReverseSequence:reverseGradOutput(gradOutput) 47 | self.gradInput:resizeAs(gradOutput) 48 | self.gradIndices = self.gradIndices or ((torch.type(gradOutput) == 'torch.CudaTensor') and torch.CudaTensor() or (torch.type(gradOutput) == 'torch.ClTensor') and torch.ClTensor() or torch.LongTensor()) 49 | self.gradIndices:resize(gradOutput:size()) 50 | local T = gradOutput:size(1) 51 | for x = 1, T do 52 | self.gradIndices:narrow(1, x, 1):fill(T - x + 1) 53 | end 54 | self.gradInput:gather(gradOutput, 1, self.gradIndices) 55 | end 56 | 57 | function SeqReverseSequence:updateGradInput(inputTable, gradOutput) 58 | if (self.dim == 1) then 59 | self:reverseGradOutput(gradOutput) 60 | end 61 | if (self.dim == 2) then 62 | gradOutput = gradOutput:transpose(1, 2) 63 | self:reverseGradOutput(gradOutput) 64 | self.gradInput = self.gradInput:transpose(1, 2) 65 | end 66 | if (self.dim == 3) then 67 | gradOutput = gradOutput:transpose(1, 3) 68 | self:reverseGradOutput(gradOutput) 69 | self.gradInput = self.gradInput:transpose(1, 3) 70 | end 71 | return self.gradInput 72 | end 73 | 74 | function SeqReverseSequence:type(type, typecache) 75 | if type then 76 | self.outputIndices = nil 77 | self.gradIndices = nil 78 | end 79 | return parent.type(self, type, typecache) 80 | end 81 | 82 | function SeqReverseSequence:clearState() 83 | self.output:set() 84 | self.gradInput:set() 85 | self.outputIndices = nil 86 | self.gradIndices = nil 87 | end 88 | -------------------------------------------------------------------------------- /Sequencer.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ Sequencer ]]-- 3 | -- Encapsulates a Module. 4 | -- Input is a sequence (a table) of tensors. 5 | -- Output is a sequence (a table) of tensors of the same length. 6 | -- Applies the module to each element in the sequence. 7 | -- Handles both recurrent modules and non-recurrent modules. 8 | -- The sequences in a batch must have the same size. 9 | -- But the sequence length of each batch can vary. 10 | ------------------------------------------------------------------------ 11 | assert(not nn.Sequencer, "update nnx package : luarocks install nnx") 12 | local Sequencer, parent = torch.class('nn.Sequencer', 'nn.AbstractSequencer') 13 | local _ = require 'moses' 14 | 15 | function Sequencer:__init(module) 16 | parent.__init(self) 17 | if not torch.isTypeOf(module, 'nn.Module') then 18 | error"Sequencer: expecting nn.Module instance at arg 1" 19 | end 20 | 21 | -- we can decorate the module with a Recursor to make it AbstractRecurrent 22 | self.module = (not torch.isTypeOf(module, 'nn.AbstractRecurrent')) and nn.Recursor(module) or module 23 | -- backprop through time (BPTT) will be done online (in reverse order of forward) 24 | self.modules = {self.module} 25 | 26 | self.output = {} 27 | self.tableoutput = {} 28 | self.tablegradInput = {} 29 | 30 | -- table of buffers used for evaluation 31 | self._output = {} 32 | -- so that these buffers aren't serialized : 33 | local _ = require 'moses' 34 | self.dpnn_mediumEmpty = _.clone(self.dpnn_mediumEmpty) 35 | table.insert(self.dpnn_mediumEmpty, '_output') 36 | -- default is to forget previous inputs before each forward() 37 | self._remember = 'neither' 38 | end 39 | 40 | function Sequencer:updateOutput(input) 41 | local nStep 42 | if torch.isTensor(input) then 43 | nStep = input:size(1) 44 | else 45 | assert(torch.type(input) == 'table', "expecting input table") 46 | nStep = #input 47 | end 48 | 49 | -- Note that the Sequencer hijacks the rho attribute of the rnn 50 | self.module:maxBPTTstep(nStep) 51 | if self.train ~= false then 52 | -- TRAINING 53 | if not (self._remember == 'train' or self._remember == 'both') then 54 | self.module:forget() 55 | end 56 | 57 | self.tableoutput = {} 58 | for step=1,nStep do 59 | self.tableoutput[step] = self.module:updateOutput(input[step]) 60 | end 61 | 62 | if torch.isTensor(input) then 63 | self.output = torch.isTensor(self.output) and self.output or self.tableoutput[1].new() 64 | self.output:resize(nStep, unpack(self.tableoutput[1]:size():totable())) 65 | for step=1,nStep do 66 | self.output[step]:copy(self.tableoutput[step]) 67 | end 68 | else 69 | self.output = self.tableoutput 70 | end 71 | else 72 | -- EVALUATION 73 | if not (self._remember == 'eval' or self._remember == 'both') then 74 | self.module:forget() 75 | end 76 | -- during evaluation, recurrent modules reuse memory (i.e. outputs) 77 | -- so we need to copy each output into our own table or tensor 78 | if torch.isTensor(input) then 79 | for step=1,nStep do 80 | local output = self.module:updateOutput(input[step]) 81 | if step == 1 then 82 | self.output = torch.isTensor(self.output) and self.output or output.new() 83 | self.output:resize(nStep, unpack(output:size():totable())) 84 | end 85 | self.output[step]:copy(output) 86 | end 87 | else 88 | for step=1,nStep do 89 | self.tableoutput[step] = nn.rnn.recursiveCopy( 90 | self.tableoutput[step] or table.remove(self._output, 1), 91 | self.module:updateOutput(input[step]) 92 | ) 93 | end 94 | -- remove extra output tensors (save for later) 95 | for i=nStep+1,#self.tableoutput do 96 | table.insert(self._output, self.tableoutput[i]) 97 | self.tableoutput[i] = nil 98 | end 99 | self.output = self.tableoutput 100 | end 101 | end 102 | 103 | return self.output 104 | end 105 | 106 | function Sequencer:updateGradInput(input, gradOutput) 107 | local nStep 108 | if torch.isTensor(input) then 109 | assert(torch.isTensor(gradOutput), "expecting gradOutput Tensor since input is a Tensor") 110 | assert(gradOutput:size(1) == input:size(1), "gradOutput should have as many elements as input") 111 | nStep = input:size(1) 112 | else 113 | assert(torch.type(gradOutput) == 'table', "expecting gradOutput table") 114 | assert(#gradOutput == #input, "gradOutput should have as many elements as input") 115 | nStep = #input 116 | end 117 | 118 | -- back-propagate through time 119 | self.tablegradinput = {} 120 | for step=nStep,1,-1 do 121 | self.tablegradinput[step] = self.module:updateGradInput(input[step], gradOutput[step]) 122 | end 123 | 124 | if torch.isTensor(input) then 125 | self.gradInput = torch.isTensor(self.gradInput) and self.gradInput or self.tablegradinput[1].new() 126 | self.gradInput:resize(nStep, unpack(self.tablegradinput[1]:size():totable())) 127 | for step=1,nStep do 128 | self.gradInput[step]:copy(self.tablegradinput[step]) 129 | end 130 | else 131 | self.gradInput = self.tablegradinput 132 | end 133 | 134 | return self.gradInput 135 | end 136 | 137 | function Sequencer:accGradParameters(input, gradOutput, scale) 138 | local nStep 139 | if torch.isTensor(input) then 140 | assert(torch.isTensor(gradOutput), "expecting gradOutput Tensor since input is a Tensor") 141 | assert(gradOutput:size(1) == input:size(1), "gradOutput should have as many elements as input") 142 | nStep = input:size(1) 143 | else 144 | assert(torch.type(gradOutput) == 'table', "expecting gradOutput table") 145 | assert(#gradOutput == #input, "gradOutput should have as many elements as input") 146 | nStep = #input 147 | end 148 | 149 | -- back-propagate through time 150 | for step=nStep,1,-1 do 151 | self.module:accGradParameters(input[step], gradOutput[step], scale) 152 | end 153 | end 154 | 155 | function Sequencer:accUpdateGradParameters(inputTable, gradOutputTable, lr) 156 | error"Not Implemented" 157 | end 158 | 159 | function Sequencer:training() 160 | if self.train == false then 161 | -- forget at the start of each training 162 | self:forget() 163 | -- empty temporary output table 164 | self._output = {} 165 | -- empty output table (tensor mem was managed by seq) 166 | self.tableoutput = nil 167 | end 168 | parent.training(self) 169 | end 170 | 171 | function Sequencer:evaluate() 172 | if self.train ~= false then 173 | -- forget at the start of each evaluation 174 | self:forget() 175 | -- empty output table (tensor mem was managed by rnn) 176 | self.tableoutput = {} 177 | end 178 | parent.evaluate(self) 179 | assert(self.train == false) 180 | end 181 | 182 | function Sequencer:clearState() 183 | if torch.isTensor(self.output) then 184 | self.output:set() 185 | self.gradInput:set() 186 | else 187 | self.output = {} 188 | self.gradInput = {} 189 | end 190 | self._output = {} 191 | self.tableoutput = {} 192 | self.tablegradinput = {} 193 | self.module:clearState() 194 | end 195 | 196 | Sequencer.__tostring__ = nn.Decorator.__tostring__ 197 | -------------------------------------------------------------------------------- /SequencerCriterion.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ SequencerCriterion ]]-- 3 | -- Applies a criterion to each of the inputs and targets in the 4 | -- corresponding input and target Tables. 5 | -- Useful for nn.Repeater and nn.Sequencer. 6 | -- WARNING : assumes that the decorated criterion is stateless, i.e. 7 | -- the backward doesn't need to be preceded by a commensurate forward. 8 | ------------------------------------------------------------------------ 9 | local SequencerCriterion, parent = torch.class('nn.SequencerCriterion', 'nn.Criterion') 10 | 11 | function SequencerCriterion:__init(criterion, sizeAverage) 12 | parent.__init(self) 13 | self.criterion = criterion 14 | if torch.isTypeOf(criterion, 'nn.ModuleCriterion') then 15 | error("SequencerCriterion shouldn't decorate a ModuleCriterion. ".. 16 | "Instead, try the other way around : ".. 17 | "ModuleCriterion decorates a SequencerCriterion. ".. 18 | "Its modules can also be similarly decorated with a Sequencer.") 19 | end 20 | if sizeAverage ~= nil then 21 | self.sizeAverage = sizeAverage 22 | else 23 | self.sizeAverage = false 24 | end 25 | self.clones = {} 26 | self.gradInput = {} 27 | end 28 | 29 | function SequencerCriterion:getStepCriterion(step) 30 | assert(step, "expecting step at arg 1") 31 | local criterion = self.clones[step] 32 | if not criterion then 33 | criterion = self.criterion:clone() 34 | self.clones[step] = criterion 35 | end 36 | return criterion 37 | end 38 | 39 | function SequencerCriterion:updateOutput(input, target) 40 | self.output = 0 41 | local nStep 42 | if torch.isTensor(input) then 43 | assert(torch.isTensor(target), "expecting target Tensor since input is a Tensor") 44 | assert(target:size(1) == input:size(1), "target should have as many elements as input") 45 | nStep = input:size(1) 46 | else 47 | assert(torch.type(target) == 'table', "expecting target table") 48 | assert(#target == #input, "target should have as many elements as input") 49 | nStep = #input 50 | end 51 | 52 | 53 | for i=1,nStep do 54 | local criterion = self:getStepCriterion(i) 55 | self.output = self.output + criterion:forward(input[i], target[i]) 56 | end 57 | 58 | if self.sizeAverage then 59 | self.output = self.output / nStep 60 | end 61 | 62 | return self.output 63 | end 64 | 65 | function SequencerCriterion:updateGradInput(input, target) 66 | self.gradInput = {} 67 | local nStep 68 | if torch.isTensor(input) then 69 | assert(torch.isTensor(target), "expecting target Tensor since input is a Tensor") 70 | assert(target:size(1) == input:size(1), "target should have as many elements as input") 71 | nStep = input:size(1) 72 | else 73 | assert(torch.type(target) == 'table', "expecting gradOutput table") 74 | assert(#target == #input, "target should have as many elements as input") 75 | nStep = #input 76 | end 77 | 78 | local tableGradInput = {} 79 | for i=1,nStep do 80 | local criterion = self:getStepCriterion(i) 81 | tableGradInput[i] = criterion:backward(input[i], target[i]) 82 | 83 | if self.sizeAverage then 84 | local function table_div(output, scalar) 85 | if torch.type(output) == 'table' then 86 | for j=1,#output do 87 | table_div(output[j], scalar) 88 | end 89 | else 90 | output:div(scalar) 91 | end 92 | end 93 | table_div(tableGradInput[i], nStep) 94 | end 95 | end 96 | 97 | if torch.isTensor(input) then 98 | self.gradInput = tableGradInput[1].new() 99 | self.gradInput:resize(nStep, unpack(tableGradInput[1]:size():totable())) 100 | for step=1,nStep do 101 | self.gradInput[step]:copy(tableGradInput[step]) 102 | end 103 | else 104 | self.gradInput = tableGradInput 105 | end 106 | 107 | return self.gradInput 108 | end 109 | -------------------------------------------------------------------------------- /TrimZero.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | --[[ TrimZero ]]-- 3 | -- Author: Jin-Hwa Kim 4 | -- License: LICENSE.2nd.txt 5 | 6 | -- Decorator that zeroes the output rows of the encapsulated module 7 | -- for commensurate input rows which are tensors of zeros 8 | 9 | -- The only difference from `MaskZero` is that it reduces computational costs 10 | -- by varying a batch size, if any, for the case that varying lengths 11 | -- are provided in the input. Notice that when the lengths are consistent, 12 | -- `MaskZero` will be faster, because `TrimZero` has an operational cost. 13 | 14 | -- In short, the result is the same with `MaskZero`'s, however, `TrimZero` is 15 | -- faster than `MaskZero` only when sentence lengths is costly vary. 16 | -- In practice, e.g. language model, `TrimZero` is expected to be faster than 17 | -- `MaskZero` 30%. (You can test with it using `test/test_trimzero.lua`.) 18 | ------------------------------------------------------------------------ 19 | local TrimZero, parent = torch.class("nn.TrimZero", "nn.MaskZero") 20 | 21 | require 'torchx' 22 | 23 | function TrimZero:__init(module, nInputDim, silent) 24 | parent.__init(self, module, nInputDim, silent) 25 | if (torch.typename(module)=='nn.GRU' or torch.typename(module)=='nn.LSTM' or torch.typename(module)=='nn.FastLSTM') and module.p ~= 0 then 26 | assert(module.mono, 'TrimZero + Bayesian RNN needs `mono` option!') 27 | end 28 | self.temp = torch.Tensor() 29 | self.gradTemp = torch.Tensor() 30 | end 31 | 32 | function TrimZero:recursiveMask(output, input, mask) 33 | if torch.type(input) == 'table' then 34 | output = torch.type(output) == 'table' and output or {} 35 | for k,v in ipairs(input) do 36 | output[k], mask = self:recursiveMask(output[k], v, mask) 37 | end 38 | else 39 | assert(torch.isTensor(input)) 40 | output = torch.isTensor(output) and output or input.new() 41 | 42 | -- make sure mask has the same dimension as the input tensor 43 | if torch.type(mask) ~= 'torch.LongTensor' then 44 | local inputSize = input:size():fill(1) 45 | assert(self.nInputDim) 46 | if self.batchmode then 47 | inputSize[1] = input:size(1) 48 | end 49 | mask:resize(inputSize) 50 | end 51 | 52 | -- build mask 53 | if self.batchmode then 54 | assert(torch.find, 'install torchx package : luarocks install torchx') 55 | -- use torch.find to convert mask from onehot to indices 56 | if torch.type(mask) ~= 'torch.LongTensor' then 57 | if torch.type(mask) == 'torch.CudaTensor' then 58 | self._maskbyte = self._maskbyte or torch.ByteTensor() 59 | self._maskbyte:resize(mask:size()):copy(mask) 60 | mask = self._maskbyte 61 | end 62 | mask = torch.LongTensor(torch.find(mask, 0)) 63 | end 64 | self._maskindices = mask 65 | if mask:dim() > 0 then 66 | output:index(input, 1, mask) 67 | else 68 | output:index(input, 1, torch.LongTensor{1}):zero() 69 | end 70 | else 71 | if mask:dim() == 0 or mask:view(-1)[1] == 1 then 72 | output:resize(input:size()):zero() 73 | else 74 | output:resize(input:size()):copy(input) 75 | end 76 | end 77 | end 78 | return output, mask 79 | end 80 | 81 | function TrimZero:recursiveUnMask(output, input, mask) 82 | if torch.type(input) == 'table' then 83 | output = torch.type(output) == 'table' and output or {} 84 | for k,v in ipairs(input) do 85 | output[k] = self:recursiveUnMask(output[k], v, mask) 86 | end 87 | else 88 | assert(torch.isTensor(input)) 89 | output = torch.isTensor(output) and output or input.new() 90 | 91 | -- make sure output has the same dimension as the mask 92 | local inputSize = input:size() 93 | if self.batchmode then 94 | inputSize[1] = mask:size(1) 95 | end 96 | output:resize(inputSize):zero() 97 | 98 | -- build mask 99 | if self.batchmode then 100 | assert(self._maskindices) 101 | mask = self._maskindices 102 | if mask:dim() > 0 then 103 | output:indexCopy(1, mask, input) 104 | end 105 | else 106 | if mask:view(-1)[1] == 0 then 107 | output:copy(input) 108 | end 109 | end 110 | end 111 | return output 112 | end 113 | 114 | function TrimZero:updateOutput(input) 115 | -- recurrent module input is always the first one 116 | local rmi = self:recursiveGetFirst(input):contiguous() 117 | if rmi:dim() == self.nInputDim then 118 | self.batchmode = false 119 | rmi = rmi:view(-1) -- collapse dims 120 | elseif rmi:dim() - 1 == self.nInputDim then 121 | self.batchmode = true 122 | rmi = rmi:view(rmi:size(1), -1) -- collapse non-batch dims 123 | else 124 | error("nInputDim error: "..rmi:dim()..", "..self.nInputDim) 125 | end 126 | 127 | -- build mask 128 | local vectorDim = rmi:dim() 129 | self._zeroMask = self._zeroMask or rmi.new() 130 | self._zeroMask:norm(rmi, 2, vectorDim) 131 | self.zeroMask = self.zeroMask or ((torch.type(rmi) == 'torch.CudaTensor') and torch.CudaTensor() or torch.ByteTensor()) 132 | self._zeroMask.eq(self.zeroMask, self._zeroMask, 0) 133 | 134 | -- forward through decorated module 135 | self.temp = self:recursiveMask(self.temp, input, self.zeroMask) 136 | output = self.modules[1]:updateOutput(self.temp) 137 | self.output = self:recursiveUnMask(self.output, output, self.zeroMask, true) 138 | 139 | return self.output 140 | end 141 | 142 | function TrimZero:updateGradInput(input, gradOutput) 143 | self.temp = self:recursiveMask(self.temp, input, self.zeroMask) 144 | self.gradTemp = self:recursiveMask(self.gradTemp, gradOutput, self.zeroMask) 145 | 146 | local gradInput = self.modules[1]:updateGradInput(self.temp, self.gradTemp) 147 | 148 | self.gradInput = self:recursiveUnMask(self.gradInput, gradInput, self.zeroMask) 149 | 150 | return self.gradInput 151 | end 152 | 153 | function TrimZero:accGradParameters(input, gradOutput, scale) 154 | self.temp = self:recursiveMask(self.temp, input, self.zeroMask) 155 | self.modules[1]:accGradParameters(self.temp, gradOutput, scale) 156 | end 157 | -------------------------------------------------------------------------------- /ZeroGrad.lua: -------------------------------------------------------------------------------- 1 | local ZeroGrad, parent 2 | if nn.ZeroGrad then -- prevent name conflicts with nnx 3 | ZeroGrad, parent = nn.ZeroGrad, nn.Module 4 | else 5 | ZeroGrad, parent = torch.class('nn.ZeroGrad', 'nn.Module') 6 | end 7 | 8 | local function recursiveZero(t1,t2) 9 | if torch.type(t2) == 'table' then 10 | t1 = (torch.type(t1) == 'table') and t1 or {t1} 11 | for key,_ in pairs(t2) do 12 | t1[key], t2[key] = recursiveZero(t1[key], t2[key]) 13 | end 14 | elseif torch.isTensor(t2) then 15 | t1 = torch.isTensor(t1) and t1 or t2.new() 16 | t1:resizeAs(t2):zero() 17 | else 18 | error("expecting nested tensors or tables. Got ".. 19 | torch.type(t1).." and "..torch.type(t2).." instead") 20 | end 21 | return t1, t2 22 | end 23 | 24 | function ZeroGrad:updateOutput(input) 25 | self.output:set(input) 26 | return self.output 27 | end 28 | 29 | -- the gradient is simply zeroed. 30 | -- useful when you don't want to backpropgate through certain paths. 31 | function ZeroGrad:updateGradInput(input, gradOutput) 32 | self.gradInput = recursiveZero(self.gradInput, gradOutput) 33 | return self.gradInput 34 | end 35 | -------------------------------------------------------------------------------- /doc/article/ff-lua.tex: -------------------------------------------------------------------------------- 1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite 2 | \noindent 3 | \mbox{}input\ =\ \{torch.\textbf{randn}(3,4),\ torch.\textbf{randn}(3,4),\ torch.\textbf{randn}(3,4)\} \\ 4 | \mbox{}\textbf{rnn:forward}(input[1]) \\ 5 | \mbox{}\textbf{rnn:forward}(input[2]) \\ 6 | \mbox{}\textbf{rnn:forward}(input[3]) 7 | -------------------------------------------------------------------------------- /doc/article/ff.lua: -------------------------------------------------------------------------------- 1 | input = {torch.randn(3,4), torch.randn(3,4), torch.randn(3,4)} 2 | rnn:forward(input[1]) 3 | rnn:forward(input[2]) 4 | rnn:forward(input[3]) -------------------------------------------------------------------------------- /doc/article/ff2-lua.tex: -------------------------------------------------------------------------------- 1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite 2 | \noindent 3 | \mbox{}seq\ =\ nn.\textbf{Sequencer}(rnn) \\ 4 | \mbox{}\textbf{seq:forward}(input) 5 | -------------------------------------------------------------------------------- /doc/article/ff2.lua: -------------------------------------------------------------------------------- 1 | seq = nn.Sequencer(rnn) 2 | seq:forward(input) -------------------------------------------------------------------------------- /doc/article/lm-lua.tex: -------------------------------------------------------------------------------- 1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite 2 | \noindent 3 | \mbox{}input\ =\ \{\} \\ 4 | \mbox{}\textbf{for}\ i=1,rho\ \textbf{do} \\ 5 | \mbox{}\ \ \ table.\textbf{insert}(input,\ torch.\textbf{Tensor}(batchSize):\textbf{random}(1,nIndex)) \\ 6 | \mbox{}\textbf{end} \\ 7 | \mbox{}output\ =\ \textbf{rnn:forward}(input) \\ 8 | \mbox{}\textbf{assert}(\#output\ ==\ \#input) 9 | -------------------------------------------------------------------------------- /doc/article/lm.lua: -------------------------------------------------------------------------------- 1 | input = {} 2 | for i=1,rho do 3 | table.insert(input, torch.Tensor(batchSize):random(1,nIndex)) 4 | end 5 | output = rnn:forward(input) 6 | assert(#output == #input) -------------------------------------------------------------------------------- /doc/article/lstm-lua.tex: -------------------------------------------------------------------------------- 1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite 2 | \noindent 3 | \mbox{}module\ =\ nn.\textbf{LSTM}(inputSize,\ outputSize,\ [rho]) 4 | -------------------------------------------------------------------------------- /doc/article/lstm.lua: -------------------------------------------------------------------------------- 1 | module = nn.LSTM(inputSize, outputSize, [rho]) -------------------------------------------------------------------------------- /doc/article/mlp-lua.tex: -------------------------------------------------------------------------------- 1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite 2 | \noindent 3 | \mbox{}mlp\ =\ nn.\textbf{Sequential}() \\ 4 | \mbox{}\textbf{mlp:add}(nn.\textbf{Convert}(\texttt{'bchw'},\ \texttt{'bf'}))\ \textit{-\/-\ collapse\ 3D\ to\ 1D} \\ 5 | \mbox{}\textbf{mlp:add}(nn.\textbf{Linear}(1*28*28,\ 200)) \\ 6 | \mbox{}\textbf{mlp:add}(nn.\textbf{Tanh}()) \\ 7 | \mbox{}\textbf{mlp:add}(nn.\textbf{Linear}(200,\ 200)) \\ 8 | \mbox{}\textbf{mlp:add}(nn.\textbf{Tanh}())\ \\ 9 | \mbox{}\textbf{mlp:add}(nn.\textbf{Linear}(200,\ 10)) \\ 10 | \mbox{}\textbf{mlp:add}(nn.\textbf{LogSoftMax}())\ \textit{-\/-\ for\ classification\ problems} 11 | -------------------------------------------------------------------------------- /doc/article/mlp.lua: -------------------------------------------------------------------------------- 1 | mlp = nn.Sequential() 2 | mlp:add(nn.Convert('bchw', 'bf')) -- collapse 3D to 1D 3 | mlp:add(nn.Linear(1*28*28, 200)) 4 | mlp:add(nn.Tanh()) 5 | mlp:add(nn.Linear(200, 200)) 6 | mlp:add(nn.Tanh()) 7 | mlp:add(nn.Linear(200, 10)) 8 | mlp:add(nn.LogSoftMax()) -- for classification problems -------------------------------------------------------------------------------- /doc/article/nll-lua.tex: -------------------------------------------------------------------------------- 1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite 2 | \noindent 3 | \mbox{}nll\ =\ nn.\textbf{ClassNLLCriterion}() 4 | -------------------------------------------------------------------------------- /doc/article/nll.lua: -------------------------------------------------------------------------------- 1 | nll = nn.ClassNLLCriterion() -------------------------------------------------------------------------------- /doc/article/ram-lua.tex: -------------------------------------------------------------------------------- 1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite 2 | \noindent 3 | \mbox{}ram\ =\ nn.\textbf{RecurrentAttention}(rnn,\ action,\ nStep,\ hiddenSize) 4 | -------------------------------------------------------------------------------- /doc/article/ram.lua: -------------------------------------------------------------------------------- 1 | ram = nn.RecurrentAttention(rnn, action, nStep, hiddenSize) -------------------------------------------------------------------------------- /doc/article/rec-lua.tex: -------------------------------------------------------------------------------- 1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite 2 | \noindent 3 | \mbox{}rec\ =\ nn.\textbf{Recursor}(module[,\ rho]) 4 | -------------------------------------------------------------------------------- /doc/article/rec.lua: -------------------------------------------------------------------------------- 1 | rec = nn.Recursor(module[, rho]) -------------------------------------------------------------------------------- /doc/article/rec2-lua.tex: -------------------------------------------------------------------------------- 1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite 2 | \noindent 3 | \mbox{}lstm\ =\ nn.\textbf{Sequential}() \\ 4 | \mbox{}\ \ \ :\textbf{add}(nn.\textbf{Sequencer}(nn.\textbf{LSTM}(100,100))) \\ 5 | \mbox{}\ \ \ :\textbf{add}(nn.\textbf{Sequencer}(nn.\textbf{LSTM}(100,100))) 6 | -------------------------------------------------------------------------------- /doc/article/rec2.lua: -------------------------------------------------------------------------------- 1 | lstm = nn.Sequential() 2 | :add(nn.Sequencer(nn.LSTM(100,100))) 3 | :add(nn.Sequencer(nn.LSTM(100,100))) -------------------------------------------------------------------------------- /doc/article/rec3-lua.tex: -------------------------------------------------------------------------------- 1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite 2 | \noindent 3 | \mbox{}lstm\ =\ nn.\textbf{Sequencer}( \\ 4 | \mbox{}\ \ \ nn.\textbf{Recursor}( \\ 5 | \mbox{}\ \ \ \ \ \ nn.\textbf{Sequential}() \\ 6 | \mbox{}\ \ \ \ \ \ \ \ \ :\textbf{add}(nn.\textbf{LSTM}(100,100)) \\ 7 | \mbox{}\ \ \ \ \ \ \ \ \ :\textbf{add}(nn.\textbf{LSTM}(100,100)) \\ 8 | \mbox{}\ \ \ \ \ \ ) \\ 9 | \mbox{}\ \ \ ) 10 | -------------------------------------------------------------------------------- /doc/article/rec3.lua: -------------------------------------------------------------------------------- 1 | lstm = nn.Sequencer( 2 | nn.Recursor( 3 | nn.Sequential() 4 | :add(nn.LSTM(100,100)) 5 | :add(nn.LSTM(100,100)) 6 | ) 7 | ) -------------------------------------------------------------------------------- /doc/article/rec4-lua.tex: -------------------------------------------------------------------------------- 1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite 2 | \noindent 3 | \mbox{}lstm\ =\ nn.\textbf{Sequencer}( \\ 4 | \mbox{}\ \ \ nn.\textbf{Sequential}() \\ 5 | \mbox{}\ \ \ \ \ \ :\textbf{add}(nn.\textbf{LSTM}(100,100)) \\ 6 | \mbox{}\ \ \ \ \ \ :\textbf{add}(nn.\textbf{LSTM}(100,100)) \\ 7 | \mbox{}\ \ \ ) 8 | -------------------------------------------------------------------------------- /doc/article/rec4.lua: -------------------------------------------------------------------------------- 1 | lstm = nn.Sequencer( 2 | nn.Sequential() 3 | :add(nn.LSTM(100,100)) 4 | :add(nn.LSTM(100,100)) 5 | ) -------------------------------------------------------------------------------- /doc/article/rec5-lua.tex: -------------------------------------------------------------------------------- 1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite 2 | \noindent 3 | \mbox{}lstm\ =\ nn.\textbf{Sequencer}( \\ 4 | \mbox{}\ \ \ nn.\textbf{Sequential}() \\ 5 | \mbox{}\ \ \ \ \ \ :\textbf{add}(nn.\textbf{LSTM}(100,100)) \\ 6 | \mbox{}\ \ \ \ \ \ :\textbf{add}(nn.\textbf{Linear}(100,100)) \\ 7 | \mbox{}\ \ \ \ \ \ :\textbf{add}(nn.\textbf{LSTM}(100,100)) \\ 8 | \mbox{}\ \ \ ) 9 | -------------------------------------------------------------------------------- /doc/article/rec5.lua: -------------------------------------------------------------------------------- 1 | lstm = nn.Sequencer( 2 | nn.Sequential() 3 | :add(nn.LSTM(100,100)) 4 | :add(nn.Linear(100,100)) 5 | :add(nn.LSTM(100,100)) 6 | ) -------------------------------------------------------------------------------- /doc/article/recurrence-lua.tex: -------------------------------------------------------------------------------- 1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite 2 | \noindent 3 | \mbox{}rnn\ =\ nn.\textbf{Recurrence}(module,\ outputSize,\ nInputDim,\ [rho]) 4 | -------------------------------------------------------------------------------- /doc/article/recurrence.lua: -------------------------------------------------------------------------------- 1 | rnn = nn.Recurrence(module, outputSize, nInputDim, [rho]) -------------------------------------------------------------------------------- /doc/article/repeater-lua.tex: -------------------------------------------------------------------------------- 1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite 2 | \noindent 3 | \mbox{}r\ =\ nn.\textbf{Repeater}(module,\ nStep) 4 | -------------------------------------------------------------------------------- /doc/article/repeater.lua: -------------------------------------------------------------------------------- 1 | r = nn.Repeater(module, nStep) -------------------------------------------------------------------------------- /doc/article/rnn-example-lua.tex: -------------------------------------------------------------------------------- 1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite 2 | \noindent 3 | \mbox{}\textit{-\/-\ generate\ some\ dummy\ inputs\ and\ gradOutputs\ sequences} \\ 4 | \mbox{}inputs,\ gradOutputs\ =\ \{\},\ \{\} \\ 5 | \mbox{}\textbf{for}\ step=1,rho\ \textbf{do} \\ 6 | \mbox{}\ \ \ inputs[step]\ =\ torch.\textbf{randn}(batchSize,inputSize) \\ 7 | \mbox{}\ \ \ gradOutputs[step]\ =\ torch.\textbf{randn}(batchSize,inputSize) \\ 8 | \mbox{}\textbf{end} \\ 9 | \mbox{} \\ 10 | \mbox{}\textit{-\/-\ an\ AbstractRecurrent\ instance} \\ 11 | \mbox{}rnn\ =\ nn.\textbf{Recurrent}( \\ 12 | \mbox{}\ \ \ hiddenSize,\ \textit{-\/-\ size\ of\ the\ input\ layer} \\ 13 | \mbox{}\ \ \ nn.\textbf{Linear}(inputSize,outputSize),\ \textit{-\/-\ input\ layer} \\ 14 | \mbox{}\ \ \ nn.\textbf{Linear}(outputSize,\ outputSize),\ \textit{-\/-\ recurrent\ layer} \\ 15 | \mbox{}\ \ \ nn.\textbf{Sigmoid}(),\ \textit{-\/-\ transfer\ function} \\ 16 | \mbox{}\ \ \ rho\ \textit{-\/-\ maximum\ number\ of\ time-steps\ for\ BPTT} \\ 17 | \mbox{}) \\ 18 | \mbox{} \\ 19 | \mbox{}\textit{-\/-\ feed-forward\ and\ backpropagate\ through\ time\ like\ this\ :} \\ 20 | \mbox{}\textbf{for}\ step=1,rho\ \textbf{do} \\ 21 | \mbox{}\ \ \ \textbf{rnn:forward}(inputs[step]) \\ 22 | \mbox{}\ \ \ \textbf{rnn:backward}(inputs[step],\ gradOutputs[step]) \\ 23 | \mbox{}\textbf{end} \\ 24 | \mbox{}\textbf{rnn:backwardThroughTime}()\ \textit{-\/-\ call\ backward\ on\ the\ internal\ modules} \\ 25 | \mbox{}gradInputs\ =\ rnn.gradInputs \\ 26 | \mbox{}\textbf{rnn:updateParameters}(0.1) \\ 27 | \mbox{}\textbf{rnn:forget}()\ \textit{-\/-\ resets\ the\ time-step\ counter} 28 | -------------------------------------------------------------------------------- /doc/article/rnn-example.lua: -------------------------------------------------------------------------------- 1 | -- generate some dummy inputs and gradOutputs sequences 2 | inputs, gradOutputs = {}, {} 3 | for step=1,rho do 4 | inputs[step] = torch.randn(batchSize,inputSize) 5 | gradOutputs[step] = torch.randn(batchSize,inputSize) 6 | end 7 | 8 | -- an AbstractRecurrent instance 9 | rnn = nn.Recurrent( 10 | hiddenSize, -- size of the input layer 11 | nn.Linear(inputSize,outputSize), -- input layer 12 | nn.Linear(outputSize, outputSize), -- recurrent layer 13 | nn.Sigmoid(), -- transfer function 14 | rho -- maximum number of time-steps for BPTT 15 | ) 16 | 17 | -- feed-forward and backpropagate through time like this : 18 | for step=1,rho do 19 | rnn:forward(inputs[step]) 20 | rnn:backward(inputs[step], gradOutputs[step]) 21 | end 22 | rnn:backwardThroughTime() -- call backward on the internal modules 23 | gradInputs = rnn.gradInputs 24 | rnn:updateParameters(0.1) 25 | rnn:forget() -- resets the time-step counter -------------------------------------------------------------------------------- /doc/article/rnn2-lua.tex: -------------------------------------------------------------------------------- 1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite 2 | \noindent 3 | \mbox{}\textit{-\/-\ recurrent\ module} \\ 4 | \mbox{}rm\ =\ nn.\textbf{Sequential}() \\ 5 | \mbox{}\ \ \ :\textbf{add}(nn.\textbf{ParallelTable}() \\ 6 | \mbox{}\ \ \ \ \ \ :\textbf{add}(nn.\textbf{LookupTable}(nIndex,\ hiddenSize)) \\ 7 | \mbox{}\ \ \ \ \ \ :\textbf{add}(nn.\textbf{Linear}(hiddenSize,\ hiddenSize))) \\ 8 | \mbox{}\ \ \ :\textbf{add}(nn.\textbf{CAddTable}()) \\ 9 | \mbox{}\ \ \ :\textbf{add}(nn.\textbf{Sigmoid}()) \\ 10 | \mbox{}\textit{-\/-\ full\ RNN} \\ 11 | \mbox{}rnn\ =\ nn.\textbf{Sequential}() \\ 12 | \mbox{}\ \ \ :\textbf{add}(nn.\textbf{Sequencer}(nn.\textbf{Recurrence}(rm,\ hiddenSize,\ 1))) \\ 13 | \mbox{}\ \ \ :\textbf{add}(nn.\textbf{SelectTable}(-1))\ \textit{-\/-select\ last\ element} \\ 14 | \mbox{}\ \ \ :\textbf{add}(nn.\textbf{Linear}(hiddenSize,\ nSentiment)) \\ 15 | \mbox{}\ \ \ :\textbf{add}(nn.\textbf{LogSoftMax}()) \\ 16 | \mbox{}) 17 | -------------------------------------------------------------------------------- /doc/article/rnn2.lua: -------------------------------------------------------------------------------- 1 | -- recurrent module 2 | rm = nn.Sequential() 3 | :add(nn.ParallelTable() 4 | :add(nn.LookupTable(nIndex, hiddenSize)) 5 | :add(nn.Linear(hiddenSize, hiddenSize))) 6 | :add(nn.CAddTable()) 7 | :add(nn.Sigmoid()) 8 | -- full RNN 9 | rnn = nn.Sequential() 10 | :add(nn.Sequencer(nn.Recurrence(rm, hiddenSize, 1))) 11 | :add(nn.SelectTable(-1)) --select last element 12 | :add(nn.Linear(hiddenSize, nSentiment)) 13 | :add(nn.LogSoftMax()) 14 | ) -------------------------------------------------------------------------------- /doc/article/rnn_library.bbl: -------------------------------------------------------------------------------- 1 | \begin{thebibliography}{10} 2 | 3 | \bibitem{boden2001guide} 4 | M.~Boden. 5 | \newblock A guide to recurrent neural networks and backpropagation. 6 | \newblock 2001. 7 | 8 | \bibitem{collobert2011torch7} 9 | R.~Collobert, K.~Kavukcuoglu, and C.~Farabet. 10 | \newblock Torch7: A matlab-like environment for machine learning. 11 | \newblock In {\em BigLearn, NIPS Workshop}, number EPFL-CONF-192376, 2011. 12 | 13 | \bibitem{graves2013speech} 14 | A.~Graves, A.-r. Mohamed, and G.~Hinton. 15 | \newblock Speech recognition with deep recurrent neural networks. 16 | \newblock In {\em Acoustics, Speech and Signal Processing (ICASSP), 2013 IEEE 17 | International Conference on}, pages 6645--6649. IEEE, 2013. 18 | 19 | \bibitem{greff2015lstm} 20 | K.~Greff, R.~K. Srivastava, J.~Koutn{\'\i}k, B.~R. Steunebrink, and 21 | J.~Schmidhuber. 22 | \newblock Lstm: A search space odyssey. 23 | \newblock {\em arXiv preprint arXiv:1503.04069}, 2015. 24 | 25 | \bibitem{hinton2012improving} 26 | G.~E. Hinton, N.~Srivastava, A.~Krizhevsky, I.~Sutskever, and R.~R. 27 | Salakhutdinov. 28 | \newblock Improving neural networks by preventing co-adaptation of feature 29 | detectors. 30 | \newblock {\em arXiv preprint arXiv:1207.0580}, 2012. 31 | 32 | \bibitem{hochreiter1997long} 33 | S.~Hochreiter and J.~Schmidhuber. 34 | \newblock Long short-term memory. 35 | \newblock {\em Neural computation}, 9(8):1735--1780, 1997. 36 | 37 | \bibitem{ierusalimschy1996lua} 38 | R.~Ierusalimschy, L.~H. De~Figueiredo, and W.~Celes~Filho. 39 | \newblock Lua-an extensible extension language. 40 | \newblock {\em Softw., Pract. Exper.}, 26(6):635--652, 1996. 41 | 42 | \bibitem{lecun1998mnist} 43 | Y.~LeCun, C.~Cortes, and C.~J. Burges. 44 | \newblock The mnist database of handwritten digits, 1998. 45 | 46 | \bibitem{marcus1993building} 47 | M.~P. Marcus, M.~A. Marcinkiewicz, and B.~Santorini. 48 | \newblock Building a large annotated corpus of english: The penn treebank. 49 | \newblock {\em Computational linguistics}, 19(2):313--330, 1993. 50 | 51 | \bibitem{mikolov2012statistical} 52 | T.~Mikolov. 53 | \newblock Statistical language models based on neural networks. 54 | \newblock {\em Presentation at Google, Mountain View, 2nd April}, 2012. 55 | 56 | \bibitem{mnih2014recurrent} 57 | V.~Mnih, N.~Heess, A.~Graves, et~al. 58 | \newblock Recurrent models of visual attention. 59 | \newblock In {\em Advances in Neural Information Processing Systems}, pages 60 | 2204--2212, 2014. 61 | 62 | \bibitem{pang2008opinion} 63 | B.~Pang and L.~Lee. 64 | \newblock Opinion mining and sentiment analysis. 65 | \newblock {\em Foundations and trends in information retrieval}, 2(1-2):1--135, 66 | 2008. 67 | 68 | \bibitem{pinheiro2013recurrent} 69 | P.~H. Pinheiro and R.~Collobert. 70 | \newblock Recurrent convolutional neural networks for scene parsing. 71 | \newblock {\em arXiv preprint arXiv:1306.2795}, 2013. 72 | 73 | \bibitem{rumelhart2002learning} 74 | D.~E. Rumelhart, G.~E. Hinton, and R.~J. Williams. 75 | \newblock Learning representations by back-propagating errors. 76 | \newblock {\em Cognitive modeling}, 1:213, 2002. 77 | 78 | \bibitem{sutskever2013training} 79 | I.~Sutskever. 80 | \newblock {\em Training recurrent neural networks}. 81 | \newblock PhD thesis, University of Toronto, 2013. 82 | 83 | \bibitem{williams1992simple} 84 | R.~J. Williams. 85 | \newblock Simple statistical gradient-following algorithms for connectionist 86 | reinforcement learning. 87 | \newblock {\em Machine learning}, 8(3-4):229--256, 1992. 88 | 89 | \bibitem{zaremba2014recurrent} 90 | W.~Zaremba, I.~Sutskever, and O.~Vinyals. 91 | \newblock Recurrent neural network regularization. 92 | \newblock {\em arXiv preprint arXiv:1409.2329}, 2014. 93 | 94 | \end{thebibliography} 95 | -------------------------------------------------------------------------------- /doc/article/rnn_library.bib: -------------------------------------------------------------------------------- 1 | 2 | @article{rumelhart2002learning, 3 | title={Learning representations by back-propagating errors}, 4 | author={Rumelhart, David E and Hinton, Geoffrey E and Williams, Ronald J}, 5 | journal={Cognitive modeling}, 6 | volume={1}, 7 | pages={213}, 8 | year={2002} 9 | } 10 | 11 | @inproceedings{collobert2011torch7, 12 | title={Torch7: A matlab-like environment for machine learning}, 13 | author={Collobert, Ronan and Kavukcuoglu, Koray and Farabet, Cl{\'e}ment}, 14 | booktitle={BigLearn, NIPS Workshop}, 15 | number={EPFL-CONF-192376}, 16 | year={2011} 17 | } 18 | 19 | @inproceedings{mnih2014recurrent, 20 | title={Recurrent models of visual attention}, 21 | author={Mnih, Volodymyr and Heess, Nicolas and Graves, Alex and others}, 22 | booktitle={Advances in Neural Information Processing Systems}, 23 | pages={2204--2212}, 24 | year={2014} 25 | } 26 | 27 | @article{ierusalimschy1996lua, 28 | title={Lua-an extensible extension language}, 29 | author={Ierusalimschy, Roberto and De Figueiredo, Luiz Henrique and Celes Filho, Waldemar}, 30 | journal={Softw., Pract. Exper.}, 31 | volume={26}, 32 | number={6}, 33 | pages={635--652}, 34 | year={1996}, 35 | publisher={Citeseer} 36 | } 37 | 38 | @phdthesis{sutskever2013training, 39 | title={Training recurrent neural networks}, 40 | author={Sutskever, Ilya}, 41 | year={2013}, 42 | school={University of Toronto} 43 | } 44 | 45 | @article{mikolov2012statistical, 46 | title={Statistical language models based on neural networks}, 47 | author={Mikolov, Tom{\'a}{\v{s}}}, 48 | journal={Presentation at Google, Mountain View, 2nd April}, 49 | year={2012} 50 | } 51 | 52 | @article{boden2001guide, 53 | title={A guide to recurrent neural networks and backpropagation}, 54 | author={Boden, Mikael}, 55 | year={2001} 56 | } 57 | 58 | @article{zaremba2014recurrent, 59 | title={Recurrent neural network regularization}, 60 | author={Zaremba, Wojciech and Sutskever, Ilya and Vinyals, Oriol}, 61 | journal={arXiv preprint arXiv:1409.2329}, 62 | year={2014} 63 | } 64 | 65 | @inproceedings{graves2013speech, 66 | title={Speech recognition with deep recurrent neural networks}, 67 | author={Graves, Alan and Mohamed, Abdel-rahman and Hinton, Geoffrey}, 68 | booktitle={Acoustics, Speech and Signal Processing (ICASSP), 2013 IEEE International Conference on}, 69 | pages={6645--6649}, 70 | year={2013}, 71 | organization={IEEE} 72 | } 73 | 74 | @article{greff2015lstm, 75 | title={LSTM: A Search Space Odyssey}, 76 | author={Greff, Klaus and Srivastava, Rupesh Kumar and Koutn{\'\i}k, Jan and Steunebrink, Bas R and Schmidhuber, J{\"u}rgen}, 77 | journal={arXiv preprint arXiv:1503.04069}, 78 | year={2015} 79 | } 80 | 81 | @article{hochreiter1997long, 82 | title={Long short-term memory}, 83 | author={Hochreiter, Sepp and Schmidhuber, J{\"u}rgen}, 84 | journal={Neural computation}, 85 | volume={9}, 86 | number={8}, 87 | pages={1735--1780}, 88 | year={1997}, 89 | publisher={MIT Press} 90 | } 91 | 92 | @article{pinheiro2013recurrent, 93 | title={Recurrent convolutional neural networks for scene parsing}, 94 | author={Pinheiro, Pedro HO and Collobert, Ronan}, 95 | journal={arXiv preprint arXiv:1306.2795}, 96 | year={2013} 97 | } 98 | 99 | @article{williams1992simple, 100 | title={Simple statistical gradient-following algorithms for connectionist reinforcement learning}, 101 | author={Williams, Ronald J}, 102 | journal={Machine learning}, 103 | volume={8}, 104 | number={3-4}, 105 | pages={229--256}, 106 | year={1992}, 107 | publisher={Springer} 108 | } 109 | 110 | @article{pang2008opinion, 111 | title={Opinion mining and sentiment analysis}, 112 | author={Pang, Bo and Lee, Lillian}, 113 | journal={Foundations and trends in information retrieval}, 114 | volume={2}, 115 | number={1-2}, 116 | pages={1--135}, 117 | year={2008}, 118 | publisher={Now Publishers Inc.} 119 | } 120 | 121 | @article{hinton2012improving, 122 | title={Improving neural networks by preventing co-adaptation of feature detectors}, 123 | author={Hinton, Geoffrey E and Srivastava, Nitish and Krizhevsky, Alex and Sutskever, Ilya and Salakhutdinov, Ruslan R}, 124 | journal={arXiv preprint arXiv:1207.0580}, 125 | year={2012} 126 | } 127 | 128 | @article{marcus1993building, 129 | title={Building a large annotated corpus of English: The Penn Treebank}, 130 | author={Marcus, Mitchell P and Marcinkiewicz, Mary Ann and Santorini, Beatrice}, 131 | journal={Computational linguistics}, 132 | volume={19}, 133 | number={2}, 134 | pages={313--330}, 135 | year={1993}, 136 | publisher={MIT Press} 137 | } 138 | 139 | @misc{lecun1998mnist, 140 | title={The MNIST database of handwritten digits}, 141 | author={LeCun, Yann and Cortes, Corinna and Burges, Christopher JC}, 142 | year={1998} 143 | } 144 | -------------------------------------------------------------------------------- /doc/article/rnn_library.blg: -------------------------------------------------------------------------------- 1 | This is BibTeX, Version 0.99c (TeX Live 2009/Debian) 2 | The top-level auxiliary file: rnn_library.aux 3 | The style file: abbrv.bst 4 | Database file #1: rnn_library.bib 5 | Warning--empty journal in boden2001guide 6 | Warning--there's a number but no series in collobert2011torch7 7 | You've used 17 entries, 8 | 2118 wiz_defined-function locations, 9 | 596 strings with 6441 characters, 10 | and the built_in function-call counts, 5471 in all, are: 11 | = -- 535 12 | > -- 272 13 | < -- 3 14 | + -- 109 15 | - -- 90 16 | * -- 368 17 | := -- 932 18 | add.period$ -- 51 19 | call.type$ -- 17 20 | change.case$ -- 94 21 | chr.to.int$ -- 0 22 | cite$ -- 19 23 | duplicate$ -- 206 24 | empty$ -- 412 25 | format.name$ -- 90 26 | if$ -- 1126 27 | int.to.chr$ -- 0 28 | int.to.str$ -- 17 29 | missing$ -- 15 30 | newline$ -- 87 31 | num.names$ -- 34 32 | pop$ -- 97 33 | preamble$ -- 1 34 | purify$ -- 78 35 | quote$ -- 0 36 | skip$ -- 152 37 | stack$ -- 0 38 | substring$ -- 301 39 | swap$ -- 42 40 | text.length$ -- 3 41 | text.prefix$ -- 0 42 | top$ -- 0 43 | type$ -- 68 44 | warning$ -- 2 45 | while$ -- 51 46 | width$ -- 19 47 | write$ -- 180 48 | (There were 2 warnings) 49 | -------------------------------------------------------------------------------- /doc/article/rnn_library.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Element-Research/rnn/ba937a08f26e116db98b5cd6a690f49ea8f8316e/doc/article/rnn_library.log -------------------------------------------------------------------------------- /doc/article/rnn_library.out: -------------------------------------------------------------------------------- 1 | \BOOKMARK [1][-]{section.1}{Introduction}{} 2 | \BOOKMARK [1][-]{section.2}{Torch}{} 3 | \BOOKMARK [2][-]{subsection.2.1}{torch7}{section.2} 4 | \BOOKMARK [2][-]{subsection.2.2}{nn}{section.2} 5 | \BOOKMARK [1][-]{section.3}{Package Components}{} 6 | \BOOKMARK [2][-]{subsection.3.1}{First Iteration : Recurrent module}{section.3} 7 | \BOOKMARK [2][-]{subsection.3.2}{Second Iteration : Sequencer and LSTM}{section.3} 8 | \BOOKMARK [3][-]{subsubsection.3.2.1}{Sequencer}{subsection.3.2} 9 | \BOOKMARK [3][-]{subsubsection.3.2.2}{LSTM}{subsection.3.2} 10 | \BOOKMARK [3][-]{subsubsection.3.2.3}{Repeater}{subsection.3.2} 11 | \BOOKMARK [2][-]{subsection.3.3}{Third Iteration}{section.3} 12 | \BOOKMARK [3][-]{subsubsection.3.3.1}{RecurrentAttention}{subsection.3.3} 13 | \BOOKMARK [3][-]{subsubsection.3.3.2}{Recursor}{subsection.3.3} 14 | \BOOKMARK [3][-]{subsubsection.3.3.3}{Recurrence}{subsection.3.3} 15 | \BOOKMARK [1][-]{section.4}{Development Principles}{} 16 | \BOOKMARK [2][-]{subsection.4.1}{Unit Testing}{section.4} 17 | \BOOKMARK [2][-]{subsection.4.2}{Backward Compatibility}{section.4} 18 | \BOOKMARK [2][-]{subsection.4.3}{Supporting Material}{section.4} 19 | \BOOKMARK [2][-]{subsection.4.4}{Core Extensions}{section.4} 20 | \BOOKMARK [1][-]{section.5}{Results}{} 21 | \BOOKMARK [1][-]{section.6}{Conclusion}{} 22 | -------------------------------------------------------------------------------- /doc/article/rnn_library.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Element-Research/rnn/ba937a08f26e116db98b5cd6a690f49ea8f8316e/doc/article/rnn_library.pdf -------------------------------------------------------------------------------- /doc/article/rnn_library.synctex.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Element-Research/rnn/ba937a08f26e116db98b5cd6a690f49ea8f8316e/doc/article/rnn_library.synctex.gz -------------------------------------------------------------------------------- /doc/article/rnnlm-lua.tex: -------------------------------------------------------------------------------- 1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite 2 | \noindent 3 | \mbox{}\textit{-\/-\ recurrent\ module} \\ 4 | \mbox{}rm\ =\ nn.\textbf{Sequential}() \\ 5 | \mbox{}\ \ \ :\textbf{add}(nn.\textbf{ParallelTable}() \\ 6 | \mbox{}\ \ \ \ \ \ :\textbf{add}(nn.\textbf{LookupTable}(nIndex,\ hiddenSize)) \\ 7 | \mbox{}\ \ \ \ \ \ :\textbf{add}(nn.\textbf{Linear}(hiddenSize,\ hiddenSize))) \\ 8 | \mbox{}\ \ \ :\textbf{add}(nn.\textbf{CAddTable}()) \\ 9 | \mbox{}\ \ \ :\textbf{add}(nn.\textbf{Sigmoid}()) \\ 10 | \mbox{} \\ 11 | \mbox{}rnn\ =\ nn.\textbf{Sequencer}( \\ 12 | \mbox{}\ \ \ nn.\textbf{Sequential}() \\ 13 | \mbox{}\ \ \ \ \ \ :\textbf{add}(nn.\textbf{Recurrence}(rm,\ hiddenSize,\ 1)) \\ 14 | \mbox{}\ \ \ \ \ \ :\textbf{add}(nn.\textbf{Linear}(hiddenSize,\ nIndex)) \\ 15 | \mbox{}\ \ \ \ \ \ :\textbf{add}(nn.\textbf{LogSoftMax}()) \\ 16 | \mbox{}) 17 | -------------------------------------------------------------------------------- /doc/article/rnnlm.lua: -------------------------------------------------------------------------------- 1 | -- recurrent module 2 | rm = nn.Sequential() 3 | :add(nn.ParallelTable() 4 | :add(nn.LookupTable(nIndex, hiddenSize)) 5 | :add(nn.Linear(hiddenSize, hiddenSize))) 6 | :add(nn.CAddTable()) 7 | :add(nn.Sigmoid()) 8 | 9 | rnn = nn.Sequencer( 10 | nn.Sequential() 11 | :add(nn.Recurrence(rm, hiddenSize, 1)) 12 | :add(nn.Linear(hiddenSize, nIndex)) 13 | :add(nn.LogSoftMax()) 14 | ) -------------------------------------------------------------------------------- /doc/article/sequencer-lua.tex: -------------------------------------------------------------------------------- 1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite 2 | \noindent 3 | \mbox{}seq\ =\ nn.\textbf{Sequencer}(module) 4 | -------------------------------------------------------------------------------- /doc/article/sequencer.lua: -------------------------------------------------------------------------------- 1 | seq = nn.Sequencer(module) -------------------------------------------------------------------------------- /doc/article/srnn-lua.tex: -------------------------------------------------------------------------------- 1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite 2 | \noindent 3 | \mbox{}rnn\ =\ nn.\textbf{Sequential}() \\ 4 | \mbox{}\ \ \ :\textbf{add}(nn.\textbf{Sequencer}(nn.\textbf{Linear}(inputSize,\ hiddenSize))) \\ 5 | \mbox{}\ \ \ :\textbf{add}(nn.\textbf{Sequencer}(nn.\textbf{LSTM}(hiddenSize,\ hiddenSize))) \\ 6 | \mbox{}\ \ \ :\textbf{add}(nn.\textbf{Sequencer}(nn.\textbf{LSTM}(hiddenSize,\ hiddenSize))) \\ 7 | \mbox{}\ \ \ :\textbf{add}(nn.\textbf{Sequencer}(nn.\textbf{Linear}(hiddenSize,\ outputSize))) \\ 8 | \mbox{}\ \ \ :\textbf{add}(nn.\textbf{Sequencer}(nn.\textbf{LogSoftMax}())) 9 | -------------------------------------------------------------------------------- /doc/article/srnn.lua: -------------------------------------------------------------------------------- 1 | rnn = nn.Sequential() 2 | :add(nn.Sequencer(nn.Linear(inputSize, hiddenSize))) 3 | :add(nn.Sequencer(nn.LSTM(hiddenSize, hiddenSize))) 4 | :add(nn.Sequencer(nn.LSTM(hiddenSize, hiddenSize))) 5 | :add(nn.Sequencer(nn.Linear(hiddenSize, outputSize))) 6 | :add(nn.Sequencer(nn.LogSoftMax())) -------------------------------------------------------------------------------- /doc/article/trainEpoch-lua.tex: -------------------------------------------------------------------------------- 1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite 2 | \noindent 3 | \mbox{}\textbf{function}\ \textbf{trainEpoch}(module,\ criterion,\ inputs,\ targets) \\ 4 | \mbox{}\ \ \ \textbf{for}\ i=1,\textbf{inputs:size}(1)\ \textbf{do} \\ 5 | \mbox{}\ \ \ \ \ \ \textbf{local}\ idx\ =\ math.\textbf{random}(1,\textbf{inputs:size}(1)) \\ 6 | \mbox{}\ \ \ \ \ \ \textbf{local}\ input,\ target\ =\ inputs[idx],\ \textbf{targets:narrow}(1,idx,1) \\ 7 | \mbox{}\ \ \ \ \ \ \textit{-\/-\ forward} \\ 8 | \mbox{}\ \ \ \ \ \ \textbf{local}\ output\ =\ \textbf{module:forward}(input) \\ 9 | \mbox{}\ \ \ \ \ \ \textbf{local}\ loss\ =\ \textbf{criterion:forward}(output,\ target) \\ 10 | \mbox{}\ \ \ \ \ \ \textit{-\/-\ backward} \\ 11 | \mbox{}\ \ \ \ \ \ \textbf{local}\ gradOutput\ =\ \textbf{criterion:backward}(output,\ target) \\ 12 | \mbox{}\ \ \ \ \ \ \textbf{module:zeroGradParameters}() \\ 13 | \mbox{}\ \ \ \ \ \ \textbf{local}\ gradInput\ =\ \textbf{module:backward}(input,\ gradOutput) \\ 14 | \mbox{}\ \ \ \ \ \ \textit{-\/-\ update} \\ 15 | \mbox{}\ \ \ \ \ \ \textbf{module:updateParameters}(0.1)\ \textit{-\/-\ W\ =\ W\ -\ 0.1*dL/dW} \\ 16 | \mbox{}\ \ \ \textbf{end} \\ 17 | \mbox{}\textbf{end} 18 | -------------------------------------------------------------------------------- /doc/article/trainEpoch.lua: -------------------------------------------------------------------------------- 1 | function trainEpoch(module, criterion, inputs, targets) 2 | for i=1,inputs:size(1) do 3 | local idx = math.random(1,inputs:size(1)) 4 | local input, target = inputs[idx], targets:narrow(1,idx,1) 5 | -- forward 6 | local output = module:forward(input) 7 | local loss = criterion:forward(output, target) 8 | -- backward 9 | local gradOutput = criterion:backward(output, target) 10 | module:zeroGradParameters() 11 | local gradInput = module:backward(input, gradOutput) 12 | -- update 13 | module:updateParameters(0.1) -- W = W - 0.1*dL/dW 14 | end 15 | end -------------------------------------------------------------------------------- /doc/image/LSTM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Element-Research/rnn/ba937a08f26e116db98b5cd6a690f49ea8f8316e/doc/image/LSTM.png -------------------------------------------------------------------------------- /doc/image/bgru-benchmark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Element-Research/rnn/ba937a08f26e116db98b5cd6a690f49ea8f8316e/doc/image/bgru-benchmark.png -------------------------------------------------------------------------------- /doc/image/bidirectionallm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Element-Research/rnn/ba937a08f26e116db98b5cd6a690f49ea8f8316e/doc/image/bidirectionallm.png -------------------------------------------------------------------------------- /doc/image/gru-benchmark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Element-Research/rnn/ba937a08f26e116db98b5cd6a690f49ea8f8316e/doc/image/gru-benchmark.png -------------------------------------------------------------------------------- /doc/image/hellofuzzy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Element-Research/rnn/ba937a08f26e116db98b5cd6a690f49ea8f8316e/doc/image/hellofuzzy.png -------------------------------------------------------------------------------- /doc/image/sequence.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Element-Research/rnn/ba937a08f26e116db98b5cd6a690f49ea8f8316e/doc/image/sequence.png -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | This directory contains various training scripts. 4 | 5 | Torch blog posts 6 | * The torch.ch blog contains detailed posts about the *rnn* package. 7 | 1. [recurrent-visual-attention.lua](recurrent-visual-attention.lua): training script used in [Recurrent Model for Visual Attention](http://torch.ch/blog/2015/09/21/rmva.html). Implements the REINFORCE learning rule to learn an attention mechanism for classifying MNIST digits, sometimes translated. 8 | 2. [noise-contrastive-esimate.lua](noise-contrastive-estimate.lua): one of two training scripts used in [Language modeling a billion words](http://torch.ch/blog/2016/07/25/nce.html). Single-GPU script for training recurrent language models on the Google billion words dataset. 9 | 3. [multigpu-nce-rnnlm.lua](multigpu-nce-rnnlm.lua) : 4-GPU version of `noise-contrastive-estimate.lua` for training larger multi-GPU models. Two of two training scripts used in the [Language modeling a billion words](http://torch.ch/blog/2016/07/25/nce.html). 10 | 11 | Simple training scripts. 12 | * Showcases the fundamental principles of the package. In chronological order of introduction date. 13 | 1. [simple-recurrent-network.lua](simple-recurrent-network.lua): uses the `nn.Recurrent` module to instantiate a Simple RNN. Illustrates the first AbstractRecurrent instance in action. It has since been surpassed by the more flexible `nn.Recursor` and `nn.Recurrence`. The `nn.Recursor` class decorates any module to make it conform to the nn.AbstractRecurrent interface. The `nn.Recurrence` implements the recursive `h[t] <- forward(h[t-1], x[t])`. Together, `nn.Recursor` and `nn.Recurrence` can be used to implement a wide range of experimental recurrent architectures. 14 | 2. [simple-sequencer-network.lua](simple-sequencer-network.lua): uses the `nn.Sequencer` module to accept a batch of sequences as `input` of size `seqlen x batchsize x ...`. Both tables and tensors are accepted as input and produce the same type of output (table->table, tensor->tensor). The `Sequencer` class abstract away the implementation of back-propagation through time. It also provides a `remember(['neither','both'])` method for triggering what the `Sequencer` remembers between iterations (forward,backward,update). 15 | 3. [simple-recurrence-network.lua](simple-recurrence-network.lua): uses the `nn.Recurrence` module to define the h[t] <- sigmoid(h[t-1], x[t]) Simple RNN. Decorates it using `nn.Sequencer` so that an entire batch of sequences (`input`) can forward and backward propagated per update. 16 | -------------------------------------------------------------------------------- /examples/encoder-decoder-coupling.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | 3 | Example of "coupled" separate encoder and decoder networks, e.g. for sequence-to-sequence networks. 4 | 5 | ]]-- 6 | 7 | require 'rnn' 8 | 9 | version = 1.4 -- Uses [get,set]GradHiddenState for LSTM 10 | 11 | local opt = {} 12 | opt.learningRate = 0.1 13 | opt.hiddenSize = 6 14 | opt.numLayers = 1 15 | opt.useSeqLSTM = true -- faster implementation of LSTM + Sequencer 16 | opt.vocabSize = 7 17 | opt.seqLen = 7 -- length of the encoded sequence (with padding) 18 | opt.niter = 1000 19 | 20 | --[[ Forward coupling: Copy encoder cell and output to decoder LSTM ]]-- 21 | function forwardConnect(enc, dec) 22 | for i=1,#enc.lstmLayers do 23 | if opt.useSeqLSTM then 24 | dec.lstmLayers[i].userPrevOutput = enc.lstmLayers[i].output[opt.seqLen] 25 | dec.lstmLayers[i].userPrevCell = enc.lstmLayers[i].cell[opt.seqLen] 26 | else 27 | dec.lstmLayers[i].userPrevOutput = nn.rnn.recursiveCopy(dec.lstmLayers[i].userPrevOutput, enc.lstmLayers[i].outputs[opt.seqLen]) 28 | dec.lstmLayers[i].userPrevCell = nn.rnn.recursiveCopy(dec.lstmLayers[i].userPrevCell, enc.lstmLayers[i].cells[opt.seqLen]) 29 | end 30 | end 31 | end 32 | 33 | --[[ Backward coupling: Copy decoder gradients to encoder LSTM ]]-- 34 | function backwardConnect(enc, dec) 35 | for i=1,#enc.lstmLayers do 36 | if opt.useSeqLSTM then 37 | enc.lstmLayers[i].userNextGradCell = dec.lstmLayers[i].userGradPrevCell 38 | enc.lstmLayers[i].gradPrevOutput = dec.lstmLayers[i].userGradPrevOutput 39 | else 40 | enc:setGradHiddenState(opt.seqLen, dec:getGradHiddenState(0)) 41 | end 42 | end 43 | end 44 | 45 | -- Encoder 46 | local enc = nn.Sequential() 47 | enc:add(nn.LookupTableMaskZero(opt.vocabSize, opt.hiddenSize)) 48 | enc.lstmLayers = {} 49 | for i=1,opt.numLayers do 50 | if opt.useSeqLSTM then 51 | enc.lstmLayers[i] = nn.SeqLSTM(opt.hiddenSize, opt.hiddenSize) 52 | enc.lstmLayers[i]:maskZero() 53 | enc:add(enc.lstmLayers[i]) 54 | else 55 | enc.lstmLayers[i] = nn.LSTM(opt.hiddenSize, opt.hiddenSize):maskZero(1) 56 | enc:add(nn.Sequencer(enc.lstmLayers[i])) 57 | end 58 | end 59 | enc:add(nn.Select(1, -1)) 60 | 61 | -- Decoder 62 | local dec = nn.Sequential() 63 | dec:add(nn.LookupTableMaskZero(opt.vocabSize, opt.hiddenSize)) 64 | dec.lstmLayers = {} 65 | for i=1,opt.numLayers do 66 | if opt.useSeqLSTM then 67 | dec.lstmLayers[i] = nn.SeqLSTM(opt.hiddenSize, opt.hiddenSize) 68 | dec.lstmLayers[i]:maskZero() 69 | dec:add(dec.lstmLayers[i]) 70 | else 71 | dec.lstmLayers[i] = nn.LSTM(opt.hiddenSize, opt.hiddenSize):maskZero(1) 72 | dec:add(nn.Sequencer(dec.lstmLayers[i])) 73 | end 74 | end 75 | dec:add(nn.Sequencer(nn.MaskZero(nn.Linear(opt.hiddenSize, opt.vocabSize), 1))) 76 | dec:add(nn.Sequencer(nn.MaskZero(nn.LogSoftMax(), 1))) 77 | 78 | local criterion = nn.SequencerCriterion(nn.MaskZeroCriterion(nn.ClassNLLCriterion(),1)) 79 | 80 | -- Some example data (batchsize = 2) with variable length input and output sequences 81 | 82 | -- The input sentences to the encoder, padded with zeros from the left 83 | local encInSeq = torch.Tensor({{0,0,0,0,1,2,3},{0,0,0,4,3,2,1}}):t() 84 | -- The input sentences to the decoder, padded with zeros from the right. 85 | -- Label '6' represents the start of a sentence (GO). 86 | local decInSeq = torch.Tensor({{6,1,2,3,4,0,0,0},{6,5,4,3,2,1,0,0}}):t() 87 | 88 | -- The expected output from the decoder (it will return one character per time-step), 89 | -- padded with zeros from the right 90 | -- Label '7' represents the end of sentence (EOS). 91 | local decOutSeq = torch.Tensor({{1,2,3,4,7,0,0,0},{5,4,3,2,1,7,0,0}}):t() 92 | 93 | for i=1,opt.niter do 94 | enc:zeroGradParameters() 95 | dec:zeroGradParameters() 96 | 97 | -- Forward pass 98 | local encOut = enc:forward(encInSeq) 99 | forwardConnect(enc, dec) 100 | local decOut = dec:forward(decInSeq) 101 | --print(decOut) 102 | local err = criterion:forward(decOut, decOutSeq) 103 | 104 | print(string.format("Iteration %d ; NLL err = %f ", i, err)) 105 | 106 | -- Backward pass 107 | local gradOutput = criterion:backward(decOut, decOutSeq) 108 | dec:backward(decInSeq, gradOutput) 109 | backwardConnect(enc, dec) 110 | local zeroTensor = torch.Tensor(encOut):zero() 111 | enc:backward(encInSeq, zeroTensor) 112 | 113 | dec:updateParameters(opt.learningRate) 114 | enc:updateParameters(opt.learningRate) 115 | end 116 | -------------------------------------------------------------------------------- /examples/nested-recurrence-lstm.lua: -------------------------------------------------------------------------------- 1 | -- The example demonstates the ability to nest AbstractRecurrent instances. 2 | -- In this case, an FastLSTM is nested withing a Recurrence. 3 | require 'rnn' 4 | 5 | -- hyper-parameters 6 | batchSize = 8 7 | rho = 5 -- sequence length 8 | hiddenSize = 7 9 | nIndex = 10 10 | lr = 0.1 11 | 12 | -- Recurrence.recurrentModule 13 | local rm = nn.Sequential() 14 | :add(nn.ParallelTable() 15 | :add(nn.LookupTable(nIndex, hiddenSize)) 16 | :add(nn.Linear(hiddenSize, hiddenSize))) 17 | :add(nn.CAddTable()) 18 | :add(nn.Sigmoid()) 19 | :add(nn.FastLSTM(hiddenSize,hiddenSize)) -- an AbstractRecurrent instance 20 | :add(nn.Linear(hiddenSize,hiddenSize)) 21 | :add(nn.Sigmoid()) 22 | 23 | local rnn = nn.Sequential() 24 | :add(nn.Recurrence(rm, hiddenSize, 0)) -- another AbstractRecurrent instance 25 | :add(nn.Linear(hiddenSize, nIndex)) 26 | :add(nn.LogSoftMax()) 27 | 28 | -- all following code is exactly the same as the simple-sequencer-network.lua script 29 | -- internally, rnn will be wrapped into a Recursor to make it an AbstractRecurrent instance. 30 | rnn = nn.Sequencer(rnn) 31 | 32 | print(rnn) 33 | 34 | -- build criterion 35 | 36 | criterion = nn.SequencerCriterion(nn.ClassNLLCriterion()) 37 | 38 | -- build dummy dataset (task is to predict next item, given previous) 39 | sequence_ = torch.LongTensor():range(1,10) -- 1,2,3,4,5,6,7,8,9,10 40 | sequence = torch.LongTensor(100,10):copy(sequence_:view(1,10):expand(100,10)) 41 | sequence:resize(100*10) -- one long sequence of 1,2,3...,10,1,2,3...10... 42 | 43 | offsets = {} 44 | for i=1,batchSize do 45 | table.insert(offsets, math.ceil(math.random()*sequence:size(1))) 46 | end 47 | offsets = torch.LongTensor(offsets) 48 | 49 | -- training 50 | local iteration = 1 51 | while true do 52 | -- 1. create a sequence of rho time-steps 53 | 54 | local inputs, targets = {}, {} 55 | for step=1,rho do 56 | -- a batch of inputs 57 | inputs[step] = sequence:index(1, offsets) 58 | -- incement indices 59 | offsets:add(1) 60 | for j=1,batchSize do 61 | if offsets[j] > sequence:size(1) then 62 | offsets[j] = 1 63 | end 64 | end 65 | targets[step] = sequence:index(1, offsets) 66 | end 67 | 68 | -- 2. forward sequence through rnn 69 | 70 | rnn:zeroGradParameters() 71 | 72 | local outputs = rnn:forward(inputs) 73 | local err = criterion:forward(outputs, targets) 74 | 75 | print(string.format("Iteration %d ; NLL err = %f ", iteration, err)) 76 | 77 | -- 3. backward sequence through rnn (i.e. backprop through time) 78 | 79 | local gradOutputs = criterion:backward(outputs, targets) 80 | local gradInputs = rnn:backward(inputs, gradOutputs) 81 | 82 | -- 4. update 83 | 84 | rnn:updateParameters(lr) 85 | 86 | iteration = iteration + 1 87 | end 88 | -------------------------------------------------------------------------------- /examples/recurrent-time-series.lua: -------------------------------------------------------------------------------- 1 | -- Multi-variate time-series example 2 | 3 | require 'rnn' 4 | 5 | cmd = torch.CmdLine() 6 | cmd:text() 7 | cmd:text('Train a multivariate time-series model using RNN') 8 | cmd:option('--rho', 5, 'maximum number of time steps for back-propagate through time (BPTT)') 9 | cmd:option('--multiSize', 6, 'number of random variables as input and output') 10 | cmd:option('--hiddenSize', 10, 'number of hidden units used at output of the recurrent layer') 11 | cmd:option('--dataSize', 100, 'total number of time-steps in dataset') 12 | cmd:option('--batchSize', 8, 'number of training samples per batch') 13 | cmd:option('--nIterations', 1000, 'max number of training iterations') 14 | cmd:option('--learningRate', 0.001, 'learning rate') 15 | cmd:option('--plot', false, 'plot the errors during training?') 16 | cmd:text() 17 | local opt = cmd:parse(arg or {}) 18 | 19 | if opt.plot then 20 | require 'optim' 21 | logger = optim.Logger(paths.concat('outputs', 'rects_log.txt')) 22 | end 23 | -- For simplicity, the multi-variate dataset in this example is independently distributed. 24 | -- Toy dataset (task is to predict next vector, given previous vectors) following the normal distribution . 25 | -- Generated by sampling a separate normal distribution for each random variable. 26 | -- note: vX is used as both input X and output Y to save memory 27 | local function evalPDF(vMean, vSigma, vX) 28 | for i=1,vMean:size(1) do 29 | local b = (vX[i]-vMean[i])/vSigma[i] 30 | vX[i] = math.exp(-b*b/2)/(vSigma[i]*math.sqrt(2*math.pi)) 31 | end 32 | return vX 33 | end 34 | 35 | assert(opt.multiSize > 1, "Multi-variate time-series") 36 | 37 | vBias = torch.randn(opt.multiSize) 38 | vMean = torch.Tensor(opt.multiSize):fill(5) 39 | vSigma = torch.linspace(1,opt.multiSize,opt.multiSize) 40 | sequence = torch.Tensor(opt.dataSize, opt.multiSize) 41 | 42 | j = 0 43 | for i=1,opt.dataSize do 44 | sequence[{i,{}}]:fill(j) 45 | evalPDF(vMean, vSigma, sequence[{i,{}}]) 46 | sequence[{i,{}}]:add(vBias) 47 | j = j + 1 48 | if j>10 then j = 0 end 49 | end 50 | print('Sequence:'); print(sequence) 51 | 52 | -- batch mode 53 | 54 | offsets = torch.LongTensor(opt.batchSize):random(1,opt.dataSize) 55 | 56 | -- RNN 57 | r = nn.Recurrent( 58 | opt.hiddenSize, -- size of output 59 | nn.Linear(opt.multiSize, opt.hiddenSize), -- input layer 60 | nn.Linear(opt.hiddenSize, opt.hiddenSize), -- recurrent layer 61 | nn.Sigmoid(), -- transfer function 62 | opt.rho 63 | ) 64 | 65 | rnn = nn.Sequential() 66 | :add(r) 67 | :add(nn.Linear(opt.hiddenSize, opt.multiSize)) 68 | 69 | criterion = nn.MSECriterion() 70 | 71 | -- use Sequencer for better data handling 72 | rnn = nn.Sequencer(rnn) 73 | 74 | criterion = nn.SequencerCriterion(criterion) 75 | print("Model :") 76 | print(rnn) 77 | 78 | -- train rnn model 79 | minErr = opt.multiSize -- report min error 80 | minK = 0 81 | avgErrs = torch.Tensor(opt.nIterations):fill(0) 82 | for k = 1, opt.nIterations do 83 | 84 | -- 1. create a sequence of rho time-steps 85 | 86 | local inputs, targets = {}, {} 87 | for step = 1, opt.rho do 88 | -- batch of inputs 89 | inputs[step] = inputs[step] or sequence.new() 90 | inputs[step]:index(sequence, 1, offsets) 91 | -- batch of targets 92 | offsets:add(1) -- increase indices by 1 93 | offsets[offsets:gt(opt.dataSize)] = 1 94 | targets[step] = targets[step] or sequence.new() 95 | targets[step]:index(sequence, 1, offsets) 96 | end 97 | 98 | -- 2. forward sequence through rnn 99 | 100 | local outputs = rnn:forward(inputs) 101 | local err = criterion:forward(outputs, targets) 102 | 103 | -- report errors 104 | 105 | print('Iter: ' .. k .. ' Err: ' .. err) 106 | if opt.plot then 107 | logger:add{['Err'] = err} 108 | logger:style{['Err'] = '-'} 109 | logger:plot() 110 | end 111 | 112 | avgErrs[k] = err 113 | if avgErrs[k] < minErr then 114 | minErr = avgErrs[k] 115 | minK = k 116 | end 117 | 118 | -- 3. backward sequence through rnn (i.e. backprop through time) 119 | 120 | rnn:zeroGradParameters() 121 | 122 | local gradOutputs = criterion:backward(outputs, targets) 123 | local gradInputs = rnn:backward(inputs, gradOutputs) 124 | 125 | -- 4. updates parameters 126 | 127 | rnn:updateParameters(opt.learningRate) 128 | end 129 | 130 | print('min err: ' .. minErr .. ' on iteration ' .. minK) 131 | -------------------------------------------------------------------------------- /examples/sequence-to-one.lua: -------------------------------------------------------------------------------- 1 | require 'rnn' 2 | 3 | -- hyper-parameters 4 | batchSize = 8 5 | rho = 10 -- sequence length 6 | hiddenSize = 100 7 | nIndex = 100 -- input words 8 | nClass = 7 -- output classes 9 | lr = 0.1 10 | 11 | 12 | -- build simple recurrent neural network 13 | r = nn.Recurrent( 14 | hiddenSize, nn.Identity(), 15 | nn.Linear(hiddenSize, hiddenSize), nn.Sigmoid(), 16 | rho 17 | ) 18 | 19 | rnn = nn.Sequential() 20 | :add(nn.LookupTable(nIndex, hiddenSize)) 21 | :add(nn.SplitTable(1,2)) 22 | :add(nn.Sequencer(r)) 23 | :add(nn.SelectTable(-1)) -- this selects the last time-step of the rnn output sequence 24 | :add(nn.Linear(hiddenSize, nClass)) 25 | :add(nn.LogSoftMax()) 26 | 27 | -- build criterion 28 | 29 | criterion = nn.ClassNLLCriterion() 30 | 31 | -- build dummy dataset (task is to predict class given rho words) 32 | -- similar to sentiment analysis datasets 33 | ds = {} 34 | ds.size = 1000 35 | ds.input = torch.LongTensor(ds.size,rho) 36 | ds.target = torch.LongTensor(ds.size):random(nClass) 37 | 38 | -- this will make the inputs somewhat correlate with the targets, 39 | -- such that the reduction in training error should be more obvious 40 | local correlate = torch.LongTensor(nClass, rho*3):random(nClass) 41 | local indices = torch.LongTensor(rho) 42 | local buffer = torch.LongTensor() 43 | local sortVal, sortIdx = torch.LongTensor(), torch.LongTensor() 44 | for i=1,ds.size do 45 | indices:random(1,rho*3) 46 | buffer:index(correlate[ds.target[i]], 1, indices) 47 | sortVal:sort(sortIdx, buffer, 1) 48 | ds.input[i]:copy(sortVal:view(-1)) 49 | end 50 | 51 | 52 | indices:resize(batchSize) 53 | 54 | -- training 55 | local inputs, targets = torch.LongTensor(), torch.LongTensor() 56 | for iteration = 1, 1000 do 57 | -- 1. create a sequence of rho time-steps 58 | 59 | indices:random(1,ds.size) -- choose some random samples 60 | inputs:index(ds.input, 1,indices) 61 | targets:index(ds.target, 1,indices) 62 | 63 | -- 2. forward sequence through rnn 64 | 65 | rnn:zeroGradParameters() 66 | 67 | local outputs = rnn:forward(inputs) 68 | local err = criterion:forward(outputs, targets) 69 | 70 | print(string.format("Iteration %d ; NLL err = %f ", iteration, err)) 71 | 72 | -- 3. backward sequence through rnn (i.e. backprop through time) 73 | 74 | local gradOutputs = criterion:backward(outputs, targets) 75 | local gradInputs = rnn:backward(inputs, gradOutputs) 76 | 77 | -- 4. update 78 | 79 | rnn:updateParameters(lr) 80 | end 81 | -------------------------------------------------------------------------------- /examples/simple-bisequencer-network-variable.lua: -------------------------------------------------------------------------------- 1 | -- Example BLSTM for variable-length sequences 2 | require 'rnn' 3 | 4 | torch.manualSeed(0) 5 | math.randomseed(0) 6 | 7 | -- hyper-parameters 8 | batchSize = 8 9 | rho = 10 -- sequence length 10 | hiddenSize = 5 11 | nIndex = 10 12 | lr = 0.1 13 | maxIter = 100 14 | 15 | local sharedLookupTable = nn.LookupTableMaskZero(nIndex, hiddenSize) 16 | 17 | -- forward rnn 18 | local fwd = nn.Sequential() 19 | :add(sharedLookupTable) 20 | :add(nn.FastLSTM(hiddenSize, hiddenSize):maskZero(1)) 21 | 22 | -- internally, rnn will be wrapped into a Recursor to make it an AbstractRecurrent instance. 23 | fwdSeq = nn.Sequencer(fwd) 24 | 25 | -- backward rnn (will be applied in reverse order of input sequence) 26 | local bwd = nn.Sequential() 27 | :add(sharedLookupTable:sharedClone()) 28 | :add(nn.FastLSTM(hiddenSize, hiddenSize):maskZero(1)) 29 | bwdSeq = nn.Sequencer(bwd) 30 | 31 | -- merges the output of one time-step of fwd and bwd rnns. 32 | -- You could also try nn.AddTable(), nn.Identity(), etc. 33 | local merge = nn.JoinTable(1, 1) 34 | mergeSeq = nn.Sequencer(merge) 35 | 36 | -- Assume that two input sequences are given (original and reverse, both are right-padded). 37 | -- Instead of ConcatTable, we use ParallelTable here. 38 | local parallel = nn.ParallelTable() 39 | parallel:add(fwdSeq):add(bwdSeq) 40 | local brnn = nn.Sequential() 41 | :add(parallel) 42 | :add(nn.ZipTable()) 43 | :add(mergeSeq) 44 | 45 | local rnn = nn.Sequential() 46 | :add(brnn) 47 | :add(nn.Sequencer(nn.MaskZero(nn.Linear(hiddenSize*2, nIndex), 1))) -- times two due to JoinTable 48 | :add(nn.Sequencer(nn.MaskZero(nn.LogSoftMax(), 1))) 49 | 50 | print(rnn) 51 | 52 | -- build criterion 53 | 54 | criterion = nn.SequencerCriterion(nn.MaskZeroCriterion(nn.ClassNLLCriterion(), 1)) 55 | 56 | -- build dummy dataset (task is to predict next item, given previous) 57 | sequence_ = torch.LongTensor():range(1,10) -- 1,2,3,4,5,6,7,8,9,10 58 | sequence = torch.LongTensor(100,10):copy(sequence_:view(1,10):expand(100,10)) 59 | sequence:resize(100*10) -- one long sequence of 1,2,3...,10,1,2,3...10... 60 | 61 | offsets = {} 62 | maxStep = {} 63 | for i=1,batchSize do 64 | table.insert(offsets, math.ceil(math.random()*sequence:size(1))) 65 | -- variable length for each sample 66 | table.insert(maxStep, math.random(rho)) 67 | end 68 | offsets = torch.LongTensor(offsets) 69 | 70 | -- training 71 | for iteration = 1, maxIter do 72 | -- 1. create a sequence of rho time-steps 73 | 74 | local inputs, inputs_rev, targets = {}, {}, {} 75 | for step=1,rho do 76 | -- a batch of inputs 77 | inputs[step] = sequence:index(1, offsets) 78 | -- increment indices 79 | offsets:add(1) 80 | for j=1,batchSize do 81 | if offsets[j] > sequence:size(1) then 82 | offsets[j] = 1 83 | end 84 | end 85 | targets[step] = sequence:index(1, offsets) 86 | -- padding 87 | for j=1,batchSize do 88 | if step > maxStep[j] then 89 | inputs[step][j] = 0 90 | targets[step][j] = 0 91 | end 92 | end 93 | end 94 | 95 | -- reverse 96 | for step=1,rho do 97 | inputs_rev[step] = torch.LongTensor(batchSize) 98 | for j=1,batchSize do 99 | if step <= maxStep[j] then 100 | inputs_rev[step][j] = inputs[maxStep[j]-step+1][j] 101 | else 102 | inputs_rev[step][j] = 0 103 | end 104 | end 105 | end 106 | 107 | -- 2. forward sequence through rnn 108 | 109 | rnn:zeroGradParameters() 110 | 111 | local outputs = rnn:forward({inputs, inputs_rev}) 112 | local err = criterion:forward(outputs, targets) 113 | 114 | local correct = 0 115 | local total = 0 116 | for step=1,rho do 117 | probs = outputs[step] 118 | _, preds = probs:max(2) 119 | for j=1,batchSize do 120 | local cur_x = inputs[step][j] 121 | local cur_y = targets[step][j] 122 | local cur_t = preds[j][1] 123 | -- print(string.format("x=%d ; y=%d ; pred=%d", cur_x, cur_y, cur_t)) 124 | if step <= maxStep[j] then 125 | if cur_y == cur_t then correct = correct + 1 end 126 | total = total + 1 127 | end 128 | end 129 | end 130 | 131 | local acc = correct*1.0/total 132 | print(string.format("Iteration %d ; NLL err = %f ; ACC = %.2f ", iteration, err, acc)) 133 | 134 | -- 3. backward sequence through rnn (i.e. backprop through time) 135 | 136 | local gradOutputs = criterion:backward(outputs, targets) 137 | local gradInputs = rnn:backward({inputs, inputs_rev}, gradOutputs) 138 | 139 | -- 4. update 140 | 141 | rnn:updateParameters(lr) 142 | 143 | end 144 | -------------------------------------------------------------------------------- /examples/simple-bisequencer-network.lua: -------------------------------------------------------------------------------- 1 | require 'rnn' 2 | 3 | -- hyper-parameters 4 | batchSize = 8 5 | rho = 5 -- sequence length 6 | hiddenSize = 7 7 | nIndex = 10 8 | lr = 0.1 9 | 10 | 11 | -- forward rnn 12 | -- build simple recurrent neural network 13 | local fwd = nn.Recurrent( 14 | hiddenSize, nn.LookupTable(nIndex, hiddenSize), 15 | nn.Linear(hiddenSize, hiddenSize), nn.Sigmoid(), 16 | rho 17 | ) 18 | 19 | -- backward rnn (will be applied in reverse order of input sequence) 20 | local bwd = fwd:clone() 21 | bwd:reset() -- reinitializes parameters 22 | 23 | -- merges the output of one time-step of fwd and bwd rnns. 24 | -- You could also try nn.AddTable(), nn.Identity(), etc. 25 | local merge = nn.JoinTable(1, 1) 26 | 27 | -- we use BiSequencerLM because this is a language model (previous and next words to predict current word). 28 | -- If we used BiSequencer, x[t] would be used to predict y[t] = x[t] (which is cheating). 29 | -- Note that bwd and merge argument are optional and will default to the above. 30 | local brnn = nn.BiSequencerLM(fwd, bwd, merge) 31 | 32 | local rnn = nn.Sequential() 33 | :add(brnn) 34 | :add(nn.Sequencer(nn.Linear(hiddenSize*2, nIndex))) -- times two due to JoinTable 35 | :add(nn.Sequencer(nn.LogSoftMax())) 36 | 37 | print(rnn) 38 | 39 | -- build criterion 40 | 41 | criterion = nn.SequencerCriterion(nn.ClassNLLCriterion()) 42 | 43 | -- build dummy dataset (task is to predict next item, given previous) 44 | sequence_ = torch.LongTensor():range(1,10) -- 1,2,3,4,5,6,7,8,9,10 45 | sequence = torch.LongTensor(100,10):copy(sequence_:view(1,10):expand(100,10)) 46 | sequence:resize(100*10) -- one long sequence of 1,2,3...,10,1,2,3...10... 47 | 48 | offsets = {} 49 | for i=1,batchSize do 50 | table.insert(offsets, math.ceil(math.random()*sequence:size(1))) 51 | end 52 | offsets = torch.LongTensor(offsets) 53 | 54 | -- training 55 | local iteration = 1 56 | while true do 57 | -- 1. create a sequence of rho time-steps 58 | 59 | local inputs, targets = {}, {} 60 | for step=1,rho do 61 | -- a batch of inputs 62 | inputs[step] = sequence:index(1, offsets) 63 | -- incement indices 64 | offsets:add(1) 65 | for j=1,batchSize do 66 | if offsets[j] > sequence:size(1) then 67 | offsets[j] = 1 68 | end 69 | end 70 | targets[step] = sequence:index(1, offsets) 71 | end 72 | 73 | -- 2. forward sequence through rnn 74 | 75 | rnn:zeroGradParameters() 76 | 77 | local outputs = rnn:forward(inputs) 78 | local err = criterion:forward(outputs, targets) 79 | 80 | print(string.format("Iteration %d ; NLL err = %f ", iteration, err)) 81 | 82 | -- 3. backward sequence through rnn (i.e. backprop through time) 83 | 84 | local gradOutputs = criterion:backward(outputs, targets) 85 | local gradInputs = rnn:backward(inputs, gradOutputs) 86 | 87 | -- 4. update 88 | 89 | rnn:updateParameters(lr) 90 | 91 | iteration = iteration + 1 92 | end 93 | -------------------------------------------------------------------------------- /examples/simple-recurrence-network.lua: -------------------------------------------------------------------------------- 1 | -- example use of nn.Recurrence 2 | require 'rnn' 3 | 4 | -- hyper-parameters 5 | batchSize = 8 6 | rho = 5 -- sequence length 7 | hiddenSize = 7 8 | nIndex = 10 9 | lr = 0.1 10 | 11 | -- the internal recurrentModule used by Recurrence 12 | local rm = nn.Sequential() -- input is {x[t], h[t-1]} 13 | :add(nn.ParallelTable() 14 | :add(nn.LookupTable(nIndex, hiddenSize)) -- input layer 15 | :add(nn.Linear(hiddenSize, hiddenSize))) -- recurrent layer 16 | :add(nn.CAddTable()) -- merge 17 | :add(nn.Sigmoid()) -- transfer 18 | 19 | local rnn = nn.Sequential() 20 | :add(nn.Recurrence(rm, hiddenSize, 0)) -- similar to nn.Recurrent, but more general, and no startModule 21 | :add(nn.Linear(hiddenSize, nIndex)) 22 | :add(nn.LogSoftMax()) 23 | 24 | -- all following code is exactly the same as the simple-sequencer-network.lua script 25 | -- internally, rnn will be wrapped into a Recursor to make it an AbstractRecurrent instance. 26 | rnn = nn.Sequencer(rnn) 27 | 28 | print(rnn) 29 | 30 | -- build criterion 31 | 32 | criterion = nn.SequencerCriterion(nn.ClassNLLCriterion()) 33 | 34 | -- build dummy dataset (task is to predict next item, given previous) 35 | sequence_ = torch.LongTensor():range(1,10) -- 1,2,3,4,5,6,7,8,9,10 36 | sequence = torch.LongTensor(100,10):copy(sequence_:view(1,10):expand(100,10)) 37 | sequence:resize(100*10) -- one long sequence of 1,2,3...,10,1,2,3...10... 38 | 39 | offsets = {} 40 | for i=1,batchSize do 41 | table.insert(offsets, math.ceil(math.random()*sequence:size(1))) 42 | end 43 | offsets = torch.LongTensor(offsets) 44 | 45 | -- training 46 | local iteration = 1 47 | while true do 48 | -- 1. create a sequence of rho time-steps 49 | 50 | local inputs, targets = {}, {} 51 | for step=1,rho do 52 | -- a batch of inputs 53 | inputs[step] = sequence:index(1, offsets) 54 | -- incement indices 55 | offsets:add(1) 56 | for j=1,batchSize do 57 | if offsets[j] > sequence:size(1) then 58 | offsets[j] = 1 59 | end 60 | end 61 | targets[step] = sequence:index(1, offsets) 62 | end 63 | 64 | -- 2. forward sequence through rnn 65 | 66 | rnn:zeroGradParameters() 67 | 68 | local outputs = rnn:forward(inputs) 69 | local err = criterion:forward(outputs, targets) 70 | 71 | print(string.format("Iteration %d ; NLL err = %f ", iteration, err)) 72 | 73 | -- 3. backward sequence through rnn (i.e. backprop through time) 74 | 75 | local gradOutputs = criterion:backward(outputs, targets) 76 | local gradInputs = rnn:backward(inputs, gradOutputs) 77 | 78 | -- 4. update 79 | 80 | rnn:updateParameters(lr) 81 | 82 | iteration = iteration + 1 83 | end 84 | -------------------------------------------------------------------------------- /examples/simple-recurrent-network.lua: -------------------------------------------------------------------------------- 1 | require 'rnn' 2 | 3 | -- hyper-parameters 4 | batchSize = 8 5 | rho = 5 -- sequence length 6 | hiddenSize = 7 7 | nIndex = 10 8 | lr = 0.1 9 | 10 | 11 | -- build simple recurrent neural network 12 | local r = nn.Recurrent( 13 | hiddenSize, nn.LookupTable(nIndex, hiddenSize), 14 | nn.Linear(hiddenSize, hiddenSize), nn.Sigmoid(), 15 | rho 16 | ) 17 | 18 | local rnn = nn.Sequential() 19 | :add(r) 20 | :add(nn.Linear(hiddenSize, nIndex)) 21 | :add(nn.LogSoftMax()) 22 | 23 | -- wrap the non-recurrent module (Sequential) in Recursor. 24 | -- This makes it a recurrent module 25 | -- i.e. Recursor is an AbstractRecurrent instance 26 | rnn = nn.Recursor(rnn, rho) 27 | 28 | print(rnn) 29 | 30 | -- build criterion 31 | 32 | criterion = nn.ClassNLLCriterion() 33 | 34 | -- build dummy dataset (task is to predict next item, given previous) 35 | sequence_ = torch.LongTensor():range(1,10) -- 1,2,3,4,5,6,7,8,9,10 36 | sequence = torch.LongTensor(100,10):copy(sequence_:view(1,10):expand(100,10)) 37 | sequence:resize(100*10) -- one long sequence of 1,2,3...,10,1,2,3...10... 38 | 39 | offsets = {} 40 | for i=1,batchSize do 41 | table.insert(offsets, math.ceil(math.random()*sequence:size(1))) 42 | end 43 | offsets = torch.LongTensor(offsets) 44 | 45 | -- training 46 | local iteration = 1 47 | while true do 48 | -- 1. create a sequence of rho time-steps 49 | 50 | local inputs, targets = {}, {} 51 | for step=1,rho do 52 | -- a batch of inputs 53 | inputs[step] = sequence:index(1, offsets) 54 | -- incement indices 55 | offsets:add(1) 56 | for j=1,batchSize do 57 | if offsets[j] > sequence:size(1) then 58 | offsets[j] = 1 59 | end 60 | end 61 | targets[step] = sequence:index(1, offsets) 62 | end 63 | 64 | -- 2. forward sequence through rnn 65 | 66 | rnn:zeroGradParameters() 67 | rnn:forget() -- forget all past time-steps 68 | 69 | local outputs, err = {}, 0 70 | for step=1,rho do 71 | outputs[step] = rnn:forward(inputs[step]) 72 | err = err + criterion:forward(outputs[step], targets[step]) 73 | end 74 | 75 | print(string.format("Iteration %d ; NLL err = %f ", iteration, err)) 76 | 77 | -- 3. backward sequence through rnn (i.e. backprop through time) 78 | 79 | local gradOutputs, gradInputs = {}, {} 80 | for step=rho,1,-1 do -- reverse order of forward calls 81 | gradOutputs[step] = criterion:backward(outputs[step], targets[step]) 82 | gradInputs[step] = rnn:backward(inputs[step], gradOutputs[step]) 83 | end 84 | 85 | -- 4. update 86 | 87 | rnn:updateParameters(lr) 88 | 89 | iteration = iteration + 1 90 | end 91 | -------------------------------------------------------------------------------- /examples/simple-sequencer-network.lua: -------------------------------------------------------------------------------- 1 | require 'rnn' 2 | 3 | -- hyper-parameters 4 | batchSize = 8 5 | rho = 5 -- sequence length 6 | hiddenSize = 7 7 | nIndex = 10 8 | lr = 0.1 9 | 10 | 11 | -- build simple recurrent neural network 12 | local r = nn.Recurrent( 13 | hiddenSize, nn.LookupTable(nIndex, hiddenSize), 14 | nn.Linear(hiddenSize, hiddenSize), nn.Sigmoid(), 15 | rho 16 | ) 17 | 18 | local rnn = nn.Sequential() 19 | :add(r) 20 | :add(nn.Linear(hiddenSize, nIndex)) 21 | :add(nn.LogSoftMax()) 22 | 23 | -- internally, rnn will be wrapped into a Recursor to make it an AbstractRecurrent instance. 24 | rnn = nn.Sequencer(rnn) 25 | 26 | print(rnn) 27 | 28 | -- build criterion 29 | 30 | criterion = nn.SequencerCriterion(nn.ClassNLLCriterion()) 31 | 32 | -- build dummy dataset (task is to predict next item, given previous) 33 | sequence_ = torch.LongTensor():range(1,10) -- 1,2,3,4,5,6,7,8,9,10 34 | sequence = torch.LongTensor(100,10):copy(sequence_:view(1,10):expand(100,10)) 35 | sequence:resize(100*10) -- one long sequence of 1,2,3...,10,1,2,3...10... 36 | 37 | offsets = {} 38 | for i=1,batchSize do 39 | table.insert(offsets, math.ceil(math.random()*sequence:size(1))) 40 | end 41 | offsets = torch.LongTensor(offsets) 42 | 43 | -- training 44 | local iteration = 1 45 | while true do 46 | -- 1. create a sequence of rho time-steps 47 | 48 | local inputs, targets = {}, {} 49 | for step=1,rho do 50 | -- a batch of inputs 51 | inputs[step] = sequence:index(1, offsets) 52 | -- incement indices 53 | offsets:add(1) 54 | for j=1,batchSize do 55 | if offsets[j] > sequence:size(1) then 56 | offsets[j] = 1 57 | end 58 | end 59 | targets[step] = sequence:index(1, offsets) 60 | end 61 | 62 | -- 2. forward sequence through rnn 63 | 64 | rnn:zeroGradParameters() 65 | 66 | local outputs = rnn:forward(inputs) 67 | local err = criterion:forward(outputs, targets) 68 | 69 | print(string.format("Iteration %d ; NLL err = %f ", iteration, err)) 70 | 71 | -- 3. backward sequence through rnn (i.e. backprop through time) 72 | 73 | local gradOutputs = criterion:backward(outputs, targets) 74 | local gradInputs = rnn:backward(inputs, gradOutputs) 75 | 76 | -- 4. update 77 | 78 | rnn:updateParameters(lr) 79 | 80 | iteration = iteration + 1 81 | end 82 | -------------------------------------------------------------------------------- /init.lua: -------------------------------------------------------------------------------- 1 | require 'dpnn' 2 | require 'torchx' 3 | dpnn.version = dpnn.version or 0 4 | assert(dpnn.version > 1, "Please update dpnn : luarocks install dpnn") 5 | 6 | -- create global rnn table: 7 | rnn = {} 8 | rnn.version = 2 9 | rnn.version = 2.1 -- [get,set][Grad]HiddenState(step) 10 | 11 | unpack = unpack or table.unpack 12 | 13 | torch.include('rnn', 'recursiveUtils.lua') 14 | 15 | -- extensions to nn.Module 16 | torch.include('rnn', 'Module.lua') 17 | 18 | -- override nn.Dropout 19 | torch.include('rnn', 'Dropout.lua') 20 | 21 | -- for testing: 22 | torch.include('rnn', 'test/test.lua') 23 | torch.include('rnn', 'test/bigtest.lua') 24 | 25 | -- support modules 26 | torch.include('rnn', 'ZeroGrad.lua') 27 | torch.include('rnn', 'LinearNoBias.lua') 28 | torch.include('rnn', 'SAdd.lua') 29 | torch.include('rnn', 'CopyGrad.lua') 30 | 31 | -- recurrent modules 32 | torch.include('rnn', 'LookupTableMaskZero.lua') 33 | torch.include('rnn', 'MaskZero.lua') 34 | torch.include('rnn', 'TrimZero.lua') 35 | torch.include('rnn', 'AbstractRecurrent.lua') 36 | torch.include('rnn', 'Recurrent.lua') 37 | torch.include('rnn', 'LSTM.lua') 38 | torch.include('rnn', 'FastLSTM.lua') 39 | torch.include('rnn', 'GRU.lua') 40 | torch.include('rnn', 'Mufuru.lua') 41 | torch.include('rnn', 'Recursor.lua') 42 | torch.include('rnn', 'Recurrence.lua') 43 | torch.include('rnn', 'NormStabilizer.lua') 44 | 45 | -- sequencer modules 46 | torch.include('rnn', 'AbstractSequencer.lua') 47 | torch.include('rnn', 'Repeater.lua') 48 | torch.include('rnn', 'Sequencer.lua') 49 | torch.include('rnn', 'BiSequencer.lua') 50 | torch.include('rnn', 'BiSequencerLM.lua') 51 | torch.include('rnn', 'RecurrentAttention.lua') 52 | 53 | -- sequencer + recurrent modules 54 | torch.include('rnn', 'SeqLSTM.lua') 55 | torch.include('rnn', 'SeqLSTMP.lua') 56 | torch.include('rnn', 'SeqGRU.lua') 57 | torch.include('rnn', 'SeqReverseSequence.lua') 58 | torch.include('rnn', 'SeqBRNN.lua') 59 | 60 | -- recurrent criterions: 61 | torch.include('rnn', 'SequencerCriterion.lua') 62 | torch.include('rnn', 'RepeaterCriterion.lua') 63 | torch.include('rnn', 'MaskZeroCriterion.lua') 64 | 65 | -- prevent likely name conflicts 66 | nn.rnn = rnn 67 | -------------------------------------------------------------------------------- /recursiveUtils.lua: -------------------------------------------------------------------------------- 1 | 2 | function rnn.recursiveResizeAs(t1,t2) 3 | if torch.type(t2) == 'table' then 4 | t1 = (torch.type(t1) == 'table') and t1 or {t1} 5 | for key,_ in pairs(t2) do 6 | t1[key], t2[key] = rnn.recursiveResizeAs(t1[key], t2[key]) 7 | end 8 | elseif torch.isTensor(t2) then 9 | t1 = torch.isTensor(t1) and t1 or t2.new() 10 | t1:resizeAs(t2) 11 | else 12 | error("expecting nested tensors or tables. Got ".. 13 | torch.type(t1).." and "..torch.type(t2).." instead") 14 | end 15 | return t1, t2 16 | end 17 | 18 | function rnn.recursiveSet(t1,t2) 19 | if torch.type(t2) == 'table' then 20 | t1 = (torch.type(t1) == 'table') and t1 or {t1} 21 | for key,_ in pairs(t2) do 22 | t1[key], t2[key] = rnn.recursiveSet(t1[key], t2[key]) 23 | end 24 | elseif torch.isTensor(t2) then 25 | t1 = torch.isTensor(t1) and t1 or t2.new() 26 | t1:set(t2) 27 | else 28 | error("expecting nested tensors or tables. Got ".. 29 | torch.type(t1).." and "..torch.type(t2).." instead") 30 | end 31 | return t1, t2 32 | end 33 | 34 | function rnn.recursiveCopy(t1,t2) 35 | if torch.type(t2) == 'table' then 36 | t1 = (torch.type(t1) == 'table') and t1 or {t1} 37 | for key,_ in pairs(t2) do 38 | t1[key], t2[key] = rnn.recursiveCopy(t1[key], t2[key]) 39 | end 40 | elseif torch.isTensor(t2) then 41 | t1 = torch.isTensor(t1) and t1 or t2.new() 42 | t1:resizeAs(t2):copy(t2) 43 | else 44 | error("expecting nested tensors or tables. Got ".. 45 | torch.type(t1).." and "..torch.type(t2).." instead") 46 | end 47 | return t1, t2 48 | end 49 | 50 | function rnn.recursiveAdd(t1, t2) 51 | if torch.type(t2) == 'table' then 52 | t1 = (torch.type(t1) == 'table') and t1 or {t1} 53 | for key,_ in pairs(t2) do 54 | t1[key], t2[key] = rnn.recursiveAdd(t1[key], t2[key]) 55 | end 56 | elseif torch.isTensor(t1) and torch.isTensor(t2) then 57 | t1:add(t2) 58 | else 59 | error("expecting nested tensors or tables. Got ".. 60 | torch.type(t1).." and "..torch.type(t2).." instead") 61 | end 62 | return t1, t2 63 | end 64 | 65 | function rnn.recursiveTensorEq(t1, t2) 66 | if torch.type(t2) == 'table' then 67 | local isEqual = true 68 | if torch.type(t1) ~= 'table' then 69 | return false 70 | end 71 | for key,_ in pairs(t2) do 72 | isEqual = isEqual and rnn.recursiveTensorEq(t1[key], t2[key]) 73 | end 74 | return isEqual 75 | elseif torch.isTensor(t1) and torch.isTensor(t2) then 76 | local diff = t1-t2 77 | local err = diff:abs():max() 78 | return err < 0.00001 79 | else 80 | error("expecting nested tensors or tables. Got ".. 81 | torch.type(t1).." and "..torch.type(t2).." instead") 82 | end 83 | end 84 | 85 | function rnn.recursiveNormal(t2) 86 | if torch.type(t2) == 'table' then 87 | for key,_ in pairs(t2) do 88 | t2[key] = rnn.recursiveNormal(t2[key]) 89 | end 90 | elseif torch.isTensor(t2) then 91 | t2:normal() 92 | else 93 | error("expecting tensor or table thereof. Got " 94 | ..torch.type(t2).." instead") 95 | end 96 | return t2 97 | end 98 | 99 | function rnn.recursiveFill(t2, val) 100 | if torch.type(t2) == 'table' then 101 | for key,_ in pairs(t2) do 102 | t2[key] = rnn.recursiveFill(t2[key], val) 103 | end 104 | elseif torch.isTensor(t2) then 105 | t2:fill(val) 106 | else 107 | error("expecting tensor or table thereof. Got " 108 | ..torch.type(t2).." instead") 109 | end 110 | return t2 111 | end 112 | 113 | function rnn.recursiveType(param, type_str) 114 | if torch.type(param) == 'table' then 115 | for i = 1, #param do 116 | param[i] = rnn.recursiveType(param[i], type_str) 117 | end 118 | else 119 | if torch.typename(param) and 120 | torch.typename(param):find('torch%..+Tensor') then 121 | param = param:type(type_str) 122 | end 123 | end 124 | return param 125 | end 126 | 127 | function rnn.recursiveSum(t2) 128 | local sum = 0 129 | if torch.type(t2) == 'table' then 130 | for key,_ in pairs(t2) do 131 | sum = sum + rnn.recursiveSum(t2[key], val) 132 | end 133 | elseif torch.isTensor(t2) then 134 | return t2:sum() 135 | else 136 | error("expecting tensor or table thereof. Got " 137 | ..torch.type(t2).." instead") 138 | end 139 | return sum 140 | end 141 | 142 | function rnn.recursiveNew(t2) 143 | if torch.type(t2) == 'table' then 144 | local t1 = {} 145 | for key,_ in pairs(t2) do 146 | t1[key] = rnn.recursiveNew(t2[key]) 147 | end 148 | return t1 149 | elseif torch.isTensor(t2) then 150 | return t2.new() 151 | else 152 | error("expecting tensor or table thereof. Got " 153 | ..torch.type(t2).." instead") 154 | end 155 | end 156 | -------------------------------------------------------------------------------- /rocks/rnn-scm-1.rockspec: -------------------------------------------------------------------------------- 1 | package = "rnn" 2 | version = "scm-1" 3 | 4 | source = { 5 | url = "git://github.com/Element-Research/rnn", 6 | tag = "master" 7 | } 8 | 9 | description = { 10 | summary = "A Recurrent Neural Network library that extends Torch's nn", 11 | detailed = [[ 12 | A library to build RNNs, LSTMs, GRUs, BRNNs, BLSTMs, and so forth and so on. 13 | ]], 14 | homepage = "https://github.com/Element-Research/rnn", 15 | license = "BSD" 16 | } 17 | 18 | dependencies = { 19 | "torch >= 7.0", 20 | "nn >= 1.0", 21 | "dpnn >= 1.0", 22 | "torchx >= 1.0" 23 | } 24 | 25 | build = { 26 | type = "command", 27 | build_command = [[ 28 | cmake -E make_directory build; 29 | cd build; 30 | cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH="$(LUA_BINDIR)/.." -DCMAKE_INSTALL_PREFIX="$(PREFIX)"; 31 | $(MAKE) 32 | ]], 33 | install_command = "cd build && $(MAKE) install" 34 | } 35 | -------------------------------------------------------------------------------- /scripts/evaluate-rnnlm.lua: -------------------------------------------------------------------------------- 1 | require 'nngraph' 2 | require 'rnn' 3 | local dl = require 'dataload' 4 | 5 | 6 | --[[command line arguments]]-- 7 | cmd = torch.CmdLine() 8 | cmd:text() 9 | cmd:text('Evaluate a RNNLM') 10 | cmd:text('Options:') 11 | cmd:option('--xplogpath', '', 'path to a previously saved xplog containing model') 12 | cmd:option('--cuda', false, 'model was saved with cuda') 13 | cmd:option('--device', 1, 'which GPU device to use') 14 | cmd:option('--nsample', -1, 'sample this many words from the language model') 15 | cmd:option('--temperature', 1, 'temperature of multinomial. Increase to sample wildly, reduce to be more deterministic.') 16 | cmd:option('--dumpcsv', false, 'dump training and validation error to CSV file') 17 | cmd:text() 18 | local opt = cmd:parse(arg or {}) 19 | 20 | assert(opt.temperature > 0) 21 | 22 | -- check that saved model exists 23 | assert(paths.filep(opt.xplogpath), opt.xplogpath..' does not exist') 24 | 25 | if opt.cuda then 26 | require 'cunn' 27 | cutorch.setDevice(opt.device) 28 | end 29 | 30 | local xplog = torch.load(opt.xplogpath) 31 | local lm = xplog.model 32 | local criterion = xplog.criterion 33 | local targetmodule = xplog.targetmodule 34 | 35 | print("Hyper-parameters (xplog.opt):") 36 | print(xplog.opt) 37 | 38 | local trainerr = xplog.trainnceloss or xplog.trainppl 39 | local validerr = xplog.valnceloss or xplog.valppl 40 | 41 | print(string.format("Error (epoch=%d): training=%f; validation=%f", xplog.epoch, trainerr[#trainerr], validerr[#validerr])) 42 | 43 | if opt.dumpcsv then 44 | local csvfile = opt.xplogpath:match('([^/]+)[.]t7$')..'.csv' 45 | paths.mkdir('learningcurves') 46 | csvpath = paths.concat('learningcurves', csvfile) 47 | 48 | local file = io.open(csvpath, 'w') 49 | file:write("epoch,trainerr,validerr\n") 50 | for i=1,#trainerr do 51 | file:write(string.format('%d,%f,%f\n', i, trainerr[i], validerr[i])) 52 | end 53 | file:close() 54 | 55 | print("CSV file saved to "..csvpath) 56 | os.exit() 57 | end 58 | 59 | local trainset, validset, testset 60 | if xplog.dataset == 'PennTreeBank' then 61 | print"Loading Penn Tree Bank test set" 62 | trainset, validset, testset = dl.loadPTB({50, 1, 1}) 63 | assert(trainset.vocab['the'] == xplog.vocab['the']) 64 | elseif xplog.dataset == 'GoogleBillionWords' then 65 | print"Loading Google Billion Words test set" 66 | trainset, validset, testset = dl.loadGBW({50,1,1}, 'train_tiny.th7') 67 | else 68 | error"Unrecognized dataset" 69 | end 70 | 71 | 72 | for i,nce in ipairs(lm:findModules('nn.NCEModule')) do 73 | nce.normalized = true 74 | nce.logsoftmax = true 75 | if not opt.nce then 76 | print"Found NCEModule" 77 | criterion = nn.SequencerCriterion(nn.MaskZeroCriterion(nn.ClassNLLCriterion(), 1)) 78 | if opt.cuda then criterion:cuda() end 79 | opt.nce = true 80 | end 81 | end 82 | 83 | print(lm) 84 | 85 | lm:forget() 86 | lm:evaluate() 87 | 88 | if opt.nsample > 0 then 89 | if xplog.dataset == 'GoogleBillionWords' then 90 | local sampletext = {''} 91 | local prevword = trainset.vocab[''] 92 | assert(prevword) 93 | local inputs = torch.LongTensor(1,1) -- seqlen x batchsize 94 | local targets = opt.cuda and torch.CudaTensor(1) or torch.LongTensor(1) -- dummy tensor 95 | local buffer = torch.FloatTensor() 96 | for i=1,opt.nsample do 97 | inputs:fill(prevword) 98 | local output = lm:forward({inputs,{targets}})[1][1] 99 | buffer:resize(output:size()):copy(output) 100 | buffer:div(opt.temperature) 101 | buffer:exp() 102 | local sample = torch.multinomial(buffer, 1, true) 103 | local currentword = trainset.ivocab[sample[1]] 104 | table.insert(sampletext, currentword) 105 | if currentword == '' then 106 | -- sentences were trained independently, so we explicitly tell it to start a new sentence 107 | lm:forget() 108 | prevword = trainset.vocab[''] 109 | table.insert(sampletext, '\n') 110 | else 111 | prevword = sample[1] 112 | end 113 | end 114 | print(table.concat(sampletext, ' ')) 115 | else 116 | local sampletext = {} 117 | local prevword = trainset.vocab[''] 118 | assert(prevword) 119 | local inputs = torch.LongTensor(1,1) -- seqlen x batchsize 120 | if opt.cuda then inputs = inputs:cuda() end 121 | local buffer = torch.FloatTensor() 122 | for i=1,opt.nsample do 123 | inputs:fill(prevword) 124 | local output = lm:forward(inputs)[1][1] 125 | buffer:resize(output:size()):copy(output) 126 | buffer:div(opt.temperature) 127 | buffer:exp() 128 | local sample = torch.multinomial(buffer, 1, true) 129 | local currentword = trainset.ivocab[sample[1]] 130 | table.insert(sampletext, currentword) 131 | prevword = sample[1] 132 | end 133 | print(table.concat(sampletext, ' ')) 134 | end 135 | else 136 | local sumErr, count = 0, 0 137 | 138 | for i, inputs, targets in testset:subiter(xplog.opt.seqlen or 100) do 139 | inputs:apply(function(x) 140 | if x > 0 then 141 | count = count + 1 142 | end 143 | end) 144 | local targets = targetmodule:forward(targets) 145 | local inputs = opt.nce and {inputs, targets} or inputs 146 | local outputs = lm:forward(inputs) 147 | local err = criterion:forward(outputs, targets) 148 | sumErr = sumErr + err 149 | end 150 | 151 | if count ~= testset:size() then 152 | local meanseqlen = testset:size()/(testset:size() - count) 153 | print("mean sequence length : "..meanseqlen) 154 | end 155 | 156 | local ppl = torch.exp(sumErr/count) 157 | print("Test PPL : "..ppl) 158 | end 159 | 160 | -------------------------------------------------------------------------------- /scripts/evaluate-rva.lua: -------------------------------------------------------------------------------- 1 | require 'dp' 2 | require 'rnn' 3 | require 'optim' 4 | 5 | -- References : 6 | -- A. http://papers.nips.cc/paper/5542-recurrent-models-of-visual-attention.pdf 7 | -- B. http://incompleteideas.net/sutton/williams-92.pdf 8 | 9 | --[[command line arguments]]-- 10 | cmd = torch.CmdLine() 11 | cmd:text() 12 | cmd:text('Evaluate a Recurrent Model for Visual Attention') 13 | cmd:text('Options:') 14 | cmd:option('--xpPath', '', 'path to a previously saved model') 15 | cmd:option('--cuda', false, 'model was saved with cuda') 16 | cmd:option('--evalTest', false, 'model was saved with cuda') 17 | cmd:option('--stochastic', false, 'evaluate the model stochatically. Generate glimpses stochastically') 18 | cmd:option('--dataset', 'Mnist', 'which dataset to use : Mnist | TranslattedMnist | etc') 19 | cmd:option('--overwrite', false, 'overwrite checkpoint') 20 | cmd:text() 21 | local opt = cmd:parse(arg or {}) 22 | 23 | -- check that saved model exists 24 | assert(paths.filep(opt.xpPath), opt.xpPath..' does not exist') 25 | 26 | if opt.cuda then 27 | require 'cunn' 28 | end 29 | 30 | xp = torch.load(opt.xpPath) 31 | model = xp:model().module 32 | tester = xp:tester() or xp:validator() -- dp.Evaluator 33 | tester:sampler()._epoch_size = nil 34 | conf = tester:feedback() -- dp.Confusion 35 | cm = conf._cm -- optim.ConfusionMatrix 36 | 37 | print("Last evaluation of "..(xp:tester() and 'test' or 'valid').." set :") 38 | print(cm) 39 | 40 | if opt.dataset == 'TranslatedMnist' then 41 | ds = torch.checkpoint( 42 | paths.concat(dp.DATA_DIR, 'checkpoint/dp.TranslatedMnist_test.t7'), 43 | function() 44 | local ds = dp[opt.dataset]{load_all=false} 45 | ds:loadTest() 46 | return ds 47 | end, 48 | opt.overwrite 49 | ) 50 | else 51 | ds = dp[opt.dataset]() 52 | end 53 | 54 | ra = model:findModules('nn.RecurrentAttention')[1] 55 | sg = model:findModules('nn.SpatialGlimpse')[1] 56 | 57 | -- stochastic or deterministic 58 | for i=1,#ra.actions do 59 | local rn = ra.action:getStepModule(i):findModules('nn.ReinforceNormal')[1] 60 | rn.stochastic = opt.stochastic 61 | end 62 | 63 | if opt.evalTest then 64 | conf:reset() 65 | tester:propagateEpoch(ds:testSet()) 66 | 67 | print((opt.stochastic and "Stochastic" or "Deterministic") .. "evaluation of test set :") 68 | print(cm) 69 | end 70 | 71 | inputs = ds:get('test','inputs') 72 | targets = ds:get('test','targets', 'b') 73 | 74 | input = inputs:narrow(1,1,10) 75 | model:training() -- otherwise the rnn doesn't save intermediate time-step states 76 | if not opt.stochastic then 77 | for i=1,#ra.actions do 78 | local rn = ra.action:getStepModule(i):findModules('nn.ReinforceNormal')[1] 79 | rn.stdev = 0 -- deterministic 80 | end 81 | end 82 | output = model:forward(input) 83 | 84 | function drawBox(img, bbox, channel) 85 | channel = channel or 1 86 | 87 | local x1, y1 = torch.round(bbox[1]), torch.round(bbox[2]) 88 | local x2, y2 = torch.round(bbox[1] + bbox[3]), torch.round(bbox[2] + bbox[4]) 89 | 90 | x1, y1 = math.max(1, x1), math.max(1, y1) 91 | x2, y2 = math.min(img:size(3), x2), math.min(img:size(2), y2) 92 | 93 | local max = img:max() 94 | 95 | for i=x1,x2 do 96 | img[channel][y1][i] = max 97 | img[channel][y2][i] = max 98 | end 99 | for i=y1,y2 do 100 | img[channel][i][x1] = max 101 | img[channel][i][x2] = max 102 | end 103 | 104 | return img 105 | end 106 | 107 | locations = ra.actions 108 | 109 | input = nn.Convert(ds:ioShapes(),'bchw'):forward(input) 110 | glimpses = {} 111 | patches = {} 112 | 113 | params = nil 114 | for i=1,input:size(1) do 115 | local img = input[i] 116 | for j,location in ipairs(locations) do 117 | local glimpse = glimpses[j] or {} 118 | glimpses[j] = glimpse 119 | local patch = patches[j] or {} 120 | patches[j] = patch 121 | 122 | local xy = location[i] 123 | -- (-1,-1) top left corner, (1,1) bottom right corner of image 124 | local x, y = xy:select(1,1), xy:select(1,2) 125 | -- (0,0), (1,1) 126 | x, y = (x+1)/2, (y+1)/2 127 | -- (1,1), (input:size(3), input:size(4)) 128 | x, y = x*(input:size(3)-1)+1, y*(input:size(4)-1)+1 129 | 130 | local gimg = img:clone() 131 | for d=1,sg.depth do 132 | local size = sg.height*(sg.scale^(d-1)) 133 | local bbox = {y-size/2, x-size/2, size, size} 134 | drawBox(gimg, bbox, 1) 135 | end 136 | glimpse[i] = gimg 137 | 138 | local sg_, ps 139 | if j == 1 then 140 | sg_ = ra.rnn.initialModule:findModules('nn.SpatialGlimpse')[1] 141 | else 142 | sg_ = ra.rnn.sharedClones[j]:findModules('nn.SpatialGlimpse')[1] 143 | end 144 | patch[i] = image.scale(img:clone():float(), sg_.output[i]:narrow(1,1,1):float()) 145 | 146 | collectgarbage() 147 | end 148 | end 149 | 150 | paths.mkdir('glimpse') 151 | for j,glimpse in ipairs(glimpses) do 152 | local g = image.toDisplayTensor{input=glimpse,nrow=10,padding=3} 153 | local p = image.toDisplayTensor{input=patches[j],nrow=10,padding=3} 154 | image.save("glimpse/glimpse"..j..".png", g) 155 | image.save("glimpse/patch"..j..".png", p) 156 | end 157 | 158 | 159 | -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | FILE(GLOB luasrc *.lua) 2 | ADD_TORCH_PACKAGE(rnn/test "${src}" "${luasrc}") 3 | -------------------------------------------------------------------------------- /test/GRU_test.lua: -------------------------------------------------------------------------------- 1 | require 'torch' 2 | require 'nn' 3 | 4 | require 'GRU' 5 | local gradcheck = require 'util.gradcheck' 6 | local tests = {} 7 | local tester = torch.Tester() 8 | 9 | 10 | local function check_size(x, dims) 11 | tester:assert(x:dim() == #dims) 12 | for i, d in ipairs(dims) do 13 | tester:assert(x:size(i) == d) 14 | end 15 | end 16 | 17 | 18 | function tests.testForward() 19 | local N, T, D, H = 3, 4, 5, 6 20 | 21 | local h0 = torch.randn(N, H) 22 | local x = torch.randn(N, T, D) 23 | 24 | local gru = nn.GRU(D, H) 25 | local h = gru:forward{h0, x} 26 | 27 | -- Do a naive forward pass 28 | local naive_h = torch.Tensor(N, T, H) 29 | 30 | 31 | -- Unpack weight, bias for each gate 32 | local Wxu = gru.weight[{{1, D}, {1, H}}] 33 | local Wxr = gru.weight[{{1, D}, {H + 1, 2 * H}}] 34 | local Wxhc = gru.weight[{{1, D}, {2 * H + 1, 3 * H}}] 35 | 36 | 37 | local Whu = gru.weight[{{D + 1, D + H}, {1, H}}] 38 | local Whr = gru.weight[{{D + 1, D + H}, {H + 1, 2 * H}}] 39 | local Whhc = gru.weight[{{D + 1, D + H}, {2 * H + 1, 3 * H}}] 40 | 41 | 42 | local bu = gru.bias[{{1, H}}]:view(1, H):expand(N, H) 43 | local br = gru.bias[{{H + 1, 2 * H}}]:view(1, H):expand(N, H) 44 | local bhc = gru.bias[{{2 * H + 1, 3 * H}}]:view(1, H):expand(N, H) 45 | 46 | 47 | local prev_h = h0:clone() 48 | for t = 1, T do 49 | local xt = x[{{}, t}] 50 | local u = torch.sigmoid(torch.mm(xt, Wxu) + torch.mm(prev_h, Whu) + bu) 51 | local r = torch.sigmoid(torch.mm(xt, Wxr) + torch.mm(prev_h, Whr) + br) 52 | local hc = torch.tanh(torch.mm(xt, Wxhc) + torch.mm(torch.cmul(prev_h,r), Whhc) + bhc) 53 | local next_h = torch.cmul(hc, u) + prev_h - torch.cmul(prev_h, u) 54 | 55 | naive_h[{{}, t}] = next_h 56 | 57 | prev_h = next_h 58 | end 59 | 60 | tester:assertTensorEq(naive_h, h, 1e-10) 61 | end 62 | 63 | 64 | function tests.gradcheck() 65 | local N, T, D, H = 2, 3, 4, 5 66 | 67 | local x = torch.randn(N, T, D) 68 | local h0 = torch.randn(N, H) 69 | 70 | 71 | local gru = nn.GRU(D, H) 72 | local h = gru:forward{h0, x} 73 | 74 | local dh = torch.randn(#h) 75 | 76 | gru:zeroGradParameters() 77 | local dh0, dx = unpack(gru:backward({h0, x}, dh)) 78 | local dw = gru.gradWeight:clone() 79 | local db = gru.gradBias:clone() 80 | 81 | local function fx(x) return gru:forward{h0, x} end 82 | local function fh0(h0) return gru:forward{h0, x} end 83 | 84 | local function fw(w) 85 | local old_w = gru.weight 86 | gru.weight = w 87 | local out = gru:forward{ h0, x} 88 | gru.weight = old_w 89 | return out 90 | end 91 | 92 | local function fb(b) 93 | local old_b = gru.bias 94 | gru.bias = b 95 | local out = gru:forward{h0, x} 96 | gru.bias = old_b 97 | return out 98 | end 99 | 100 | local dx_num = gradcheck.numeric_gradient(fx, x, dh) 101 | local dh0_num = gradcheck.numeric_gradient(fh0, h0, dh) 102 | 103 | local dw_num = gradcheck.numeric_gradient(fw, gru.weight, dh) 104 | local db_num = gradcheck.numeric_gradient(fb, gru.bias, dh) 105 | 106 | local dx_error = gradcheck.relative_error(dx_num, dx) 107 | local dh0_error = gradcheck.relative_error(dh0_num, dh0) 108 | 109 | local dw_error = gradcheck.relative_error(dw_num, dw) 110 | local db_error = gradcheck.relative_error(db_num, db) 111 | 112 | tester:assertle(dh0_error, 1e-4) 113 | 114 | tester:assertle(dx_error, 1e-5) 115 | tester:assertle(dw_error, 1e-4) 116 | tester:assertle(db_error, 1e-5) 117 | end 118 | 119 | 120 | -- Make sure that everything works correctly when we don't pass an initial cell 121 | -- state; in this case we do pass an initial hidden state and an input sequence 122 | function tests.noCellTest() 123 | local N, T, D, H = 4, 5, 6, 7 124 | local gru = nn.GRU(D, H) 125 | 126 | for t = 1, 3 do 127 | local x = torch.randn(N, T, D) 128 | local h0 = torch.randn(N, H) 129 | local dout = torch.randn(N, T, H) 130 | 131 | local out = gru:forward{h0, x} 132 | local din = gru:backward({h0, x}, dout) 133 | 134 | tester:assert(torch.type(din) == 'table') 135 | tester:assert(#din == 2) 136 | check_size(din[1], {N, H}) 137 | check_size(din[2], {N, T, D}) 138 | 139 | -- Make sure the initial cell state got reset to zero 140 | --tester:assertTensorEq(gru.c0, torch.zeros(N, H), 0) 141 | end 142 | end 143 | 144 | 145 | -- Make sure that everything works when we don't pass initial hidden or initial 146 | -- cell state; in this case we only pass input sequence of vectors 147 | function tests.noHiddenTest() 148 | local N, T, D, H = 4, 5, 6, 7 149 | local gru = nn.GRU(D, H) 150 | 151 | for t = 1, 3 do 152 | local x = torch.randn(N, T, D) 153 | local dout = torch.randn(N, T, H) 154 | 155 | local out = gru:forward(x) 156 | local din = gru:backward(x, dout) 157 | 158 | tester:assert(torch.isTensor(din)) 159 | check_size(din, {N, T, D}) 160 | 161 | -- Make sure the initial cell state and initial hidden state are zero 162 | --tester:assertTensorEq(gru.c0, torch.zeros(N, H), 0) 163 | tester:assertTensorEq(gru.h0, torch.zeros(N, H), 0) 164 | end 165 | end 166 | 167 | 168 | function tests.rememberStatesTest() 169 | local N, T, D, H = 5, 6, 7, 8 170 | local gru = nn.GRU(D, H) 171 | gru.remember_states = true 172 | 173 | local final_h = nil 174 | for t = 1, 4 do 175 | local x = torch.randn(N, T, D) 176 | local dout = torch.randn(N, T, H) 177 | local out = gru:forward(x) 178 | local din = gru:backward(x, dout) 179 | 180 | if t == 1 then 181 | tester:assertTensorEq(gru.h0, torch.zeros(N, H), 0) 182 | elseif t > 1 then 183 | tester:assertTensorEq(gru.h0, final_h, 0) 184 | end 185 | final_h = out[{{}, T}]:clone() 186 | end 187 | 188 | -- Initial states should reset to zero after we call resetStates 189 | gru:resetStates() 190 | local x = torch.randn(N, T, D) 191 | local dout = torch.randn(N, T, H) 192 | gru:forward(x) 193 | gru:backward(x, dout) 194 | tester:assertTensorEq(gru.h0, torch.zeros(N, H), 0) 195 | end 196 | 197 | 198 | tester:add(tests) 199 | tester:run() 200 | -------------------------------------------------------------------------------- /test/bigtest.lua: -------------------------------------------------------------------------------- 1 | local _ = require 'moses' 2 | local rnnbigtest = {} 3 | local precision = 1e-5 4 | local mytester 5 | 6 | function rnnbigtest.NCE_nan() 7 | local success, dl = pcall(function() return require 'dataload' end) 8 | if not success then 9 | return 10 | end 11 | if not pcall(function() require 'cunn' end) then 12 | return 13 | end 14 | 15 | local datapath = paths.concat(dl.DATA_PATH, 'BillionWords') 16 | local wordfreq = torch.load(paths.concat(datapath, 'word_freq.th7')) 17 | local unigram = wordfreq:float()--:add(0.0000001):log() 18 | print("U", unigram:min(), unigram:mean(), unigram:std(), unigram:max()) 19 | 20 | local batchsize = 128 21 | local seqlen = 50 22 | local hiddensize = 200 23 | local vocabsize = unigram:size(1) 24 | local k = 400 25 | 26 | local tinyset = dl.MultiSequenceGBW(datapath, 'train_tiny.th7', batchsize, verbose) 27 | 28 | local lm = nn.Sequential() 29 | local lookup = nn.LookupTableMaskZero(vocabsize, hiddensize) 30 | lm:add(lookup) 31 | 32 | for i=1,2 do 33 | local rnn = nn.SeqLSTM(hiddensize, hiddensize) 34 | rnn.maskzero = true 35 | lm:add(rnn) 36 | end 37 | 38 | lm:add(nn.SplitTable(1)) 39 | 40 | local ncemodule = nn.NCEModule(hiddensize, vocabsize, k, unigram, 1) 41 | 42 | lm = nn.Sequential() 43 | :add(nn.ParallelTable() 44 | :add(lm):add(nn.Identity())) 45 | :add(nn.ZipTable()) 46 | 47 | lm:add(nn.Sequencer(nn.MaskZero(ncemodule, 1))) 48 | lm:remember() 49 | 50 | local crit = nn.MaskZeroCriterion(nn.NCECriterion(), 0) 51 | local targetmodule = nn.Sequential():add(nn.Convert()):add(nn.SplitTable(1)) 52 | local criterion = nn.SequencerCriterion(crit) 53 | 54 | for k,param in ipairs(lm:parameters()) do 55 | param:uniform(-0.1, 0.1) 56 | end 57 | 58 | -- comment this out to see the difference 59 | ncemodule:reset() 60 | 61 | lm:training() 62 | 63 | lm:cuda() 64 | criterion:cuda() 65 | targetmodule:cuda() 66 | 67 | local sumErr = 0 68 | local _ = require 'moses' 69 | for k,inputs, targets in tinyset:subiter(seqlen, 512) do 70 | local targets = targetmodule:forward(targets) 71 | local inputs = {inputs, targets} 72 | -- forward 73 | local outputs = lm:forward(inputs) 74 | for i,output in ipairs(outputs) do 75 | assert(not _.isNaN(output[1]:sum()), tostring(i)) 76 | assert(not _.isNaN(output[2]:sum()), tostring(i)) 77 | assert(not _.isNaN(output[3]:sum()), tostring(i)) 78 | assert(not _.isNaN(output[4]:sum()), tostring(i)) 79 | end 80 | local err = criterion:forward(outputs, targets) 81 | assert(not _.isNaN(err)) 82 | sumErr = sumErr + err 83 | -- backward 84 | local gradOutputs = criterion:backward(outputs, targets) 85 | 86 | for i,gradOutput in ipairs(gradOutputs) do 87 | assert(not _.isNaN(gradOutput[1]:sum()), tostring(i)) 88 | assert(not _.isNaN(gradOutput[2]:sum()), tostring(i)) 89 | end 90 | lm:zeroGradParameters() 91 | lm:backward(inputs, gradOutputs) 92 | lm:updateParameters(0.7) 93 | local params, gradParams = lm:parameters() 94 | 95 | for i,param in ipairs(params) do 96 | assert(not _.isNaN(param:sum()), tostring(i)) 97 | assert(not _.isNaN(gradParams[i]:sum()), tostring(i)) 98 | end 99 | 100 | local counts = {} 101 | inputs[1]:float():apply(function(x) 102 | counts[x] = (counts[x] or 0) + 1 103 | end) 104 | 105 | print("Top freqs", unpack(_.last(_.sort(_.values(counts)), 5))) 106 | print("Batch : "..k..", err="..err) 107 | for name,module in pairs{LT=lookup, NCE=ncemodule} do 108 | print(name..".gradWeight : "..module.gradWeight:norm()..", .weight : "..module.weight:norm()) 109 | if name == 'NCE' then 110 | print(name..".gradBias : "..module.gradBias:norm()..", .bias : "..module.bias:norm()) 111 | end 112 | end 113 | end 114 | 115 | end 116 | 117 | function rnn.bigtest(tests) 118 | mytester = torch.Tester() 119 | mytester:add(rnnbigtest) 120 | math.randomseed(os.time()) 121 | mytester:run(tests) 122 | end 123 | -------------------------------------------------------------------------------- /test/mnistsample.t7: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Element-Research/rnn/ba937a08f26e116db98b5cd6a690f49ea8f8316e/test/mnistsample.t7 -------------------------------------------------------------------------------- /test/test_trimzero.lua: -------------------------------------------------------------------------------- 1 | require 'rnn' 2 | require 'dp' 3 | require 'sys' 4 | 5 | torch.manualSeed(123) 6 | 7 | batch_size = 200 8 | sentence_length = 26 9 | vocabulary_size = 1000 10 | word_embedding_size = 200 11 | rnn_size = 300 12 | 13 | x = torch.ceil(torch.rand(batch_size,sentence_length)*vocabulary_size) 14 | t = torch.ceil(torch.rand(batch_size)*10) 15 | 16 | -- variable sentence lengths 17 | for i=1,batch_size do 18 | idx = torch.floor(torch.rand(1)[1]*(sentence_length)) 19 | if idx > 0 then x[i][{{1,idx}}]:fill(0) end 20 | end 21 | 22 | rnns = {'FastLSTM','GRU'} 23 | methods = {'maskZero', 'trimZero'} 24 | 25 | for ir,arch in pairs(rnns) do 26 | local rnn = nn[arch](word_embedding_size, rnn_size) 27 | local model = nn.Sequential() 28 | :add(nn.LookupTableMaskZero(vocabulary_size, word_embedding_size)) 29 | :add(nn.SplitTable(2)) 30 | :add(nn.Sequencer(rnn)) 31 | :add(nn.SelectTable(sentence_length)) 32 | :add(nn.Linear(rnn_size, 10)) 33 | model:getParameters():uniform(-0.1, 0.1) 34 | collectgarbage() 35 | criterion = nn.CrossEntropyCriterion() 36 | local models = {} 37 | for j=1,#methods do 38 | table.insert(models, model:clone()) 39 | end 40 | collectgarbage() 41 | for im,method in pairs(methods) do 42 | print('-- '..arch..' with '..method) 43 | model = models[im] 44 | rnn = model:get(3).module 45 | rnn[method](rnn, 1) 46 | sys.tic() 47 | for i=1,3 do 48 | model:zeroGradParameters() 49 | y = model:forward(x) 50 | loss = criterion:forward(y,t) 51 | print('loss:', loss) 52 | collectgarbage() 53 | dy = criterion:backward(y,t) 54 | model:backward(x, dy) 55 | w,dw = model:parameters() 56 | model:updateParameters(.5) 57 | collectgarbage() 58 | end 59 | elapse = sys.toc() 60 | print('elapse time:', elapse) 61 | end 62 | end 63 | --------------------------------------------------------------------------------