├── .gitignore
├── AbstractRecurrent.lua
├── AbstractSequencer.lua
├── BiSequencer.lua
├── BiSequencerLM.lua
├── CMakeLists.txt
├── CopyGrad.lua
├── Dropout.lua
├── ExpandAs.lua
├── FastLSTM.lua
├── GRU.lua
├── LICENSE.2nd.txt
├── LICENSE.txt
├── LSTM.lua
├── LinearNoBias.lua
├── LookupTableMaskZero.lua
├── MaskZero.lua
├── MaskZeroCriterion.lua
├── Module.lua
├── Mufuru.lua
├── NormStabilizer.lua
├── Padding.lua
├── README.md
├── Recurrence.lua
├── Recurrent.lua
├── RecurrentAttention.lua
├── Recursor.lua
├── Repeater.lua
├── RepeaterCriterion.lua
├── SAdd.lua
├── SeqBRNN.lua
├── SeqGRU.lua
├── SeqLSTM.lua
├── SeqLSTMP.lua
├── SeqReverseSequence.lua
├── Sequencer.lua
├── SequencerCriterion.lua
├── TrimZero.lua
├── ZeroGrad.lua
├── doc
    ├── article
    │   ├── ff-lua.tex
    │   ├── ff.lua
    │   ├── ff2-lua.tex
    │   ├── ff2.lua
    │   ├── lm-lua.tex
    │   ├── lm.lua
    │   ├── lstm-lua.tex
    │   ├── lstm.lua
    │   ├── mlp-lua.tex
    │   ├── mlp.lua
    │   ├── nips15submit_e.sty
    │   ├── nll-lua.tex
    │   ├── nll.lua
    │   ├── ram-lua.tex
    │   ├── ram.lua
    │   ├── rec-lua.tex
    │   ├── rec.lua
    │   ├── rec2-lua.tex
    │   ├── rec2.lua
    │   ├── rec3-lua.tex
    │   ├── rec3.lua
    │   ├── rec4-lua.tex
    │   ├── rec4.lua
    │   ├── rec5-lua.tex
    │   ├── rec5.lua
    │   ├── recurrence-lua.tex
    │   ├── recurrence.lua
    │   ├── repeater-lua.tex
    │   ├── repeater.lua
    │   ├── rnn-example-lua.tex
    │   ├── rnn-example.lua
    │   ├── rnn2-lua.tex
    │   ├── rnn2.lua
    │   ├── rnn_library.bbl
    │   ├── rnn_library.bib
    │   ├── rnn_library.blg
    │   ├── rnn_library.log
    │   ├── rnn_library.out
    │   ├── rnn_library.pdf
    │   ├── rnn_library.synctex.gz
    │   ├── rnn_library.tex
    │   ├── rnnlm-lua.tex
    │   ├── rnnlm.lua
    │   ├── sequencer-lua.tex
    │   ├── sequencer.lua
    │   ├── srnn-lua.tex
    │   ├── srnn.lua
    │   ├── trainEpoch-lua.tex
    │   └── trainEpoch.lua
    └── image
    │   ├── LSTM.png
    │   ├── bgru-benchmark.png
    │   ├── bidirectionallm.png
    │   ├── gru-benchmark.png
    │   ├── hellofuzzy.png
    │   └── sequence.png
├── examples
    ├── README.md
    ├── encoder-decoder-coupling.lua
    ├── multigpu-nce-rnnlm.lua
    ├── nested-recurrence-lstm.lua
    ├── noise-contrastive-estimate.lua
    ├── recurrent-language-model.lua
    ├── recurrent-time-series.lua
    ├── recurrent-visual-attention.lua
    ├── sequence-to-one.lua
    ├── simple-bisequencer-network-variable.lua
    ├── simple-bisequencer-network.lua
    ├── simple-recurrence-network.lua
    ├── simple-recurrent-network.lua
    ├── simple-sequencer-network.lua
    └── twitter_sentiment_rnn.lua
├── init.lua
├── recursiveUtils.lua
├── rocks
    └── rnn-scm-1.rockspec
├── scripts
    ├── evaluate-rnnlm.lua
    └── evaluate-rva.lua
└── test
    ├── CMakeLists.txt
    ├── GRU_test.lua
    ├── bigtest.lua
    ├── mnistsample.t7
    ├── test.lua
    └── test_trimzero.lua


/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | 


--------------------------------------------------------------------------------
/AbstractSequencer.lua:
--------------------------------------------------------------------------------
 1 | local AbstractSequencer, parent = torch.class("nn.AbstractSequencer", "nn.Container")
 2 | 
 3 | function AbstractSequencer:getStepModule(step)
 4 |    error"DEPRECATED 27 Oct 2015. Wrap your internal modules into a Recursor instead"
 5 | end
 6 | 
 7 | function AbstractSequencer:sharedClone(shareParams, shareGradParams, clones, pointers, stepClone)
 8 |    -- stepClone is ignored (always false, i.e. uses sharedClone)
 9 |    return parent.sharedClone(self, shareParams, shareGradParams, clones, pointers)
10 | end
11 | 
12 | -- AbstractSequence handles its own rho internally (dynamically)
13 | function AbstractSequencer:maxBPTTstep(rho)
14 | end
15 | 
16 | -- Toggle to feed long sequences using multiple forwards.
17 | -- 'eval' only affects evaluation (recommended for RNNs)
18 | -- 'train' only affects training
19 | -- 'neither' affects neither training nor evaluation
20 | -- 'both' affects both training and evaluation (recommended for LSTMs)
21 | -- Essentially, forget() isn't called on rnn module when remember is on
22 | function AbstractSequencer:remember(remember)
23 |    self._remember = (remember == nil) and 'both' or remember
24 |    local _ = require 'moses'
25 |    assert(_.contains({'both','eval','train','neither'}, self._remember),
26 |       "AbstractSequencer : unrecognized value for remember : "..self._remember)
27 |    return self
28 | end
29 | 
30 | function AbstractSequencer:hasMemory()
31 |    local _ = require 'moses'
32 |    if (self.train ~= false) and _.contains({'both','train'}, self._remember) then -- train (defaults to nil...)
33 |       return true
34 |    elseif (self.train == false) and _.contains({'both','eval'}, self._remember) then -- evaluate
35 |       return true
36 |    else
37 |       return false
38 |    end
39 | end
40 | 
41 | 


--------------------------------------------------------------------------------
/BiSequencer.lua:
--------------------------------------------------------------------------------
 1 | ------------------------------------------------------------------------
 2 | --[[ BiSequencer ]]--
 3 | -- Encapsulates forward, backward and merge modules. 
 4 | -- Input is a sequence (a table) of tensors.
 5 | -- Output is a sequence (a table) of tensors of the same length.
 6 | -- Applies a forward rnn to each element in the sequence in
 7 | -- forward order and applies a backward rnn in reverse order.
 8 | -- For each step, the outputs of both rnn are merged together using
 9 | -- the merge module (defaults to nn.JoinTable(1,1)).
10 | -- The sequences in a batch must have the same size.
11 | -- But the sequence length of each batch can vary.
12 | -- It is implemented by decorating a structure of modules that makes 
13 | -- use of 3 Sequencers for the forward, backward and merge modules.
14 | ------------------------------------------------------------------------
15 | local BiSequencer, parent = torch.class('nn.BiSequencer', 'nn.AbstractSequencer')
16 | 
17 | function BiSequencer:__init(forward, backward, merge)
18 |    
19 |    if not torch.isTypeOf(forward, 'nn.Module') then
20 |       error"BiSequencer: expecting nn.Module instance at arg 1"
21 |    end
22 |    self.forwardModule = forward
23 |    
24 |    self.backwardModule = backward
25 |    if not self.backwardModule then
26 |       self.backwardModule = forward:clone()
27 |       self.backwardModule:reset()
28 |    end
29 |    if not torch.isTypeOf(self.backwardModule, 'nn.Module') then
30 |       error"BiSequencer: expecting nn.Module instance at arg 2"
31 |    end
32 |    
33 |    if torch.type(merge) == 'number' then
34 |       self.mergeModule = nn.JoinTable(1, merge)
35 |    elseif merge == nil then
36 |       self.mergeModule = nn.JoinTable(1, 1)
37 |    elseif torch.isTypeOf(merge, 'nn.Module') then
38 |       self.mergeModule = merge
39 |    else
40 |       error"BiSequencer: expecting nn.Module or number instance at arg 3"
41 |    end
42 |    
43 |    self.fwdSeq = nn.Sequencer(self.forwardModule)
44 |    self.bwdSeq = nn.Sequencer(self.backwardModule)
45 |    self.mergeSeq = nn.Sequencer(self.mergeModule)
46 |    
47 |    local backward = nn.Sequential()
48 |    backward:add(nn.ReverseTable()) -- reverse
49 |    backward:add(self.bwdSeq)
50 |    backward:add(nn.ReverseTable()) -- unreverse
51 |    
52 |    local concat = nn.ConcatTable()
53 |    concat:add(self.fwdSeq):add(backward)
54 |    
55 |    local brnn = nn.Sequential()
56 |    brnn:add(concat)
57 |    brnn:add(nn.ZipTable())
58 |    brnn:add(self.mergeSeq)
59 |    
60 |    parent.__init(self)
61 |    
62 |    self.output = {}
63 |    self.gradInput = {}
64 |    
65 |    self.module = brnn
66 |    -- so that it can be handled like a Container
67 |    self.modules[1] = brnn
68 | end
69 | 
70 | -- multiple-inheritance
71 | nn.Decorator.decorate(BiSequencer)
72 | 


--------------------------------------------------------------------------------
/BiSequencerLM.lua:
--------------------------------------------------------------------------------
  1 | ------------------------------------------------------------------------
  2 | --[[ BiSequencerLM ]]--
  3 | -- Encapsulates forward, backward and merge modules. 
  4 | -- Input is a sequence (a table) of tensors.
  5 | -- Output is a sequence (a table) of tensors of the same length.
  6 | -- Applies a `fwd` rnn instance to the first `N-1` elements in the 
  7 | -- sequence in forward order.
  8 | -- Applies the `bwd` rnn in reverse order to the last `N-1` elements 
  9 | -- (from second-to-last element to first element).
 10 | -- Note : you shouldn't stack these for language modeling. 
 11 | -- Instead, stack each fwd/bwd seqs and encapsulate these.
 12 | ------------------------------------------------------------------------
 13 | local _ = require 'moses'
 14 | local BiSequencerLM, parent = torch.class('nn.BiSequencerLM', 'nn.AbstractSequencer')
 15 | 
 16 | function BiSequencerLM:__init(forward, backward, merge)
 17 |    
 18 |    if not torch.isTypeOf(forward, 'nn.Module') then
 19 |       error"BiSequencerLM: expecting nn.Module instance at arg 1"
 20 |    end
 21 |    self.forwardModule = forward
 22 |    
 23 |    self.backwardModule = backward
 24 |    if not self.backwardModule then
 25 |       self.backwardModule = forward:clone()
 26 |       self.backwardModule:reset()
 27 |    end
 28 |    if not torch.isTypeOf(self.backwardModule, 'nn.Module') then
 29 |       error"BiSequencerLM: expecting nn.Module instance at arg 2"
 30 |    end
 31 |    
 32 |    if torch.type(merge) == 'number' then
 33 |       self.mergeModule = nn.JoinTable(1, merge)
 34 |    elseif merge == nil then
 35 |       self.mergeModule = nn.JoinTable(1, 1)
 36 |    elseif torch.isTypeOf(merge, 'nn.Module') then
 37 |       self.mergeModule = merge
 38 |    else
 39 |       error"BiSequencerLM: expecting nn.Module or number instance at arg 3"
 40 |    end
 41 |    
 42 |    if torch.isTypeOf(self.forwardModule, 'nn.AbstractRecurrent') then
 43 |       self.fwdSeq = nn.Sequencer(self.forwardModule)
 44 |    else -- assumes a nn.Sequencer or stack thereof
 45 |       self.fwdSeq = self.forwardModule
 46 |    end
 47 |    
 48 |    if torch.isTypeOf(self.backwardModule, 'nn.AbstractRecurrent') then
 49 |       self.bwdSeq = nn.Sequencer(self.backwardModule)
 50 |    else
 51 |       self.bwdSeq = self.backwardModule
 52 |    end
 53 |    self.mergeSeq = nn.Sequencer(self.mergeModule)
 54 |    
 55 |    self._fwd = self.fwdSeq
 56 |    
 57 |    self._bwd = nn.Sequential()
 58 |    self._bwd:add(nn.ReverseTable())
 59 |    self._bwd:add(self.bwdSeq)
 60 |    self._bwd:add(nn.ReverseTable())
 61 |    
 62 |    self._merge = nn.Sequential()
 63 |    self._merge:add(nn.ZipTable())
 64 |    self._merge:add(self.mergeSeq)
 65 |    
 66 |    
 67 |    parent.__init(self)
 68 |    
 69 |    self.modules = {self._fwd, self._bwd, self._merge}
 70 |    
 71 |    self.output = {}
 72 |    self.gradInput = {}
 73 | end
 74 | 
 75 | function BiSequencerLM:updateOutput(input)
 76 |    assert(torch.type(input) == 'table', 'Expecting table at arg 1')
 77 |    local nStep = #input
 78 |    assert(nStep > 1, "Expecting at least 2 elements in table")
 79 |    
 80 |    -- forward through fwd and bwd rnn in fwd and reverse order
 81 |    self._fwdOutput = self._fwd:updateOutput(_.first(input, nStep - 1))
 82 |    self._bwdOutput = self._bwd:updateOutput(_.last(input, nStep - 1))
 83 |    
 84 |    -- empty outputs
 85 |    for k,v in ipairs(self.output) do self.output[k] = nil end
 86 |    
 87 |    -- padding for first and last elements of fwd and bwd outputs, respectively
 88 |    self._firstStep = nn.rnn.recursiveResizeAs(self._firstStep, self._fwdOutput[1])
 89 |    nn.rnn.recursiveFill(self._firstStep, 0)
 90 |    self._lastStep = nn.rnn.recursiveResizeAs(self._lastStep, self._bwdOutput[1])
 91 |    nn.rnn.recursiveFill(self._lastStep, 0)
 92 |    
 93 |    -- { { zeros, fwd1, fwd2, ..., fwdN}, {bwd1, bwd2, ..., bwdN, zeros} }
 94 |    self._mergeInput = {_.clone(self._fwdOutput), _.clone(self._bwdOutput)}
 95 |    table.insert(self._mergeInput[1], 1, self._firstStep)
 96 |    table.insert(self._mergeInput[2], self._lastStep)
 97 |    assert(#self._mergeInput[1] == #self._mergeInput[2])
 98 |    
 99 |    self.output = self._merge:updateOutput(self._mergeInput)
100 |    
101 |    return self.output
102 | end
103 | 
104 | function BiSequencerLM:updateGradInput(input, gradOutput)
105 |    local nStep = #input
106 |    
107 |    self._mergeGradInput = self._merge:updateGradInput(self._mergeInput, gradOutput)
108 |    self._fwdGradInput = self._fwd:updateGradInput(_.first(input, nStep - 1), _.last(self._mergeGradInput[1], nStep - 1))
109 |    self._bwdGradInput = self._bwd:updateGradInput(_.last(input, nStep - 1), _.first(self._mergeGradInput[2], nStep - 1))
110 |    
111 |    -- add fwd rnn gradInputs to bwd rnn gradInputs
112 |    for i=1,nStep do
113 |       if i == 1 then
114 |          self.gradInput[1] = self._fwdGradInput[1]
115 |       elseif i == nStep then
116 |          self.gradInput[nStep] = self._bwdGradInput[nStep-1]
117 |       else
118 |          self.gradInput[i] = nn.rnn.recursiveCopy(self.gradInput[i], self._fwdGradInput[i])
119 |          nn.rnn.recursiveAdd(self.gradInput[i], self._bwdGradInput[i-1])
120 |       end
121 |    end
122 |    
123 |    return self.gradInput
124 | end
125 | 
126 | function BiSequencerLM:accGradParameters(input, gradOutput, scale)
127 |    local nStep = #input
128 |    
129 |    self._merge:accGradParameters(self._mergeInput, gradOutput, scale)
130 |    self._fwd:accGradParameters(_.first(input, nStep - 1), _.last(self._mergeGradInput[1], nStep - 1), scale)
131 |    self._bwd:accGradParameters(_.last(input, nStep - 1), _.first(self._mergeGradInput[2], nStep - 1), scale)
132 | end
133 | 
134 | function BiSequencerLM:accUpdateGradParameters(input, gradOutput, lr)
135 |    local nStep = #input
136 |    
137 |    self._merge:accUpdateGradParameters(self._mergeInput, gradOutput, lr)
138 |    self._fwd:accUpdateGradParameters(_.first(input, nStep - 1), _.last(self._mergeGradInput[1], nStep - 1), lr)
139 |    self._bwd:accUpdateGradParameters(_.last(input, nStep - 1), _.first(self._mergeGradInput[2], nStep - 1), lr)
140 | end
141 | 
142 | function BiSequencerLM:__tostring__()
143 |    local tab = '  '
144 |    local line = '\n'
145 |    local ext = '  |    '
146 |    local extlast = '       '
147 |    local last = '   ... -> '
148 |    local str = torch.type(self)
149 |    str = str .. ' {'
150 |    str = str .. line .. tab .. '(  fwd  ): ' .. tostring(self._fwd):gsub(line, line .. tab .. ext)
151 |    str = str .. line .. tab .. '(  bwd  ): ' .. tostring(self._bwd):gsub(line, line .. tab .. ext)
152 |    str = str .. line .. tab .. '( merge ): ' .. tostring(self._merge):gsub(line, line .. tab .. ext)
153 |    str = str .. line .. '}'
154 |    return str
155 | end
156 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | CMAKE_MINIMUM_REQUIRED(VERSION 2.6 FATAL_ERROR)
 3 | CMAKE_POLICY(VERSION 2.6)
 4 | IF(LUAROCKS_PREFIX)
 5 |     MESSAGE(STATUS "Installing Torch through Luarocks")
 6 |     STRING(REGEX REPLACE "(.*)lib/luarocks/rocks.*" "\\1" CMAKE_INSTALL_PREFIX  "${LUAROCKS_PREFIX}")
 7 |     MESSAGE(STATUS "Prefix inferred from Luarocks: ${CMAKE_INSTALL_PREFIX}")
 8 | ENDIF()
 9 | FIND_PACKAGE(Torch REQUIRED)
10 | 
11 | SET(src)
12 | FILE(GLOB luasrc *.lua)
13 | SET(luasrc ${luasrc})
14 | ADD_SUBDIRECTORY(test)
15 | ADD_TORCH_PACKAGE(rnn "${src}" "${luasrc}" "Recurrent Neural Networks")
16 | 


--------------------------------------------------------------------------------
/CopyGrad.lua:
--------------------------------------------------------------------------------
1 | local CopyGrad, _ = torch.class('nn.CopyGrad', 'nn.Identity')
2 | 
3 | function CopyGrad:updateGradInput(input, gradOutput)
4 |    self.gradInput:resizeAs(gradOutput):copy(gradOutput)
5 |    return self.gradInput
6 | end
7 | 


--------------------------------------------------------------------------------
/Dropout.lua:
--------------------------------------------------------------------------------
 1 | ------------------------------------------------------------------------
 2 | --[[ Dropout ]]--
 3 | 
 4 | -- Implementation of Lazy Dropout. 
 5 | -- `lazy` option is used to to only resample after backward is called. 
 6 | -- This mechanism is used by Bayesian GRUs to use the same dropout mask 
 7 | -- for each sequence, not for each word. 
 8 | -- See GRU part in README.md (Ref. E & F)
 9 | ------------------------------------------------------------------------
10 | local Dropout, Parent = nn.Dropout, nn.Module
11 | 
12 | function Dropout:__init(p,v1,inplace,lazy,mono)
13 |    Parent.__init(self)
14 |    self.p = p or 0.5
15 |    self.train = true
16 |    self.inplace = inplace
17 |    self.lazy = lazy or false
18 |    self.mono = mono or false  -- used by trimZero, single sample for a batch
19 |    self.flag = true  -- used by lazy noise
20 |    -- version 2 scales output during training instead of evaluation
21 |    self.v2 = not v1
22 |    if self.p >= 1 or self.p < 0 then
23 |       error('<Dropout> illegal percentage, must be 0 <= p < 1')
24 |    end
25 |    self.noise = torch.Tensor()
26 | end
27 | 
28 | function Dropout:updateOutput(input)
29 |    if self.inplace then
30 |       self.output = input
31 |    else
32 |       self.output:resizeAs(input):copy(input)
33 |    end
34 |    if self.p > 0 then
35 |       if self.train then
36 |          if not self.lazy or self.flag then
37 |             local noiseSize = input:size()
38 |             if self.mono then noiseSize[1] = 1 end
39 |             self.noise:resize(noiseSize)
40 |             self.noise:bernoulli(1-self.p)
41 |             if self.v2 then
42 |                self.noise:div(1-self.p)
43 |             end
44 |             self.flag = false
45 |          end
46 |          if self.mono and self.noise:size(1) ~= input:size(1) then
47 |             self.noise = self.noise:narrow(1,1,1):expandAs(input)
48 |          end
49 |          self.output:cmul(self.noise)
50 |       elseif not self.v2 then
51 |          self.output:mul(1-self.p)
52 |       end
53 |    end
54 |    return self.output
55 | end
56 | 
57 | function Dropout:updateGradInput(input, gradOutput)
58 |    if self.lazy then
59 |       self.flag = true
60 |    end
61 |    if self.train then
62 |       if self.inplace then
63 |          self.gradInput = gradOutput
64 |       else
65 |          self.gradInput:resizeAs(gradOutput):copy(gradOutput)
66 |       end
67 |       if self.p > 0 then
68 |          self.gradInput:cmul(self.noise) -- simply mask the gradients with the noise vector
69 |       end
70 |    else
71 |       if self.inplace then
72 |          self.gradInput = gradOutput
73 |       else
74 |          self.gradInput:resizeAs(gradOutput):copy(gradOutput)
75 |       end
76 |       if not self.v2 and self.p > 0 then
77 |          self.gradInput:cdiv(1-self.p)
78 |       end
79 |    end
80 |    return self.gradInput
81 | end
82 | 
83 | function Dropout:__tostring__()
84 |    return string.format('%s(%.1f, %s)', torch.type(self), self.p, self.lazy and 'lazy' or 'busy')
85 | end
86 | 
87 | function Dropout:clearState()
88 |    if self.noise then
89 |       self.noise:set()
90 |    end
91 |    self.flag = true
92 |    return Parent.clearState(self)
93 | end
94 | 


--------------------------------------------------------------------------------
/ExpandAs.lua:
--------------------------------------------------------------------------------
 1 | local ExpandAs, parent = torch.class('nn.ExpandAs', 'nn.Module')
 2 | -- expands the second input to match the first
 3 | 
 4 | function ExpandAs:__init()
 5 |   parent.__init(self)
 6 |   self.output = {}
 7 |   self.gradInput = {}
 8 | 
 9 |   self.sum1 = torch.Tensor()
10 |   self.sum2 = torch.Tensor()
11 | end
12 | 
13 | function ExpandAs:updateOutput(input)
14 |   self.output[1] = input[1]
15 |   self.output[2] = input[2]:expandAs(input[1])
16 |   return self.output
17 | end
18 | 
19 | function ExpandAs:updateGradInput(input, gradOutput)
20 |   local b, db = input[2], gradOutput[2]
21 |   local s1, s2 = self.sum1, self.sum2
22 |   local sumSrc, sumDst = db, s1
23 | 
24 |   for i=1,b:dim() do
25 |     if b:size(i) ~= db:size(i) then
26 |       sumDst:sum(sumSrc, i)
27 |       sumSrc = sumSrc == s1 and s2 or s1
28 |       sumDst = sumDst == s1 and s2 or s1
29 |     end
30 |   end
31 | 
32 |   self.gradInput[1] = gradOutput[1]
33 |   self.gradInput[2] = sumSrc
34 | 
35 |   return self.gradInput
36 | end
37 | 


--------------------------------------------------------------------------------
/FastLSTM.lua:
--------------------------------------------------------------------------------
  1 | ------------------------------------------------------------------------
  2 | --[[ LSTM ]]--
  3 | -- Long Short Term Memory architecture.
  4 | -- Ref. A.: http://arxiv.org/pdf/1303.5778v1 (blueprint for this module)
  5 | -- B. http://web.eecs.utk.edu/~itamar/courses/ECE-692/Bobby_paper1.pdf
  6 | -- C. http://arxiv.org/pdf/1503.04069v1.pdf
  7 | -- D. https://github.com/wojzaremba/lstm
  8 | -- Expects 1D or 2D input.
  9 | -- The first input in sequence uses zero value for cell and hidden state
 10 | 
 11 | -- For p > 0, it becomes Bayesian GRUs [Gal, 2015].
 12 | -- In this case, please do not dropout on input as BGRUs handle the input with
 13 | -- its own dropouts. First, try 0.25 for p as Gal (2016) suggested,
 14 | -- presumably, because of summations of two parts in GRUs connections.
 15 | ------------------------------------------------------------------------
 16 | local FastLSTM, parent = torch.class("nn.FastLSTM", "nn.LSTM")
 17 | 
 18 | -- set this to true to have it use nngraph instead of nn
 19 | -- setting this to true can make your next FastLSTM significantly faster
 20 | FastLSTM.usenngraph = false
 21 | FastLSTM.bn = false
 22 | 
 23 | function FastLSTM:__init(inputSize, outputSize, rho, eps, momentum, affine, p, mono)
 24 |    -- when FastLSTM.bn=true, the default values of eps and momentum are set by nn.BatchNormalization
 25 |    self.eps = eps
 26 |    self.momentum = momentum
 27 |    self.affine = affine == nil and true or affine
 28 |    self.p = p or 0
 29 |    if p and p ~= 0 then
 30 |       assert(nn.Dropout(p,false,false,true).lazy, 'only work with Lazy Dropout!')
 31 |    end
 32 |    self.mono = mono or false
 33 | 
 34 |    parent.__init(self, inputSize, outputSize, rho, nil, p, mono)
 35 | end
 36 | 
 37 | function FastLSTM:buildModel()
 38 |    -- input : {input, prevOutput, prevCell}
 39 |    -- output : {output, cell}
 40 | 
 41 |    -- Calculate all four gates in one go : input, hidden, forget, output
 42 |    if self.p ~= 0 then
 43 |       self.i2g = nn.Sequential()
 44 |                      :add(nn.ConcatTable()
 45 |                         :add(nn.Dropout(self.p,false,false,true,self.mono))
 46 |                         :add(nn.Dropout(self.p,false,false,true,self.mono))
 47 |                         :add(nn.Dropout(self.p,false,false,true,self.mono))
 48 |                         :add(nn.Dropout(self.p,false,false,true,self.mono)))
 49 |                      :add(nn.ParallelTable()
 50 |                         :add(nn.Linear(self.inputSize, self.outputSize))
 51 |                         :add(nn.Linear(self.inputSize, self.outputSize))
 52 |                         :add(nn.Linear(self.inputSize, self.outputSize))
 53 |                         :add(nn.Linear(self.inputSize, self.outputSize)))
 54 |                      :add(nn.JoinTable(2))
 55 |       self.o2g = nn.Sequential()
 56 |                      :add(nn.ConcatTable()
 57 |                         :add(nn.Dropout(self.p,false,false,true,self.mono))
 58 |                         :add(nn.Dropout(self.p,false,false,true,self.mono))
 59 |                         :add(nn.Dropout(self.p,false,false,true,self.mono))
 60 |                         :add(nn.Dropout(self.p,false,false,true,self.mono)))
 61 |                      :add(nn.ParallelTable()
 62 |                         :add(nn.LinearNoBias(self.outputSize, self.outputSize))
 63 |                         :add(nn.LinearNoBias(self.outputSize, self.outputSize))
 64 |                         :add(nn.LinearNoBias(self.outputSize, self.outputSize))
 65 |                         :add(nn.LinearNoBias(self.outputSize, self.outputSize)))
 66 |                      :add(nn.JoinTable(2))
 67 |    else
 68 |       self.i2g = nn.Linear(self.inputSize, 4*self.outputSize)
 69 |       self.o2g = nn.LinearNoBias(self.outputSize, 4*self.outputSize)
 70 |    end
 71 | 
 72 |    if self.usenngraph or self.bn then
 73 |       require 'nngraph'
 74 |       return self:nngraphModel()
 75 |    end
 76 | 
 77 |    local para = nn.ParallelTable():add(self.i2g):add(self.o2g)
 78 |    local gates = nn.Sequential()
 79 |    gates:add(nn.NarrowTable(1,2))
 80 |    gates:add(para)
 81 |    gates:add(nn.CAddTable())
 82 | 
 83 |    -- Reshape to (batch_size, n_gates, hid_size)
 84 |    -- Then slize the n_gates dimension, i.e dimension 2
 85 |    gates:add(nn.Reshape(4,self.outputSize))
 86 |    gates:add(nn.SplitTable(1,2))
 87 |    local transfer = nn.ParallelTable()
 88 |    transfer:add(nn.Sigmoid()):add(nn.Tanh()):add(nn.Sigmoid()):add(nn.Sigmoid())
 89 |    gates:add(transfer)
 90 | 
 91 |    local concat = nn.ConcatTable()
 92 |    concat:add(gates):add(nn.SelectTable(3))
 93 |    local seq = nn.Sequential()
 94 |    seq:add(concat)
 95 |    seq:add(nn.FlattenTable()) -- input, hidden, forget, output, cell
 96 | 
 97 |    -- input gate * hidden state
 98 |    local hidden = nn.Sequential()
 99 |    hidden:add(nn.NarrowTable(1,2))
100 |    hidden:add(nn.CMulTable())
101 | 
102 |    -- forget gate * cell
103 |    local cell = nn.Sequential()
104 |    local concat = nn.ConcatTable()
105 |    concat:add(nn.SelectTable(3)):add(nn.SelectTable(5))
106 |    cell:add(concat)
107 |    cell:add(nn.CMulTable())
108 | 
109 |    local nextCell = nn.Sequential()
110 |    local concat = nn.ConcatTable()
111 |    concat:add(hidden):add(cell)
112 |    nextCell:add(concat)
113 |    nextCell:add(nn.CAddTable())
114 | 
115 |    local concat = nn.ConcatTable()
116 |    concat:add(nextCell):add(nn.SelectTable(4))
117 |    seq:add(concat)
118 |    seq:add(nn.FlattenTable()) -- nextCell, outputGate
119 | 
120 |    local cellAct = nn.Sequential()
121 |    cellAct:add(nn.SelectTable(1))
122 |    cellAct:add(nn.Tanh())
123 |    local concat = nn.ConcatTable()
124 |    concat:add(cellAct):add(nn.SelectTable(2))
125 |    local output = nn.Sequential()
126 |    output:add(concat)
127 |    output:add(nn.CMulTable())
128 | 
129 |    local concat = nn.ConcatTable()
130 |    concat:add(output):add(nn.SelectTable(1))
131 |    seq:add(concat)
132 | 
133 |    return seq
134 | end
135 | 
136 | function FastLSTM:nngraphModel()
137 |    assert(nngraph, "Missing nngraph package")
138 | 
139 |    local inputs = {}
140 |    table.insert(inputs, nn.Identity()()) -- x
141 |    table.insert(inputs, nn.Identity()()) -- prev_h[L]
142 |    table.insert(inputs, nn.Identity()()) -- prev_c[L]
143 | 
144 |    local x, prev_h, prev_c = unpack(inputs)
145 | 
146 |    local bn_wx, bn_wh, bn_c
147 |    local i2h, h2h
148 |    if self.bn then
149 |       -- apply recurrent batch normalization
150 |       -- http://arxiv.org/pdf/1502.03167v3.pdf
151 |       -- normalize recurrent terms W_h*h_{t-1} and W_x*x_t separately
152 |       -- Olalekan Ogunmolu <patlekano@gmail.com>
153 | 
154 |       bn_wx = nn.BatchNormalization(4*self.outputSize, self.eps, self.momentum, self.affine)
155 |       bn_wh = nn.BatchNormalization(4*self.outputSize, self.eps, self.momentum, self.affine)
156 |       bn_c  = nn.BatchNormalization(self.outputSize, self.eps, self.momentum, self.affine)
157 | 
158 |       -- initialize gamma (the weight) to the recommended value
159 |       -- (https://github.com/torch/nn/blob/master/lib/THNN/generic/BatchNormalization.c#L61)
160 |       bn_wx.weight:fill(0.1)
161 |       bn_wh.weight:fill(0.1)
162 |       bn_c.weight:fill(0.1)
163 | 
164 |       -- evaluate the input sums at once for efficiency
165 |       i2h = bn_wx(self.i2g(x):annotate{name='i2h'}):annotate {name='bn_wx'}
166 |       h2h = bn_wh(self.o2g(prev_h):annotate{name='h2h'}):annotate {name = 'bn_wh'}
167 | 
168 |       -- add bias after BN as per paper
169 |       h2h = nn.Add(4*self.outputSize)(h2h)
170 |    else
171 |       -- evaluate the input sums at once for efficiency
172 |       i2h = self.i2g(x):annotate{name='i2h'}
173 |       h2h = self.o2g(prev_h):annotate{name='h2h'}
174 |    end
175 |    local all_input_sums = nn.CAddTable()({i2h, h2h})
176 | 
177 |    local reshaped = nn.Reshape(4, self.outputSize)(all_input_sums)
178 |    -- input, hidden, forget, output
179 |    local n1, n2, n3, n4 = nn.SplitTable(2)(reshaped):split(4)
180 |    local in_gate = nn.Sigmoid()(n1)
181 |    local in_transform = nn.Tanh()(n2)
182 |    local forget_gate = nn.Sigmoid()(n3)
183 |    local out_gate = nn.Sigmoid()(n4)
184 | 
185 |    -- perform the LSTM update
186 |    local next_c           = nn.CAddTable()({
187 |      nn.CMulTable()({forget_gate, prev_c}),
188 |      nn.CMulTable()({in_gate,     in_transform})
189 |    })
190 |    local next_h
191 |    if self.bn then
192 |       -- gated cells form the output
193 |       next_h = nn.CMulTable()({out_gate, nn.Tanh()(bn_c(next_c):annotate {name = 'bn_c'}) })
194 |    else
195 |       -- gated cells form the output
196 |       next_h = nn.CMulTable()({out_gate, nn.Tanh()(next_c)})
197 |    end
198 | 
199 |    local outputs = {next_h, next_c}
200 | 
201 |    nngraph.annotateNodes()
202 | 
203 |    return nn.gModule(inputs, outputs)
204 | end
205 | 
206 | function FastLSTM:buildGate()
207 |    error"Not Implemented"
208 | end
209 | 
210 | function FastLSTM:buildInputGate()
211 |    error"Not Implemented"
212 | end
213 | 
214 | function FastLSTM:buildForgetGate()
215 |    error"Not Implemented"
216 | end
217 | 
218 | function FastLSTM:buildHidden()
219 |    error"Not Implemented"
220 | end
221 | 
222 | function FastLSTM:buildCell()
223 |    error"Not Implemented"
224 | end
225 | 
226 | function FastLSTM:buildOutputGate()
227 |    error"Not Implemented"
228 | end
229 | 


--------------------------------------------------------------------------------
/GRU.lua:
--------------------------------------------------------------------------------
  1 | ------------------------------------------------------------------------
  2 | --[[ GRU ]]--
  3 | -- Author: Jin-Hwa Kim
  4 | -- License: LICENSE.2nd.txt
  5 | 
  6 | -- Gated Recurrent Units architecture.
  7 | -- http://www.wildml.com/2015/10/recurrent-neural-network-tutorial-part-4-implementing-a-grulstm-rnn-with-python-and-theano/
  8 | -- Expects 1D or 2D input.
  9 | -- The first input in sequence uses zero value for cell and hidden state
 10 | --
 11 | -- For p > 0, it becomes Bayesian GRUs [Moon et al., 2015; Gal, 2015].
 12 | -- In this case, please do not dropout on input as BGRUs handle the input with
 13 | -- its own dropouts. First, try 0.25 for p as Gal (2016) suggested, presumably,
 14 | -- because of summations of two parts in GRUs connections.
 15 | ------------------------------------------------------------------------
 16 | local GRU, parent = torch.class('nn.GRU', 'nn.AbstractRecurrent')
 17 | 
 18 | function GRU:__init(inputSize, outputSize, rho, p, mono)
 19 |    parent.__init(self, rho or 9999)
 20 |    self.p = p or 0
 21 |    if p and p ~= 0 then
 22 |       assert(nn.Dropout(p,false,false,true).lazy, 'only work with Lazy Dropout!')
 23 |    end
 24 |    self.mono = mono or false
 25 |    self.inputSize = inputSize
 26 |    self.outputSize = outputSize
 27 |    -- build the model
 28 |    self.recurrentModule = self:buildModel()
 29 |    -- make it work with nn.Container
 30 |    self.modules[1] = self.recurrentModule
 31 |    self.sharedClones[1] = self.recurrentModule
 32 | 
 33 |    -- for output(0), cell(0) and gradCell(T)
 34 |    self.zeroTensor = torch.Tensor()
 35 | 
 36 |    self.cells = {}
 37 |    self.gradCells = {}
 38 | end
 39 | 
 40 | -------------------------- factory methods -----------------------------
 41 | function GRU:buildModel()
 42 |    -- input : {input, prevOutput}
 43 |    -- output : {output}
 44 | 
 45 |    -- Calculate all four gates in one go : input, hidden, forget, output
 46 |    if self.p ~= 0 then
 47 |       self.i2g = nn.Sequential()
 48 |                      :add(nn.ConcatTable()
 49 |                         :add(nn.Dropout(self.p,false,false,true,self.mono))
 50 |                         :add(nn.Dropout(self.p,false,false,true,self.mono)))
 51 |                      :add(nn.ParallelTable()
 52 |                         :add(nn.Linear(self.inputSize, self.outputSize))
 53 |                         :add(nn.Linear(self.inputSize, self.outputSize)))
 54 |                      :add(nn.JoinTable(2))
 55 |       self.o2g = nn.Sequential()
 56 |                      :add(nn.ConcatTable()
 57 |                         :add(nn.Dropout(self.p,false,false,true,self.mono))
 58 |                         :add(nn.Dropout(self.p,false,false,true,self.mono)))
 59 |                      :add(nn.ParallelTable()
 60 |                         :add(nn.LinearNoBias(self.outputSize, self.outputSize))
 61 |                         :add(nn.LinearNoBias(self.outputSize, self.outputSize)))
 62 |                      :add(nn.JoinTable(2))
 63 |    else
 64 |       self.i2g = nn.Linear(self.inputSize, 2*self.outputSize)
 65 |       self.o2g = nn.LinearNoBias(self.outputSize, 2*self.outputSize)
 66 |    end
 67 | 
 68 |    local para = nn.ParallelTable():add(self.i2g):add(self.o2g)
 69 |    local gates = nn.Sequential()
 70 |    gates:add(para)
 71 |    gates:add(nn.CAddTable())
 72 | 
 73 |    -- Reshape to (batch_size, n_gates, hid_size)
 74 |    -- Then slize the n_gates dimension, i.e dimension 2
 75 |    gates:add(nn.Reshape(2,self.outputSize))
 76 |    gates:add(nn.SplitTable(1,2))
 77 |    local transfer = nn.ParallelTable()
 78 |    transfer:add(nn.Sigmoid()):add(nn.Sigmoid())
 79 |    gates:add(transfer)
 80 | 
 81 |    local concat = nn.ConcatTable():add(nn.Identity()):add(gates)
 82 |    local seq = nn.Sequential()
 83 |    seq:add(concat)
 84 |    seq:add(nn.FlattenTable()) -- x(t), s(t-1), r, z
 85 | 
 86 |    -- Rearrange to x(t), s(t-1), r, z, s(t-1)
 87 |    local concat = nn.ConcatTable()  --
 88 |    concat:add(nn.NarrowTable(1,4)):add(nn.SelectTable(2))
 89 |    seq:add(concat):add(nn.FlattenTable())
 90 | 
 91 |    -- h
 92 |    local hidden = nn.Sequential()
 93 |    local concat = nn.ConcatTable()
 94 |    local t1 = nn.Sequential()
 95 |    t1:add(nn.SelectTable(1))
 96 |    local t2 = nn.Sequential()
 97 |    t2:add(nn.NarrowTable(2,2)):add(nn.CMulTable())
 98 |    if self.p ~= 0 then
 99 |       t1:add(nn.Dropout(self.p,false,false,true,self.mono))
100 |       t2:add(nn.Dropout(self.p,false,false,true,self.mono))
101 |    end
102 |    t1:add(nn.Linear(self.inputSize, self.outputSize))
103 |    t2:add(nn.LinearNoBias(self.outputSize, self.outputSize))
104 | 
105 |    concat:add(t1):add(t2)
106 |    hidden:add(concat):add(nn.CAddTable()):add(nn.Tanh())
107 | 
108 |    local z1 = nn.Sequential()
109 |    z1:add(nn.SelectTable(4))
110 |    z1:add(nn.SAdd(-1, true))  -- Scalar add & negation
111 | 
112 |    local z2 = nn.Sequential()
113 |    z2:add(nn.NarrowTable(4,2))
114 |    z2:add(nn.CMulTable())
115 | 
116 |    local o1 = nn.Sequential()
117 |    local concat = nn.ConcatTable()
118 |    concat:add(hidden):add(z1)
119 |    o1:add(concat):add(nn.CMulTable())
120 | 
121 |    local o2 = nn.Sequential()
122 |    local concat = nn.ConcatTable()
123 |    concat:add(o1):add(z2)
124 |    o2:add(concat):add(nn.CAddTable())
125 | 
126 |    seq:add(o2)
127 | 
128 |    return seq
129 | end
130 | 
131 | function GRU:getHiddenState(step, input)
132 |    local prevOutput
133 |    if step == 0 then
134 |       prevOutput = self.userPrevOutput or self.outputs[step] or self.zeroTensor
135 |       if input then
136 |          if input:dim() == 2 then
137 |             self.zeroTensor:resize(input:size(1), self.outputSize):zero()
138 |          else
139 |             self.zeroTensor:resize(self.outputSize):zero()
140 |          end
141 |       end
142 |    else
143 |       -- previous output and cell of this module
144 |       prevOutput = self.outputs[step]
145 |    end
146 |    return prevOutput
147 | end
148 | 
149 | 
150 | function GRU:setHiddenState(step, hiddenState)
151 |    assert(torch.isTensor(hiddenState))
152 |    self.outputs[step] = hiddenState
153 | end
154 | 
155 | ------------------------- forward backward -----------------------------
156 | function GRU:updateOutput(input)
157 |    local prevOutput = self:getHiddenState(self.step-1, input)
158 | 
159 |    -- output(t) = gru{input(t), output(t-1)}
160 |    local output
161 |    if self.train ~= false then
162 |       self:recycle()
163 |       local recurrentModule = self:getStepModule(self.step)
164 |       -- the actual forward propagation
165 |       output = recurrentModule:updateOutput{input, prevOutput}
166 |    else
167 |       output = self.recurrentModule:updateOutput{input, prevOutput}
168 |    end
169 | 
170 |    self.outputs[self.step] = output
171 | 
172 |    self.output = output
173 | 
174 |    self.step = self.step + 1
175 |    self.gradPrevOutput = nil
176 |    self.updateGradInputStep = nil
177 |    self.accGradParametersStep = nil
178 |    -- note that we don't return the cell, just the output
179 |    return self.output
180 | end
181 | 
182 | 
183 | function GRU:getGradHiddenState(step)
184 |    local gradOutput
185 |    if step == self.step-1 then
186 |       gradOutput = self.userNextGradOutput or self.gradOutputs[step] or self.zeroTensor
187 |    else
188 |       gradOutput = self.gradOutputs[step]
189 |    end
190 |    return gradOutput
191 | end
192 | 
193 | function GRU:setGradHiddenState(step, gradHiddenState)
194 |    assert(torch.isTensor(gradHiddenState))
195 |    self.gradOutputs[step] = gradHiddenState
196 | end
197 | 
198 | function GRU:_updateGradInput(input, gradOutput)
199 |    assert(self.step > 1, "expecting at least one updateOutput")
200 |    local step = self.updateGradInputStep - 1
201 |    assert(step >= 1)
202 | 
203 |    -- set the output/gradOutput states of current Module
204 |    local recurrentModule = self:getStepModule(step)
205 | 
206 |    -- backward propagate through this step
207 |    local _gradOutput = self:getGradHiddenState(step)
208 |    assert(_gradOutput)
209 |    self._gradOutputs[step] = nn.rnn.recursiveCopy(self._gradOutputs[step], _gradOutput)
210 |    nn.rnn.recursiveAdd(self._gradOutputs[step], gradOutput)
211 |    gradOutput = self._gradOutputs[step]
212 | 
213 |    local gradInputTable = recurrentModule:updateGradInput({input, self:getHiddenState(step-1)}, gradOutput)
214 | 
215 |    self:setGradHiddenState(step-1, gradInputTable[2])
216 | 
217 |    return gradInputTable[1]
218 | end
219 | 
220 | function GRU:_accGradParameters(input, gradOutput, scale)
221 |    local step = self.accGradParametersStep - 1
222 |    assert(step >= 1)
223 | 
224 |    -- set the output/gradOutput states of current Module
225 |    local recurrentModule = self:getStepModule(step)
226 | 
227 |    -- backward propagate through this step
228 |    local gradOutput = self._gradOutputs[step] or self:getGradHiddenState(step)
229 |    recurrentModule:accGradParameters({input, self:getHiddenState(step-1)}, gradOutput, scale)
230 | end
231 | 
232 | function GRU:__tostring__()
233 |    return string.format('%s(%d -> %d, %.2f)', torch.type(self), self.inputSize, self.outputSize, self.p)
234 | end
235 | 
236 | -- migrate GRUs params to BGRUs params
237 | function GRU:migrate(params)
238 |    local _params = self:parameters()
239 |    assert(self.p ~= 0, 'only support for BGRUs.')
240 |    assert(#params == 6, '# of source params should be 6.')
241 |    assert(#_params == 9, '# of destination params should be 9.')
242 |    _params[1]:copy(params[1]:narrow(1,1,self.outputSize))
243 |    _params[2]:copy(params[2]:narrow(1,1,self.outputSize))
244 |    _params[3]:copy(params[1]:narrow(1,self.outputSize+1,self.outputSize))
245 |    _params[4]:copy(params[2]:narrow(1,self.outputSize+1,self.outputSize))
246 |    _params[5]:copy(params[3]:narrow(1,1,self.outputSize))
247 |    _params[6]:copy(params[3]:narrow(1,self.outputSize+1,self.outputSize))
248 |    _params[7]:copy(params[4])
249 |    _params[8]:copy(params[5])
250 |    _params[9]:copy(params[6])
251 | end
252 | 


--------------------------------------------------------------------------------
/LICENSE.2nd.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2016 NAVER Corp. and Seoul National University R&DB Foundation
 2 | All rights reserved.
 3 | 
 4 | Author: jnhwkim@snu.ac.kr (Jin-Hwa Kim)
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright
10 |    notice, this list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright
13 |    notice, this list of conditions and the following disclaimer in the
14 |    documentation and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the names of NAVER Corp. and Seoul National University R&DB 
17 |    Foundation nor the names of its contributors may be used to endorse or
18 |    promote products derived from this software without specific prior 
19 |    written permission.
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 | POSSIBILITY OF SUCH DAMAGE.
32 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2014-2016 Element Inc (Nicholas Leonard)
 2 | 
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright
 9 |    notice, this list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright
12 |    notice, this list of conditions and the following disclaimer in the
13 |    documentation and/or other materials provided with the distribution.
14 | 
15 | 3. Neither the names of Element Inc. nor the names of its contributors may be 
16 |    used to endorse or promote products derived from this software without 
17 |    specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 | POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/LinearNoBias.lua:
--------------------------------------------------------------------------------
 1 | ------------------------------------------------------------------------
 2 | --[[ LinearNoBias ]]--
 3 | -- Subclass of nn.Linear with no bias term
 4 | ------------------------------------------------------------------------
 5 | nn = require 'nn'
 6 | local LinearNoBias, Linear = torch.class('nn.LinearNoBias', 'nn.Linear')
 7 | 
 8 | function LinearNoBias:__init(inputSize, outputSize)
 9 |    nn.Module.__init(self)
10 | 
11 |    self.weight = torch.Tensor(outputSize, inputSize)
12 |    self.gradWeight = torch.Tensor(outputSize, inputSize)
13 | 
14 |    self:reset()
15 | end
16 | 
17 | function LinearNoBias:reset(stdv)
18 |    if stdv then
19 |       stdv = stdv * math.sqrt(3)
20 |    else
21 |       stdv = 1./math.sqrt(self.weight:size(2))
22 |    end
23 |    if nn.oldSeed then
24 |       for i=1,self.weight:size(1) do
25 |          self.weight:select(1, i):apply(function()
26 |             return torch.uniform(-stdv, stdv)
27 |          end)
28 |       end
29 |    else
30 |       self.weight:uniform(-stdv, stdv)
31 |    end
32 | 
33 |    return self
34 | end
35 | 
36 | function LinearNoBias:updateOutput(input)
37 |    if input:dim() == 1 then
38 |       self.output:resize(self.weight:size(1))
39 |       self.output:mv(self.weight, input)
40 |    elseif input:dim() == 2 then
41 |       local nframe = input:size(1)
42 |       local nElement = self.output:nElement()
43 |       self.output:resize(nframe, self.weight:size(1))
44 |       if self.output:nElement() ~= nElement then
45 |          self.output:zero()
46 |       end
47 |       if not self.addBuffer or self.addBuffer:nElement() ~= nframe then
48 |          self.addBuffer = input.new(nframe):fill(1)
49 |       end
50 |       self.output:addmm(0, self.output, 1, input, self.weight:t())
51 |    else
52 |       error('input must be vector or matrix')
53 |    end
54 | 
55 |    return self.output
56 | end
57 | 
58 | function LinearNoBias:accGradParameters(input, gradOutput, scale)
59 |    scale = scale or 1
60 |    if input:dim() == 1 then
61 |       self.gradWeight:addr(scale, gradOutput, input)
62 |    elseif input:dim() == 2 then
63 |       self.gradWeight:addmm(scale, gradOutput:t(), input)
64 |    end
65 | end
66 | 


--------------------------------------------------------------------------------
/LookupTableMaskZero.lua:
--------------------------------------------------------------------------------
 1 | local LookupTableMaskZero, parent = torch.class('nn.LookupTableMaskZero', 'nn.LookupTable')
 2 | 
 3 | function LookupTableMaskZero:__init(nIndex, nOutput)
 4 |   parent.__init(self, nIndex + 1, nOutput)
 5 | end
 6 | 
 7 | function LookupTableMaskZero:updateOutput(input)
 8 | 	self.weight[1]:zero()
 9 |    if self.__input and (torch.type(self.__input) ~= torch.type(input)) then
10 |       self.__input = nil -- fixes old casting bug
11 |    end
12 |    self.__input = self.__input or input.new()
13 |    self.__input:resizeAs(input):add(input, 1)
14 | 	return parent.updateOutput(self, self.__input)
15 | end
16 | 
17 | function LookupTableMaskZero:accGradParameters(input, gradOutput, scale)
18 | 	parent.accGradParameters(self, self.__input, gradOutput, scale)
19 | end
20 | 
21 | function LookupTableMaskZero:type(type, cache)
22 |    self.__input = nil
23 |    return parent.type(self, type, cache)
24 | end
25 | 


--------------------------------------------------------------------------------
/MaskZero.lua:
--------------------------------------------------------------------------------
  1 | ------------------------------------------------------------------------
  2 | --[[ MaskZero ]]--
  3 | -- Decorator that zeroes the output rows of the encapsulated module
  4 | -- for commensurate input rows which are tensors of zeros
  5 | ------------------------------------------------------------------------
  6 | local MaskZero, parent = torch.class("nn.MaskZero", "nn.Decorator")
  7 | 
  8 | function MaskZero:__init(module, nInputDim, silent)
  9 |    parent.__init(self, module)
 10 |    assert(torch.isTypeOf(module, 'nn.Module'))
 11 |    if torch.isTypeOf(module, 'nn.AbstractRecurrent') and not silent then
 12 |       print("Warning : you are most likely using MaskZero the wrong way. "
 13 |       .."You should probably use AbstractRecurrent:maskZero() so that "
 14 |       .."it wraps the internal AbstractRecurrent.recurrentModule instead of "
 15 |       .."wrapping the AbstractRecurrent module itself.")
 16 |    end
 17 |    assert(torch.type(nInputDim) == 'number', 'Expecting nInputDim number at arg 1')
 18 |    self.nInputDim = nInputDim
 19 | end
 20 | 
 21 | function MaskZero:recursiveGetFirst(input)
 22 |    if torch.type(input) == 'table' then
 23 |       return self:recursiveGetFirst(input[1])
 24 |    else
 25 |       assert(torch.isTensor(input))
 26 |       return input
 27 |    end
 28 | end
 29 | 
 30 | function MaskZero:recursiveMask(output, input, mask)
 31 |    if torch.type(input) == 'table' then
 32 |       output = torch.type(output) == 'table' and output or {}
 33 |       for k,v in ipairs(input) do
 34 |          output[k] = self:recursiveMask(output[k], v, mask)
 35 |       end
 36 |    else
 37 |       assert(torch.isTensor(input))
 38 |       output = torch.isTensor(output) and output or input.new()
 39 | 
 40 |       -- make sure mask has the same dimension as the input tensor
 41 |       local inputSize = input:size():fill(1)
 42 |       if self.batchmode then
 43 |          inputSize[1] = input:size(1)
 44 |       end
 45 |       mask:resize(inputSize)
 46 |       -- build mask
 47 |       local zeroMask = mask:expandAs(input)
 48 |       output:resizeAs(input):copy(input)
 49 |       output:maskedFill(zeroMask, 0)
 50 |    end
 51 |    return output
 52 | end
 53 | 
 54 | function MaskZero:updateOutput(input)
 55 |    -- recurrent module input is always the first one
 56 |    local rmi = self:recursiveGetFirst(input):contiguous()
 57 |    if rmi:dim() == self.nInputDim then
 58 |       self.batchmode = false
 59 |       rmi = rmi:view(-1) -- collapse dims
 60 |    elseif rmi:dim() - 1 == self.nInputDim then
 61 |       self.batchmode = true
 62 |       rmi = rmi:view(rmi:size(1), -1) -- collapse non-batch dims
 63 |    else
 64 |       error("nInputDim error: "..rmi:dim()..", "..self.nInputDim)
 65 |    end
 66 | 
 67 |    -- build mask
 68 |    local vectorDim = rmi:dim()
 69 |    self._zeroMask = self._zeroMask or rmi.new()
 70 |    self._zeroMask:norm(rmi, 2, vectorDim)
 71 |    self.zeroMask = self.zeroMask or (
 72 |        (torch.type(rmi) == 'torch.CudaTensor') and torch.CudaByteTensor()
 73 |        or (torch.type(rmi) == 'torch.ClTensor') and torch.ClTensor()
 74 |        or torch.ByteTensor()
 75 |     )
 76 |    self._zeroMask.eq(self.zeroMask, self._zeroMask, 0)
 77 | 
 78 |    -- forward through decorated module
 79 |    local output = self.modules[1]:updateOutput(input)
 80 | 
 81 |    self.output = self:recursiveMask(self.output, output, self.zeroMask)
 82 |    return self.output
 83 | end
 84 | 
 85 | function MaskZero:updateGradInput(input, gradOutput)
 86 |    -- zero gradOutputs before backpropagating through decorated module
 87 |    self.gradOutput = self:recursiveMask(self.gradOutput, gradOutput, self.zeroMask)
 88 | 
 89 |    self.gradInput = self.modules[1]:updateGradInput(input, self.gradOutput)
 90 |    return self.gradInput
 91 | end
 92 | 
 93 | function MaskZero:type(type, ...)
 94 |    self.zeroMask = nil
 95 |    self._zeroMask = nil
 96 |    self._maskbyte = nil
 97 |    self._maskindices = nil
 98 |    return parent.type(self, type, ...)
 99 | end
100 | 


--------------------------------------------------------------------------------
/MaskZeroCriterion.lua:
--------------------------------------------------------------------------------
  1 | ------------------------------------------------------------------------
  2 | --[[ MaskZeroCriterion ]]--
  3 | -- Decorator that zeros err and gradInputs of the encapsulated criterion
  4 | -- for commensurate input rows which are tensors of zeros
  5 | ------------------------------------------------------------------------
  6 | local MaskZeroCriterion, parent = torch.class("nn.MaskZeroCriterion", "nn.Criterion")
  7 | 
  8 | function MaskZeroCriterion:__init(criterion, nInputDim)
  9 |    parent.__init(self)
 10 |    self.criterion = criterion
 11 |    assert(torch.isTypeOf(criterion, 'nn.Criterion'))
 12 |    assert(torch.type(nInputDim) == 'number', 'Expecting nInputDim number at arg 2')
 13 |    self.nInputDim = nInputDim
 14 | end
 15 | 
 16 | function MaskZeroCriterion:recursiveGetFirst(input)
 17 |    if torch.type(input) == 'table' then
 18 |       return self:recursiveGetFirst(input[1])
 19 |    else
 20 |       assert(torch.isTensor(input))
 21 |       return input
 22 |    end
 23 | end
 24 | 
 25 | function MaskZeroCriterion:recursiveMask(dst, src, mask)
 26 |    if torch.type(src) == 'table' then
 27 |       dst = torch.type(dst) == 'table' and dst or {}
 28 |       for k,v in ipairs(src) do
 29 |          dst[k] = self:recursiveMask(dst[k], v, mask)
 30 |       end
 31 |    else
 32 |       assert(torch.isTensor(src))
 33 |       dst = torch.isTensor(dst) and dst or src.new()
 34 |    	
 35 |       dst:index(src, 1, mask)
 36 |    end
 37 |    return dst
 38 | end
 39 | 
 40 | function MaskZeroCriterion:updateOutput(input, target)   
 41 |    -- recurrent module input is always the first one
 42 |    local rmi = self:recursiveGetFirst(input):contiguous()
 43 |    if rmi:dim() == self.nInputDim then
 44 |       error("does not support online (i.e. non-batch) mode")
 45 |    elseif rmi:dim() - 1 == self.nInputDim then
 46 |       rmi = rmi:view(rmi:size(1), -1) -- collapse non-batch dims
 47 |    else
 48 |       error("nInputDim error: "..rmi:dim()..", "..self.nInputDim)
 49 |    end
 50 |    
 51 |    -- build mask
 52 |    local vectorDim = rmi:dim() 
 53 |    self._zeroMask = self._zeroMask or rmi.new()
 54 |    self._zeroMask:norm(rmi, 2, vectorDim)
 55 |    local zeroMask = self._zeroMask
 56 |    if torch.isTypeOf(zeroMask, 'torch.CudaTensor') or
 57 |          torch.isTypeOf(zeroMask, 'torch.ClTensor') then
 58 |       self.__zeroMask = self.__zeroMask or torch.FloatTensor()
 59 |       self.__zeroMask:resize(self._zeroMask:size()):copy(self._zeroMask)
 60 |       zeroMask = self._zeroMask
 61 |    end
 62 |   
 63 |    self.zeroMask = self.zeroMask or torch.LongTensor()
 64 |    self.zeroMask:resize(self._zeroMask:size(1)):zero()
 65 |    
 66 |    local i, j = 0, 0
 67 |    zeroMask:apply(function(norm)
 68 |       i = i + 1
 69 |       if norm ~= 0 then
 70 |          j = j + 1
 71 |          self.zeroMask[j] = i
 72 |       end
 73 |    end)
 74 |    self.zeroMask:resize(j)
 75 |    
 76 |    if j > 0 then
 77 |       self.input = self:recursiveMask(self.input, input, self.zeroMask)
 78 |       self.target = self:recursiveMask(self.target, target, self.zeroMask)
 79 |       
 80 |       -- forward through decorated criterion
 81 |       self.output = self.criterion:updateOutput(self.input, self.target)
 82 |    else
 83 |       -- when all samples are masked, then loss is zero (issue 128)
 84 |       self.output = 0
 85 |    end
 86 |    
 87 |    return self.output
 88 | end
 89 | 
 90 | function MaskZeroCriterion:recursiveMaskGradInput(dst, mask, src, input)
 91 |    if torch.type(input) == 'table' then
 92 |       dst = (torch.type(dst) == 'table') and dst or {dst}
 93 |       src = (torch.type(src) == 'table') and src or {src}
 94 |       for key,_ in pairs(input) do
 95 |          dst[key] = self:recursiveMaskGradInput(dst[key], mask, src[key], input[key])
 96 |       end
 97 |       for i=#input+1,#dst do
 98 |          dst[i] = nil
 99 |       end
100 |    elseif torch.isTensor(input) then
101 |       dst = torch.isTensor(dst) and dst or input.new()
102 |       dst:resizeAs(input):zero()
103 |       if mask:nElement() > 0 then
104 |          assert(src)
105 |          dst:indexCopy(1, mask, src)
106 |       end
107 |    else
108 |       error("expecting nested tensors or tables. Got "..
109 |             torch.type(dst).." and "..torch.type(input).." instead")
110 |    end
111 |    return dst
112 | end
113 | 
114 | function MaskZeroCriterion:updateGradInput(input, target)
115 |    if self.zeroMask:nElement() > 0 then
116 |       assert(self.input and self.target)
117 |       self._gradInput = self.criterion:updateGradInput(self.input, self.target)
118 |    end
119 |    self.gradInput = self:recursiveMaskGradInput(self.gradInput, self.zeroMask, self._gradInput, input)
120 |    return self.gradInput
121 | end
122 | 
123 | function MaskZeroCriterion:type(type, ...)
124 |    self.zeroMask = nil
125 |    self._zeroMask = nil
126 |    self.__zeroMask = nil
127 |    self.input = nil
128 |    self.target = nil
129 |    self._gradInput = nil
130 |    
131 |    return parent.type(self, type, ...)
132 | end
133 | 


--------------------------------------------------------------------------------
/Module.lua:
--------------------------------------------------------------------------------
 1 | local Module = nn.Module
 2 | 
 3 | -- You can use this to manually forget past memories in AbstractRecurrent instances
 4 | function Module:forget()
 5 |    if self.modules then
 6 |       for i,module in ipairs(self.modules) do
 7 |          module:forget()
 8 |       end
 9 |    end
10 |    return self
11 | end
12 | 
13 | -- Used by nn.Sequencers
14 | function Module:remember(remember)
15 |    if self.modules then
16 |       for i, module in ipairs(self.modules) do
17 |          module:remember(remember)
18 |       end
19 |    end
20 |    return self
21 | end
22 | 
23 | function Module:stepClone(shareParams, shareGradParams)
24 |    return self:sharedClone(shareParams, shareGradParams, true)
25 | end
26 | 
27 | function Module:backwardOnline()
28 |    print("Deprecated Jan 6, 2016. By default rnn now uses backwardOnline, so no need to call this method")
29 | end
30 | 
31 | -- calls setOutputStep on all component AbstractRecurrent modules
32 | -- used by Recursor() after calling stepClone.
33 | -- this solves a very annoying bug...
34 | function Module:setOutputStep(step)
35 |    if self.modules then
36 |       for i,module in ipairs(self.modules) do
37 |          module:setOutputStep(step)
38 |       end
39 |    end
40 | end
41 | 
42 | -- set the maximum number of backpropagation through time (BPTT) time-steps
43 | function Module:maxBPTTstep(rho)
44 |    if self.modules then
45 |       for i, module in ipairs(self.modules) do
46 |          module:maxBPTTstep(rho)
47 |       end
48 |    end
49 | end
50 | 
51 | function Module:getHiddenState(step)
52 |    if self.modules then
53 |       local hiddenState = {}
54 |       for i, module in ipairs(self.modules) do
55 |          hiddenState[i] = module:getHiddenState(step)
56 |       end
57 |       return hiddenState
58 |    end
59 | end
60 | 
61 | function Module:setHiddenState(step, hiddenState)
62 |    if self.modules then
63 |       assert(torch.type(hiddenState) == 'table')
64 |       for i, module in ipairs(self.modules) do
65 |          module:setHiddenState(step, hiddenState[i])
66 |       end
67 |    end
68 | end
69 | 
70 | function Module:getGradHiddenState(step)
71 |    if self.modules then
72 |       local gradHiddenState = {}
73 |       for i, module in ipairs(self.modules) do
74 |          gradHiddenState[i] = module:getGradHiddenState(step)
75 |       end
76 |       return gradHiddenState
77 |    end
78 | end
79 | 
80 | function Module:setGradHiddenState(step, gradHiddenState)
81 |    if self.modules then
82 |       assert(torch.type(gradHiddenState) == 'table')
83 |       for i, module in ipairs(self.modules) do
84 |          module:setGradHiddenState(step, gradHiddenState[i])
85 |       end
86 |    end
87 | end


--------------------------------------------------------------------------------
/Mufuru.lua:
--------------------------------------------------------------------------------
  1 | ------------------------------------------------------------------------
  2 | --[[ MuFuRu - Multi-Function Recurrent Unit ]]--
  3 | -- Author: Jonathan Uesato
  4 | -- License: LICENSE.2nd.txt
  5 | 
  6 | -- Ref. A.: http://arxiv.org/pdf/1606.03002v1.pdf
  7 | ------------------------------------------------------------------------
  8 | 
  9 | local MuFuRu, parent = torch.class('nn.MuFuRu', 'nn.GRU')
 10 | 
 11 | local SqrtDiffLayer = nn.Sequential()
 12 |                         :add(nn.CSubTable())
 13 |                         :add(nn.Abs())
 14 |                         :add(nn.Sqrt())
 15 |                         :add(nn.MulConstant(0.25))
 16 | 
 17 | local MaxLayer = nn.Sequential()
 18 |   :add(nn.MapTable(nn.Unsqueeze(1)))
 19 |   :add(nn.JoinTable(1))
 20 |   :add(nn.Max(1))
 21 | 
 22 | local MinLayer = nn.Sequential()
 23 |   :add(nn.MapTable(nn.Unsqueeze(1)))
 24 |   :add(nn.JoinTable(1))
 25 |   :add(nn.Min(1))
 26 | 
 27 | -- all operations take a table {oldState, newState} and return newState
 28 | _operations = {
 29 |    max = MaxLayer,
 30 |    keep = nn.SelectTable(1),
 31 |    replace = nn.SelectTable(2),
 32 |    mul = nn.CMulTable(),
 33 |    min = MinLayer,
 34 |    diff = nn.CSubTable(),
 35 |    forget = nn.Sequential():add(nn.SelectTable(1)):add(nn.MulConstant(0.0)),
 36 |    sqrt_diff = SqrtDiffLayer
 37 | }
 38 | 
 39 | function MuFuRu:__init(inputSize, outputSize, ops, rho)
 40 |    -- Use all ops by default. To replicate GRU, use keep and replace only.
 41 |    self.ops = ops or {'keep', 'replace', 'mul', 'diff', 'forget', 'sqrt_diff', 'max', 'min'}
 42 |    self.num_ops = #self.ops
 43 |    self.operations = {}
 44 |    for i=1,self.num_ops do
 45 |       self.operations[i] = _operations[self.ops[i]]
 46 |    end
 47 |    self.inputSize = inputSize
 48 |    self.outputSize = outputSize
 49 |    parent.__init(self, inputSize, outputSize, rho or 9999)
 50 | end
 51 | 
 52 | -------------------------- factory methods -----------------------------
 53 | function MuFuRu:buildModel()
 54 |    -- input : {input, prevOutput}
 55 |    -- output : output
 56 | 
 57 |    local nonBatchDim = 2
 58 |    -- resetGate takes {input, prevOutput} to resetGate
 59 |    local resetGate = nn.Sequential()
 60 |       :add(nn.ParallelTable()
 61 |          :add(nn.Linear(self.inputSize, self.outputSize), false)
 62 |          :add(nn.Linear(self.outputSize, self.outputSize))
 63 |       )
 64 |       :add(nn.CAddTable())
 65 |       :add(nn.Sigmoid())
 66 | 
 67 |    -- Feature takes {input, prevOutput, reset} to feature
 68 |    local featureVec = nn.Sequential()
 69 |       :add(nn.ConcatTable()
 70 |          :add(nn.SelectTable(1))
 71 |          :add(nn.Sequential()
 72 |             :add(nn.NarrowTable(2,2))
 73 |             :add(nn.CMulTable())
 74 |          )
 75 |       )
 76 |       :add(nn.JoinTable(nonBatchDim)) -- [x_t, r dot s_t-1]
 77 |       :add(nn.Linear(self.inputSize + self.outputSize, self.outputSize))
 78 |       :add(nn.Sigmoid())
 79 | 
 80 |    -- opWeights takes {input, prevOutput, reset} to opWeights.
 81 |    -- Note that reset is not used
 82 |    local opWeights = nn.Sequential()
 83 |       :add(nn.NarrowTable(1,2))
 84 |       :add(nn.JoinTable(nonBatchDim)) -- k_t
 85 |       :add(nn.Linear(self.inputSize + self.outputSize, self.num_ops * self.outputSize)) --p^_t
 86 |       :add(nn.View(self.num_ops, self.outputSize):setNumInputDims(1))
 87 |       :add(nn.Transpose({1,2}))
 88 |       :add(nn.SoftMax()) --p_t
 89 | 
 90 |    -- all_ops takes {oldState, newState} to {newState1, newState2, ...newStateN}
 91 |    local all_ops = nn.ConcatTable()
 92 |    for i=1,self.num_ops do
 93 |       -- an operation is any layer taking {prevHidden, featureVec} to newState
 94 |       all_ops:add(self.operations[i])
 95 |    end
 96 | 
 97 |    local all_op_activations = nn.Sequential()
 98 |       :add(nn.NarrowTable(1,2))
 99 |       :add(all_ops)
100 |       :add(nn.MapTable(nn.Unsqueeze(1)))
101 |       :add(nn.JoinTable(1,3))
102 | 
103 |    -- combine_ops takes {prevHidden, featureVec, opWeights} to nextHidden
104 |    local combine_ops = nn.Sequential()
105 |       :add(nn.ConcatTable()
106 |          :add(all_op_activations)
107 |          :add(nn.SelectTable(3))
108 |       )
109 |       :add(nn.CMulTable())
110 |       :add(nn.Sum(1,3))
111 | 
112 |    local cell = nn.Sequential()
113 |       :add(nn.ConcatTable()
114 |          :add(nn.SelectTable(1))
115 |          :add(nn.SelectTable(2))
116 |          :add(resetGate)
117 |       ) -- {input,prevOutput,reset}
118 |       :add(nn.ConcatTable()
119 |          :add(nn.SelectTable(2))
120 |          :add(featureVec)
121 |          :add(opWeights)
122 |       ) -- {prevOutput, v_t, opWeights}
123 |       :add(combine_ops)
124 |    return cell
125 | end
126 | 
127 | -- Factory methods are inherited from GRU
128 | 
129 | function MuFuRu:__tostring__()
130 |    local op_str = '{ '
131 |    for i=1,self.num_ops do
132 |       op_str = op_str .. self.ops[i] .. ' '
133 |    end
134 |    op_str = op_str .. '}'
135 |    return (string.format('%s(%d -> %d) ', torch.type(self), self.inputSize, self.outputSize)) .. op_str
136 | end
137 | 
138 | function MuFuRu:migrate(params)
139 |    error"Migrate not supported for MuFuRu"
140 | end
141 | 


--------------------------------------------------------------------------------
/NormStabilizer.lua:
--------------------------------------------------------------------------------
  1 | ------------------------------------------------------------------------
  2 | --[[ Norm Stabilization]]
  3 | -- Regularizing RNNs by Stabilizing Activations
  4 | -- Ref. A:  http://arxiv.org/abs/1511.08400
  5 | -- For training, this module only works in batch mode.
  6 | ------------------------------------------------------------------------
  7 | 
  8 | local NS, parent = torch.class("nn.NormStabilizer", "nn.AbstractRecurrent")
  9 | 
 10 | function NS:__init(beta)
 11 |    parent.__init(self, 99999)
 12 | 
 13 |    self.beta = beta or 1
 14 |    self.recurrentModule = nn.CopyGrad()
 15 |    
 16 |    -- make it work with nn.Container
 17 |    self.modules[1] = self.recurrentModule
 18 |    self.sharedClones[1] = self.recurrentModule 
 19 | end
 20 | 
 21 | function NS:_accGradParameters(input, gradOutput, scale)
 22 |    -- No parameters to update
 23 | end
 24 | 
 25 | function NS:updateOutput(input)
 26 |    assert(input:dim() == 2)
 27 |    local output
 28 |    if self.train ~= false then
 29 |       self:recycle()
 30 |       local rm = self:getStepModule(self.step)
 31 |       output = rm:updateOutput(input)
 32 |       -- in training mode, we also calculate norm of hidden state
 33 |       rm.norm = rm.norm or output.new()
 34 |       rm.norm:norm(output, 2, 2)
 35 |    else
 36 |       output = self.recurrentModule:updateOutput(input)
 37 |    end
 38 | 
 39 |    self.outputs[self.step] = output
 40 | 
 41 |    self.output = output
 42 |    self.step = self.step + 1
 43 |    self.gradPrevOutput = nil
 44 |    self.updateGradInputStep = nil
 45 |    self.accGradParametersStep = nil
 46 | 
 47 |    return self.output
 48 | end
 49 | 
 50 | -- returns norm-stabilizer loss as defined in ref. A
 51 | function NS:updateLoss()
 52 |    self.loss = 0
 53 |    self._normsum = self._normsum or self.output.new()
 54 |    
 55 |    for step=2,self.step-1 do
 56 |       local rm1 = self:getStepModule(step-1)
 57 |       local rm2 = self:getStepModule(step)
 58 |       self._normsum:add(rm1.norm, rm2.norm)
 59 |       self._normsum:pow(2)
 60 |       local steploss = self._normsum:mean() -- sizeAverage
 61 |       self.loss = self.loss +  steploss
 62 |    end
 63 |    
 64 |    -- the loss is divided by the number of time-steps (but not the gradients)
 65 |    self.loss = self.beta * self.loss / (self.step-1)
 66 |    return self.loss
 67 | end
 68 | 
 69 | function NS:_updateGradInput(input, gradOutput)    
 70 |    -- First grab h[t] :
 71 |    -- backward propagate through this step
 72 |    local curStep = self.updateGradInputStep-1
 73 |    local hiddenModule = self:getStepModule(curStep)
 74 |    local gradInput = hiddenModule:updateGradInput(input, gradOutput)
 75 |    assert(curStep < self.step)
 76 |    
 77 |    -- buffers
 78 |    self._normsum = self._normsum or self.output.new()
 79 |    self._gradInput = self._gradInput or self.output.new()
 80 |    
 81 |    local batchSize = hiddenModule.output:size(1)
 82 |    
 83 |    -- Add gradient of norm stabilizer cost function directly to respective CopyGrad.gradInput tensors
 84 |    
 85 |    if curStep > 1 then
 86 |       -- then grab h[t-1]
 87 |       local prevHiddenModule = self:getStepModule(curStep - 1)
 88 |       
 89 |       self._normsum:resizeAs(hiddenModule.norm):copy(hiddenModule.norm)
 90 |       self._normsum:add(-1, prevHiddenModule.norm)
 91 |       self._normsum:mul(self.beta*2)
 92 |       self._normsum:cdiv(hiddenModule.norm)
 93 |       
 94 |       self._gradInput:mul(hiddenModule.output, 1/batchSize)
 95 |       self._gradInput:cmul(self._normsum:expandAs(self._gradInput))
 96 |       hiddenModule.gradInput:add(self._gradInput)
 97 |    end
 98 |    
 99 |    if curStep < self.step-1 then
100 |       local nextHiddenModule = self:getStepModule(curStep + 1)
101 |       
102 |       self._normsum:resizeAs(hiddenModule.norm):copy(hiddenModule.norm)
103 |       self._normsum:add(-1, nextHiddenModule.norm)
104 |       self._normsum:mul(self.beta*2)
105 |       self._normsum:cdiv(hiddenModule.norm)
106 |       
107 |       self._gradInput:mul(hiddenModule.output, 1/batchSize)
108 |       self._gradInput:cmul(self._normsum:expandAs(self._gradInput))
109 |       hiddenModule.gradInput:add(self._gradInput)
110 |    end
111 |    
112 |    return hiddenModule.gradInput
113 | end
114 | 
115 | function NS:__tostring__()
116 |    return "nn.NormStabilizer"
117 | end
118 | 


--------------------------------------------------------------------------------
/Padding.lua:
--------------------------------------------------------------------------------
 1 | local Padding, parent
 2 | if nn.Padding then -- prevent name conflicts with nnx
 3 |    Padding, parent = nn.Padding, nn.Module
 4 | else
 5 |    Padding, parent = torch.class('nn.Padding', 'nn.Module')
 6 | end
 7 | 
 8 | -- pad can be positive (right) negative (left)
 9 | function Padding:__init(dim, pad, nInputDim, value)
10 |    self.dim = dim
11 |    self.pad = pad
12 |    self.nInputDim = nInputDim
13 |    self.value = value or 0
14 |    self.outputSize = torch.LongStorage()
15 |    parent.__init(self)
16 | end
17 | 
18 | function Padding:updateOutput(input)
19 |    self.outputSize:resize(input:dim())
20 |    self.outputSize:copy(input:size())
21 |    local dim = self.dim 
22 |    if self.nInputDim and input:dim() ~= self.nInputDim then
23 |       dim = dim + 1
24 |    end
25 |    self.outputSize[dim] = self.outputSize[dim] + math.abs(self.pad)
26 |    self.output:resize(self.outputSize)
27 |    self.output:fill(self.value)
28 |    local outputWindow
29 |    if self.pad > 0 then
30 |       outputWindow = self.output:narrow(dim, 1, input:size(dim)) 
31 |    else
32 |       outputWindow = self.output:narrow(dim, 1 - self.pad, input:size(dim))
33 |    end
34 |    outputWindow:copy(input)
35 |    return self.output
36 | end
37 | 
38 | function Padding:updateGradInput(input, gradOutput)
39 |    self.gradInput:resizeAs(input)
40 |    local dim = self.dim 
41 |    if self.nInputDim and input:dim() ~= self.nInputDim then
42 |       dim = dim + 1
43 |    end
44 |    local gradOutputWindow
45 |    if self.pad > 0 then
46 |       gradOutputWindow = gradOutput:narrow(dim, 1, input:size(dim)) 
47 |    else
48 |       gradOutputWindow = gradOutput:narrow(dim, 1 - self.pad, input:size(dim))
49 |    end
50 |    self.gradInput:copy(gradOutputWindow:copy(input))
51 |    return self.gradInput
52 | end
53 | 


--------------------------------------------------------------------------------
/Recurrence.lua:
--------------------------------------------------------------------------------
  1 | ------------------------------------------------------------------------
  2 | --[[ Recurrence ]]--
  3 | -- A general container for implementing a recurrence.
  4 | -- Unlike Recurrent, this module doesn't manage a separate input layer,
  5 | -- nor does it have a startModule. Instead for the first step, it
  6 | -- just forwards a zero tensor through the recurrent layer (like LSTM).
  7 | -- The recurrentModule should output Tensor or table : output(t)
  8 | -- given input table : {input(t), output(t-1)}
  9 | ------------------------------------------------------------------------
 10 | local _ = require 'moses'
 11 | local Recurrence, parent = torch.class('nn.Recurrence', 'nn.AbstractRecurrent')
 12 | 
 13 | function Recurrence:__init(recurrentModule, outputSize, nInputDim, rho)
 14 |    parent.__init(self, rho or 9999)
 15 | 
 16 |    assert(_.contains({'table','torch.LongStorage','number'}, torch.type(outputSize)), "Unsupported size type")
 17 |    self.outputSize = torch.type(outputSize) == 'number' and {outputSize} or outputSize
 18 |    -- for table outputs, this is the number of dimensions in the first (left) tensor (depth-first).
 19 |    assert(torch.type(nInputDim) == 'number', "Expecting nInputDim number for arg 2")
 20 |    self.nInputDim = nInputDim
 21 |    assert(torch.isTypeOf(recurrentModule, 'nn.Module'), "Expecting recurrenModule nn.Module for arg 3")
 22 |    self.recurrentModule = recurrentModule
 23 | 
 24 |    -- make it work with nn.Container and nn.Decorator
 25 |    self.module = self.recurrentModule
 26 |    self.modules[1] = self.recurrentModule
 27 |    self.sharedClones[1] = self.recurrentModule
 28 | 
 29 |    -- just so we can know the type of this module
 30 |    self.typeTensor = torch.Tensor()
 31 | end
 32 | 
 33 | -- recursively creates a zero tensor (or table thereof) (or table thereof).
 34 | -- This zero Tensor is forwarded as output(t=0).
 35 | function Recurrence:recursiveResizeZero(tensor, size, batchSize)
 36 |    local isTable = torch.type(size) == 'table'
 37 |    if isTable and torch.type(size[1]) ~= 'number' then
 38 |       tensor = (torch.type(tensor) == 'table') and tensor or {}
 39 |       for k,v in ipairs(size) do
 40 |          tensor[k] = self:recursiveResizeZero(tensor[k], v, batchSize)
 41 |       end
 42 |    elseif torch.type(size) == 'torch.LongStorage'  then
 43 |       local size_ = size:totable()
 44 |       tensor = torch.isTensor(tensor) and tensor or self.typeTensor.new()
 45 |       if batchSize then
 46 |          tensor:resize(batchSize, unpack(size_))
 47 |       else
 48 |          tensor:resize(unpack(size_))
 49 |       end
 50 |       tensor:zero()
 51 |    elseif isTable and torch.type(size[1]) == 'number' then
 52 |       tensor = torch.isTensor(tensor) and tensor or self.typeTensor.new()
 53 |       if batchSize then
 54 |          tensor:resize(batchSize, unpack(size))
 55 |       else
 56 |          tensor:resize(unpack(size))
 57 |       end
 58 |       tensor:zero()
 59 |    else
 60 |       error("Unknown size type : "..torch.type(size))
 61 |    end
 62 |    return tensor
 63 | end
 64 | 
 65 | -- get the batch size.
 66 | -- When input is a table, we use the first tensor (depth first).
 67 | function Recurrence:getBatchSize(input, nInputDim)
 68 |    local nInputDim = nInputDim or self.nInputDim
 69 |    if torch.type(input) == 'table' then
 70 |       return self:getBatchSize(input[1])
 71 |    else
 72 |       assert(torch.isTensor(input))
 73 |       if input:dim() == nInputDim then
 74 |          return nil
 75 |       elseif input:dim() - 1 == nInputDim then
 76 |          return input:size(1)
 77 |       else
 78 |          error("inconsitent tensor dims "..input:dim())
 79 |       end
 80 |    end
 81 | end
 82 | 
 83 | function Recurrence:getHiddenState(step, input)
 84 |    local prevOutput
 85 |    if step == 0 then
 86 |       if input then
 87 |          -- first previous output is zeros
 88 |          local batchSize = self:getBatchSize(input)
 89 |          self.zeroTensor = self:recursiveResizeZero(self.zeroTensor, self.outputSize, batchSize)
 90 |       end
 91 |       prevOutput = self.userPrevOutput or self.outputs[step] or self.zeroTensor
 92 |    else
 93 |       -- previous output of this module
 94 |       prevOutput = self.outputs[step]
 95 |    end
 96 |    -- call getHiddenState on recurrentModule as they may contain AbstractRecurrent instances...
 97 |    return {prevOutput, nn.Container.getHiddenState(self, step)}
 98 | end
 99 | 
100 | function Recurrence:setHiddenState(step, hiddenState)
101 |    assert(torch.type(hiddenState) == 'table')
102 |    assert(#hiddenState >= 1)
103 |    self.outputs[step] = hiddenState[1]
104 | 
105 |    if hiddenState[2] then
106 |       -- call setHiddenState on recurrentModule as they may contain AbstractRecurrent instances...
107 |       nn.Container.setHiddenState(self, step, hiddenState[2])
108 |    end
109 | end
110 | 
111 | function Recurrence:updateOutput(input)
112 |    -- output(t-1)
113 |    local prevOutput = self:getHiddenState(self.step-1, input)[1]
114 | 
115 |    -- output(t) = recurrentModule{input(t), output(t-1)}
116 |    local output
117 |    if self.train ~= false then
118 |       self:recycle()
119 |       local recurrentModule = self:getStepModule(self.step)
120 |       -- the actual forward propagation
121 |       output = recurrentModule:updateOutput{input, prevOutput}
122 |    else
123 |       output = self.recurrentModule:updateOutput{input, prevOutput}
124 |    end
125 | 
126 |    self.outputs[self.step] = output
127 | 
128 |    self.output = output
129 | 
130 |    self.step = self.step + 1
131 |    self.gradPrevOutput = nil
132 |    self.updateGradInputStep = nil
133 |    self.accGradParametersStep = nil
134 | 
135 |    return self.output
136 | end
137 | 
138 | function Recurrence:getGradHiddenState(step)
139 |    local gradOutput
140 |    if step == self.step-1 then
141 |       gradOutput = self.userNextGradOutput or self.gradOutputs[step] or self.zeroTensor
142 |    else
143 |       gradOutput = self.gradOutputs[step]
144 |    end
145 |    return {gradOutput, nn.Container.getGradHiddenState(self, step)}
146 | end
147 | 
148 | function Recurrence:setGradHiddenState(step, gradHiddenState)
149 |    assert(torch.type(gradHiddenState) == 'table')
150 |    assert(#gradHiddenState >= 1)
151 | 
152 |    self.gradOutputs[step] = gradHiddenState[1]
153 |    if gradHiddenState[2] then
154 |       nn.Container.setGradHiddenState(self, step, gradHiddenState[2])
155 |    end
156 | end
157 | 
158 | function Recurrence:_updateGradInput(input, gradOutput)
159 |    assert(self.step > 1, "expecting at least one updateOutput")
160 |    local step = self.updateGradInputStep - 1
161 |    assert(step >= 1)
162 | 
163 |    -- set the output/gradOutput states of current Module
164 |    local recurrentModule = self:getStepModule(step)
165 | 
166 |    -- backward propagate through this step
167 |    local _gradOutput = self:getGradHiddenState(step)[1]
168 |    self._gradOutputs[step] = nn.rnn.recursiveCopy(self._gradOutputs[step], _gradOutput)
169 |    nn.rnn.recursiveAdd(self._gradOutputs[step], gradOutput)
170 |    gradOutput = self._gradOutputs[step]
171 | 
172 |    local gradInputTable = recurrentModule:updateGradInput({input, self:getHiddenState(step-1)[1]}, gradOutput)
173 | 
174 |    local _ = require 'moses'
175 |    self:setGradHiddenState(step-1, _.slice(gradInputTable, 2, #gradInputTable))
176 | 
177 |    return gradInputTable[1]
178 | end
179 | 
180 | function Recurrence:_accGradParameters(input, gradOutput, scale)
181 |    local step = self.accGradParametersStep - 1
182 |    assert(step >= 1)
183 | 
184 |    local recurrentModule = self:getStepModule(step)
185 | 
186 |    -- backward propagate through this step
187 |    local gradOutput = self._gradOutputs[step] or self:getGradHiddenState(step)[1]
188 |    recurrentModule:accGradParameters({input, self:getHiddenState(step-1)[1]}, gradOutput, scale)
189 | end
190 | 
191 | Recurrence.__tostring__ = nn.Decorator.__tostring__
192 | 


--------------------------------------------------------------------------------
/Recurrent.lua:
--------------------------------------------------------------------------------
  1 | ------------------------------------------------------------------------
  2 | --[[ Recurrent ]]--
  3 | -- Ref. A.: http://goo.gl/vtVGkO (Mikolov et al.)
  4 | -- B. http://goo.gl/hu1Lqm
  5 | -- Processes the sequence one timestep (forward/backward) at a time.
  6 | -- A call to backward only keeps a log of the gradOutputs and scales.
  7 | -- Back-Propagation Through Time (BPTT) is done when updateParameters
  8 | -- is called. The Module keeps a list of all previous representations
  9 | -- (Module.outputs), including intermediate ones for BPTT.
 10 | -- To use this module with batches, we suggest using different
 11 | -- sequences of the same size within a batch and calling
 12 | -- updateParameters() at the end of the Sequence.
 13 | -- Note that this won't work with modules that use more than the
 14 | -- output attribute to keep track of their internal state between
 15 | -- forward and backward.
 16 | ------------------------------------------------------------------------
 17 | assert(not nn.Recurrent, "update nnx package : luarocks install nnx")
 18 | local Recurrent, parent = torch.class('nn.Recurrent', 'nn.AbstractRecurrent')
 19 | 
 20 | function Recurrent:__init(start, input, feedback, transfer, rho, merge)
 21 |    parent.__init(self, rho)
 22 | 
 23 |    local ts = torch.type(start)
 24 |    if ts == 'torch.LongStorage' or ts == 'number' then
 25 |       start = nn.Add(start)
 26 |    elseif ts == 'table' then
 27 |       start = nn.Add(torch.LongStorage(start))
 28 |    elseif not torch.isTypeOf(start, 'nn.Module') then
 29 |       error"Recurrent : expecting arg 1 of type nn.Module, torch.LongStorage, number or table"
 30 |    end
 31 | 
 32 |    self.startModule = start
 33 |    self.inputModule = input
 34 |    self.feedbackModule = feedback
 35 |    self.transferModule = transfer or nn.Sigmoid()
 36 |    self.mergeModule = merge or nn.CAddTable()
 37 | 
 38 |    self.modules = {self.startModule, self.inputModule, self.feedbackModule, self.transferModule, self.mergeModule}
 39 | 
 40 |    self:buildInitialModule()
 41 |    self:buildRecurrentModule()
 42 |    self.sharedClones[2] = self.recurrentModule
 43 | end
 44 | 
 45 | -- build module used for the first step (steps == 1)
 46 | function Recurrent:buildInitialModule()
 47 |    self.initialModule = nn.Sequential()
 48 |    self.initialModule:add(self.inputModule:sharedClone())
 49 |    self.initialModule:add(self.startModule)
 50 |    self.initialModule:add(self.transferModule:sharedClone())
 51 | end
 52 | 
 53 | -- build module used for the other steps (steps > 1)
 54 | function Recurrent:buildRecurrentModule()
 55 |    local parallelModule = nn.ParallelTable()
 56 |    parallelModule:add(self.inputModule)
 57 |    parallelModule:add(self.feedbackModule)
 58 |    self.recurrentModule = nn.Sequential()
 59 |    self.recurrentModule:add(parallelModule)
 60 |    self.recurrentModule:add(self.mergeModule)
 61 |    self.recurrentModule:add(self.transferModule)
 62 | end
 63 | 
 64 | function Recurrent:updateOutput(input)
 65 |    -- output(t) = transfer(feedback(output_(t-1)) + input(input_(t)))
 66 |    local output
 67 |    if self.step == 1 then
 68 |       output = self.initialModule:updateOutput(input)
 69 |    else
 70 |       if self.train ~= false then
 71 |          -- set/save the output states
 72 |          self:recycle()
 73 |          local recurrentModule = self:getStepModule(self.step)
 74 |           -- self.output is the previous output of this module
 75 |          output = recurrentModule:updateOutput{input, self.outputs[self.step-1]}
 76 |       else
 77 |          -- self.output is the previous output of this module
 78 |          output = self.recurrentModule:updateOutput{input, self.outputs[self.step-1]}
 79 |       end
 80 |    end
 81 | 
 82 |    self.outputs[self.step] = output
 83 |    self.output = output
 84 |    self.step = self.step + 1
 85 |    self.gradPrevOutput = nil
 86 |    self.updateGradInputStep = nil
 87 |    self.accGradParametersStep = nil
 88 |    return self.output
 89 | end
 90 | 
 91 | function Recurrent:_updateGradInput(input, gradOutput)
 92 |    assert(self.step > 1, "expecting at least one updateOutput")
 93 |    local step = self.updateGradInputStep - 1
 94 | 
 95 |    local gradInput
 96 | 
 97 |    if self.gradPrevOutput then
 98 |       self._gradOutputs[step] = nn.rnn.recursiveCopy(self._gradOutputs[step], self.gradPrevOutput)
 99 |       nn.rnn.recursiveAdd(self._gradOutputs[step], gradOutput)
100 |       gradOutput = self._gradOutputs[step]
101 |    end
102 | 
103 |    local output = self.outputs[step-1]
104 |    if step > 1 then
105 |       local recurrentModule = self:getStepModule(step)
106 |       gradInput, self.gradPrevOutput = unpack(recurrentModule:updateGradInput({input, output}, gradOutput))
107 |    elseif step == 1 then
108 |       gradInput = self.initialModule:updateGradInput(input, gradOutput)
109 |    else
110 |       error"non-positive time-step"
111 |    end
112 | 
113 |    return gradInput
114 | end
115 | 
116 | function Recurrent:_accGradParameters(input, gradOutput, scale)
117 |    local step = self.accGradParametersStep - 1
118 | 
119 |    local gradOutput = (step == self.step-1) and gradOutput or self._gradOutputs[step]
120 |    local output = self.outputs[step-1]
121 | 
122 |    if step > 1 then
123 |       local recurrentModule = self:getStepModule(step)
124 |       recurrentModule:accGradParameters({input, output}, gradOutput, scale)
125 |    elseif step == 1 then
126 |       self.initialModule:accGradParameters(input, gradOutput, scale)
127 |    else
128 |       error"non-positive time-step"
129 |    end
130 | end
131 | 
132 | function Recurrent:recycle()
133 |    return parent.recycle(self, 1)
134 | end
135 | 
136 | function Recurrent:forget()
137 |    return parent.forget(self, 1)
138 | end
139 | 
140 | function Recurrent:includingSharedClones(f)
141 |    local modules = self.modules
142 |    self.modules = {}
143 |    local sharedClones = self.sharedClones
144 |    self.sharedClones = nil
145 |    local initModule = self.initialModule
146 |    self.initialModule = nil
147 |    for i,modules in ipairs{modules, sharedClones, {initModule}} do
148 |       for j, module in pairs(modules) do
149 |          table.insert(self.modules, module)
150 |       end
151 |    end
152 |    local r = f()
153 |    self.modules = modules
154 |    self.sharedClones = sharedClones
155 |    self.initialModule = initModule
156 |    return r
157 | end
158 | 
159 | function Recurrent:reinforce(reward)
160 |    if torch.type(reward) == 'table' then
161 |       -- multiple rewards, one per time-step
162 |       local rewards = reward
163 |       for step, reward in ipairs(rewards) do
164 |          if step == 1 then
165 |             self.initialModule:reinforce(reward)
166 |          else
167 |             local sm = self:getStepModule(step)
168 |             sm:reinforce(reward)
169 |          end
170 |       end
171 |    else
172 |       -- one reward broadcast to all time-steps
173 |       return self:includingSharedClones(function()
174 |          return parent.reinforce(self, reward)
175 |       end)
176 |    end
177 | end
178 | 
179 | function Recurrent:maskZero()
180 |    error("Recurrent doesn't support maskZero as it uses a different "..
181 |       "module for the first time-step. Use nn.Recurrence instead.")
182 | end
183 | 
184 | function Recurrent:trimZero()
185 |    error("Recurrent doesn't support trimZero as it uses a different "..
186 |       "module for the first time-step. Use nn.Recurrence instead.")
187 | end
188 | 
189 | function Recurrent:__tostring__()
190 |    local tab = '  '
191 |    local line = '\n'
192 |    local next = ' -> '
193 |    local str = torch.type(self)
194 |    str = str .. ' {' .. line .. tab .. '[{input(t), output(t-1)}'
195 |    for i=1,3 do
196 |       str = str .. next .. '(' .. i .. ')'
197 |    end
198 |    str = str .. next .. 'output(t)]'
199 | 
200 |    local tab = '  '
201 |    local line = '\n  '
202 |    local next = '  |`-> '
203 |    local ext = '  |    '
204 |    local last = '   ... -> '
205 |    str = str .. line ..  '(1): ' .. ' {' .. line .. tab .. 'input(t)'
206 |    str = str .. line .. tab .. next .. '(t==0): ' .. tostring(self.startModule):gsub('\n', '\n' .. tab .. ext)
207 |    str = str .. line .. tab .. next .. '(t~=0): ' .. tostring(self.inputModule):gsub('\n', '\n' .. tab .. ext)
208 |    str = str .. line .. tab .. 'output(t-1)'
209 |    str = str .. line .. tab .. next .. tostring(self.feedbackModule):gsub('\n', line .. tab .. ext)
210 |    str = str .. line .. "}"
211 |    local tab = '  '
212 |    local line = '\n'
213 |    local next = ' -> '
214 |    str = str .. line .. tab .. '(' .. 2 .. '): ' .. tostring(self.mergeModule):gsub(line, line .. tab)
215 |    str = str .. line .. tab .. '(' .. 3 .. '): ' .. tostring(self.transferModule):gsub(line, line .. tab)
216 |    str = str .. line .. '}'
217 |    return str
218 | end
219 | 


--------------------------------------------------------------------------------
/RecurrentAttention.lua:
--------------------------------------------------------------------------------
  1 | ------------------------------------------------------------------------
  2 | --[[ RecurrentAttention ]]-- 
  3 | -- Ref. A. http://papers.nips.cc/paper/5542-recurrent-models-of-visual-attention.pdf
  4 | -- B. http://incompleteideas.net/sutton/williams-92.pdf
  5 | -- module which takes an RNN as argument with other 
  6 | -- hyper-parameters such as the maximum number of steps, 
  7 | -- action (actions sampling module like ReinforceNormal) and 
  8 | ------------------------------------------------------------------------
  9 | local RecurrentAttention, parent = torch.class("nn.RecurrentAttention", "nn.AbstractSequencer")
 10 | 
 11 | function RecurrentAttention:__init(rnn, action, nStep, hiddenSize)
 12 |    parent.__init(self)
 13 |    assert(torch.isTypeOf(action, 'nn.Module'))
 14 |    assert(torch.type(nStep) == 'number')
 15 |    assert(torch.type(hiddenSize) == 'table')
 16 |    assert(torch.type(hiddenSize[1]) == 'number', "Does not support table hidden layers" )
 17 |    
 18 |    self.rnn = rnn
 19 |    -- we can decorate the module with a Recursor to make it AbstractRecurrent
 20 |    self.rnn = (not torch.isTypeOf(rnn, 'nn.AbstractRecurrent')) and nn.Recursor(rnn) or rnn
 21 |    
 22 |    -- samples an x,y actions for each example
 23 |    self.action =  (not torch.isTypeOf(action, 'nn.AbstractRecurrent')) and nn.Recursor(action) or action 
 24 |    self.hiddenSize = hiddenSize
 25 |    self.nStep = nStep
 26 |    
 27 |    self.modules = {self.rnn, self.action}
 28 |    
 29 |    self.output = {} -- rnn output
 30 |    self.actions = {} -- action output
 31 |    
 32 |    self.forwardActions = false
 33 |    
 34 |    self.gradHidden = {}
 35 | end
 36 | 
 37 | function RecurrentAttention:updateOutput(input)
 38 |    self.rnn:forget()
 39 |    self.action:forget()
 40 |    local nDim = input:dim()
 41 |    
 42 |    for step=1,self.nStep do
 43 |       
 44 |       if step == 1 then
 45 |          -- sample an initial starting actions by forwarding zeros through the action
 46 |          self._initInput = self._initInput or input.new()
 47 |          self._initInput:resize(input:size(1),table.unpack(self.hiddenSize)):zero()
 48 |          self.actions[1] = self.action:updateOutput(self._initInput)
 49 |       else
 50 |          -- sample actions from previous hidden activation (rnn output)
 51 |          self.actions[step] = self.action:updateOutput(self.output[step-1])
 52 |       end
 53 |       
 54 |       -- rnn handles the recurrence internally
 55 |       local output = self.rnn:updateOutput{input, self.actions[step]}
 56 |       self.output[step] = self.forwardActions and {output, self.actions[step]} or output
 57 |    end
 58 |    
 59 |    return self.output
 60 | end
 61 | 
 62 | function RecurrentAttention:updateGradInput(input, gradOutput)
 63 |    assert(self.rnn.step - 1 == self.nStep, "inconsistent rnn steps")
 64 |    assert(torch.type(gradOutput) == 'table', "expecting gradOutput table")
 65 |    assert(#gradOutput == self.nStep, "gradOutput should have nStep elements")
 66 |     
 67 |    -- back-propagate through time (BPTT)
 68 |    for step=self.nStep,1,-1 do
 69 |       -- 1. backward through the action layer
 70 |       local gradOutput_, gradAction_ = gradOutput[step]
 71 |       if self.forwardActions then
 72 |          gradOutput_, gradAction_ = unpack(gradOutput[step])
 73 |       else
 74 |          -- Note : gradOutput is ignored by REINFORCE modules so we give a zero Tensor instead
 75 |          self._gradAction = self._gradAction or self.action.output.new()
 76 |          if not self._gradAction:isSameSizeAs(self.action.output) then
 77 |             self._gradAction:resizeAs(self.action.output):zero()
 78 |          end
 79 |          gradAction_ = self._gradAction
 80 |       end
 81 |       
 82 |       if step == self.nStep then
 83 |          self.gradHidden[step] = nn.rnn.recursiveCopy(self.gradHidden[step], gradOutput_)
 84 |       else
 85 |          -- gradHidden = gradOutput + gradAction
 86 |          nn.rnn.recursiveAdd(self.gradHidden[step], gradOutput_)
 87 |       end
 88 |       
 89 |       if step == 1 then
 90 |          -- backward through initial starting actions
 91 |          self.action:updateGradInput(self._initInput, gradAction_)
 92 |       else
 93 |          local gradAction = self.action:updateGradInput(self.output[step-1], gradAction_)
 94 |          self.gradHidden[step-1] = nn.rnn.recursiveCopy(self.gradHidden[step-1], gradAction)
 95 |       end
 96 |       
 97 |       -- 2. backward through the rnn layer
 98 |       local gradInput = self.rnn:updateGradInput({input, self.actions[step]}, self.gradHidden[step])[1]
 99 |       if step == self.nStep then
100 |          self.gradInput:resizeAs(gradInput):copy(gradInput)
101 |       else
102 |          self.gradInput:add(gradInput)
103 |       end
104 |    end
105 | 
106 |    return self.gradInput
107 | end
108 | 
109 | function RecurrentAttention:accGradParameters(input, gradOutput, scale)
110 |    assert(self.rnn.step - 1 == self.nStep, "inconsistent rnn steps")
111 |    assert(torch.type(gradOutput) == 'table', "expecting gradOutput table")
112 |    assert(#gradOutput == self.nStep, "gradOutput should have nStep elements")
113 |    
114 |    -- back-propagate through time (BPTT)
115 |    for step=self.nStep,1,-1 do
116 |       -- 1. backward through the action layer
117 |       local gradAction_ = self.forwardActions and gradOutput[step][2] or self._gradAction
118 |             
119 |       if step == 1 then
120 |          -- backward through initial starting actions
121 |          self.action:accGradParameters(self._initInput, gradAction_, scale)
122 |       else
123 |          self.action:accGradParameters(self.output[step-1], gradAction_, scale)
124 |       end
125 |       
126 |       -- 2. backward through the rnn layer
127 |       self.rnn:accGradParameters({input, self.actions[step]}, self.gradHidden[step], scale)
128 |    end
129 | end
130 | 
131 | function RecurrentAttention:accUpdateGradParameters(input, gradOutput, lr)
132 |    assert(self.rnn.step - 1 == self.nStep, "inconsistent rnn steps")
133 |    assert(torch.type(gradOutput) == 'table', "expecting gradOutput table")
134 |    assert(#gradOutput == self.nStep, "gradOutput should have nStep elements")
135 |     
136 |    -- backward through the action layers
137 |    for step=self.nStep,1,-1 do
138 |       -- 1. backward through the action layer
139 |       local gradAction_ = self.forwardActions and gradOutput[step][2] or self._gradAction
140 |       
141 |       if step == 1 then
142 |          -- backward through initial starting actions
143 |          self.action:accUpdateGradParameters(self._initInput, gradAction_, lr)
144 |       else
145 |          -- Note : gradOutput is ignored by REINFORCE modules so we give action.output as a dummy variable
146 |          self.action:accUpdateGradParameters(self.output[step-1], gradAction_, lr)
147 |       end
148 |       
149 |       -- 2. backward through the rnn layer
150 |       self.rnn:accUpdateGradParameters({input, self.actions[step]}, self.gradHidden[step], lr)
151 |    end
152 | end
153 | 
154 | function RecurrentAttention:type(type)
155 |    self._input = nil
156 |    self._actions = nil
157 |    self._crop = nil
158 |    self._pad = nil
159 |    self._byte = nil
160 |    return parent.type(self, type)
161 | end
162 | 
163 | function RecurrentAttention:__tostring__()
164 |    local tab = '  '
165 |    local line = '\n'
166 |    local ext = '  |    '
167 |    local extlast = '       '
168 |    local last = '   ... -> '
169 |    local str = torch.type(self)
170 |    str = str .. ' {'
171 |    str = str .. line .. tab .. 'action : ' .. tostring(self.action):gsub(line, line .. tab .. ext)
172 |    str = str .. line .. tab .. 'rnn     : ' .. tostring(self.rnn):gsub(line, line .. tab .. ext)
173 |    str = str .. line .. '}'
174 |    return str
175 | end
176 | 


--------------------------------------------------------------------------------
/Recursor.lua:
--------------------------------------------------------------------------------
  1 | ------------------------------------------------------------------------
  2 | --[[ Recursor ]]--
  3 | -- Decorates module to be used within an AbstractSequencer.
  4 | -- It does this by making the decorated module conform to the
  5 | -- AbstractRecurrent interface (which is inherited by LSTM/Recurrent)
  6 | ------------------------------------------------------------------------
  7 | local Recursor, parent = torch.class('nn.Recursor', 'nn.AbstractRecurrent')
  8 | 
  9 | function Recursor:__init(module, rho)
 10 |    parent.__init(self, rho or 9999999)
 11 | 
 12 |    self.recurrentModule = module
 13 | 
 14 |    self.module = module
 15 |    self.modules = {module}
 16 |    self.sharedClones[1] = self.recurrentModule
 17 | end
 18 | 
 19 | function Recursor:updateOutput(input)
 20 |    local output
 21 |    if self.train ~= false then -- if self.train or self.train == nil then
 22 |       -- set/save the output states
 23 |       self:recycle()
 24 |       local recurrentModule = self:getStepModule(self.step)
 25 |       output = recurrentModule:updateOutput(input)
 26 |    else
 27 |       output = self.recurrentModule:updateOutput(input)
 28 |    end
 29 | 
 30 |    self.outputs[self.step] = output
 31 |    self.output = output
 32 |    self.step = self.step + 1
 33 |    self.updateGradInputStep = nil
 34 |    self.accGradParametersStep = nil
 35 |    return self.output
 36 | end
 37 | 
 38 | function Recursor:_updateGradInput(input, gradOutput)
 39 |    assert(self.step > 1, "expecting at least one updateOutput")
 40 |    local step = self.updateGradInputStep - 1
 41 |    assert(step >= 1)
 42 | 
 43 |    local recurrentModule = self:getStepModule(step)
 44 |    recurrentModule:setOutputStep(step)
 45 |    local gradInput = recurrentModule:updateGradInput(input, gradOutput)
 46 | 
 47 |    return gradInput
 48 | end
 49 | 
 50 | function Recursor:_accGradParameters(input, gradOutput, scale)
 51 |    local step = self.accGradParametersStep - 1
 52 |    assert(step >= 1)
 53 | 
 54 |    local recurrentModule = self:getStepModule(step)
 55 |    recurrentModule:setOutputStep(step)
 56 |    recurrentModule:accGradParameters(input, gradOutput, scale)
 57 | end
 58 | 
 59 | function Recursor:includingSharedClones(f)
 60 |    local modules = self.modules
 61 |    self.modules = {}
 62 |    local sharedClones = self.sharedClones
 63 |    self.sharedClones = nil
 64 |    for i,modules in ipairs{modules, sharedClones} do
 65 |       for j, module in pairs(modules) do
 66 |          table.insert(self.modules, module)
 67 |       end
 68 |    end
 69 |    local r = {f()}
 70 |    self.modules = modules
 71 |    self.sharedClones = sharedClones
 72 |    return unpack(r)
 73 | end
 74 | 
 75 | function Recursor:forget(offset)
 76 |    parent.forget(self, offset)
 77 |    nn.Module.forget(self)
 78 |    return self
 79 | end
 80 | 
 81 | function Recursor:maxBPTTstep(rho)
 82 |    self.rho = rho
 83 |    nn.Module.maxBPTTstep(self, rho)
 84 | end
 85 | 
 86 | function Recursor:getHiddenState(...)
 87 |    return self.modules[1]:getHiddenState(...)
 88 | end
 89 | 
 90 | function Recursor:setHiddenState(...)
 91 |    return self.modules[1]:setHiddenState(...)
 92 | end
 93 | 
 94 | function Recursor:getGradHiddenState(...)
 95 |    return self.modules[1]:getGradHiddenState(...)
 96 | end
 97 | 
 98 | function Recursor:setGradHiddenState(...)
 99 |    return self.modules[1]:setGradHiddenState(...)
100 | end
101 | 
102 | Recursor.__tostring__ = nn.Decorator.__tostring__
103 | 


--------------------------------------------------------------------------------
/Repeater.lua:
--------------------------------------------------------------------------------
 1 | ------------------------------------------------------------------------
 2 | --[[ Repeater ]]--
 3 | -- Encapsulates an AbstractRecurrent instance (rnn) which is repeatedly 
 4 | -- presented with the same input for rho time steps.
 5 | -- The output is a table of rho outputs of the rnn.
 6 | ------------------------------------------------------------------------
 7 | assert(not nn.Repeater, "update nnx package : luarocks install nnx")
 8 | local Repeater, parent = torch.class('nn.Repeater', 'nn.AbstractSequencer')
 9 | 
10 | function Repeater:__init(module, rho)
11 |    parent.__init(self)
12 |    assert(torch.type(rho) == 'number', "expecting number value for arg 2")
13 |    self.rho = rho
14 |    self.module = (not torch.isTypeOf(module, 'nn.AbstractRecurrent')) and nn.Recursor(module) or module
15 |    
16 |    self.module:maxBPTTstep(rho) -- hijack rho (max number of time-steps for backprop)
17 |    
18 |    self.modules[1] = self.module
19 |    self.output = {}
20 | end
21 | 
22 | function Repeater:updateOutput(input)
23 |    self.module = self.module or self.rnn -- backwards compatibility
24 | 
25 |    self.module:forget()
26 |    -- TODO make copy outputs optional
27 |    for step=1,self.rho do
28 |       self.output[step] = nn.rnn.recursiveCopy(self.output[step], self.module:updateOutput(input))
29 |    end
30 |    return self.output
31 | end
32 | 
33 | function Repeater:updateGradInput(input, gradOutput)
34 |    assert(self.module.step - 1 == self.rho, "inconsistent rnn steps")
35 |    assert(torch.type(gradOutput) == 'table', "expecting gradOutput table")
36 |    assert(#gradOutput == self.rho, "gradOutput should have rho elements")
37 |    
38 |    -- back-propagate through time (BPTT)
39 |    for step=self.rho,1,-1 do
40 |       local gradInput = self.module:updateGradInput(input, gradOutput[step])
41 |       if step == self.rho then
42 |          self.gradInput = nn.rnn.recursiveCopy(self.gradInput, gradInput)
43 |       else
44 |          nn.rnn.recursiveAdd(self.gradInput, gradInput)
45 |       end
46 |    end
47 | 
48 |    return self.gradInput
49 | end
50 | 
51 | function Repeater:accGradParameters(input, gradOutput, scale)
52 |    assert(self.module.step - 1 == self.rho, "inconsistent rnn steps")
53 |    assert(torch.type(gradOutput) == 'table', "expecting gradOutput table")
54 |    assert(#gradOutput == self.rho, "gradOutput should have rho elements")
55 |    
56 |    -- back-propagate through time (BPTT)
57 |    for step=self.rho,1,-1 do
58 |       self.module:accGradParameters(input, gradOutput[step], scale)
59 |    end
60 |    
61 | end
62 | 
63 | function Repeater:maxBPTTstep(rho)
64 |    self.rho = rho
65 |    self.module:maxBPTTstep(rho)
66 | end
67 | 
68 | function Repeater:accUpdateGradParameters(input, gradOutput, lr)
69 |    assert(self.module.step - 1 == self.rho, "inconsistent rnn steps")
70 |    assert(torch.type(gradOutput) == 'table', "expecting gradOutput table")
71 |    assert(#gradOutput == self.rho, "gradOutput should have rho elements")
72 |    
73 |    -- back-propagate through time (BPTT)
74 |    for step=self.rho,1,-1 do
75 |       self.module:accUpdateGradParameters(input, gradOutput[step], lr)
76 |    end
77 | end
78 | 
79 | function Repeater:__tostring__()
80 |    local tab = '  '
81 |    local line = '\n'
82 |    local str = torch.type(self) .. ' {' .. line
83 |    str = str .. tab .. '[  input,    input,  ...,  input  ]'.. line
84 |    str = str .. tab .. '     V         V             V     '.. line
85 |    str = str .. tab .. tostring(self.modules[1]):gsub(line, line .. tab) .. line
86 |    str = str .. tab .. '     V         V             V     '.. line
87 |    str = str .. tab .. '[output(1),output(2),...,output('..self.rho..')]' .. line
88 |    str = str .. '}'
89 |    return str
90 | end
91 | 


--------------------------------------------------------------------------------
/RepeaterCriterion.lua:
--------------------------------------------------------------------------------
 1 | ------------------------------------------------------------------------
 2 | --[[ RepeaterCriterion ]]--
 3 | -- Applies a criterion to each of the inputs in a Table using the
 4 | -- same target (the target is repeated).
 5 | -- Useful for nn.Repeater and nn.Sequencer.
 6 | ------------------------------------------------------------------------
 7 | assert(not nn.RepeaterCriterion, "update nnx package : luarocks install nnx")
 8 | local RepeaterCriterion, parent = torch.class('nn.RepeaterCriterion', 'nn.Criterion')
 9 | 
10 | function RepeaterCriterion:__init(criterion)
11 |    parent.__init(self)
12 |    self.criterion = criterion
13 |    self.gradInput = {}
14 |    self.clones = {}
15 | end
16 | 
17 | RepeaterCriterion.getStepCriterion = nn.SequencerCriterion.getStepCriterion
18 | 
19 | function RepeaterCriterion:forward(input, target)
20 |    self.output = 0
21 |    local nStep
22 |    if torch.isTensor(input) then
23 |       nStep = input:size(1)
24 |    else
25 |       nStep = #input
26 |    end
27 | 
28 |    
29 |    for i=1,nStep do
30 |       local criterion = self:getStepCriterion(i)
31 |       self.output = self.output + criterion:forward(input[i], target)
32 |    end
33 |    
34 |    return self.output
35 | end
36 | 
37 | function RepeaterCriterion:backward(input, target)
38 |    self.gradInput = {}
39 |    if torch.isTensor(input) then
40 |       nStep = input:size(1)
41 |    else
42 |       nStep = #input
43 |    end
44 |    
45 |    local tableGradInput = {}
46 |    for i=1,nStep do
47 |       local criterion = self:getStepCriterion(i)
48 |       tableGradInput[i] = criterion:backward(input[i], target)
49 |    end
50 |    
51 |    if torch.isTensor(input) then
52 |       self.gradInput = tableGradInput[1].new()
53 |       self.gradInput:resize(nStep, unpack(tableGradInput[1]:size():totable()))
54 |       for step=1,nStep do
55 |          self.gradInput[step]:copy(tableGradInput[step])
56 |       end
57 |    else
58 |       self.gradInput = tableGradInput
59 |    end
60 |    
61 |    return self.gradInput
62 | end
63 | 


--------------------------------------------------------------------------------
/SAdd.lua:
--------------------------------------------------------------------------------
 1 | local SAdd, parent = torch.class('nn.SAdd', 'nn.Module')
 2 | 
 3 | function SAdd:__init(addend, negate)
 4 |    parent.__init(self)
 5 |   
 6 |    self.addend = addend
 7 |    self.negate = (negate == nil) and false or negate
 8 | end
 9 | 
10 | function SAdd:updateOutput(input)
11 |    self.output:resizeAs(input):copy(input)
12 |    self.output = self.output + self.addend
13 |    if self.negate then
14 |       self.output = -self.output
15 |    end
16 |    return self.output
17 | end
18 | 
19 | function SAdd:updateGradInput(input, gradOutput)
20 |    if self.gradInput then
21 |       self.gradInput:resizeAs(gradOutput):copy(gradOutput) 
22 |    else
23 |       self.gradInput = torch.Tensor():resizeAs(gradOutput):copy(gradOutput) 
24 |    end
25 |    if self.negate then
26 |       self.gradInput = -self.gradInput
27 |    end
28 |    return self.gradInput
29 | end


--------------------------------------------------------------------------------
/SeqBRNN.lua:
--------------------------------------------------------------------------------
 1 | ------------------------------------------------------------------------
 2 | --[[ SeqBRNN ]] --
 3 | -- Bi-directional RNN using two SeqLSTM modules.
 4 | -- Input is a tensor e.g time x batch x inputdim.
 5 | -- Output is a tensor of the same length e.g time x batch x outputdim.
 6 | -- Applies a forward rnn to input tensor in forward order
 7 | -- and applies a backward rnn in reverse order.
 8 | -- Reversal of the sequence happens on the time dimension.
 9 | -- For each step, the outputs of both rnn are merged together using
10 | -- the merge module (defaults to nn.CAddTable() which sums the activations).
11 | ------------------------------------------------------------------------
12 | local SeqBRNN, parent = torch.class('nn.SeqBRNN', 'nn.Container')
13 | 
14 | function SeqBRNN:__init(inputDim, hiddenDim, batchFirst, merge)
15 |     self.forwardModule = nn.SeqLSTM(inputDim, hiddenDim)
16 |     self.backwardModule = nn.SeqLSTM(inputDim, hiddenDim)
17 |     self.merge = merge
18 |     if not self.merge then
19 |         self.merge = nn.CAddTable()
20 |     end
21 |     self.dim = 1
22 |     local backward = nn.Sequential()
23 |     backward:add(nn.SeqReverseSequence(self.dim)) -- reverse
24 |     backward:add(self.backwardModule)
25 |     backward:add(nn.SeqReverseSequence(self.dim)) -- unreverse
26 | 
27 |     local concat = nn.ConcatTable()
28 |     concat:add(self.forwardModule):add(backward)
29 | 
30 |     local brnn = nn.Sequential()
31 |     brnn:add(concat)
32 |     brnn:add(self.merge)
33 |     if(batchFirst) then
34 |         -- Insert transposes before and after the brnn.
35 |         brnn:insert(nn.Transpose({1, 2}), 1)
36 |         brnn:insert(nn.Transpose({1, 2}))
37 |     end
38 | 
39 |     parent.__init(self)
40 | 
41 |     self.output = torch.Tensor()
42 |     self.gradInput = torch.Tensor()
43 | 
44 |     self.module = brnn
45 |     -- so that it can be handled like a Container
46 |     self.modules[1] = brnn
47 | end
48 | 
49 | function SeqBRNN:updateOutput(input)
50 |     self.output = self.module:updateOutput(input)
51 |     return self.output
52 | end
53 | 
54 | function SeqBRNN:updateGradInput(input, gradOutput)
55 |     self.gradInput = self.module:updateGradInput(input, gradOutput)
56 |     return self.gradInput
57 | end
58 | 
59 | function SeqBRNN:accGradParameters(input, gradOutput, scale)
60 |     self.module:accGradParameters(input, gradOutput, scale)
61 | end
62 | 
63 | function SeqBRNN:accUpdateGradParameters(input, gradOutput, lr)
64 |     self.module:accUpdateGradParameters(input, gradOutput, lr)
65 | end
66 | 
67 | function SeqBRNN:sharedAccUpdateGradParameters(input, gradOutput, lr)
68 |     self.module:sharedAccUpdateGradParameters(input, gradOutput, lr)
69 | end
70 | 
71 | function SeqBRNN:__tostring__()
72 |     if self.module.__tostring__ then
73 |         return torch.type(self) .. ' @ ' .. self.module:__tostring__()
74 |     else
75 |         return torch.type(self) .. ' @ ' .. torch.type(self.module)
76 |     end
77 | end


--------------------------------------------------------------------------------
/SeqLSTMP.lua:
--------------------------------------------------------------------------------
 1 | local SeqLSTMP, parent = torch.class('nn.SeqLSTMP', 'nn.SeqLSTM')
 2 | 
 3 | SeqLSTMP.dpnn_parameters = {'weight', 'bias', 'weightO'}
 4 | SeqLSTMP.dpnn_gradParameters = {'gradWeight', 'gradBias', 'gradWeightO'}
 5 | 
 6 | function SeqLSTMP:__init(inputsize, hiddensize, outputsize)
 7 |    assert(inputsize and hiddensize and outputsize, "Expecting input, hidden and output size")
 8 |    local D, H, R = inputsize, hiddensize, outputsize
 9 |    
10 |    self.weightO = torch.Tensor(H, R)
11 |    self.gradWeightO = torch.Tensor(H, R)
12 |    
13 |    parent.__init(self, inputsize, hiddensize, outputsize)
14 | end
15 | 
16 | function SeqLSTMP:reset(std)
17 |    self.bias:zero()
18 |    self.bias[{{self.outputsize + 1, 2 * self.outputsize}}]:fill(1)
19 |    if not std then
20 |       self.weight:normal(0, 1.0 / math.sqrt(self.hiddensize + self.inputsize))
21 |       self.weightO:normal(0, 1.0 / math.sqrt(self.outputsize + self.hiddensize))
22 |    else
23 |       self.weight:normal(0, std)
24 |       self.weightO:normal(0, std)
25 |    end
26 |    return self
27 | end
28 | 
29 | function SeqLSTMP:adapter(t)
30 |    local T, N = self._output:size(1), self._output:size(2)
31 |    self._hidden = self._hidden or self.next_h.new()
32 |    self._hidden:resize(T, N, self.hiddensize)
33 |    
34 |    self._hidden[t]:copy(self.next_h)
35 |    self.next_h:resize(N,self.outputsize)
36 |    self.next_h:mm(self._hidden[t], self.weightO)
37 | end
38 | 
39 | function SeqLSTMP:gradAdapter(scale, t)
40 |    self.buffer3:resizeAs(self.grad_next_h):copy(self.grad_next_h)
41 |    
42 |    self.gradWeightO:addmm(scale, self._hidden[t]:t(), self.grad_next_h)
43 |    self.grad_next_h:resize(self._output:size(2), self.hiddensize)
44 |    self.grad_next_h:mm(self.buffer3, self.weightO:t())
45 | end
46 | 
47 | function SeqLSTMP:parameters()
48 |    return {self.weight, self.bias, self.weightO}, {self.gradWeight, self.gradBias, self.gradWeightO}
49 | end
50 | 
51 | function SeqLSTMP:accUpdateGradParameters(input, gradOutput, lr)
52 |    error"accUpdateGradParameters not implemented for SeqLSTMP"
53 | end
54 | 
55 | function SeqLSTMP:toFastLSTM()
56 |    error"toFastLSTM not supported for SeqLSTMP"
57 | end
58 | 


--------------------------------------------------------------------------------
/SeqReverseSequence.lua:
--------------------------------------------------------------------------------
 1 | ------------------------------------------------------------------------
 2 | --[[ SeqReverseSequence ]] --
 3 | -- Reverses a sequence on a given dimension.
 4 | -- Example: Given a tensor of torch.Tensor({{1,2,3,4,5}, {6,7,8,9,10}})
 5 | -- nn.SeqReverseSequence(1):forward(tensor) would give: torch.Tensor({{6,7,8,9,10},{1,2,3,4,5}})
 6 | ------------------------------------------------------------------------
 7 | local SeqReverseSequence, parent = torch.class("nn.SeqReverseSequence", "nn.Module")
 8 | 
 9 | function SeqReverseSequence:__init(dim)
10 |     parent.__init(self)
11 |     self.output = torch.Tensor()
12 |     self.gradInput = torch.Tensor()
13 |     assert(dim, "Must specify dimension to reverse sequence over")
14 |     assert(dim <= 3, "Dimension has to be no greater than 3 (Only supports up to a 3D Tensor).")
15 |     self.dim = dim
16 | end
17 | 
18 | function SeqReverseSequence:reverseOutput(input)
19 |     self.output:resizeAs(input)
20 |     self.outputIndices = self.outputIndices or ((torch.type(input) == 'torch.CudaTensor') and torch.CudaTensor() or (torch.type(input) == 'torch.ClTensor') and torch.ClTensor() or torch.LongTensor())
21 |     self.outputIndices:resize(input:size())
22 |     local T = input:size(1)
23 |     for x = 1, T do
24 |         self.outputIndices:narrow(1, x, 1):fill(T - x + 1)
25 |     end
26 |     self.output:gather(input, 1, self.outputIndices)
27 | end
28 | 
29 | function SeqReverseSequence:updateOutput(input)
30 |     if (self.dim == 1) then
31 |         self:reverseOutput(input)
32 |     end
33 |     if (self.dim == 2) then
34 |         input = input:transpose(1, 2)
35 |         self:reverseOutput(input)
36 |         self.output = self.output:transpose(1, 2)
37 |     end
38 |     if (self.dim == 3) then
39 |         input = input:transpose(1, 3)
40 |         self:reverseOutput(input)
41 |         self.output = self.output:transpose(1, 3)
42 |     end
43 |     return self.output
44 | end
45 | 
46 | function SeqReverseSequence:reverseGradOutput(gradOutput)
47 |     self.gradInput:resizeAs(gradOutput)
48 |     self.gradIndices = self.gradIndices or ((torch.type(gradOutput) == 'torch.CudaTensor') and torch.CudaTensor() or (torch.type(gradOutput) == 'torch.ClTensor') and torch.ClTensor() or torch.LongTensor())
49 |     self.gradIndices:resize(gradOutput:size())
50 |     local T = gradOutput:size(1)
51 |     for x = 1, T do
52 |         self.gradIndices:narrow(1, x, 1):fill(T - x + 1)
53 |     end
54 |     self.gradInput:gather(gradOutput, 1, self.gradIndices)
55 | end
56 | 
57 | function SeqReverseSequence:updateGradInput(inputTable, gradOutput)
58 |     if (self.dim == 1) then
59 |         self:reverseGradOutput(gradOutput)
60 |     end
61 |     if (self.dim == 2) then
62 |         gradOutput = gradOutput:transpose(1, 2)
63 |         self:reverseGradOutput(gradOutput)
64 |         self.gradInput = self.gradInput:transpose(1, 2)
65 |     end
66 |     if (self.dim == 3) then
67 |         gradOutput = gradOutput:transpose(1, 3)
68 |         self:reverseGradOutput(gradOutput)
69 |         self.gradInput = self.gradInput:transpose(1, 3)
70 |     end
71 |     return self.gradInput
72 | end
73 | 
74 | function SeqReverseSequence:type(type, typecache)
75 |    if type then
76 |       self.outputIndices = nil
77 |       self.gradIndices = nil
78 |    end
79 |    return parent.type(self, type, typecache)
80 | end
81 | 
82 | function SeqReverseSequence:clearState()
83 |    self.output:set()
84 |    self.gradInput:set()
85 |    self.outputIndices = nil
86 |    self.gradIndices = nil
87 | end
88 | 


--------------------------------------------------------------------------------
/Sequencer.lua:
--------------------------------------------------------------------------------
  1 | ------------------------------------------------------------------------
  2 | --[[ Sequencer ]]--
  3 | -- Encapsulates a Module.
  4 | -- Input is a sequence (a table) of tensors.
  5 | -- Output is a sequence (a table) of tensors of the same length.
  6 | -- Applies the module to each element in the sequence.
  7 | -- Handles both recurrent modules and non-recurrent modules.
  8 | -- The sequences in a batch must have the same size.
  9 | -- But the sequence length of each batch can vary.
 10 | ------------------------------------------------------------------------
 11 | assert(not nn.Sequencer, "update nnx package : luarocks install nnx")
 12 | local Sequencer, parent = torch.class('nn.Sequencer', 'nn.AbstractSequencer')
 13 | local _ = require 'moses'
 14 | 
 15 | function Sequencer:__init(module)
 16 |    parent.__init(self)
 17 |    if not torch.isTypeOf(module, 'nn.Module') then
 18 |       error"Sequencer: expecting nn.Module instance at arg 1"
 19 |    end
 20 | 
 21 |    -- we can decorate the module with a Recursor to make it AbstractRecurrent
 22 |    self.module = (not torch.isTypeOf(module, 'nn.AbstractRecurrent')) and nn.Recursor(module) or module
 23 |    -- backprop through time (BPTT) will be done online (in reverse order of forward)
 24 |    self.modules = {self.module}
 25 | 
 26 |    self.output = {}
 27 |    self.tableoutput = {}
 28 |    self.tablegradInput = {}
 29 | 
 30 |    -- table of buffers used for evaluation
 31 |    self._output = {}
 32 |    -- so that these buffers aren't serialized :
 33 |    local _ = require 'moses'
 34 |    self.dpnn_mediumEmpty = _.clone(self.dpnn_mediumEmpty)
 35 |    table.insert(self.dpnn_mediumEmpty, '_output')
 36 |    -- default is to forget previous inputs before each forward()
 37 |    self._remember = 'neither'
 38 | end
 39 | 
 40 | function Sequencer:updateOutput(input)
 41 |    local nStep
 42 |    if torch.isTensor(input) then
 43 |       nStep = input:size(1)
 44 |    else
 45 |       assert(torch.type(input) == 'table', "expecting input table")
 46 |       nStep = #input
 47 |    end
 48 | 
 49 |    -- Note that the Sequencer hijacks the rho attribute of the rnn
 50 |    self.module:maxBPTTstep(nStep)
 51 |    if self.train ~= false then
 52 |       -- TRAINING
 53 |       if not (self._remember == 'train' or self._remember == 'both') then
 54 |          self.module:forget()
 55 |       end
 56 | 
 57 |       self.tableoutput = {}
 58 |       for step=1,nStep do
 59 |          self.tableoutput[step] = self.module:updateOutput(input[step])
 60 |       end
 61 | 
 62 |       if torch.isTensor(input) then
 63 |          self.output = torch.isTensor(self.output) and self.output or self.tableoutput[1].new()
 64 |          self.output:resize(nStep, unpack(self.tableoutput[1]:size():totable()))
 65 |          for step=1,nStep do
 66 |             self.output[step]:copy(self.tableoutput[step])
 67 |          end
 68 |       else
 69 |          self.output = self.tableoutput
 70 |       end
 71 |    else
 72 |       -- EVALUATION
 73 |       if not (self._remember == 'eval' or self._remember == 'both') then
 74 |          self.module:forget()
 75 |       end
 76 |       -- during evaluation, recurrent modules reuse memory (i.e. outputs)
 77 |       -- so we need to copy each output into our own table or tensor
 78 |       if torch.isTensor(input) then
 79 |          for step=1,nStep do
 80 |             local output = self.module:updateOutput(input[step])
 81 |             if step == 1 then
 82 |                self.output = torch.isTensor(self.output) and self.output or output.new()
 83 |                self.output:resize(nStep, unpack(output:size():totable()))
 84 |             end
 85 |             self.output[step]:copy(output)
 86 |          end
 87 |       else
 88 |          for step=1,nStep do
 89 |             self.tableoutput[step] = nn.rnn.recursiveCopy(
 90 |                self.tableoutput[step] or table.remove(self._output, 1),
 91 |                self.module:updateOutput(input[step])
 92 |             )
 93 |          end
 94 |          -- remove extra output tensors (save for later)
 95 |          for i=nStep+1,#self.tableoutput do
 96 |             table.insert(self._output, self.tableoutput[i])
 97 |             self.tableoutput[i] = nil
 98 |          end
 99 |          self.output = self.tableoutput
100 |       end
101 |    end
102 | 
103 |    return self.output
104 | end
105 | 
106 | function Sequencer:updateGradInput(input, gradOutput)
107 |    local nStep
108 |    if torch.isTensor(input) then
109 |       assert(torch.isTensor(gradOutput), "expecting gradOutput Tensor since input is a Tensor")
110 |       assert(gradOutput:size(1) == input:size(1), "gradOutput should have as many elements as input")
111 |       nStep = input:size(1)
112 |    else
113 |       assert(torch.type(gradOutput) == 'table', "expecting gradOutput table")
114 |       assert(#gradOutput == #input, "gradOutput should have as many elements as input")
115 |       nStep = #input
116 |    end
117 | 
118 |    -- back-propagate through time
119 |    self.tablegradinput = {}
120 |    for step=nStep,1,-1 do
121 |       self.tablegradinput[step] = self.module:updateGradInput(input[step], gradOutput[step])
122 |    end
123 | 
124 |    if torch.isTensor(input) then
125 |       self.gradInput = torch.isTensor(self.gradInput) and self.gradInput or self.tablegradinput[1].new()
126 |       self.gradInput:resize(nStep, unpack(self.tablegradinput[1]:size():totable()))
127 |       for step=1,nStep do
128 |          self.gradInput[step]:copy(self.tablegradinput[step])
129 |       end
130 |    else
131 |       self.gradInput = self.tablegradinput
132 |    end
133 | 
134 |    return self.gradInput
135 | end
136 | 
137 | function Sequencer:accGradParameters(input, gradOutput, scale)
138 |    local nStep
139 |    if torch.isTensor(input) then
140 |       assert(torch.isTensor(gradOutput), "expecting gradOutput Tensor since input is a Tensor")
141 |       assert(gradOutput:size(1) == input:size(1), "gradOutput should have as many elements as input")
142 |       nStep = input:size(1)
143 |    else
144 |       assert(torch.type(gradOutput) == 'table', "expecting gradOutput table")
145 |       assert(#gradOutput == #input, "gradOutput should have as many elements as input")
146 |       nStep = #input
147 |    end
148 | 
149 |    -- back-propagate through time
150 |    for step=nStep,1,-1 do
151 |       self.module:accGradParameters(input[step], gradOutput[step], scale)
152 |    end
153 | end
154 | 
155 | function Sequencer:accUpdateGradParameters(inputTable, gradOutputTable, lr)
156 |    error"Not Implemented"
157 | end
158 | 
159 | function Sequencer:training()
160 |    if self.train == false then
161 |       -- forget at the start of each training
162 |       self:forget()
163 |       -- empty temporary output table
164 |       self._output = {}
165 |       -- empty output table (tensor mem was managed by seq)
166 |       self.tableoutput = nil
167 |    end
168 |    parent.training(self)
169 | end
170 | 
171 | function Sequencer:evaluate()
172 |    if self.train ~= false then
173 |       -- forget at the start of each evaluation
174 |       self:forget()
175 |       -- empty output table (tensor mem was managed by rnn)
176 |       self.tableoutput = {}
177 |    end
178 |    parent.evaluate(self)
179 |    assert(self.train == false)
180 | end
181 | 
182 | function Sequencer:clearState()
183 |    if torch.isTensor(self.output) then
184 |       self.output:set()
185 |       self.gradInput:set()
186 |    else
187 |       self.output = {}
188 |       self.gradInput = {}
189 |    end
190 |    self._output = {}
191 |    self.tableoutput = {}
192 |    self.tablegradinput = {}
193 |    self.module:clearState()
194 | end
195 | 
196 | Sequencer.__tostring__ = nn.Decorator.__tostring__
197 | 


--------------------------------------------------------------------------------
/SequencerCriterion.lua:
--------------------------------------------------------------------------------
  1 | ------------------------------------------------------------------------
  2 | --[[ SequencerCriterion ]]--
  3 | -- Applies a criterion to each of the inputs and targets in the 
  4 | -- corresponding input and target Tables.
  5 | -- Useful for nn.Repeater and nn.Sequencer.
  6 | -- WARNING : assumes that the decorated criterion is stateless, i.e. 
  7 | -- the backward doesn't need to be preceded by a commensurate forward.
  8 | ------------------------------------------------------------------------
  9 | local SequencerCriterion, parent = torch.class('nn.SequencerCriterion', 'nn.Criterion')
 10 | 
 11 | function SequencerCriterion:__init(criterion, sizeAverage)
 12 |    parent.__init(self)
 13 |    self.criterion = criterion
 14 |    if torch.isTypeOf(criterion, 'nn.ModuleCriterion') then
 15 |       error("SequencerCriterion shouldn't decorate a ModuleCriterion. "..
 16 |          "Instead, try the other way around : "..
 17 |          "ModuleCriterion decorates a SequencerCriterion. "..
 18 |          "Its modules can also be similarly decorated with a Sequencer.")
 19 |    end
 20 |    if sizeAverage ~= nil then
 21 |       self.sizeAverage = sizeAverage
 22 |    else
 23 |       self.sizeAverage = false
 24 |    end
 25 |    self.clones = {}
 26 |    self.gradInput = {}
 27 | end
 28 | 
 29 | function SequencerCriterion:getStepCriterion(step)
 30 |    assert(step, "expecting step at arg 1")
 31 |    local criterion = self.clones[step]
 32 |    if not criterion then
 33 |       criterion = self.criterion:clone()
 34 |       self.clones[step] = criterion
 35 |    end
 36 |    return criterion
 37 | end
 38 | 
 39 | function SequencerCriterion:updateOutput(input, target)
 40 |    self.output = 0
 41 |    local nStep
 42 |    if torch.isTensor(input) then
 43 |       assert(torch.isTensor(target), "expecting target Tensor since input is a Tensor")
 44 |       assert(target:size(1) == input:size(1), "target should have as many elements as input")
 45 |       nStep = input:size(1)
 46 |    else
 47 |       assert(torch.type(target) == 'table', "expecting target table")
 48 |       assert(#target == #input, "target should have as many elements as input")
 49 |       nStep = #input
 50 |    end
 51 | 
 52 |    
 53 |    for i=1,nStep do
 54 |       local criterion = self:getStepCriterion(i)
 55 |       self.output = self.output + criterion:forward(input[i], target[i])
 56 |    end
 57 |    
 58 |    if self.sizeAverage then
 59 |       self.output = self.output / nStep
 60 |    end
 61 | 
 62 |    return self.output
 63 | end
 64 | 
 65 | function SequencerCriterion:updateGradInput(input, target)
 66 |    self.gradInput = {}
 67 |    local nStep
 68 |    if torch.isTensor(input) then
 69 |       assert(torch.isTensor(target), "expecting target Tensor since input is a Tensor")
 70 |       assert(target:size(1) == input:size(1), "target should have as many elements as input")
 71 |       nStep = input:size(1)
 72 |    else
 73 |       assert(torch.type(target) == 'table', "expecting gradOutput table")
 74 |       assert(#target == #input, "target should have as many elements as input")
 75 |       nStep = #input
 76 |    end
 77 |    
 78 |    local tableGradInput = {}
 79 |    for i=1,nStep do
 80 |       local criterion = self:getStepCriterion(i)
 81 |       tableGradInput[i] = criterion:backward(input[i], target[i])
 82 |       
 83 |       if self.sizeAverage then
 84 |          local function table_div(output, scalar)
 85 |             if torch.type(output) == 'table' then
 86 |                for j=1,#output do
 87 |                   table_div(output[j], scalar)
 88 |                end
 89 |             else
 90 |                output:div(scalar)
 91 |             end
 92 |          end
 93 |          table_div(tableGradInput[i], nStep)
 94 |       end
 95 |    end
 96 |    
 97 |    if torch.isTensor(input) then
 98 |       self.gradInput = tableGradInput[1].new()
 99 |       self.gradInput:resize(nStep, unpack(tableGradInput[1]:size():totable()))
100 |       for step=1,nStep do
101 |          self.gradInput[step]:copy(tableGradInput[step])
102 |       end
103 |    else
104 |       self.gradInput = tableGradInput
105 |    end
106 |    
107 |    return self.gradInput
108 | end
109 | 


--------------------------------------------------------------------------------
/TrimZero.lua:
--------------------------------------------------------------------------------
  1 | ------------------------------------------------------------------------
  2 | --[[ TrimZero ]]--
  3 | -- Author: Jin-Hwa Kim
  4 | -- License: LICENSE.2nd.txt
  5 | 
  6 | -- Decorator that zeroes the output rows of the encapsulated module
  7 | -- for commensurate input rows which are tensors of zeros
  8 | 
  9 | -- The only difference from `MaskZero` is that it reduces computational costs 
 10 | -- by varying a batch size, if any, for the case that varying lengths 
 11 | -- are provided in the input. Notice that when the lengths are consistent, 
 12 | -- `MaskZero` will be faster, because `TrimZero` has an operational cost. 
 13 | 
 14 | -- In short, the result is the same with `MaskZero`'s, however, `TrimZero` is
 15 | -- faster than `MaskZero` only when sentence lengths is costly vary.
 16 | -- In practice, e.g. language model, `TrimZero` is expected to be faster than
 17 | --  `MaskZero` 30%. (You can test with it using `test/test_trimzero.lua`.)
 18 | ------------------------------------------------------------------------
 19 | local TrimZero, parent = torch.class("nn.TrimZero", "nn.MaskZero")
 20 | 
 21 | require 'torchx'
 22 | 
 23 | function TrimZero:__init(module, nInputDim, silent)
 24 |    parent.__init(self, module, nInputDim, silent)
 25 |    if (torch.typename(module)=='nn.GRU' or torch.typename(module)=='nn.LSTM' or torch.typename(module)=='nn.FastLSTM') and module.p ~= 0 then
 26 |       assert(module.mono, 'TrimZero + Bayesian RNN needs `mono` option!')
 27 |    end
 28 |    self.temp = torch.Tensor()
 29 |    self.gradTemp = torch.Tensor()
 30 | end
 31 | 
 32 | function TrimZero:recursiveMask(output, input, mask)
 33 |    if torch.type(input) == 'table' then
 34 |       output = torch.type(output) == 'table' and output or {}
 35 |       for k,v in ipairs(input) do
 36 |          output[k], mask = self:recursiveMask(output[k], v, mask)
 37 |       end
 38 |    else
 39 |       assert(torch.isTensor(input))
 40 |       output = torch.isTensor(output) and output or input.new()
 41 |       
 42 |       -- make sure mask has the same dimension as the input tensor
 43 |       if torch.type(mask) ~= 'torch.LongTensor' then
 44 |          local inputSize = input:size():fill(1)
 45 |          assert(self.nInputDim)
 46 |          if self.batchmode then
 47 |             inputSize[1] = input:size(1)
 48 |          end
 49 |          mask:resize(inputSize)
 50 |       end
 51 |       
 52 |       -- build mask
 53 |       if self.batchmode then
 54 |          assert(torch.find, 'install torchx package : luarocks install torchx')
 55 |          -- use torch.find to convert mask from onehot to indices
 56 |          if torch.type(mask) ~= 'torch.LongTensor' then
 57 |             if torch.type(mask) == 'torch.CudaTensor' then
 58 |                self._maskbyte = self._maskbyte or torch.ByteTensor()
 59 |                self._maskbyte:resize(mask:size()):copy(mask)
 60 |                mask = self._maskbyte
 61 |             end
 62 |             mask = torch.LongTensor(torch.find(mask, 0))
 63 |          end
 64 |          self._maskindices = mask
 65 |          if mask:dim() > 0 then
 66 |             output:index(input, 1, mask)
 67 |          else
 68 |             output:index(input, 1, torch.LongTensor{1}):zero()
 69 |          end
 70 |       else 
 71 |          if mask:dim() == 0 or mask:view(-1)[1] == 1 then 
 72 |             output:resize(input:size()):zero() 
 73 |          else 
 74 |             output:resize(input:size()):copy(input) 
 75 |          end
 76 |       end
 77 |    end
 78 |    return output, mask
 79 | end
 80 | 
 81 | function TrimZero:recursiveUnMask(output, input, mask)
 82 |    if torch.type(input) == 'table' then
 83 |       output = torch.type(output) == 'table' and output or {}
 84 |       for k,v in ipairs(input) do
 85 |          output[k] = self:recursiveUnMask(output[k], v, mask)
 86 |       end
 87 |    else
 88 |       assert(torch.isTensor(input))
 89 |       output = torch.isTensor(output) and output or input.new()
 90 |       
 91 |       -- make sure output has the same dimension as the mask
 92 |       local inputSize = input:size()
 93 |       if self.batchmode then
 94 |          inputSize[1] = mask:size(1)
 95 |       end
 96 |       output:resize(inputSize):zero()
 97 |       
 98 |       -- build mask
 99 |       if self.batchmode then
100 |          assert(self._maskindices)
101 |          mask = self._maskindices
102 |          if mask:dim() > 0 then
103 |             output:indexCopy(1, mask, input)
104 |          end
105 |       else
106 |          if mask:view(-1)[1] == 0 then 
107 |             output:copy(input)
108 |          end
109 |       end
110 |    end
111 |    return output
112 | end
113 | 
114 | function TrimZero:updateOutput(input)
115 |    -- recurrent module input is always the first one
116 |    local rmi = self:recursiveGetFirst(input):contiguous()
117 |    if rmi:dim() == self.nInputDim then
118 |       self.batchmode = false
119 |       rmi = rmi:view(-1) -- collapse dims
120 |    elseif rmi:dim() - 1 == self.nInputDim then
121 |       self.batchmode = true
122 |       rmi = rmi:view(rmi:size(1), -1) -- collapse non-batch dims
123 |    else
124 |       error("nInputDim error: "..rmi:dim()..", "..self.nInputDim)
125 |    end
126 |    
127 |    -- build mask
128 |    local vectorDim = rmi:dim() 
129 |    self._zeroMask = self._zeroMask or rmi.new()
130 |    self._zeroMask:norm(rmi, 2, vectorDim)
131 |    self.zeroMask = self.zeroMask or ((torch.type(rmi) == 'torch.CudaTensor') and torch.CudaTensor() or torch.ByteTensor())
132 |    self._zeroMask.eq(self.zeroMask, self._zeroMask, 0)
133 |    
134 |    -- forward through decorated module
135 |    self.temp = self:recursiveMask(self.temp, input, self.zeroMask)
136 |    output = self.modules[1]:updateOutput(self.temp)
137 |    self.output = self:recursiveUnMask(self.output, output, self.zeroMask, true)
138 | 
139 |    return self.output
140 | end
141 | 
142 | function TrimZero:updateGradInput(input, gradOutput)
143 |    self.temp = self:recursiveMask(self.temp, input, self.zeroMask)
144 |    self.gradTemp = self:recursiveMask(self.gradTemp, gradOutput, self.zeroMask)
145 | 
146 |    local gradInput = self.modules[1]:updateGradInput(self.temp, self.gradTemp)
147 | 
148 |    self.gradInput = self:recursiveUnMask(self.gradInput, gradInput, self.zeroMask)
149 | 
150 |    return self.gradInput
151 | end
152 | 
153 | function TrimZero:accGradParameters(input, gradOutput, scale)
154 |    self.temp = self:recursiveMask(self.temp, input, self.zeroMask)
155 |    self.modules[1]:accGradParameters(self.temp, gradOutput, scale)
156 | end
157 | 


--------------------------------------------------------------------------------
/ZeroGrad.lua:
--------------------------------------------------------------------------------
 1 | local ZeroGrad, parent
 2 | if nn.ZeroGrad then -- prevent name conflicts with nnx
 3 |    ZeroGrad, parent = nn.ZeroGrad, nn.Module
 4 | else
 5 |    ZeroGrad, parent = torch.class('nn.ZeroGrad', 'nn.Module')
 6 | end
 7 | 
 8 | local function recursiveZero(t1,t2)
 9 |    if torch.type(t2) == 'table' then
10 |       t1 = (torch.type(t1) == 'table') and t1 or {t1}
11 |       for key,_ in pairs(t2) do
12 |          t1[key], t2[key] = recursiveZero(t1[key], t2[key])
13 |       end
14 |    elseif torch.isTensor(t2) then
15 |       t1 = torch.isTensor(t1) and t1 or t2.new()
16 |       t1:resizeAs(t2):zero()
17 |    else
18 |       error("expecting nested tensors or tables. Got "..
19 |             torch.type(t1).." and "..torch.type(t2).." instead")
20 |    end
21 |    return t1, t2
22 | end
23 | 
24 | function ZeroGrad:updateOutput(input)
25 |    self.output:set(input)
26 |    return self.output
27 | end
28 | 
29 | -- the gradient is simply zeroed.
30 | -- useful when you don't want to backpropgate through certain paths.
31 | function ZeroGrad:updateGradInput(input, gradOutput)
32 |    self.gradInput = recursiveZero(self.gradInput, gradOutput)
33 |    return self.gradInput
34 | end
35 | 


--------------------------------------------------------------------------------
/doc/article/ff-lua.tex:
--------------------------------------------------------------------------------
1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite
2 | \noindent
3 | \mbox{}input\ =\ \{torch.\textbf{randn}(3,4),\ torch.\textbf{randn}(3,4),\ torch.\textbf{randn}(3,4)\} \\
4 | \mbox{}\textbf{rnn:forward}(input[1]) \\
5 | \mbox{}\textbf{rnn:forward}(input[2]) \\
6 | \mbox{}\textbf{rnn:forward}(input[3])
7 | 


--------------------------------------------------------------------------------
/doc/article/ff.lua:
--------------------------------------------------------------------------------
1 | input = {torch.randn(3,4), torch.randn(3,4), torch.randn(3,4)}
2 | rnn:forward(input[1])
3 | rnn:forward(input[2])
4 | rnn:forward(input[3])


--------------------------------------------------------------------------------
/doc/article/ff2-lua.tex:
--------------------------------------------------------------------------------
1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite
2 | \noindent
3 | \mbox{}seq\ =\ nn.\textbf{Sequencer}(rnn) \\
4 | \mbox{}\textbf{seq:forward}(input)
5 | 


--------------------------------------------------------------------------------
/doc/article/ff2.lua:
--------------------------------------------------------------------------------
1 | seq = nn.Sequencer(rnn)
2 | seq:forward(input)


--------------------------------------------------------------------------------
/doc/article/lm-lua.tex:
--------------------------------------------------------------------------------
1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite
2 | \noindent
3 | \mbox{}input\ =\ \{\} \\
4 | \mbox{}\textbf{for}\ i=1,rho\ \textbf{do} \\
5 | \mbox{}\ \ \ table.\textbf{insert}(input,\ torch.\textbf{Tensor}(batchSize):\textbf{random}(1,nIndex)) \\
6 | \mbox{}\textbf{end} \\
7 | \mbox{}output\ =\ \textbf{rnn:forward}(input) \\
8 | \mbox{}\textbf{assert}(\#output\ ==\ \#input)
9 | 


--------------------------------------------------------------------------------
/doc/article/lm.lua:
--------------------------------------------------------------------------------
1 | input = {}
2 | for i=1,rho do
3 |    table.insert(input, torch.Tensor(batchSize):random(1,nIndex))
4 | end
5 | output = rnn:forward(input)
6 | assert(#output == #input)


--------------------------------------------------------------------------------
/doc/article/lstm-lua.tex:
--------------------------------------------------------------------------------
1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite
2 | \noindent
3 | \mbox{}module\ =\ nn.\textbf{LSTM}(inputSize,\ outputSize,\ [rho])
4 | 


--------------------------------------------------------------------------------
/doc/article/lstm.lua:
--------------------------------------------------------------------------------
1 | module = nn.LSTM(inputSize, outputSize, [rho])


--------------------------------------------------------------------------------
/doc/article/mlp-lua.tex:
--------------------------------------------------------------------------------
 1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite
 2 | \noindent
 3 | \mbox{}mlp\ =\ nn.\textbf{Sequential}() \\
 4 | \mbox{}\textbf{mlp:add}(nn.\textbf{Convert}(\texttt{'bchw'},\ \texttt{'bf'}))\ \textit{-\/-\ collapse\ 3D\ to\ 1D} \\
 5 | \mbox{}\textbf{mlp:add}(nn.\textbf{Linear}(1*28*28,\ 200)) \\
 6 | \mbox{}\textbf{mlp:add}(nn.\textbf{Tanh}()) \\
 7 | \mbox{}\textbf{mlp:add}(nn.\textbf{Linear}(200,\ 200)) \\
 8 | \mbox{}\textbf{mlp:add}(nn.\textbf{Tanh}())\  \\
 9 | \mbox{}\textbf{mlp:add}(nn.\textbf{Linear}(200,\ 10)) \\
10 | \mbox{}\textbf{mlp:add}(nn.\textbf{LogSoftMax}())\ \textit{-\/-\ for\ classification\ problems}
11 | 


--------------------------------------------------------------------------------
/doc/article/mlp.lua:
--------------------------------------------------------------------------------
1 | mlp = nn.Sequential()
2 | mlp:add(nn.Convert('bchw', 'bf')) -- collapse 3D to 1D
3 | mlp:add(nn.Linear(1*28*28, 200))
4 | mlp:add(nn.Tanh())
5 | mlp:add(nn.Linear(200, 200))
6 | mlp:add(nn.Tanh()) 
7 | mlp:add(nn.Linear(200, 10))
8 | mlp:add(nn.LogSoftMax()) -- for classification problems


--------------------------------------------------------------------------------
/doc/article/nll-lua.tex:
--------------------------------------------------------------------------------
1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite
2 | \noindent
3 | \mbox{}nll\ =\ nn.\textbf{ClassNLLCriterion}()
4 | 


--------------------------------------------------------------------------------
/doc/article/nll.lua:
--------------------------------------------------------------------------------
1 | nll = nn.ClassNLLCriterion()


--------------------------------------------------------------------------------
/doc/article/ram-lua.tex:
--------------------------------------------------------------------------------
1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite
2 | \noindent
3 | \mbox{}ram\ =\ nn.\textbf{RecurrentAttention}(rnn,\ action,\ nStep,\ hiddenSize)
4 | 


--------------------------------------------------------------------------------
/doc/article/ram.lua:
--------------------------------------------------------------------------------
1 | ram = nn.RecurrentAttention(rnn, action, nStep, hiddenSize)


--------------------------------------------------------------------------------
/doc/article/rec-lua.tex:
--------------------------------------------------------------------------------
1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite
2 | \noindent
3 | \mbox{}rec\ =\ nn.\textbf{Recursor}(module[,\ rho])
4 | 


--------------------------------------------------------------------------------
/doc/article/rec.lua:
--------------------------------------------------------------------------------
1 | rec = nn.Recursor(module[, rho])


--------------------------------------------------------------------------------
/doc/article/rec2-lua.tex:
--------------------------------------------------------------------------------
1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite
2 | \noindent
3 | \mbox{}lstm\ =\ nn.\textbf{Sequential}() \\
4 | \mbox{}\ \ \ :\textbf{add}(nn.\textbf{Sequencer}(nn.\textbf{LSTM}(100,100))) \\
5 | \mbox{}\ \ \ :\textbf{add}(nn.\textbf{Sequencer}(nn.\textbf{LSTM}(100,100)))
6 | 


--------------------------------------------------------------------------------
/doc/article/rec2.lua:
--------------------------------------------------------------------------------
1 | lstm = nn.Sequential()
2 |    :add(nn.Sequencer(nn.LSTM(100,100)))
3 |    :add(nn.Sequencer(nn.LSTM(100,100)))


--------------------------------------------------------------------------------
/doc/article/rec3-lua.tex:
--------------------------------------------------------------------------------
 1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite
 2 | \noindent
 3 | \mbox{}lstm\ =\ nn.\textbf{Sequencer}( \\
 4 | \mbox{}\ \ \ nn.\textbf{Recursor}( \\
 5 | \mbox{}\ \ \ \ \ \ nn.\textbf{Sequential}() \\
 6 | \mbox{}\ \ \ \ \ \ \ \ \ :\textbf{add}(nn.\textbf{LSTM}(100,100)) \\
 7 | \mbox{}\ \ \ \ \ \ \ \ \ :\textbf{add}(nn.\textbf{LSTM}(100,100)) \\
 8 | \mbox{}\ \ \ \ \ \ ) \\
 9 | \mbox{}\ \ \ )
10 | 


--------------------------------------------------------------------------------
/doc/article/rec3.lua:
--------------------------------------------------------------------------------
1 | lstm = nn.Sequencer(
2 |    nn.Recursor(
3 |       nn.Sequential()
4 |          :add(nn.LSTM(100,100))
5 |          :add(nn.LSTM(100,100))
6 |       )
7 |    )


--------------------------------------------------------------------------------
/doc/article/rec4-lua.tex:
--------------------------------------------------------------------------------
1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite
2 | \noindent
3 | \mbox{}lstm\ =\ nn.\textbf{Sequencer}( \\
4 | \mbox{}\ \ \ nn.\textbf{Sequential}() \\
5 | \mbox{}\ \ \ \ \ \ :\textbf{add}(nn.\textbf{LSTM}(100,100)) \\
6 | \mbox{}\ \ \ \ \ \ :\textbf{add}(nn.\textbf{LSTM}(100,100)) \\
7 | \mbox{}\ \ \ )
8 | 


--------------------------------------------------------------------------------
/doc/article/rec4.lua:
--------------------------------------------------------------------------------
1 | lstm = nn.Sequencer(
2 |    nn.Sequential()
3 |       :add(nn.LSTM(100,100))
4 |       :add(nn.LSTM(100,100))
5 |    )


--------------------------------------------------------------------------------
/doc/article/rec5-lua.tex:
--------------------------------------------------------------------------------
1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite
2 | \noindent
3 | \mbox{}lstm\ =\ nn.\textbf{Sequencer}( \\
4 | \mbox{}\ \ \ nn.\textbf{Sequential}() \\
5 | \mbox{}\ \ \ \ \ \ :\textbf{add}(nn.\textbf{LSTM}(100,100)) \\
6 | \mbox{}\ \ \ \ \ \ :\textbf{add}(nn.\textbf{Linear}(100,100)) \\
7 | \mbox{}\ \ \ \ \ \ :\textbf{add}(nn.\textbf{LSTM}(100,100)) \\
8 | \mbox{}\ \ \ )
9 | 


--------------------------------------------------------------------------------
/doc/article/rec5.lua:
--------------------------------------------------------------------------------
1 | lstm = nn.Sequencer(
2 |    nn.Sequential()
3 |       :add(nn.LSTM(100,100))
4 |       :add(nn.Linear(100,100))
5 |       :add(nn.LSTM(100,100))
6 |    )


--------------------------------------------------------------------------------
/doc/article/recurrence-lua.tex:
--------------------------------------------------------------------------------
1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite
2 | \noindent
3 | \mbox{}rnn\ =\ nn.\textbf{Recurrence}(module,\ outputSize,\ nInputDim,\ [rho])
4 | 


--------------------------------------------------------------------------------
/doc/article/recurrence.lua:
--------------------------------------------------------------------------------
1 | rnn = nn.Recurrence(module, outputSize, nInputDim, [rho])


--------------------------------------------------------------------------------
/doc/article/repeater-lua.tex:
--------------------------------------------------------------------------------
1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite
2 | \noindent
3 | \mbox{}r\ =\ nn.\textbf{Repeater}(module,\ nStep)
4 | 


--------------------------------------------------------------------------------
/doc/article/repeater.lua:
--------------------------------------------------------------------------------
1 | r = nn.Repeater(module, nStep)


--------------------------------------------------------------------------------
/doc/article/rnn-example-lua.tex:
--------------------------------------------------------------------------------
 1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite
 2 | \noindent
 3 | \mbox{}\textit{-\/-\ generate\ some\ dummy\ inputs\ and\ gradOutputs\ sequences} \\
 4 | \mbox{}inputs,\ gradOutputs\ =\ \{\},\ \{\} \\
 5 | \mbox{}\textbf{for}\ step=1,rho\ \textbf{do} \\
 6 | \mbox{}\ \ \ inputs[step]\ =\ torch.\textbf{randn}(batchSize,inputSize) \\
 7 | \mbox{}\ \ \ gradOutputs[step]\ =\ torch.\textbf{randn}(batchSize,inputSize) \\
 8 | \mbox{}\textbf{end} \\
 9 | \mbox{} \\
10 | \mbox{}\textit{-\/-\ an\ AbstractRecurrent\ instance} \\
11 | \mbox{}rnn\ =\ nn.\textbf{Recurrent}( \\
12 | \mbox{}\ \ \ hiddenSize,\ \textit{-\/-\ size\ of\ the\ input\ layer} \\
13 | \mbox{}\ \ \ nn.\textbf{Linear}(inputSize,outputSize),\ \textit{-\/-\ input\ layer} \\
14 | \mbox{}\ \ \ nn.\textbf{Linear}(outputSize,\ outputSize),\ \textit{-\/-\ recurrent\ layer} \\
15 | \mbox{}\ \ \ nn.\textbf{Sigmoid}(),\ \textit{-\/-\ transfer\ function} \\
16 | \mbox{}\ \ \ rho\ \textit{-\/-\ maximum\ number\ of\ time-steps\ for\ BPTT} \\
17 | \mbox{}) \\
18 | \mbox{} \\
19 | \mbox{}\textit{-\/-\ feed-forward\ and\ backpropagate\ through\ time\ like\ this\ :} \\
20 | \mbox{}\textbf{for}\ step=1,rho\ \textbf{do} \\
21 | \mbox{}\ \ \ \textbf{rnn:forward}(inputs[step]) \\
22 | \mbox{}\ \ \ \textbf{rnn:backward}(inputs[step],\ gradOutputs[step]) \\
23 | \mbox{}\textbf{end} \\
24 | \mbox{}\textbf{rnn:backwardThroughTime}()\ \textit{-\/-\ call\ backward\ on\ the\ internal\ modules} \\
25 | \mbox{}gradInputs\ =\ rnn.gradInputs \\
26 | \mbox{}\textbf{rnn:updateParameters}(0.1) \\
27 | \mbox{}\textbf{rnn:forget}()\ \textit{-\/-\ resets\ the\ time-step\ counter}
28 | 


--------------------------------------------------------------------------------
/doc/article/rnn-example.lua:
--------------------------------------------------------------------------------
 1 | -- generate some dummy inputs and gradOutputs sequences
 2 | inputs, gradOutputs = {}, {}
 3 | for step=1,rho do
 4 |    inputs[step] = torch.randn(batchSize,inputSize)
 5 |    gradOutputs[step] = torch.randn(batchSize,inputSize)
 6 | end
 7 | 
 8 | -- an AbstractRecurrent instance
 9 | rnn = nn.Recurrent(
10 |    hiddenSize, -- size of the input layer
11 |    nn.Linear(inputSize,outputSize), -- input layer
12 |    nn.Linear(outputSize, outputSize), -- recurrent layer
13 |    nn.Sigmoid(), -- transfer function
14 |    rho -- maximum number of time-steps for BPTT
15 | )
16 | 
17 | -- feed-forward and backpropagate through time like this :
18 | for step=1,rho do
19 |    rnn:forward(inputs[step])
20 |    rnn:backward(inputs[step], gradOutputs[step])
21 | end
22 | rnn:backwardThroughTime() -- call backward on the internal modules
23 | gradInputs = rnn.gradInputs
24 | rnn:updateParameters(0.1)
25 | rnn:forget() -- resets the time-step counter


--------------------------------------------------------------------------------
/doc/article/rnn2-lua.tex:
--------------------------------------------------------------------------------
 1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite
 2 | \noindent
 3 | \mbox{}\textit{-\/-\ recurrent\ module} \\
 4 | \mbox{}rm\ =\ nn.\textbf{Sequential}() \\
 5 | \mbox{}\ \ \ :\textbf{add}(nn.\textbf{ParallelTable}() \\
 6 | \mbox{}\ \ \ \ \ \ :\textbf{add}(nn.\textbf{LookupTable}(nIndex,\ hiddenSize)) \\
 7 | \mbox{}\ \ \ \ \ \ :\textbf{add}(nn.\textbf{Linear}(hiddenSize,\ hiddenSize))) \\
 8 | \mbox{}\ \ \ :\textbf{add}(nn.\textbf{CAddTable}()) \\
 9 | \mbox{}\ \ \ :\textbf{add}(nn.\textbf{Sigmoid}()) \\
10 | \mbox{}\textit{-\/-\ full\ RNN} \\
11 | \mbox{}rnn\ =\ nn.\textbf{Sequential}() \\
12 | \mbox{}\ \ \ :\textbf{add}(nn.\textbf{Sequencer}(nn.\textbf{Recurrence}(rm,\ hiddenSize,\ 1))) \\
13 | \mbox{}\ \ \ :\textbf{add}(nn.\textbf{SelectTable}(-1))\ \textit{-\/-select\ last\ element} \\
14 | \mbox{}\ \ \ :\textbf{add}(nn.\textbf{Linear}(hiddenSize,\ nSentiment)) \\
15 | \mbox{}\ \ \ :\textbf{add}(nn.\textbf{LogSoftMax}()) \\
16 | \mbox{})
17 | 


--------------------------------------------------------------------------------
/doc/article/rnn2.lua:
--------------------------------------------------------------------------------
 1 | -- recurrent module
 2 | rm = nn.Sequential()
 3 |    :add(nn.ParallelTable()
 4 |       :add(nn.LookupTable(nIndex, hiddenSize))
 5 |       :add(nn.Linear(hiddenSize, hiddenSize)))
 6 |    :add(nn.CAddTable())
 7 |    :add(nn.Sigmoid())
 8 | -- full RNN
 9 | rnn = nn.Sequential()
10 |    :add(nn.Sequencer(nn.Recurrence(rm, hiddenSize, 1)))
11 |    :add(nn.SelectTable(-1)) --select last element
12 |    :add(nn.Linear(hiddenSize, nSentiment))
13 |    :add(nn.LogSoftMax())
14 | )


--------------------------------------------------------------------------------
/doc/article/rnn_library.bbl:
--------------------------------------------------------------------------------
 1 | \begin{thebibliography}{10}
 2 | 
 3 | \bibitem{boden2001guide}
 4 | M.~Boden.
 5 | \newblock A guide to recurrent neural networks and backpropagation.
 6 | \newblock 2001.
 7 | 
 8 | \bibitem{collobert2011torch7}
 9 | R.~Collobert, K.~Kavukcuoglu, and C.~Farabet.
10 | \newblock Torch7: A matlab-like environment for machine learning.
11 | \newblock In {\em BigLearn, NIPS Workshop}, number EPFL-CONF-192376, 2011.
12 | 
13 | \bibitem{graves2013speech}
14 | A.~Graves, A.-r. Mohamed, and G.~Hinton.
15 | \newblock Speech recognition with deep recurrent neural networks.
16 | \newblock In {\em Acoustics, Speech and Signal Processing (ICASSP), 2013 IEEE
17 |   International Conference on}, pages 6645--6649. IEEE, 2013.
18 | 
19 | \bibitem{greff2015lstm}
20 | K.~Greff, R.~K. Srivastava, J.~Koutn{\'\i}k, B.~R. Steunebrink, and
21 |   J.~Schmidhuber.
22 | \newblock Lstm: A search space odyssey.
23 | \newblock {\em arXiv preprint arXiv:1503.04069}, 2015.
24 | 
25 | \bibitem{hinton2012improving}
26 | G.~E. Hinton, N.~Srivastava, A.~Krizhevsky, I.~Sutskever, and R.~R.
27 |   Salakhutdinov.
28 | \newblock Improving neural networks by preventing co-adaptation of feature
29 |   detectors.
30 | \newblock {\em arXiv preprint arXiv:1207.0580}, 2012.
31 | 
32 | \bibitem{hochreiter1997long}
33 | S.~Hochreiter and J.~Schmidhuber.
34 | \newblock Long short-term memory.
35 | \newblock {\em Neural computation}, 9(8):1735--1780, 1997.
36 | 
37 | \bibitem{ierusalimschy1996lua}
38 | R.~Ierusalimschy, L.~H. De~Figueiredo, and W.~Celes~Filho.
39 | \newblock Lua-an extensible extension language.
40 | \newblock {\em Softw., Pract. Exper.}, 26(6):635--652, 1996.
41 | 
42 | \bibitem{lecun1998mnist}
43 | Y.~LeCun, C.~Cortes, and C.~J. Burges.
44 | \newblock The mnist database of handwritten digits, 1998.
45 | 
46 | \bibitem{marcus1993building}
47 | M.~P. Marcus, M.~A. Marcinkiewicz, and B.~Santorini.
48 | \newblock Building a large annotated corpus of english: The penn treebank.
49 | \newblock {\em Computational linguistics}, 19(2):313--330, 1993.
50 | 
51 | \bibitem{mikolov2012statistical}
52 | T.~Mikolov.
53 | \newblock Statistical language models based on neural networks.
54 | \newblock {\em Presentation at Google, Mountain View, 2nd April}, 2012.
55 | 
56 | \bibitem{mnih2014recurrent}
57 | V.~Mnih, N.~Heess, A.~Graves, et~al.
58 | \newblock Recurrent models of visual attention.
59 | \newblock In {\em Advances in Neural Information Processing Systems}, pages
60 |   2204--2212, 2014.
61 | 
62 | \bibitem{pang2008opinion}
63 | B.~Pang and L.~Lee.
64 | \newblock Opinion mining and sentiment analysis.
65 | \newblock {\em Foundations and trends in information retrieval}, 2(1-2):1--135,
66 |   2008.
67 | 
68 | \bibitem{pinheiro2013recurrent}
69 | P.~H. Pinheiro and R.~Collobert.
70 | \newblock Recurrent convolutional neural networks for scene parsing.
71 | \newblock {\em arXiv preprint arXiv:1306.2795}, 2013.
72 | 
73 | \bibitem{rumelhart2002learning}
74 | D.~E. Rumelhart, G.~E. Hinton, and R.~J. Williams.
75 | \newblock Learning representations by back-propagating errors.
76 | \newblock {\em Cognitive modeling}, 1:213, 2002.
77 | 
78 | \bibitem{sutskever2013training}
79 | I.~Sutskever.
80 | \newblock {\em Training recurrent neural networks}.
81 | \newblock PhD thesis, University of Toronto, 2013.
82 | 
83 | \bibitem{williams1992simple}
84 | R.~J. Williams.
85 | \newblock Simple statistical gradient-following algorithms for connectionist
86 |   reinforcement learning.
87 | \newblock {\em Machine learning}, 8(3-4):229--256, 1992.
88 | 
89 | \bibitem{zaremba2014recurrent}
90 | W.~Zaremba, I.~Sutskever, and O.~Vinyals.
91 | \newblock Recurrent neural network regularization.
92 | \newblock {\em arXiv preprint arXiv:1409.2329}, 2014.
93 | 
94 | \end{thebibliography}
95 | 


--------------------------------------------------------------------------------
/doc/article/rnn_library.bib:
--------------------------------------------------------------------------------
  1 | 
  2 | @article{rumelhart2002learning,
  3 |   title={Learning representations by back-propagating errors},
  4 |   author={Rumelhart, David E and Hinton, Geoffrey E and Williams, Ronald J},
  5 |   journal={Cognitive modeling},
  6 |   volume={1},
  7 |   pages={213},
  8 |   year={2002}
  9 | }
 10 | 
 11 | @inproceedings{collobert2011torch7,
 12 |   title={Torch7: A matlab-like environment for machine learning},
 13 |   author={Collobert, Ronan and Kavukcuoglu, Koray and Farabet, Cl{\'e}ment},
 14 |   booktitle={BigLearn, NIPS Workshop},
 15 |   number={EPFL-CONF-192376},
 16 |   year={2011}
 17 | }
 18 | 
 19 | @inproceedings{mnih2014recurrent,
 20 |   title={Recurrent models of visual attention},
 21 |   author={Mnih, Volodymyr and Heess, Nicolas and Graves, Alex and others},
 22 |   booktitle={Advances in Neural Information Processing Systems},
 23 |   pages={2204--2212},
 24 |   year={2014}
 25 | }
 26 | 
 27 | @article{ierusalimschy1996lua,
 28 |   title={Lua-an extensible extension language},
 29 |   author={Ierusalimschy, Roberto and De Figueiredo, Luiz Henrique and Celes Filho, Waldemar},
 30 |   journal={Softw., Pract. Exper.},
 31 |   volume={26},
 32 |   number={6},
 33 |   pages={635--652},
 34 |   year={1996},
 35 |   publisher={Citeseer}
 36 | }
 37 | 
 38 | @phdthesis{sutskever2013training,
 39 |   title={Training recurrent neural networks},
 40 |   author={Sutskever, Ilya},
 41 |   year={2013},
 42 |   school={University of Toronto}
 43 | }
 44 | 
 45 | @article{mikolov2012statistical,
 46 |   title={Statistical language models based on neural networks},
 47 |   author={Mikolov, Tom{\'a}{\v{s}}},
 48 |   journal={Presentation at Google, Mountain View, 2nd April},
 49 |   year={2012}
 50 | }
 51 | 
 52 | @article{boden2001guide,
 53 |   title={A guide to recurrent neural networks and backpropagation},
 54 |   author={Boden, Mikael},
 55 |   year={2001}
 56 | }
 57 | 
 58 | @article{zaremba2014recurrent,
 59 |   title={Recurrent neural network regularization},
 60 |   author={Zaremba, Wojciech and Sutskever, Ilya and Vinyals, Oriol},
 61 |   journal={arXiv preprint arXiv:1409.2329},
 62 |   year={2014}
 63 | }
 64 | 
 65 | @inproceedings{graves2013speech,
 66 |   title={Speech recognition with deep recurrent neural networks},
 67 |   author={Graves, Alan and Mohamed, Abdel-rahman and Hinton, Geoffrey},
 68 |   booktitle={Acoustics, Speech and Signal Processing (ICASSP), 2013 IEEE International Conference on},
 69 |   pages={6645--6649},
 70 |   year={2013},
 71 |   organization={IEEE}
 72 | }
 73 | 
 74 | @article{greff2015lstm,
 75 |   title={LSTM: A Search Space Odyssey},
 76 |   author={Greff, Klaus and Srivastava, Rupesh Kumar and Koutn{\'\i}k, Jan and Steunebrink, Bas R and Schmidhuber, J{\"u}rgen},
 77 |   journal={arXiv preprint arXiv:1503.04069},
 78 |   year={2015}
 79 | }
 80 | 
 81 | @article{hochreiter1997long,
 82 |   title={Long short-term memory},
 83 |   author={Hochreiter, Sepp and Schmidhuber, J{\"u}rgen},
 84 |   journal={Neural computation},
 85 |   volume={9},
 86 |   number={8},
 87 |   pages={1735--1780},
 88 |   year={1997},
 89 |   publisher={MIT Press}
 90 | }
 91 | 
 92 | @article{pinheiro2013recurrent,
 93 |   title={Recurrent convolutional neural networks for scene parsing},
 94 |   author={Pinheiro, Pedro HO and Collobert, Ronan},
 95 |   journal={arXiv preprint arXiv:1306.2795},
 96 |   year={2013}
 97 | }
 98 | 
 99 | @article{williams1992simple,
100 |   title={Simple statistical gradient-following algorithms for connectionist reinforcement learning},
101 |   author={Williams, Ronald J},
102 |   journal={Machine learning},
103 |   volume={8},
104 |   number={3-4},
105 |   pages={229--256},
106 |   year={1992},
107 |   publisher={Springer}
108 | }
109 | 
110 | @article{pang2008opinion,
111 |   title={Opinion mining and sentiment analysis},
112 |   author={Pang, Bo and Lee, Lillian},
113 |   journal={Foundations and trends in information retrieval},
114 |   volume={2},
115 |   number={1-2},
116 |   pages={1--135},
117 |   year={2008},
118 |   publisher={Now Publishers Inc.}
119 | }
120 | 
121 | @article{hinton2012improving,
122 |   title={Improving neural networks by preventing co-adaptation of feature detectors},
123 |   author={Hinton, Geoffrey E and Srivastava, Nitish and Krizhevsky, Alex and Sutskever, Ilya and Salakhutdinov, Ruslan R},
124 |   journal={arXiv preprint arXiv:1207.0580},
125 |   year={2012}
126 | }
127 | 
128 | @article{marcus1993building,
129 |   title={Building a large annotated corpus of English: The Penn Treebank},
130 |   author={Marcus, Mitchell P and Marcinkiewicz, Mary Ann and Santorini, Beatrice},
131 |   journal={Computational linguistics},
132 |   volume={19},
133 |   number={2},
134 |   pages={313--330},
135 |   year={1993},
136 |   publisher={MIT Press}
137 | }
138 | 
139 | @misc{lecun1998mnist,
140 |   title={The MNIST database of handwritten digits},
141 |   author={LeCun, Yann and Cortes, Corinna and Burges, Christopher JC},
142 |   year={1998}
143 | }
144 | 


--------------------------------------------------------------------------------
/doc/article/rnn_library.blg:
--------------------------------------------------------------------------------
 1 | This is BibTeX, Version 0.99c (TeX Live 2009/Debian)
 2 | The top-level auxiliary file: rnn_library.aux
 3 | The style file: abbrv.bst
 4 | Database file #1: rnn_library.bib
 5 | Warning--empty journal in boden2001guide
 6 | Warning--there's a number but no series in collobert2011torch7
 7 | You've used 17 entries,
 8 |             2118 wiz_defined-function locations,
 9 |             596 strings with 6441 characters,
10 | and the built_in function-call counts, 5471 in all, are:
11 | = -- 535
12 | > -- 272
13 | < -- 3
14 | + -- 109
15 | - -- 90
16 | * -- 368
17 | := -- 932
18 | add.period$ -- 51
19 | call.type$ -- 17
20 | change.case$ -- 94
21 | chr.to.int$ -- 0
22 | cite$ -- 19
23 | duplicate$ -- 206
24 | empty$ -- 412
25 | format.name$ -- 90
26 | if$ -- 1126
27 | int.to.chr$ -- 0
28 | int.to.str$ -- 17
29 | missing$ -- 15
30 | newline$ -- 87
31 | num.names$ -- 34
32 | pop$ -- 97
33 | preamble$ -- 1
34 | purify$ -- 78
35 | quote$ -- 0
36 | skip$ -- 152
37 | stack$ -- 0
38 | substring$ -- 301
39 | swap$ -- 42
40 | text.length$ -- 3
41 | text.prefix$ -- 0
42 | top$ -- 0
43 | type$ -- 68
44 | warning$ -- 2
45 | while$ -- 51
46 | width$ -- 19
47 | write$ -- 180
48 | (There were 2 warnings)
49 | 


--------------------------------------------------------------------------------
/doc/article/rnn_library.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Element-Research/rnn/ba937a08f26e116db98b5cd6a690f49ea8f8316e/doc/article/rnn_library.log


--------------------------------------------------------------------------------
/doc/article/rnn_library.out:
--------------------------------------------------------------------------------
 1 | \BOOKMARK [1][-]{section.1}{Introduction}{}
 2 | \BOOKMARK [1][-]{section.2}{Torch}{}
 3 | \BOOKMARK [2][-]{subsection.2.1}{torch7}{section.2}
 4 | \BOOKMARK [2][-]{subsection.2.2}{nn}{section.2}
 5 | \BOOKMARK [1][-]{section.3}{Package Components}{}
 6 | \BOOKMARK [2][-]{subsection.3.1}{First Iteration : Recurrent module}{section.3}
 7 | \BOOKMARK [2][-]{subsection.3.2}{Second Iteration : Sequencer and LSTM}{section.3}
 8 | \BOOKMARK [3][-]{subsubsection.3.2.1}{Sequencer}{subsection.3.2}
 9 | \BOOKMARK [3][-]{subsubsection.3.2.2}{LSTM}{subsection.3.2}
10 | \BOOKMARK [3][-]{subsubsection.3.2.3}{Repeater}{subsection.3.2}
11 | \BOOKMARK [2][-]{subsection.3.3}{Third Iteration}{section.3}
12 | \BOOKMARK [3][-]{subsubsection.3.3.1}{RecurrentAttention}{subsection.3.3}
13 | \BOOKMARK [3][-]{subsubsection.3.3.2}{Recursor}{subsection.3.3}
14 | \BOOKMARK [3][-]{subsubsection.3.3.3}{Recurrence}{subsection.3.3}
15 | \BOOKMARK [1][-]{section.4}{Development Principles}{}
16 | \BOOKMARK [2][-]{subsection.4.1}{Unit Testing}{section.4}
17 | \BOOKMARK [2][-]{subsection.4.2}{Backward Compatibility}{section.4}
18 | \BOOKMARK [2][-]{subsection.4.3}{Supporting Material}{section.4}
19 | \BOOKMARK [2][-]{subsection.4.4}{Core Extensions}{section.4}
20 | \BOOKMARK [1][-]{section.5}{Results}{}
21 | \BOOKMARK [1][-]{section.6}{Conclusion}{}
22 | 


--------------------------------------------------------------------------------
/doc/article/rnn_library.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Element-Research/rnn/ba937a08f26e116db98b5cd6a690f49ea8f8316e/doc/article/rnn_library.pdf


--------------------------------------------------------------------------------
/doc/article/rnn_library.synctex.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Element-Research/rnn/ba937a08f26e116db98b5cd6a690f49ea8f8316e/doc/article/rnn_library.synctex.gz


--------------------------------------------------------------------------------
/doc/article/rnnlm-lua.tex:
--------------------------------------------------------------------------------
 1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite
 2 | \noindent
 3 | \mbox{}\textit{-\/-\ recurrent\ module} \\
 4 | \mbox{}rm\ =\ nn.\textbf{Sequential}() \\
 5 | \mbox{}\ \ \ :\textbf{add}(nn.\textbf{ParallelTable}() \\
 6 | \mbox{}\ \ \ \ \ \ :\textbf{add}(nn.\textbf{LookupTable}(nIndex,\ hiddenSize)) \\
 7 | \mbox{}\ \ \ \ \ \ :\textbf{add}(nn.\textbf{Linear}(hiddenSize,\ hiddenSize))) \\
 8 | \mbox{}\ \ \ :\textbf{add}(nn.\textbf{CAddTable}()) \\
 9 | \mbox{}\ \ \ :\textbf{add}(nn.\textbf{Sigmoid}()) \\
10 | \mbox{} \\
11 | \mbox{}rnn\ =\ nn.\textbf{Sequencer}( \\
12 | \mbox{}\ \ \ nn.\textbf{Sequential}() \\
13 | \mbox{}\ \ \ \ \ \ :\textbf{add}(nn.\textbf{Recurrence}(rm,\ hiddenSize,\ 1)) \\
14 | \mbox{}\ \ \ \ \ \ :\textbf{add}(nn.\textbf{Linear}(hiddenSize,\ nIndex)) \\
15 | \mbox{}\ \ \ \ \ \ :\textbf{add}(nn.\textbf{LogSoftMax}()) \\
16 | \mbox{})
17 | 


--------------------------------------------------------------------------------
/doc/article/rnnlm.lua:
--------------------------------------------------------------------------------
 1 | -- recurrent module
 2 | rm = nn.Sequential()
 3 |    :add(nn.ParallelTable()
 4 |       :add(nn.LookupTable(nIndex, hiddenSize))
 5 |       :add(nn.Linear(hiddenSize, hiddenSize)))
 6 |    :add(nn.CAddTable())
 7 |    :add(nn.Sigmoid())
 8 | 
 9 | rnn = nn.Sequencer(
10 |    nn.Sequential()
11 |       :add(nn.Recurrence(rm, hiddenSize, 1))
12 |       :add(nn.Linear(hiddenSize, nIndex))
13 |       :add(nn.LogSoftMax())
14 | )


--------------------------------------------------------------------------------
/doc/article/sequencer-lua.tex:
--------------------------------------------------------------------------------
1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite
2 | \noindent
3 | \mbox{}seq\ =\ nn.\textbf{Sequencer}(module)
4 | 


--------------------------------------------------------------------------------
/doc/article/sequencer.lua:
--------------------------------------------------------------------------------
1 | seq = nn.Sequencer(module)


--------------------------------------------------------------------------------
/doc/article/srnn-lua.tex:
--------------------------------------------------------------------------------
1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite
2 | \noindent
3 | \mbox{}rnn\ =\ nn.\textbf{Sequential}() \\
4 | \mbox{}\ \ \ :\textbf{add}(nn.\textbf{Sequencer}(nn.\textbf{Linear}(inputSize,\ hiddenSize))) \\
5 | \mbox{}\ \ \ :\textbf{add}(nn.\textbf{Sequencer}(nn.\textbf{LSTM}(hiddenSize,\ hiddenSize))) \\
6 | \mbox{}\ \ \ :\textbf{add}(nn.\textbf{Sequencer}(nn.\textbf{LSTM}(hiddenSize,\ hiddenSize))) \\
7 | \mbox{}\ \ \ :\textbf{add}(nn.\textbf{Sequencer}(nn.\textbf{Linear}(hiddenSize,\ outputSize))) \\
8 | \mbox{}\ \ \ :\textbf{add}(nn.\textbf{Sequencer}(nn.\textbf{LogSoftMax}()))
9 | 


--------------------------------------------------------------------------------
/doc/article/srnn.lua:
--------------------------------------------------------------------------------
1 | rnn = nn.Sequential()
2 |    :add(nn.Sequencer(nn.Linear(inputSize, hiddenSize)))
3 |    :add(nn.Sequencer(nn.LSTM(hiddenSize, hiddenSize)))
4 |    :add(nn.Sequencer(nn.LSTM(hiddenSize, hiddenSize)))
5 |    :add(nn.Sequencer(nn.Linear(hiddenSize, outputSize)))
6 |    :add(nn.Sequencer(nn.LogSoftMax()))


--------------------------------------------------------------------------------
/doc/article/trainEpoch-lua.tex:
--------------------------------------------------------------------------------
 1 | % Generator: GNU source-highlight, by Lorenzo Bettini, http://www.gnu.org/software/src-highlite
 2 | \noindent
 3 | \mbox{}\textbf{function}\ \textbf{trainEpoch}(module,\ criterion,\ inputs,\ targets) \\
 4 | \mbox{}\ \ \ \textbf{for}\ i=1,\textbf{inputs:size}(1)\ \textbf{do} \\
 5 | \mbox{}\ \ \ \ \ \ \textbf{local}\ idx\ =\ math.\textbf{random}(1,\textbf{inputs:size}(1)) \\
 6 | \mbox{}\ \ \ \ \ \ \textbf{local}\ input,\ target\ =\ inputs[idx],\ \textbf{targets:narrow}(1,idx,1) \\
 7 | \mbox{}\ \ \ \ \ \ \textit{-\/-\ forward} \\
 8 | \mbox{}\ \ \ \ \ \ \textbf{local}\ output\ =\ \textbf{module:forward}(input) \\
 9 | \mbox{}\ \ \ \ \ \ \textbf{local}\ loss\ =\ \textbf{criterion:forward}(output,\ target) \\
10 | \mbox{}\ \ \ \ \ \ \textit{-\/-\ backward} \\
11 | \mbox{}\ \ \ \ \ \ \textbf{local}\ gradOutput\ =\ \textbf{criterion:backward}(output,\ target) \\
12 | \mbox{}\ \ \ \ \ \ \textbf{module:zeroGradParameters}() \\
13 | \mbox{}\ \ \ \ \ \ \textbf{local}\ gradInput\ =\ \textbf{module:backward}(input,\ gradOutput) \\
14 | \mbox{}\ \ \ \ \ \ \textit{-\/-\ update} \\
15 | \mbox{}\ \ \ \ \ \ \textbf{module:updateParameters}(0.1)\ \textit{-\/-\ W\ =\ W\ -\ 0.1*dL/dW} \\
16 | \mbox{}\ \ \ \textbf{end} \\
17 | \mbox{}\textbf{end}
18 | 


--------------------------------------------------------------------------------
/doc/article/trainEpoch.lua:
--------------------------------------------------------------------------------
 1 | function trainEpoch(module, criterion, inputs, targets)
 2 |    for i=1,inputs:size(1) do
 3 |       local idx = math.random(1,inputs:size(1))
 4 |       local input, target = inputs[idx], targets:narrow(1,idx,1)
 5 |       -- forward
 6 |       local output = module:forward(input)
 7 |       local loss = criterion:forward(output, target)
 8 |       -- backward
 9 |       local gradOutput = criterion:backward(output, target)
10 |       module:zeroGradParameters()
11 |       local gradInput = module:backward(input, gradOutput)
12 |       -- update
13 |       module:updateParameters(0.1) -- W = W - 0.1*dL/dW
14 |    end
15 | end


--------------------------------------------------------------------------------
/doc/image/LSTM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Element-Research/rnn/ba937a08f26e116db98b5cd6a690f49ea8f8316e/doc/image/LSTM.png


--------------------------------------------------------------------------------
/doc/image/bgru-benchmark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Element-Research/rnn/ba937a08f26e116db98b5cd6a690f49ea8f8316e/doc/image/bgru-benchmark.png


--------------------------------------------------------------------------------
/doc/image/bidirectionallm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Element-Research/rnn/ba937a08f26e116db98b5cd6a690f49ea8f8316e/doc/image/bidirectionallm.png


--------------------------------------------------------------------------------
/doc/image/gru-benchmark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Element-Research/rnn/ba937a08f26e116db98b5cd6a690f49ea8f8316e/doc/image/gru-benchmark.png


--------------------------------------------------------------------------------
/doc/image/hellofuzzy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Element-Research/rnn/ba937a08f26e116db98b5cd6a690f49ea8f8316e/doc/image/hellofuzzy.png


--------------------------------------------------------------------------------
/doc/image/sequence.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Element-Research/rnn/ba937a08f26e116db98b5cd6a690f49ea8f8316e/doc/image/sequence.png


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
 1 | # Examples
 2 | 
 3 | This directory contains various training scripts. 
 4 | 
 5 | Torch blog posts
 6 |  * The torch.ch blog contains detailed posts about the *rnn* package.
 7 |  1. [recurrent-visual-attention.lua](recurrent-visual-attention.lua): training script used in [Recurrent Model for Visual Attention](http://torch.ch/blog/2015/09/21/rmva.html). Implements the REINFORCE learning rule to learn an attention mechanism for classifying MNIST digits, sometimes translated.
 8 |  2. [noise-contrastive-esimate.lua](noise-contrastive-estimate.lua): one of two training scripts used in [Language modeling a billion words](http://torch.ch/blog/2016/07/25/nce.html). Single-GPU script for training recurrent language models on the Google billion words dataset.
 9 |  3. [multigpu-nce-rnnlm.lua](multigpu-nce-rnnlm.lua) : 4-GPU version of `noise-contrastive-estimate.lua` for training larger multi-GPU models. Two of two training scripts used in the [Language modeling a billion words](http://torch.ch/blog/2016/07/25/nce.html).
10 | 
11 | Simple training scripts. 
12 |  * Showcases the fundamental principles of the package. In chronological order of introduction date.
13 |  1. [simple-recurrent-network.lua](simple-recurrent-network.lua): uses the `nn.Recurrent` module to instantiate a Simple RNN. Illustrates the first AbstractRecurrent instance in action. It has since been surpassed by the more flexible `nn.Recursor` and `nn.Recurrence`. The `nn.Recursor` class decorates any module to make it conform to the nn.AbstractRecurrent interface. The `nn.Recurrence` implements the recursive `h[t] <- forward(h[t-1], x[t])`. Together, `nn.Recursor` and `nn.Recurrence` can be used to implement a wide range of experimental recurrent architectures.
14 |  2. [simple-sequencer-network.lua](simple-sequencer-network.lua): uses the `nn.Sequencer` module to accept a batch of sequences as `input` of size `seqlen x batchsize x ...`. Both tables and tensors are accepted as input and produce the same type of output (table->table, tensor->tensor). The `Sequencer` class abstract away the implementation of back-propagation through time. It also provides a `remember(['neither','both'])` method for triggering what the `Sequencer` remembers between iterations (forward,backward,update).
15 |  3. [simple-recurrence-network.lua](simple-recurrence-network.lua): uses the `nn.Recurrence` module to define the h[t] <- sigmoid(h[t-1], x[t]) Simple RNN. Decorates it using `nn.Sequencer` so that an entire batch of sequences (`input`) can forward and backward propagated per update.
16 | 


--------------------------------------------------------------------------------
/examples/encoder-decoder-coupling.lua:
--------------------------------------------------------------------------------
  1 | --[[
  2 | 
  3 | Example of "coupled" separate encoder and decoder networks, e.g. for sequence-to-sequence networks.
  4 | 
  5 | ]]--
  6 | 
  7 | require 'rnn'
  8 | 
  9 | version = 1.4 -- Uses [get,set]GradHiddenState for LSTM
 10 | 
 11 | local opt = {}
 12 | opt.learningRate = 0.1
 13 | opt.hiddenSize = 6
 14 | opt.numLayers = 1
 15 | opt.useSeqLSTM = true -- faster implementation of LSTM + Sequencer
 16 | opt.vocabSize = 7
 17 | opt.seqLen = 7 -- length of the encoded sequence (with padding)
 18 | opt.niter = 1000
 19 | 
 20 | --[[ Forward coupling: Copy encoder cell and output to decoder LSTM ]]--
 21 | function forwardConnect(enc, dec)
 22 |    for i=1,#enc.lstmLayers do
 23 |       if opt.useSeqLSTM then
 24 |          dec.lstmLayers[i].userPrevOutput = enc.lstmLayers[i].output[opt.seqLen]
 25 |          dec.lstmLayers[i].userPrevCell = enc.lstmLayers[i].cell[opt.seqLen]
 26 |       else
 27 |          dec.lstmLayers[i].userPrevOutput = nn.rnn.recursiveCopy(dec.lstmLayers[i].userPrevOutput, enc.lstmLayers[i].outputs[opt.seqLen])
 28 |          dec.lstmLayers[i].userPrevCell = nn.rnn.recursiveCopy(dec.lstmLayers[i].userPrevCell, enc.lstmLayers[i].cells[opt.seqLen])
 29 |       end
 30 |    end
 31 | end
 32 | 
 33 | --[[ Backward coupling: Copy decoder gradients to encoder LSTM ]]--
 34 | function backwardConnect(enc, dec)
 35 |    for i=1,#enc.lstmLayers do
 36 |       if opt.useSeqLSTM then
 37 |          enc.lstmLayers[i].userNextGradCell = dec.lstmLayers[i].userGradPrevCell
 38 |          enc.lstmLayers[i].gradPrevOutput = dec.lstmLayers[i].userGradPrevOutput
 39 |       else
 40 |          enc:setGradHiddenState(opt.seqLen, dec:getGradHiddenState(0))
 41 |       end
 42 |    end
 43 | end
 44 | 
 45 | -- Encoder
 46 | local enc = nn.Sequential()
 47 | enc:add(nn.LookupTableMaskZero(opt.vocabSize, opt.hiddenSize))
 48 | enc.lstmLayers = {}
 49 | for i=1,opt.numLayers do
 50 |    if opt.useSeqLSTM then
 51 |       enc.lstmLayers[i] = nn.SeqLSTM(opt.hiddenSize, opt.hiddenSize)
 52 |       enc.lstmLayers[i]:maskZero()
 53 |       enc:add(enc.lstmLayers[i])
 54 |    else
 55 |       enc.lstmLayers[i] = nn.LSTM(opt.hiddenSize, opt.hiddenSize):maskZero(1)
 56 |       enc:add(nn.Sequencer(enc.lstmLayers[i]))
 57 |    end
 58 | end
 59 | enc:add(nn.Select(1, -1))
 60 | 
 61 | -- Decoder
 62 | local dec = nn.Sequential()
 63 | dec:add(nn.LookupTableMaskZero(opt.vocabSize, opt.hiddenSize))
 64 | dec.lstmLayers = {}
 65 | for i=1,opt.numLayers do
 66 |    if opt.useSeqLSTM then
 67 |       dec.lstmLayers[i] = nn.SeqLSTM(opt.hiddenSize, opt.hiddenSize)
 68 |       dec.lstmLayers[i]:maskZero()
 69 |       dec:add(dec.lstmLayers[i])
 70 |    else
 71 |       dec.lstmLayers[i] = nn.LSTM(opt.hiddenSize, opt.hiddenSize):maskZero(1)
 72 |       dec:add(nn.Sequencer(dec.lstmLayers[i]))
 73 |    end
 74 | end
 75 | dec:add(nn.Sequencer(nn.MaskZero(nn.Linear(opt.hiddenSize, opt.vocabSize), 1)))
 76 | dec:add(nn.Sequencer(nn.MaskZero(nn.LogSoftMax(), 1)))
 77 | 
 78 | local criterion = nn.SequencerCriterion(nn.MaskZeroCriterion(nn.ClassNLLCriterion(),1))
 79 | 
 80 | -- Some example data (batchsize = 2) with variable length input and output sequences
 81 | 
 82 | -- The input sentences to the encoder, padded with zeros from the left
 83 | local encInSeq = torch.Tensor({{0,0,0,0,1,2,3},{0,0,0,4,3,2,1}}):t()
 84 | -- The input sentences to the decoder, padded with zeros from the right.
 85 | -- Label '6' represents the start of a sentence (GO).
 86 | local decInSeq = torch.Tensor({{6,1,2,3,4,0,0,0},{6,5,4,3,2,1,0,0}}):t()
 87 | 
 88 | -- The expected output from the decoder (it will return one character per time-step),
 89 | -- padded with zeros from the right
 90 | -- Label '7' represents the end of sentence (EOS).
 91 | local decOutSeq = torch.Tensor({{1,2,3,4,7,0,0,0},{5,4,3,2,1,7,0,0}}):t()
 92 | 
 93 | for i=1,opt.niter do
 94 |    enc:zeroGradParameters()
 95 |    dec:zeroGradParameters()
 96 | 
 97 |    -- Forward pass
 98 |    local encOut = enc:forward(encInSeq)
 99 |    forwardConnect(enc, dec)
100 |    local decOut = dec:forward(decInSeq)
101 |    --print(decOut)
102 |    local err = criterion:forward(decOut, decOutSeq)
103 | 
104 |    print(string.format("Iteration %d ; NLL err = %f ", i, err))
105 | 
106 |    -- Backward pass
107 |    local gradOutput = criterion:backward(decOut, decOutSeq)
108 |    dec:backward(decInSeq, gradOutput)
109 |    backwardConnect(enc, dec)
110 |    local zeroTensor = torch.Tensor(encOut):zero()
111 |    enc:backward(encInSeq, zeroTensor)
112 | 
113 |    dec:updateParameters(opt.learningRate)
114 |    enc:updateParameters(opt.learningRate)
115 | end
116 | 


--------------------------------------------------------------------------------
/examples/nested-recurrence-lstm.lua:
--------------------------------------------------------------------------------
 1 | -- The example demonstates the ability to nest AbstractRecurrent instances.
 2 | -- In this case, an FastLSTM is nested withing a Recurrence.
 3 | require 'rnn'
 4 | 
 5 | -- hyper-parameters 
 6 | batchSize = 8
 7 | rho = 5 -- sequence length
 8 | hiddenSize = 7
 9 | nIndex = 10
10 | lr = 0.1
11 | 
12 | -- Recurrence.recurrentModule
13 | local rm = nn.Sequential()
14 |   :add(nn.ParallelTable()
15 |       :add(nn.LookupTable(nIndex, hiddenSize)) 
16 |       :add(nn.Linear(hiddenSize, hiddenSize))) 
17 |    :add(nn.CAddTable())
18 |    :add(nn.Sigmoid())
19 |   :add(nn.FastLSTM(hiddenSize,hiddenSize)) -- an AbstractRecurrent instance
20 |   :add(nn.Linear(hiddenSize,hiddenSize))
21 |   :add(nn.Sigmoid())    
22 | 
23 | local rnn = nn.Sequential()
24 |    :add(nn.Recurrence(rm, hiddenSize, 0)) -- another AbstractRecurrent instance
25 |    :add(nn.Linear(hiddenSize, nIndex))
26 |    :add(nn.LogSoftMax())
27 | 
28 | -- all following code is exactly the same as the simple-sequencer-network.lua script
29 | -- internally, rnn will be wrapped into a Recursor to make it an AbstractRecurrent instance.
30 | rnn = nn.Sequencer(rnn)
31 | 
32 | print(rnn)
33 | 
34 | -- build criterion
35 | 
36 | criterion = nn.SequencerCriterion(nn.ClassNLLCriterion())
37 | 
38 | -- build dummy dataset (task is to predict next item, given previous)
39 | sequence_ = torch.LongTensor():range(1,10) -- 1,2,3,4,5,6,7,8,9,10
40 | sequence = torch.LongTensor(100,10):copy(sequence_:view(1,10):expand(100,10))
41 | sequence:resize(100*10) -- one long sequence of 1,2,3...,10,1,2,3...10...
42 | 
43 | offsets = {}
44 | for i=1,batchSize do
45 |    table.insert(offsets, math.ceil(math.random()*sequence:size(1)))
46 | end
47 | offsets = torch.LongTensor(offsets)
48 | 
49 | -- training
50 | local iteration = 1
51 | while true do
52 |    -- 1. create a sequence of rho time-steps
53 |    
54 |    local inputs, targets = {}, {}
55 |    for step=1,rho do
56 |       -- a batch of inputs
57 |       inputs[step] = sequence:index(1, offsets)
58 |       -- incement indices
59 |       offsets:add(1)
60 |       for j=1,batchSize do
61 |          if offsets[j] > sequence:size(1) then
62 |             offsets[j] = 1
63 |          end
64 |       end
65 |       targets[step] = sequence:index(1, offsets)
66 |    end
67 |    
68 |    -- 2. forward sequence through rnn
69 |    
70 |    rnn:zeroGradParameters() 
71 |    
72 |    local outputs = rnn:forward(inputs)
73 |    local err = criterion:forward(outputs, targets)
74 |    
75 |    print(string.format("Iteration %d ; NLL err = %f ", iteration, err))
76 | 
77 |    -- 3. backward sequence through rnn (i.e. backprop through time)
78 |    
79 |    local gradOutputs = criterion:backward(outputs, targets)
80 |    local gradInputs = rnn:backward(inputs, gradOutputs)
81 |    
82 |    -- 4. update
83 |    
84 |    rnn:updateParameters(lr)
85 |    
86 |    iteration = iteration + 1
87 | end
88 | 


--------------------------------------------------------------------------------
/examples/recurrent-time-series.lua:
--------------------------------------------------------------------------------
  1 | -- Multi-variate time-series example 
  2 | 
  3 | require 'rnn'
  4 | 
  5 | cmd = torch.CmdLine()
  6 | cmd:text()
  7 | cmd:text('Train a multivariate time-series model using RNN')
  8 | cmd:option('--rho', 5, 'maximum number of time steps for back-propagate through time (BPTT)')
  9 | cmd:option('--multiSize', 6, 'number of random variables as input and output')
 10 | cmd:option('--hiddenSize', 10, 'number of hidden units used at output of the recurrent layer')
 11 | cmd:option('--dataSize', 100, 'total number of time-steps in dataset')
 12 | cmd:option('--batchSize', 8, 'number of training samples per batch')
 13 | cmd:option('--nIterations', 1000, 'max number of training iterations')
 14 | cmd:option('--learningRate', 0.001, 'learning rate')
 15 | cmd:option('--plot', false, 'plot the errors during training?')
 16 | cmd:text()
 17 | local opt = cmd:parse(arg or {})
 18 | 
 19 | if opt.plot then
 20 |    require 'optim'
 21 |    logger = optim.Logger(paths.concat('outputs', 'rects_log.txt'))
 22 | end
 23 | -- For simplicity, the multi-variate dataset in this example is independently distributed.
 24 | -- Toy dataset (task is to predict next vector, given previous vectors) following the normal distribution .
 25 | -- Generated by sampling a separate normal distribution for each random variable.
 26 | -- note: vX is used as both input X and output Y to save memory
 27 | local function evalPDF(vMean, vSigma, vX)
 28 |    for i=1,vMean:size(1) do
 29 |       local b = (vX[i]-vMean[i])/vSigma[i]
 30 |       vX[i] = math.exp(-b*b/2)/(vSigma[i]*math.sqrt(2*math.pi))
 31 |    end
 32 |    return vX
 33 | end
 34 | 
 35 | assert(opt.multiSize > 1, "Multi-variate time-series")
 36 | 
 37 | vBias = torch.randn(opt.multiSize)
 38 | vMean = torch.Tensor(opt.multiSize):fill(5)
 39 | vSigma = torch.linspace(1,opt.multiSize,opt.multiSize)
 40 | sequence = torch.Tensor(opt.dataSize, opt.multiSize)
 41 | 
 42 | j = 0
 43 | for i=1,opt.dataSize do
 44 |   sequence[{i,{}}]:fill(j)
 45 |   evalPDF(vMean, vSigma, sequence[{i,{}}])
 46 |   sequence[{i,{}}]:add(vBias)
 47 |   j = j + 1
 48 |   if j>10 then j = 0 end
 49 | end
 50 | print('Sequence:'); print(sequence)
 51 | 
 52 | -- batch mode
 53 | 
 54 | offsets = torch.LongTensor(opt.batchSize):random(1,opt.dataSize)
 55 | 
 56 | -- RNN
 57 | r = nn.Recurrent(
 58 |    opt.hiddenSize, -- size of output
 59 |    nn.Linear(opt.multiSize, opt.hiddenSize), -- input layer
 60 |    nn.Linear(opt.hiddenSize, opt.hiddenSize), -- recurrent layer
 61 |    nn.Sigmoid(), -- transfer function
 62 |    opt.rho
 63 | )
 64 | 
 65 | rnn = nn.Sequential()
 66 |    :add(r)
 67 |    :add(nn.Linear(opt.hiddenSize, opt.multiSize))
 68 | 
 69 | criterion = nn.MSECriterion() 
 70 | 
 71 | -- use Sequencer for better data handling
 72 | rnn = nn.Sequencer(rnn)
 73 | 
 74 | criterion = nn.SequencerCriterion(criterion)
 75 | print("Model :")
 76 | print(rnn)
 77 | 
 78 | -- train rnn model
 79 | minErr = opt.multiSize -- report min error
 80 | minK = 0
 81 | avgErrs = torch.Tensor(opt.nIterations):fill(0)
 82 | for k = 1, opt.nIterations do 
 83 | 
 84 |    -- 1. create a sequence of rho time-steps
 85 |    
 86 |    local inputs, targets = {}, {}
 87 |    for step = 1, opt.rho do
 88 |       -- batch of inputs
 89 |       inputs[step] = inputs[step] or sequence.new()
 90 |       inputs[step]:index(sequence, 1, offsets)
 91 |       -- batch of targets
 92 |       offsets:add(1) -- increase indices by 1
 93 |       offsets[offsets:gt(opt.dataSize)] = 1
 94 |       targets[step] = targets[step] or sequence.new()
 95 |       targets[step]:index(sequence, 1, offsets)
 96 |    end
 97 | 
 98 |    -- 2. forward sequence through rnn
 99 | 
100 |    local outputs = rnn:forward(inputs)
101 |    local err = criterion:forward(outputs, targets)
102 |    
103 |    -- report errors
104 |    
105 |    print('Iter: ' .. k .. '   Err: ' .. err)   
106 |    if opt.plot then
107 |       logger:add{['Err'] = err}
108 |       logger:style{['Err'] = '-'}
109 |       logger:plot()
110 |    end
111 | 
112 |    avgErrs[k] = err
113 |    if avgErrs[k] < minErr then
114 |       minErr = avgErrs[k]
115 |       minK = k
116 |    end
117 | 
118 |    -- 3. backward sequence through rnn (i.e. backprop through time)
119 |    
120 |    rnn:zeroGradParameters()
121 |    
122 |    local gradOutputs = criterion:backward(outputs, targets)
123 |    local gradInputs = rnn:backward(inputs, gradOutputs)
124 | 
125 |    -- 4. updates parameters
126 |    
127 |    rnn:updateParameters(opt.learningRate)
128 | end
129 | 
130 | print('min err: ' .. minErr .. ' on iteration ' .. minK)
131 | 


--------------------------------------------------------------------------------
/examples/sequence-to-one.lua:
--------------------------------------------------------------------------------
 1 | require 'rnn'
 2 | 
 3 | -- hyper-parameters 
 4 | batchSize = 8
 5 | rho = 10 -- sequence length
 6 | hiddenSize = 100
 7 | nIndex = 100 -- input words
 8 | nClass = 7 -- output classes
 9 | lr = 0.1
10 | 
11 | 
12 | -- build simple recurrent neural network
13 | r = nn.Recurrent(
14 |    hiddenSize, nn.Identity(), 
15 |    nn.Linear(hiddenSize, hiddenSize), nn.Sigmoid(), 
16 |    rho
17 | )
18 | 
19 | rnn = nn.Sequential()
20 |    :add(nn.LookupTable(nIndex, hiddenSize))
21 |    :add(nn.SplitTable(1,2))
22 |    :add(nn.Sequencer(r))
23 |    :add(nn.SelectTable(-1)) -- this selects the last time-step of the rnn output sequence
24 |    :add(nn.Linear(hiddenSize, nClass))
25 |    :add(nn.LogSoftMax())
26 | 
27 | -- build criterion
28 | 
29 | criterion = nn.ClassNLLCriterion()
30 | 
31 | -- build dummy dataset (task is to predict class given rho words)
32 | -- similar to sentiment analysis datasets
33 | ds = {}
34 | ds.size = 1000
35 | ds.input = torch.LongTensor(ds.size,rho)
36 | ds.target = torch.LongTensor(ds.size):random(nClass)
37 | 
38 | -- this will make the inputs somewhat correlate with the targets,
39 | -- such that the reduction in training error should be more obvious
40 | local correlate = torch.LongTensor(nClass, rho*3):random(nClass)
41 | local indices = torch.LongTensor(rho)
42 | local buffer = torch.LongTensor()
43 | local sortVal, sortIdx = torch.LongTensor(), torch.LongTensor()
44 | for i=1,ds.size do
45 |    indices:random(1,rho*3)
46 |    buffer:index(correlate[ds.target[i]], 1, indices)
47 |    sortVal:sort(sortIdx, buffer, 1)
48 |    ds.input[i]:copy(sortVal:view(-1))
49 | end
50 | 
51 | 
52 | indices:resize(batchSize)
53 | 
54 | -- training
55 | local inputs, targets = torch.LongTensor(), torch.LongTensor()
56 | for iteration = 1, 1000 do
57 |    -- 1. create a sequence of rho time-steps
58 |    
59 |    indices:random(1,ds.size) -- choose some random samples
60 |    inputs:index(ds.input, 1,indices)
61 |    targets:index(ds.target, 1,indices)
62 |    
63 |    -- 2. forward sequence through rnn
64 |    
65 |    rnn:zeroGradParameters() 
66 |    
67 |    local outputs = rnn:forward(inputs)
68 |    local err = criterion:forward(outputs, targets)
69 |    
70 |    print(string.format("Iteration %d ; NLL err = %f ", iteration, err))
71 | 
72 |    -- 3. backward sequence through rnn (i.e. backprop through time)
73 |    
74 |    local gradOutputs = criterion:backward(outputs, targets)
75 |    local gradInputs = rnn:backward(inputs, gradOutputs)
76 |    
77 |    -- 4. update
78 |    
79 |    rnn:updateParameters(lr)
80 | end
81 | 


--------------------------------------------------------------------------------
/examples/simple-bisequencer-network-variable.lua:
--------------------------------------------------------------------------------
  1 | -- Example BLSTM for variable-length sequences
  2 | require 'rnn'
  3 | 
  4 | torch.manualSeed(0)
  5 | math.randomseed(0)
  6 | 
  7 | -- hyper-parameters 
  8 | batchSize = 8
  9 | rho = 10 -- sequence length
 10 | hiddenSize = 5
 11 | nIndex = 10
 12 | lr = 0.1
 13 | maxIter = 100
 14 | 
 15 | local sharedLookupTable = nn.LookupTableMaskZero(nIndex, hiddenSize)
 16 | 
 17 | -- forward rnn
 18 | local fwd = nn.Sequential()
 19 |    :add(sharedLookupTable)
 20 |    :add(nn.FastLSTM(hiddenSize, hiddenSize):maskZero(1))
 21 | 
 22 | -- internally, rnn will be wrapped into a Recursor to make it an AbstractRecurrent instance.
 23 | fwdSeq = nn.Sequencer(fwd)
 24 | 
 25 | -- backward rnn (will be applied in reverse order of input sequence)
 26 | local bwd = nn.Sequential()
 27 |    :add(sharedLookupTable:sharedClone())
 28 |    :add(nn.FastLSTM(hiddenSize, hiddenSize):maskZero(1))
 29 | bwdSeq = nn.Sequencer(bwd)
 30 | 
 31 | -- merges the output of one time-step of fwd and bwd rnns.
 32 | -- You could also try nn.AddTable(), nn.Identity(), etc.
 33 | local merge = nn.JoinTable(1, 1) 
 34 | mergeSeq = nn.Sequencer(merge)
 35 | 
 36 | -- Assume that two input sequences are given (original and reverse, both are right-padded).
 37 | -- Instead of ConcatTable, we use ParallelTable here.
 38 | local parallel = nn.ParallelTable()
 39 | parallel:add(fwdSeq):add(bwdSeq)
 40 | local brnn = nn.Sequential()
 41 |    :add(parallel)
 42 |    :add(nn.ZipTable())
 43 |    :add(mergeSeq)
 44 | 
 45 | local rnn = nn.Sequential()
 46 |    :add(brnn) 
 47 |    :add(nn.Sequencer(nn.MaskZero(nn.Linear(hiddenSize*2, nIndex), 1))) -- times two due to JoinTable
 48 |    :add(nn.Sequencer(nn.MaskZero(nn.LogSoftMax(), 1)))
 49 | 
 50 | print(rnn)
 51 | 
 52 | -- build criterion
 53 | 
 54 | criterion = nn.SequencerCriterion(nn.MaskZeroCriterion(nn.ClassNLLCriterion(), 1))
 55 | 
 56 | -- build dummy dataset (task is to predict next item, given previous)
 57 | sequence_ = torch.LongTensor():range(1,10) -- 1,2,3,4,5,6,7,8,9,10
 58 | sequence = torch.LongTensor(100,10):copy(sequence_:view(1,10):expand(100,10))
 59 | sequence:resize(100*10) -- one long sequence of 1,2,3...,10,1,2,3...10...
 60 | 
 61 | offsets = {}
 62 | maxStep = {}
 63 | for i=1,batchSize do
 64 |    table.insert(offsets, math.ceil(math.random()*sequence:size(1)))
 65 |    -- variable length for each sample
 66 |    table.insert(maxStep, math.random(rho))
 67 | end
 68 | offsets = torch.LongTensor(offsets)
 69 | 
 70 | -- training
 71 | for iteration = 1, maxIter do
 72 |    -- 1. create a sequence of rho time-steps
 73 |    
 74 |    local inputs, inputs_rev, targets = {}, {}, {}
 75 |    for step=1,rho do
 76 |       -- a batch of inputs
 77 |       inputs[step] = sequence:index(1, offsets)
 78 |       -- increment indices
 79 |       offsets:add(1)
 80 |       for j=1,batchSize do
 81 |          if offsets[j] > sequence:size(1) then
 82 |             offsets[j] = 1
 83 |          end
 84 |       end
 85 |       targets[step] = sequence:index(1, offsets)
 86 |       -- padding
 87 |       for j=1,batchSize do
 88 |          if step > maxStep[j] then
 89 |             inputs[step][j] = 0
 90 |             targets[step][j] = 0
 91 |          end
 92 |       end
 93 |    end
 94 | 
 95 |    -- reverse
 96 |    for step=1,rho do
 97 |       inputs_rev[step] = torch.LongTensor(batchSize)
 98 |       for j=1,batchSize do
 99 |          if step <= maxStep[j] then
100 |             inputs_rev[step][j] = inputs[maxStep[j]-step+1][j]
101 |          else
102 |             inputs_rev[step][j] = 0
103 |          end
104 |       end
105 |    end
106 |    
107 |    -- 2. forward sequence through rnn
108 |    
109 |    rnn:zeroGradParameters() 
110 | 
111 |    local outputs = rnn:forward({inputs, inputs_rev})
112 |    local err = criterion:forward(outputs, targets)
113 |    
114 |    local correct = 0
115 |    local total = 0
116 |    for step=1,rho do
117 |       probs = outputs[step]
118 |       _, preds = probs:max(2)
119 |       for j=1,batchSize do
120 |          local cur_x = inputs[step][j]
121 |          local cur_y = targets[step][j]
122 |          local cur_t = preds[j][1]
123 |          -- print(string.format("x=%d ; y=%d ; pred=%d", cur_x, cur_y, cur_t))
124 |          if step <= maxStep[j] then
125 |              if cur_y == cur_t then correct = correct + 1 end
126 |              total = total + 1
127 |          end
128 |       end
129 |    end
130 | 
131 |    local acc = correct*1.0/total
132 |    print(string.format("Iteration %d ; NLL err = %f ; ACC = %.2f ", iteration, err, acc))
133 | 
134 |    -- 3. backward sequence through rnn (i.e. backprop through time)
135 |    
136 |    local gradOutputs = criterion:backward(outputs, targets)
137 |    local gradInputs = rnn:backward({inputs, inputs_rev}, gradOutputs)
138 |    
139 |    -- 4. update
140 |    
141 |    rnn:updateParameters(lr)
142 |    
143 | end
144 | 


--------------------------------------------------------------------------------
/examples/simple-bisequencer-network.lua:
--------------------------------------------------------------------------------
 1 | require 'rnn'
 2 | 
 3 | -- hyper-parameters 
 4 | batchSize = 8
 5 | rho = 5 -- sequence length
 6 | hiddenSize = 7
 7 | nIndex = 10
 8 | lr = 0.1
 9 | 
10 | 
11 | -- forward rnn
12 | -- build simple recurrent neural network
13 | local fwd = nn.Recurrent(
14 |    hiddenSize, nn.LookupTable(nIndex, hiddenSize), 
15 |    nn.Linear(hiddenSize, hiddenSize), nn.Sigmoid(), 
16 |    rho
17 | )
18 | 
19 | -- backward rnn (will be applied in reverse order of input sequence)
20 | local bwd = fwd:clone()
21 | bwd:reset() -- reinitializes parameters
22 | 
23 | -- merges the output of one time-step of fwd and bwd rnns.
24 | -- You could also try nn.AddTable(), nn.Identity(), etc.
25 | local merge = nn.JoinTable(1, 1) 
26 | 
27 | -- we use BiSequencerLM because this is a language model (previous and next words to predict current word).
28 | -- If we used BiSequencer, x[t] would be used to predict y[t] = x[t] (which is cheating).
29 | -- Note that bwd and merge argument are optional and will default to the above.
30 | local brnn = nn.BiSequencerLM(fwd, bwd, merge)
31 | 
32 | local rnn = nn.Sequential()
33 |    :add(brnn) 
34 |    :add(nn.Sequencer(nn.Linear(hiddenSize*2, nIndex))) -- times two due to JoinTable
35 |    :add(nn.Sequencer(nn.LogSoftMax()))
36 | 
37 | print(rnn)
38 | 
39 | -- build criterion
40 | 
41 | criterion = nn.SequencerCriterion(nn.ClassNLLCriterion())
42 | 
43 | -- build dummy dataset (task is to predict next item, given previous)
44 | sequence_ = torch.LongTensor():range(1,10) -- 1,2,3,4,5,6,7,8,9,10
45 | sequence = torch.LongTensor(100,10):copy(sequence_:view(1,10):expand(100,10))
46 | sequence:resize(100*10) -- one long sequence of 1,2,3...,10,1,2,3...10...
47 | 
48 | offsets = {}
49 | for i=1,batchSize do
50 |    table.insert(offsets, math.ceil(math.random()*sequence:size(1)))
51 | end
52 | offsets = torch.LongTensor(offsets)
53 | 
54 | -- training
55 | local iteration = 1
56 | while true do
57 |    -- 1. create a sequence of rho time-steps
58 |    
59 |    local inputs, targets = {}, {}
60 |    for step=1,rho do
61 |       -- a batch of inputs
62 |       inputs[step] = sequence:index(1, offsets)
63 |       -- incement indices
64 |       offsets:add(1)
65 |       for j=1,batchSize do
66 |          if offsets[j] > sequence:size(1) then
67 |             offsets[j] = 1
68 |          end
69 |       end
70 |       targets[step] = sequence:index(1, offsets)
71 |    end
72 |    
73 |    -- 2. forward sequence through rnn
74 |    
75 |    rnn:zeroGradParameters() 
76 |    
77 |    local outputs = rnn:forward(inputs)
78 |    local err = criterion:forward(outputs, targets)
79 |    
80 |    print(string.format("Iteration %d ; NLL err = %f ", iteration, err))
81 | 
82 |    -- 3. backward sequence through rnn (i.e. backprop through time)
83 |    
84 |    local gradOutputs = criterion:backward(outputs, targets)
85 |    local gradInputs = rnn:backward(inputs, gradOutputs)
86 |    
87 |    -- 4. update
88 |    
89 |    rnn:updateParameters(lr)
90 |    
91 |    iteration = iteration + 1
92 | end
93 | 


--------------------------------------------------------------------------------
/examples/simple-recurrence-network.lua:
--------------------------------------------------------------------------------
 1 | -- example use of nn.Recurrence
 2 | require 'rnn'
 3 | 
 4 | -- hyper-parameters 
 5 | batchSize = 8
 6 | rho = 5 -- sequence length
 7 | hiddenSize = 7
 8 | nIndex = 10
 9 | lr = 0.1
10 | 
11 | -- the internal recurrentModule used by Recurrence
12 | local rm = nn.Sequential() -- input is {x[t], h[t-1]}
13 |    :add(nn.ParallelTable()
14 |       :add(nn.LookupTable(nIndex, hiddenSize)) -- input layer
15 |       :add(nn.Linear(hiddenSize, hiddenSize))) -- recurrent layer
16 |    :add(nn.CAddTable()) -- merge
17 |    :add(nn.Sigmoid()) -- transfer
18 | 
19 | local rnn = nn.Sequential()
20 |    :add(nn.Recurrence(rm, hiddenSize, 0)) -- similar to nn.Recurrent, but more general, and no startModule
21 |    :add(nn.Linear(hiddenSize, nIndex))
22 |    :add(nn.LogSoftMax())
23 | 
24 | -- all following code is exactly the same as the simple-sequencer-network.lua script
25 | -- internally, rnn will be wrapped into a Recursor to make it an AbstractRecurrent instance.
26 | rnn = nn.Sequencer(rnn)
27 | 
28 | print(rnn)
29 | 
30 | -- build criterion
31 | 
32 | criterion = nn.SequencerCriterion(nn.ClassNLLCriterion())
33 | 
34 | -- build dummy dataset (task is to predict next item, given previous)
35 | sequence_ = torch.LongTensor():range(1,10) -- 1,2,3,4,5,6,7,8,9,10
36 | sequence = torch.LongTensor(100,10):copy(sequence_:view(1,10):expand(100,10))
37 | sequence:resize(100*10) -- one long sequence of 1,2,3...,10,1,2,3...10...
38 | 
39 | offsets = {}
40 | for i=1,batchSize do
41 |    table.insert(offsets, math.ceil(math.random()*sequence:size(1)))
42 | end
43 | offsets = torch.LongTensor(offsets)
44 | 
45 | -- training
46 | local iteration = 1
47 | while true do
48 |    -- 1. create a sequence of rho time-steps
49 |    
50 |    local inputs, targets = {}, {}
51 |    for step=1,rho do
52 |       -- a batch of inputs
53 |       inputs[step] = sequence:index(1, offsets)
54 |       -- incement indices
55 |       offsets:add(1)
56 |       for j=1,batchSize do
57 |          if offsets[j] > sequence:size(1) then
58 |             offsets[j] = 1
59 |          end
60 |       end
61 |       targets[step] = sequence:index(1, offsets)
62 |    end
63 |    
64 |    -- 2. forward sequence through rnn
65 |    
66 |    rnn:zeroGradParameters() 
67 |    
68 |    local outputs = rnn:forward(inputs)
69 |    local err = criterion:forward(outputs, targets)
70 |    
71 |    print(string.format("Iteration %d ; NLL err = %f ", iteration, err))
72 | 
73 |    -- 3. backward sequence through rnn (i.e. backprop through time)
74 |    
75 |    local gradOutputs = criterion:backward(outputs, targets)
76 |    local gradInputs = rnn:backward(inputs, gradOutputs)
77 |    
78 |    -- 4. update
79 |    
80 |    rnn:updateParameters(lr)
81 |    
82 |    iteration = iteration + 1
83 | end
84 | 


--------------------------------------------------------------------------------
/examples/simple-recurrent-network.lua:
--------------------------------------------------------------------------------
 1 | require 'rnn'
 2 | 
 3 | -- hyper-parameters 
 4 | batchSize = 8
 5 | rho = 5 -- sequence length
 6 | hiddenSize = 7
 7 | nIndex = 10
 8 | lr = 0.1
 9 | 
10 | 
11 | -- build simple recurrent neural network
12 | local r = nn.Recurrent(
13 |    hiddenSize, nn.LookupTable(nIndex, hiddenSize), 
14 |    nn.Linear(hiddenSize, hiddenSize), nn.Sigmoid(), 
15 |    rho
16 | )
17 | 
18 | local rnn = nn.Sequential()
19 |    :add(r)
20 |    :add(nn.Linear(hiddenSize, nIndex))
21 |    :add(nn.LogSoftMax())
22 | 
23 | -- wrap the non-recurrent module (Sequential) in Recursor.
24 | -- This makes it a recurrent module
25 | -- i.e. Recursor is an AbstractRecurrent instance
26 | rnn = nn.Recursor(rnn, rho)
27 | 
28 | print(rnn)
29 | 
30 | -- build criterion
31 | 
32 | criterion = nn.ClassNLLCriterion()
33 | 
34 | -- build dummy dataset (task is to predict next item, given previous)
35 | sequence_ = torch.LongTensor():range(1,10) -- 1,2,3,4,5,6,7,8,9,10
36 | sequence = torch.LongTensor(100,10):copy(sequence_:view(1,10):expand(100,10))
37 | sequence:resize(100*10) -- one long sequence of 1,2,3...,10,1,2,3...10...
38 | 
39 | offsets = {}
40 | for i=1,batchSize do
41 |    table.insert(offsets, math.ceil(math.random()*sequence:size(1)))
42 | end
43 | offsets = torch.LongTensor(offsets)
44 | 
45 | -- training
46 | local iteration = 1
47 | while true do
48 |    -- 1. create a sequence of rho time-steps
49 |    
50 |    local inputs, targets = {}, {}
51 |    for step=1,rho do
52 |       -- a batch of inputs
53 |       inputs[step] = sequence:index(1, offsets)
54 |       -- incement indices
55 |       offsets:add(1)
56 |       for j=1,batchSize do
57 |          if offsets[j] > sequence:size(1) then
58 |             offsets[j] = 1
59 |          end
60 |       end
61 |       targets[step] = sequence:index(1, offsets)
62 |    end
63 |    
64 |    -- 2. forward sequence through rnn
65 |    
66 |    rnn:zeroGradParameters() 
67 |    rnn:forget() -- forget all past time-steps
68 |    
69 |    local outputs, err = {}, 0
70 |    for step=1,rho do
71 |       outputs[step] = rnn:forward(inputs[step])
72 |       err = err + criterion:forward(outputs[step], targets[step])
73 |    end
74 |    
75 |    print(string.format("Iteration %d ; NLL err = %f ", iteration, err))
76 | 
77 |    -- 3. backward sequence through rnn (i.e. backprop through time)
78 |    
79 |    local gradOutputs, gradInputs = {}, {}
80 |    for step=rho,1,-1 do -- reverse order of forward calls
81 |       gradOutputs[step] = criterion:backward(outputs[step], targets[step])
82 |       gradInputs[step] = rnn:backward(inputs[step], gradOutputs[step])
83 |    end
84 |    
85 |    -- 4. update
86 |    
87 |    rnn:updateParameters(lr)
88 |    
89 |    iteration = iteration + 1
90 | end
91 | 


--------------------------------------------------------------------------------
/examples/simple-sequencer-network.lua:
--------------------------------------------------------------------------------
 1 | require 'rnn'
 2 | 
 3 | -- hyper-parameters 
 4 | batchSize = 8
 5 | rho = 5 -- sequence length
 6 | hiddenSize = 7
 7 | nIndex = 10
 8 | lr = 0.1
 9 | 
10 | 
11 | -- build simple recurrent neural network
12 | local r = nn.Recurrent(
13 |    hiddenSize, nn.LookupTable(nIndex, hiddenSize), 
14 |    nn.Linear(hiddenSize, hiddenSize), nn.Sigmoid(), 
15 |    rho
16 | )
17 | 
18 | local rnn = nn.Sequential()
19 |    :add(r)
20 |    :add(nn.Linear(hiddenSize, nIndex))
21 |    :add(nn.LogSoftMax())
22 | 
23 | -- internally, rnn will be wrapped into a Recursor to make it an AbstractRecurrent instance.
24 | rnn = nn.Sequencer(rnn)
25 | 
26 | print(rnn)
27 | 
28 | -- build criterion
29 | 
30 | criterion = nn.SequencerCriterion(nn.ClassNLLCriterion())
31 | 
32 | -- build dummy dataset (task is to predict next item, given previous)
33 | sequence_ = torch.LongTensor():range(1,10) -- 1,2,3,4,5,6,7,8,9,10
34 | sequence = torch.LongTensor(100,10):copy(sequence_:view(1,10):expand(100,10))
35 | sequence:resize(100*10) -- one long sequence of 1,2,3...,10,1,2,3...10...
36 | 
37 | offsets = {}
38 | for i=1,batchSize do
39 |    table.insert(offsets, math.ceil(math.random()*sequence:size(1)))
40 | end
41 | offsets = torch.LongTensor(offsets)
42 | 
43 | -- training
44 | local iteration = 1
45 | while true do
46 |    -- 1. create a sequence of rho time-steps
47 |    
48 |    local inputs, targets = {}, {}
49 |    for step=1,rho do
50 |       -- a batch of inputs
51 |       inputs[step] = sequence:index(1, offsets)
52 |       -- incement indices
53 |       offsets:add(1)
54 |       for j=1,batchSize do
55 |          if offsets[j] > sequence:size(1) then
56 |             offsets[j] = 1
57 |          end
58 |       end
59 |       targets[step] = sequence:index(1, offsets)
60 |    end
61 |    
62 |    -- 2. forward sequence through rnn
63 |    
64 |    rnn:zeroGradParameters() 
65 |    
66 |    local outputs = rnn:forward(inputs)
67 |    local err = criterion:forward(outputs, targets)
68 |    
69 |    print(string.format("Iteration %d ; NLL err = %f ", iteration, err))
70 | 
71 |    -- 3. backward sequence through rnn (i.e. backprop through time)
72 |    
73 |    local gradOutputs = criterion:backward(outputs, targets)
74 |    local gradInputs = rnn:backward(inputs, gradOutputs)
75 |    
76 |    -- 4. update
77 |    
78 |    rnn:updateParameters(lr)
79 |    
80 |    iteration = iteration + 1
81 | end
82 | 


--------------------------------------------------------------------------------
/init.lua:
--------------------------------------------------------------------------------
 1 | require 'dpnn'
 2 | require 'torchx'
 3 | dpnn.version = dpnn.version or 0
 4 | assert(dpnn.version > 1, "Please update dpnn : luarocks install dpnn")
 5 | 
 6 | -- create global rnn table:
 7 | rnn = {}
 8 | rnn.version = 2
 9 | rnn.version = 2.1 -- [get,set][Grad]HiddenState(step)
10 | 
11 | unpack = unpack or table.unpack
12 | 
13 | torch.include('rnn', 'recursiveUtils.lua')
14 | 
15 | -- extensions to nn.Module
16 | torch.include('rnn', 'Module.lua')
17 | 
18 | -- override nn.Dropout
19 | torch.include('rnn', 'Dropout.lua')
20 | 
21 | -- for testing:
22 | torch.include('rnn', 'test/test.lua')
23 | torch.include('rnn', 'test/bigtest.lua')
24 | 
25 | -- support modules
26 | torch.include('rnn', 'ZeroGrad.lua')
27 | torch.include('rnn', 'LinearNoBias.lua')
28 | torch.include('rnn', 'SAdd.lua')
29 | torch.include('rnn', 'CopyGrad.lua')
30 | 
31 | -- recurrent modules
32 | torch.include('rnn', 'LookupTableMaskZero.lua')
33 | torch.include('rnn', 'MaskZero.lua')
34 | torch.include('rnn', 'TrimZero.lua')
35 | torch.include('rnn', 'AbstractRecurrent.lua')
36 | torch.include('rnn', 'Recurrent.lua')
37 | torch.include('rnn', 'LSTM.lua')
38 | torch.include('rnn', 'FastLSTM.lua')
39 | torch.include('rnn', 'GRU.lua')
40 | torch.include('rnn', 'Mufuru.lua')
41 | torch.include('rnn', 'Recursor.lua')
42 | torch.include('rnn', 'Recurrence.lua')
43 | torch.include('rnn', 'NormStabilizer.lua')
44 | 
45 | -- sequencer modules
46 | torch.include('rnn', 'AbstractSequencer.lua')
47 | torch.include('rnn', 'Repeater.lua')
48 | torch.include('rnn', 'Sequencer.lua')
49 | torch.include('rnn', 'BiSequencer.lua')
50 | torch.include('rnn', 'BiSequencerLM.lua')
51 | torch.include('rnn', 'RecurrentAttention.lua')
52 | 
53 | -- sequencer + recurrent modules
54 | torch.include('rnn', 'SeqLSTM.lua')
55 | torch.include('rnn', 'SeqLSTMP.lua')
56 | torch.include('rnn', 'SeqGRU.lua')
57 | torch.include('rnn', 'SeqReverseSequence.lua')
58 | torch.include('rnn', 'SeqBRNN.lua')
59 | 
60 | -- recurrent criterions:
61 | torch.include('rnn', 'SequencerCriterion.lua')
62 | torch.include('rnn', 'RepeaterCriterion.lua')
63 | torch.include('rnn', 'MaskZeroCriterion.lua')
64 | 
65 | -- prevent likely name conflicts
66 | nn.rnn = rnn
67 | 


--------------------------------------------------------------------------------
/recursiveUtils.lua:
--------------------------------------------------------------------------------
  1 | 
  2 | function rnn.recursiveResizeAs(t1,t2)
  3 |    if torch.type(t2) == 'table' then
  4 |       t1 = (torch.type(t1) == 'table') and t1 or {t1}
  5 |       for key,_ in pairs(t2) do
  6 |          t1[key], t2[key] = rnn.recursiveResizeAs(t1[key], t2[key])
  7 |       end
  8 |    elseif torch.isTensor(t2) then
  9 |       t1 = torch.isTensor(t1) and t1 or t2.new()
 10 |       t1:resizeAs(t2)
 11 |    else
 12 |       error("expecting nested tensors or tables. Got "..
 13 |             torch.type(t1).." and "..torch.type(t2).." instead")
 14 |    end
 15 |    return t1, t2
 16 | end
 17 | 
 18 | function rnn.recursiveSet(t1,t2)
 19 |    if torch.type(t2) == 'table' then
 20 |       t1 = (torch.type(t1) == 'table') and t1 or {t1}
 21 |       for key,_ in pairs(t2) do
 22 |          t1[key], t2[key] = rnn.recursiveSet(t1[key], t2[key])
 23 |       end
 24 |    elseif torch.isTensor(t2) then
 25 |       t1 = torch.isTensor(t1) and t1 or t2.new()
 26 |       t1:set(t2)
 27 |    else
 28 |       error("expecting nested tensors or tables. Got "..
 29 |             torch.type(t1).." and "..torch.type(t2).." instead")
 30 |    end
 31 |    return t1, t2
 32 | end
 33 | 
 34 | function rnn.recursiveCopy(t1,t2)
 35 |    if torch.type(t2) == 'table' then
 36 |       t1 = (torch.type(t1) == 'table') and t1 or {t1}
 37 |       for key,_ in pairs(t2) do
 38 |          t1[key], t2[key] = rnn.recursiveCopy(t1[key], t2[key])
 39 |       end
 40 |    elseif torch.isTensor(t2) then
 41 |       t1 = torch.isTensor(t1) and t1 or t2.new()
 42 |       t1:resizeAs(t2):copy(t2)
 43 |    else
 44 |       error("expecting nested tensors or tables. Got "..
 45 |             torch.type(t1).." and "..torch.type(t2).." instead")
 46 |    end
 47 |    return t1, t2
 48 | end
 49 | 
 50 | function rnn.recursiveAdd(t1, t2)
 51 |    if torch.type(t2) == 'table' then
 52 |       t1 = (torch.type(t1) == 'table') and t1 or {t1}
 53 |       for key,_ in pairs(t2) do
 54 |          t1[key], t2[key] = rnn.recursiveAdd(t1[key], t2[key])
 55 |       end
 56 |    elseif torch.isTensor(t1) and torch.isTensor(t2) then
 57 |       t1:add(t2)
 58 |    else
 59 |       error("expecting nested tensors or tables. Got "..
 60 |             torch.type(t1).." and "..torch.type(t2).." instead")
 61 |    end
 62 |    return t1, t2
 63 | end
 64 | 
 65 | function rnn.recursiveTensorEq(t1, t2)
 66 |    if torch.type(t2) == 'table' then
 67 |       local isEqual = true
 68 |       if torch.type(t1) ~= 'table' then
 69 |          return false
 70 |       end
 71 |       for key,_ in pairs(t2) do
 72 |           isEqual = isEqual and rnn.recursiveTensorEq(t1[key], t2[key])
 73 |       end
 74 |       return isEqual
 75 |    elseif torch.isTensor(t1) and torch.isTensor(t2) then
 76 |       local diff = t1-t2
 77 |       local err = diff:abs():max()
 78 |       return err < 0.00001
 79 |    else
 80 |       error("expecting nested tensors or tables. Got "..
 81 |             torch.type(t1).." and "..torch.type(t2).." instead")
 82 |    end
 83 | end
 84 | 
 85 | function rnn.recursiveNormal(t2)
 86 |    if torch.type(t2) == 'table' then
 87 |       for key,_ in pairs(t2) do
 88 |          t2[key] = rnn.recursiveNormal(t2[key])
 89 |       end
 90 |    elseif torch.isTensor(t2) then
 91 |       t2:normal()
 92 |    else
 93 |       error("expecting tensor or table thereof. Got "
 94 |            ..torch.type(t2).." instead")
 95 |    end
 96 |    return t2
 97 | end
 98 | 
 99 | function rnn.recursiveFill(t2, val)
100 |    if torch.type(t2) == 'table' then
101 |       for key,_ in pairs(t2) do
102 |          t2[key] = rnn.recursiveFill(t2[key], val)
103 |       end
104 |    elseif torch.isTensor(t2) then
105 |       t2:fill(val)
106 |    else
107 |       error("expecting tensor or table thereof. Got "
108 |            ..torch.type(t2).." instead")
109 |    end
110 |    return t2
111 | end
112 | 
113 | function rnn.recursiveType(param, type_str)
114 |    if torch.type(param) == 'table' then
115 |       for i = 1, #param do
116 |          param[i] = rnn.recursiveType(param[i], type_str)
117 |       end
118 |    else
119 |       if torch.typename(param) and 
120 |         torch.typename(param):find('torch%..+Tensor') then
121 |          param = param:type(type_str)
122 |       end
123 |    end
124 |    return param
125 | end
126 | 
127 | function rnn.recursiveSum(t2)
128 |    local sum = 0
129 |    if torch.type(t2) == 'table' then
130 |       for key,_ in pairs(t2) do
131 |          sum = sum + rnn.recursiveSum(t2[key], val)
132 |       end
133 |    elseif torch.isTensor(t2) then
134 |       return t2:sum()
135 |    else
136 |       error("expecting tensor or table thereof. Got "
137 |            ..torch.type(t2).." instead")
138 |    end
139 |    return sum
140 | end
141 | 
142 | function rnn.recursiveNew(t2)
143 |    if torch.type(t2) == 'table' then
144 |       local t1 = {}
145 |       for key,_ in pairs(t2) do
146 |          t1[key] = rnn.recursiveNew(t2[key])
147 |       end
148 |       return t1
149 |    elseif torch.isTensor(t2) then
150 |       return t2.new()
151 |    else
152 |       error("expecting tensor or table thereof. Got "
153 |            ..torch.type(t2).." instead")
154 |    end
155 | end
156 | 


--------------------------------------------------------------------------------
/rocks/rnn-scm-1.rockspec:
--------------------------------------------------------------------------------
 1 | package = "rnn"
 2 | version = "scm-1"
 3 | 
 4 | source = {
 5 |    url = "git://github.com/Element-Research/rnn",
 6 |    tag = "master"
 7 | }
 8 | 
 9 | description = {
10 |    summary = "A Recurrent Neural Network library that extends Torch's nn",
11 |    detailed = [[
12 | A library to build RNNs, LSTMs, GRUs, BRNNs, BLSTMs, and so forth and so on.
13 |    ]],
14 |    homepage = "https://github.com/Element-Research/rnn",
15 |    license = "BSD"
16 | }
17 | 
18 | dependencies = {
19 |    "torch >= 7.0",
20 |    "nn >= 1.0",
21 |    "dpnn >= 1.0",
22 |    "torchx >= 1.0"
23 | }
24 | 
25 | build = {
26 |    type = "command",
27 |    build_command = [[
28 | cmake -E make_directory build;
29 | cd build;
30 | cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH="$(LUA_BINDIR)/.." -DCMAKE_INSTALL_PREFIX="$(PREFIX)"; 
31 | $(MAKE)
32 |    ]],
33 |    install_command = "cd build && $(MAKE) install"
34 | }
35 | 


--------------------------------------------------------------------------------
/scripts/evaluate-rnnlm.lua:
--------------------------------------------------------------------------------
  1 | require 'nngraph'
  2 | require 'rnn'
  3 | local dl = require 'dataload'
  4 | 
  5 | 
  6 | --[[command line arguments]]--
  7 | cmd = torch.CmdLine()
  8 | cmd:text()
  9 | cmd:text('Evaluate a RNNLM')
 10 | cmd:text('Options:')
 11 | cmd:option('--xplogpath', '', 'path to a previously saved xplog containing model')
 12 | cmd:option('--cuda', false, 'model was saved with cuda')
 13 | cmd:option('--device', 1, 'which GPU device to use')
 14 | cmd:option('--nsample', -1, 'sample this many words from the language model')
 15 | cmd:option('--temperature', 1, 'temperature of multinomial. Increase to sample wildly, reduce to be more deterministic.')
 16 | cmd:option('--dumpcsv', false, 'dump training and validation error to CSV file')
 17 | cmd:text()
 18 | local opt = cmd:parse(arg or {})
 19 | 
 20 | assert(opt.temperature > 0)
 21 | 
 22 | -- check that saved model exists
 23 | assert(paths.filep(opt.xplogpath), opt.xplogpath..' does not exist')
 24 | 
 25 | if opt.cuda then
 26 |    require 'cunn'
 27 |    cutorch.setDevice(opt.device)
 28 | end
 29 | 
 30 | local xplog = torch.load(opt.xplogpath)
 31 | local lm = xplog.model
 32 | local criterion = xplog.criterion
 33 | local targetmodule = xplog.targetmodule
 34 | 
 35 | print("Hyper-parameters (xplog.opt):")
 36 | print(xplog.opt)
 37 | 
 38 | local trainerr = xplog.trainnceloss or xplog.trainppl
 39 | local validerr = xplog.valnceloss or xplog.valppl
 40 | 
 41 | print(string.format("Error (epoch=%d): training=%f; validation=%f", xplog.epoch, trainerr[#trainerr], validerr[#validerr]))
 42 | 
 43 | if opt.dumpcsv then
 44 |    local csvfile = opt.xplogpath:match('([^/]+)[.]t7$')..'.csv'
 45 |    paths.mkdir('learningcurves')
 46 |    csvpath = paths.concat('learningcurves', csvfile)
 47 |    
 48 |    local file = io.open(csvpath, 'w')
 49 |    file:write("epoch,trainerr,validerr\n")
 50 |    for i=1,#trainerr do
 51 |       file:write(string.format('%d,%f,%f\n', i, trainerr[i], validerr[i]))
 52 |    end
 53 |    file:close()
 54 |    
 55 |    print("CSV file saved to "..csvpath)
 56 |    os.exit()
 57 | end
 58 | 
 59 | local trainset, validset, testset
 60 | if xplog.dataset == 'PennTreeBank' then
 61 |    print"Loading Penn Tree Bank test set"
 62 |    trainset, validset, testset = dl.loadPTB({50, 1, 1})
 63 |    assert(trainset.vocab['the'] == xplog.vocab['the'])
 64 | elseif xplog.dataset == 'GoogleBillionWords' then
 65 |    print"Loading Google Billion Words test set"
 66 |    trainset, validset, testset = dl.loadGBW({50,1,1}, 'train_tiny.th7')
 67 | else
 68 |    error"Unrecognized dataset"
 69 | end
 70 | 
 71 | 
 72 | for i,nce in ipairs(lm:findModules('nn.NCEModule')) do
 73 |    nce.normalized = true
 74 |    nce.logsoftmax = true
 75 |    if not opt.nce then
 76 |       print"Found NCEModule"
 77 |       criterion = nn.SequencerCriterion(nn.MaskZeroCriterion(nn.ClassNLLCriterion(), 1))
 78 |       if opt.cuda then criterion:cuda() end
 79 |       opt.nce = true
 80 |    end
 81 | end
 82 | 
 83 | print(lm)
 84 | 
 85 | lm:forget()
 86 | lm:evaluate()
 87 | 
 88 | if opt.nsample > 0 then
 89 |    if xplog.dataset == 'GoogleBillionWords' then
 90 |       local sampletext = {'<S>'}
 91 |       local prevword = trainset.vocab['<S>']
 92 |       assert(prevword)
 93 |       local inputs = torch.LongTensor(1,1) -- seqlen x batchsize
 94 |       local targets = opt.cuda and torch.CudaTensor(1) or torch.LongTensor(1) -- dummy tensor
 95 |       local buffer = torch.FloatTensor()
 96 |       for i=1,opt.nsample do
 97 |          inputs:fill(prevword)
 98 |          local output = lm:forward({inputs,{targets}})[1][1]
 99 |          buffer:resize(output:size()):copy(output)
100 |          buffer:div(opt.temperature)
101 |          buffer:exp()
102 |          local sample = torch.multinomial(buffer, 1, true)
103 |          local currentword = trainset.ivocab[sample[1]]
104 |          table.insert(sampletext, currentword)
105 |          if currentword == '</S>' then
106 |             -- sentences were trained independently, so we explicitly tell it to start a new sentence
107 |             lm:forget()
108 |             prevword = trainset.vocab['<S>']
109 |             table.insert(sampletext, '\n<S>')
110 |          else
111 |             prevword = sample[1]
112 |          end
113 |       end
114 |       print(table.concat(sampletext, ' '))
115 |    else
116 |       local sampletext = {}
117 |       local prevword = trainset.vocab['<eos>']
118 |       assert(prevword)
119 |       local inputs = torch.LongTensor(1,1) -- seqlen x batchsize
120 |       if opt.cuda then inputs = inputs:cuda() end
121 |       local buffer = torch.FloatTensor()
122 |       for i=1,opt.nsample do
123 |          inputs:fill(prevword)
124 |          local output = lm:forward(inputs)[1][1]
125 |          buffer:resize(output:size()):copy(output)
126 |          buffer:div(opt.temperature)
127 |          buffer:exp()
128 |          local sample = torch.multinomial(buffer, 1, true)
129 |          local currentword = trainset.ivocab[sample[1]]
130 |          table.insert(sampletext, currentword)
131 |          prevword = sample[1]
132 |       end
133 |       print(table.concat(sampletext, ' '))
134 |    end
135 | else
136 |    local sumErr, count = 0, 0
137 |    
138 |    for i, inputs, targets in testset:subiter(xplog.opt.seqlen or 100) do
139 |       inputs:apply(function(x)
140 |          if x > 0 then
141 |             count = count + 1
142 |          end
143 |       end)
144 |       local targets = targetmodule:forward(targets)
145 |       local inputs = opt.nce and {inputs, targets} or inputs
146 |       local outputs = lm:forward(inputs)
147 |       local err = criterion:forward(outputs, targets)
148 |       sumErr = sumErr + err
149 |    end
150 |    
151 |    if count ~= testset:size() then
152 |       local meanseqlen = testset:size()/(testset:size() - count)
153 |       print("mean sequence length : "..meanseqlen)
154 |    end
155 | 
156 |    local ppl = torch.exp(sumErr/count)
157 |    print("Test PPL : "..ppl)
158 | end
159 | 
160 | 


--------------------------------------------------------------------------------
/scripts/evaluate-rva.lua:
--------------------------------------------------------------------------------
  1 | require 'dp'
  2 | require 'rnn'
  3 | require 'optim'
  4 | 
  5 | -- References :
  6 | -- A. http://papers.nips.cc/paper/5542-recurrent-models-of-visual-attention.pdf
  7 | -- B. http://incompleteideas.net/sutton/williams-92.pdf
  8 | 
  9 | --[[command line arguments]]--
 10 | cmd = torch.CmdLine()
 11 | cmd:text()
 12 | cmd:text('Evaluate a Recurrent Model for Visual Attention')
 13 | cmd:text('Options:')
 14 | cmd:option('--xpPath', '', 'path to a previously saved model')
 15 | cmd:option('--cuda', false, 'model was saved with cuda')
 16 | cmd:option('--evalTest', false, 'model was saved with cuda')
 17 | cmd:option('--stochastic', false, 'evaluate the model stochatically. Generate glimpses stochastically')
 18 | cmd:option('--dataset', 'Mnist', 'which dataset to use : Mnist | TranslattedMnist | etc')
 19 | cmd:option('--overwrite', false, 'overwrite checkpoint')
 20 | cmd:text()
 21 | local opt = cmd:parse(arg or {})
 22 | 
 23 | -- check that saved model exists
 24 | assert(paths.filep(opt.xpPath), opt.xpPath..' does not exist')
 25 | 
 26 | if opt.cuda then
 27 |    require 'cunn'
 28 | end
 29 | 
 30 | xp = torch.load(opt.xpPath)
 31 | model = xp:model().module 
 32 | tester = xp:tester() or xp:validator() -- dp.Evaluator
 33 | tester:sampler()._epoch_size = nil
 34 | conf = tester:feedback() -- dp.Confusion
 35 | cm = conf._cm -- optim.ConfusionMatrix
 36 | 
 37 | print("Last evaluation of "..(xp:tester() and 'test' or 'valid').." set :")
 38 | print(cm)
 39 | 
 40 | if opt.dataset == 'TranslatedMnist' then
 41 |    ds = torch.checkpoint(
 42 |       paths.concat(dp.DATA_DIR, 'checkpoint/dp.TranslatedMnist_test.t7'),
 43 |       function() 
 44 |          local ds = dp[opt.dataset]{load_all=false} 
 45 |          ds:loadTest()
 46 |          return ds
 47 |          end, 
 48 |       opt.overwrite
 49 |    )
 50 | else
 51 |    ds = dp[opt.dataset]()
 52 | end
 53 | 
 54 | ra = model:findModules('nn.RecurrentAttention')[1]
 55 | sg = model:findModules('nn.SpatialGlimpse')[1]
 56 | 
 57 | -- stochastic or deterministic
 58 | for i=1,#ra.actions do
 59 |    local rn = ra.action:getStepModule(i):findModules('nn.ReinforceNormal')[1]
 60 |    rn.stochastic = opt.stochastic
 61 | end
 62 | 
 63 | if opt.evalTest then
 64 |    conf:reset()
 65 |    tester:propagateEpoch(ds:testSet())
 66 | 
 67 |    print((opt.stochastic and "Stochastic" or "Deterministic") .. "evaluation of test set :")
 68 |    print(cm)
 69 | end
 70 | 
 71 | inputs = ds:get('test','inputs')
 72 | targets = ds:get('test','targets', 'b')
 73 | 
 74 | input = inputs:narrow(1,1,10)
 75 | model:training() -- otherwise the rnn doesn't save intermediate time-step states
 76 | if not opt.stochastic then
 77 |    for i=1,#ra.actions do
 78 |       local rn = ra.action:getStepModule(i):findModules('nn.ReinforceNormal')[1]
 79 |       rn.stdev = 0 -- deterministic
 80 |    end
 81 | end
 82 | output = model:forward(input)
 83 | 
 84 | function drawBox(img, bbox, channel)
 85 |     channel = channel or 1
 86 | 
 87 |     local x1, y1 = torch.round(bbox[1]), torch.round(bbox[2])
 88 |     local x2, y2 = torch.round(bbox[1] + bbox[3]), torch.round(bbox[2] + bbox[4])
 89 | 
 90 |     x1, y1 = math.max(1, x1), math.max(1, y1)
 91 |     x2, y2 = math.min(img:size(3), x2), math.min(img:size(2), y2)
 92 | 
 93 |     local max = img:max()
 94 | 
 95 |     for i=x1,x2 do
 96 |         img[channel][y1][i] = max
 97 |         img[channel][y2][i] = max
 98 |     end
 99 |     for i=y1,y2 do
100 |         img[channel][i][x1] = max
101 |         img[channel][i][x2] = max
102 |     end
103 | 
104 |     return img
105 | end
106 | 
107 | locations = ra.actions
108 | 
109 | input = nn.Convert(ds:ioShapes(),'bchw'):forward(input)
110 | glimpses = {}
111 | patches = {}
112 | 
113 | params = nil
114 | for i=1,input:size(1) do
115 |    local img = input[i]
116 |    for j,location in ipairs(locations) do
117 |       local glimpse = glimpses[j] or {}
118 |       glimpses[j] = glimpse
119 |       local patch = patches[j] or {}
120 |       patches[j] = patch
121 |       
122 |       local xy = location[i]
123 |       -- (-1,-1) top left corner, (1,1) bottom right corner of image
124 |       local x, y = xy:select(1,1), xy:select(1,2)
125 |       -- (0,0), (1,1)
126 |       x, y = (x+1)/2, (y+1)/2
127 |       -- (1,1), (input:size(3), input:size(4))
128 |       x, y = x*(input:size(3)-1)+1, y*(input:size(4)-1)+1
129 |       
130 |       local gimg = img:clone()
131 |       for d=1,sg.depth do
132 |          local size = sg.height*(sg.scale^(d-1))
133 |          local bbox = {y-size/2, x-size/2, size, size}
134 |          drawBox(gimg, bbox, 1)
135 |       end
136 |       glimpse[i] = gimg
137 |       
138 |       local sg_, ps
139 |       if j == 1 then
140 |          sg_ = ra.rnn.initialModule:findModules('nn.SpatialGlimpse')[1]
141 |       else
142 |          sg_ = ra.rnn.sharedClones[j]:findModules('nn.SpatialGlimpse')[1]
143 |       end
144 |       patch[i] = image.scale(img:clone():float(), sg_.output[i]:narrow(1,1,1):float())
145 |       
146 |       collectgarbage()
147 |    end
148 | end
149 | 
150 | paths.mkdir('glimpse')
151 | for j,glimpse in ipairs(glimpses) do
152 |    local g = image.toDisplayTensor{input=glimpse,nrow=10,padding=3}
153 |    local p = image.toDisplayTensor{input=patches[j],nrow=10,padding=3}
154 |    image.save("glimpse/glimpse"..j..".png", g)
155 |    image.save("glimpse/patch"..j..".png", p)
156 | end
157 | 
158 | 
159 | 


--------------------------------------------------------------------------------
/test/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | FILE(GLOB luasrc *.lua)
2 | ADD_TORCH_PACKAGE(rnn/test "${src}" "${luasrc}")
3 | 


--------------------------------------------------------------------------------
/test/GRU_test.lua:
--------------------------------------------------------------------------------
  1 | require 'torch'
  2 | require 'nn'
  3 | 
  4 | require 'GRU'
  5 | local gradcheck = require 'util.gradcheck'
  6 | local tests = {}
  7 | local tester = torch.Tester()
  8 | 
  9 | 
 10 | local function check_size(x, dims)
 11 |   tester:assert(x:dim() == #dims)
 12 |   for i, d in ipairs(dims) do
 13 |     tester:assert(x:size(i) == d)
 14 |   end
 15 | end
 16 | 
 17 | 
 18 | function tests.testForward()
 19 |   local N, T, D, H = 3, 4, 5, 6
 20 | 
 21 |   local h0 = torch.randn(N, H)
 22 |   local x  = torch.randn(N, T, D)
 23 | 
 24 |   local gru = nn.GRU(D, H)
 25 |   local h = gru:forward{h0, x}
 26 | 
 27 |   -- Do a naive forward pass
 28 |   local naive_h = torch.Tensor(N, T, H)
 29 |   
 30 | 
 31 |   -- Unpack weight, bias for each gate
 32 |   local Wxu = gru.weight[{{1, D}, {1, H}}]
 33 |   local Wxr = gru.weight[{{1, D}, {H + 1, 2 * H}}]
 34 |   local Wxhc = gru.weight[{{1, D}, {2 * H + 1, 3 * H}}]
 35 |   
 36 |   
 37 |   local Whu = gru.weight[{{D + 1, D + H}, {1, H}}]
 38 |   local Whr = gru.weight[{{D + 1, D + H}, {H + 1, 2 * H}}]
 39 |   local Whhc = gru.weight[{{D + 1, D + H}, {2 * H + 1, 3 * H}}]
 40 |   
 41 |   
 42 |   local bu = gru.bias[{{1, H}}]:view(1, H):expand(N, H)
 43 |   local br = gru.bias[{{H + 1, 2 * H}}]:view(1, H):expand(N, H)
 44 |   local bhc = gru.bias[{{2 * H + 1, 3 * H}}]:view(1, H):expand(N, H)
 45 |   
 46 | 
 47 |   local prev_h = h0:clone()
 48 |   for t = 1, T do
 49 |     local xt = x[{{}, t}]
 50 |     local u = torch.sigmoid(torch.mm(xt, Wxu) + torch.mm(prev_h, Whu) + bu)
 51 |     local r = torch.sigmoid(torch.mm(xt, Wxr) + torch.mm(prev_h, Whr) + br)
 52 |     local hc = torch.tanh(torch.mm(xt, Wxhc) + torch.mm(torch.cmul(prev_h,r), Whhc) + bhc)
 53 |     local next_h = torch.cmul(hc, u) + prev_h - torch.cmul(prev_h, u)
 54 |     
 55 |     naive_h[{{}, t}] = next_h
 56 |     
 57 |     prev_h = next_h
 58 |   end
 59 | 
 60 |   tester:assertTensorEq(naive_h, h, 1e-10)
 61 | end
 62 | 
 63 | 
 64 | function tests.gradcheck()
 65 |   local N, T, D, H = 2, 3, 4, 5
 66 | 
 67 |   local x = torch.randn(N, T, D)
 68 |   local h0 = torch.randn(N, H)
 69 |   
 70 |   
 71 |   local gru = nn.GRU(D, H)
 72 |   local h = gru:forward{h0, x}
 73 | 
 74 |   local dh = torch.randn(#h)
 75 | 
 76 |   gru:zeroGradParameters()
 77 |   local dh0, dx = unpack(gru:backward({h0, x}, dh))
 78 |   local dw = gru.gradWeight:clone()
 79 |   local db = gru.gradBias:clone()
 80 | 
 81 |   local function fx(x)   return gru:forward{h0, x} end
 82 |   local function fh0(h0) return gru:forward{h0, x} end
 83 | 
 84 |   local function fw(w)
 85 |     local old_w = gru.weight
 86 |     gru.weight = w
 87 |     local out = gru:forward{ h0, x}
 88 |     gru.weight = old_w
 89 |     return out
 90 |   end
 91 | 
 92 |   local function fb(b)
 93 |     local old_b = gru.bias
 94 |     gru.bias = b
 95 |     local out = gru:forward{h0, x}
 96 |     gru.bias = old_b
 97 |     return out
 98 |   end
 99 | 
100 |   local dx_num = gradcheck.numeric_gradient(fx, x, dh)
101 |   local dh0_num = gradcheck.numeric_gradient(fh0, h0, dh)
102 |   
103 |   local dw_num = gradcheck.numeric_gradient(fw, gru.weight, dh)
104 |   local db_num = gradcheck.numeric_gradient(fb, gru.bias, dh)
105 | 
106 |   local dx_error = gradcheck.relative_error(dx_num, dx)
107 |   local dh0_error = gradcheck.relative_error(dh0_num, dh0)
108 | 
109 |   local dw_error = gradcheck.relative_error(dw_num, dw)
110 |   local db_error = gradcheck.relative_error(db_num, db)
111 | 
112 |   tester:assertle(dh0_error, 1e-4)
113 |   
114 |   tester:assertle(dx_error, 1e-5)
115 |   tester:assertle(dw_error, 1e-4)
116 |   tester:assertle(db_error, 1e-5)
117 | end
118 | 
119 | 
120 | -- Make sure that everything works correctly when we don't pass an initial cell
121 | -- state; in this case we do pass an initial hidden state and an input sequence
122 | function tests.noCellTest()
123 |   local N, T, D, H = 4, 5, 6, 7
124 |   local gru = nn.GRU(D, H)
125 | 
126 |   for t = 1, 3 do
127 |     local x = torch.randn(N, T, D)
128 |     local h0 = torch.randn(N, H)
129 |     local dout = torch.randn(N, T, H)
130 | 
131 |     local out = gru:forward{h0, x}
132 |     local din = gru:backward({h0, x}, dout)
133 | 
134 |     tester:assert(torch.type(din) == 'table')
135 |     tester:assert(#din == 2)
136 |     check_size(din[1], {N, H})
137 |     check_size(din[2], {N, T, D})
138 | 
139 |     -- Make sure the initial cell state got reset to zero
140 |     --tester:assertTensorEq(gru.c0, torch.zeros(N, H), 0)
141 |   end
142 | end
143 | 
144 | 
145 | -- Make sure that everything works when we don't pass initial hidden or initial
146 | -- cell state; in this case we only pass input sequence of vectors
147 | function tests.noHiddenTest()
148 |   local N, T, D, H = 4, 5, 6, 7
149 |   local gru = nn.GRU(D, H)
150 | 
151 |   for t = 1, 3 do
152 |     local x = torch.randn(N, T, D)
153 |     local dout = torch.randn(N, T, H)
154 | 
155 |     local out = gru:forward(x)
156 |     local din = gru:backward(x, dout)
157 | 
158 |     tester:assert(torch.isTensor(din))
159 |     check_size(din, {N, T, D})
160 | 
161 |     -- Make sure the initial cell state and initial hidden state are zero
162 |     --tester:assertTensorEq(gru.c0, torch.zeros(N, H), 0)
163 |     tester:assertTensorEq(gru.h0, torch.zeros(N, H), 0)
164 |   end
165 | end
166 | 
167 | 
168 | function tests.rememberStatesTest()
169 |   local N, T, D, H = 5, 6, 7, 8
170 |   local gru = nn.GRU(D, H)
171 |   gru.remember_states = true
172 | 
173 |   local final_h = nil
174 |   for t = 1, 4 do
175 |     local x = torch.randn(N, T, D)
176 |     local dout = torch.randn(N, T, H)
177 |     local out = gru:forward(x)
178 |     local din = gru:backward(x, dout)
179 | 
180 |     if t == 1 then
181 |       tester:assertTensorEq(gru.h0, torch.zeros(N, H), 0)
182 |     elseif t > 1 then
183 |       tester:assertTensorEq(gru.h0, final_h, 0)
184 |     end
185 |     final_h = out[{{}, T}]:clone()
186 |   end
187 | 
188 |   -- Initial states should reset to zero after we call resetStates
189 |   gru:resetStates()
190 |   local x = torch.randn(N, T, D)
191 |   local dout = torch.randn(N, T, H)
192 |   gru:forward(x)
193 |   gru:backward(x, dout)
194 |   tester:assertTensorEq(gru.h0, torch.zeros(N, H), 0)
195 | end
196 | 
197 | 
198 | tester:add(tests)
199 | tester:run()
200 | 


--------------------------------------------------------------------------------
/test/bigtest.lua:
--------------------------------------------------------------------------------
  1 | local _ = require 'moses'
  2 | local rnnbigtest = {}
  3 | local precision = 1e-5
  4 | local mytester
  5 | 
  6 | function rnnbigtest.NCE_nan()
  7 |    local success, dl = pcall(function() return require 'dataload' end)
  8 |    if not success then
  9 |       return
 10 |    end
 11 |    if not pcall(function() require 'cunn' end) then
 12 |       return
 13 |    end
 14 |    
 15 |    local datapath = paths.concat(dl.DATA_PATH, 'BillionWords')
 16 |    local wordfreq = torch.load(paths.concat(datapath, 'word_freq.th7'))
 17 |    local unigram = wordfreq:float()--:add(0.0000001):log()
 18 |    print("U", unigram:min(), unigram:mean(), unigram:std(), unigram:max())
 19 |    
 20 |    local batchsize = 128
 21 |    local seqlen = 50
 22 |    local hiddensize = 200
 23 |    local vocabsize = unigram:size(1)
 24 |    local k = 400
 25 |    
 26 |    local tinyset = dl.MultiSequenceGBW(datapath, 'train_tiny.th7', batchsize, verbose)
 27 |    
 28 |    local lm = nn.Sequential()
 29 |    local lookup = nn.LookupTableMaskZero(vocabsize, hiddensize)
 30 |    lm:add(lookup)
 31 | 
 32 |    for i=1,2 do
 33 |       local rnn = nn.SeqLSTM(hiddensize, hiddensize)
 34 |       rnn.maskzero = true
 35 |       lm:add(rnn)
 36 |    end
 37 | 
 38 |    lm:add(nn.SplitTable(1))
 39 | 
 40 |    local ncemodule = nn.NCEModule(hiddensize, vocabsize, k, unigram, 1)
 41 | 
 42 |    lm = nn.Sequential()
 43 |       :add(nn.ParallelTable()
 44 |          :add(lm):add(nn.Identity()))
 45 |       :add(nn.ZipTable()) 
 46 | 
 47 |    lm:add(nn.Sequencer(nn.MaskZero(ncemodule, 1)))
 48 |    lm:remember()
 49 |    
 50 |    local crit = nn.MaskZeroCriterion(nn.NCECriterion(), 0)
 51 |    local targetmodule = nn.Sequential():add(nn.Convert()):add(nn.SplitTable(1))
 52 |    local criterion = nn.SequencerCriterion(crit)
 53 |    
 54 |    for k,param in ipairs(lm:parameters()) do
 55 |       param:uniform(-0.1, 0.1)
 56 |    end
 57 |    
 58 |    -- comment this out to see the difference
 59 |    ncemodule:reset()
 60 |    
 61 |    lm:training()
 62 |    
 63 |    lm:cuda()
 64 |    criterion:cuda()
 65 |    targetmodule:cuda()
 66 |    
 67 |    local sumErr = 0
 68 |    local _ = require 'moses'
 69 |    for k,inputs, targets in tinyset:subiter(seqlen, 512) do  
 70 |       local targets = targetmodule:forward(targets)
 71 |       local inputs = {inputs, targets}
 72 |       -- forward
 73 |       local outputs = lm:forward(inputs)
 74 |       for i,output in ipairs(outputs) do
 75 |          assert(not _.isNaN(output[1]:sum()), tostring(i))
 76 |          assert(not _.isNaN(output[2]:sum()), tostring(i))
 77 |          assert(not _.isNaN(output[3]:sum()), tostring(i))
 78 |          assert(not _.isNaN(output[4]:sum()), tostring(i))
 79 |       end
 80 |       local err = criterion:forward(outputs, targets)
 81 |       assert(not _.isNaN(err))
 82 |       sumErr = sumErr + err
 83 |       -- backward 
 84 |       local gradOutputs = criterion:backward(outputs, targets)
 85 |       
 86 |       for i,gradOutput in ipairs(gradOutputs) do
 87 |          assert(not _.isNaN(gradOutput[1]:sum()), tostring(i))
 88 |          assert(not _.isNaN(gradOutput[2]:sum()), tostring(i))
 89 |       end
 90 |       lm:zeroGradParameters()
 91 |       lm:backward(inputs, gradOutputs)   
 92 |       lm:updateParameters(0.7)
 93 |       local params, gradParams = lm:parameters()
 94 |       
 95 |       for i,param in ipairs(params) do
 96 |          assert(not _.isNaN(param:sum()), tostring(i))
 97 |          assert(not _.isNaN(gradParams[i]:sum()), tostring(i))
 98 |       end
 99 |       
100 |       local counts = {}
101 |       inputs[1]:float():apply(function(x)
102 |          counts[x] = (counts[x] or 0) + 1
103 |       end)
104 |       
105 |       print("Top freqs", unpack(_.last(_.sort(_.values(counts)), 5)))
106 |       print("Batch : "..k..", err="..err)
107 |       for name,module in pairs{LT=lookup, NCE=ncemodule} do
108 |          print(name..".gradWeight : "..module.gradWeight:norm()..", .weight : "..module.weight:norm())
109 |          if name == 'NCE' then
110 |             print(name..".gradBias : "..module.gradBias:norm()..", .bias : "..module.bias:norm())
111 |          end
112 |       end
113 |    end
114 |    
115 | end
116 | 
117 | function rnn.bigtest(tests)
118 |    mytester = torch.Tester()
119 |    mytester:add(rnnbigtest)
120 |    math.randomseed(os.time())
121 |    mytester:run(tests)
122 | end
123 | 


--------------------------------------------------------------------------------
/test/mnistsample.t7:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Element-Research/rnn/ba937a08f26e116db98b5cd6a690f49ea8f8316e/test/mnistsample.t7


--------------------------------------------------------------------------------
/test/test_trimzero.lua:
--------------------------------------------------------------------------------
 1 | require 'rnn'
 2 | require 'dp'
 3 | require 'sys'
 4 | 
 5 | torch.manualSeed(123)
 6 | 
 7 | batch_size = 200
 8 | sentence_length = 26
 9 | vocabulary_size = 1000
10 | word_embedding_size = 200
11 | rnn_size = 300
12 | 
13 | x = torch.ceil(torch.rand(batch_size,sentence_length)*vocabulary_size)
14 | t = torch.ceil(torch.rand(batch_size)*10)
15 | 
16 | -- variable sentence lengths
17 | for i=1,batch_size do
18 |    idx = torch.floor(torch.rand(1)[1]*(sentence_length))
19 |    if idx > 0 then x[i][{{1,idx}}]:fill(0) end
20 | end
21 | 
22 | rnns = {'FastLSTM','GRU'}
23 | methods = {'maskZero', 'trimZero'}
24 | 
25 | for ir,arch in pairs(rnns) do
26 |    local rnn = nn[arch](word_embedding_size, rnn_size)
27 |    local model = nn.Sequential()
28 |                :add(nn.LookupTableMaskZero(vocabulary_size, word_embedding_size))
29 |                :add(nn.SplitTable(2))
30 |                :add(nn.Sequencer(rnn))
31 |                :add(nn.SelectTable(sentence_length))
32 |                :add(nn.Linear(rnn_size, 10))
33 |    model:getParameters():uniform(-0.1, 0.1)
34 |    collectgarbage()
35 |    criterion = nn.CrossEntropyCriterion()
36 |    local models = {}
37 |    for j=1,#methods do
38 |       table.insert(models, model:clone())
39 |    end
40 |    collectgarbage()
41 |    for im,method in pairs(methods) do
42 |       print('-- '..arch..' with '..method)
43 |       model = models[im]
44 |       rnn = model:get(3).module
45 |       rnn[method](rnn, 1)
46 |       sys.tic()
47 |       for i=1,3 do
48 |          model:zeroGradParameters()
49 |          y = model:forward(x)
50 |          loss = criterion:forward(y,t)
51 |          print('loss:', loss)
52 |          collectgarbage()
53 |          dy = criterion:backward(y,t)
54 |          model:backward(x, dy)
55 |          w,dw = model:parameters()
56 |          model:updateParameters(.5)
57 |          collectgarbage()
58 |       end
59 |       elapse = sys.toc()
60 |       print('elapse time:', elapse)   
61 |    end
62 | end
63 | 


--------------------------------------------------------------------------------